Discussion:
[PATCH 0/3] nilfs2 additional updates
Ryusuke Konishi
2014-02-23 16:58:38 UTC
Permalink
Hi Andrew,

Please queue this patchset for the next merge window.

Andreas's two patches will add FITRIM ioctl support to nilfs. The
original cover letter can be seen at:

[1] http://marc.info/?l=linux-nilfs&m=139314496625648
"[PATCH v4 0/2] nilfs2: add support for FITRIM ioctl"

My patch adds missing range checks of metadata sizes read from disk,
and prevents potential memory access violations due to invalid sizes.
This is for the issue pointed out in the following comment.

[2] http://marc.info/?l=linux-nilfs&m=139153213412866
"Re: [PATCH 3/4] nilfs2: add nilfs_sufile_set_suinfo to update
segment usage"

Thanks in advance,
Ryusuke Konishi
--
Andreas Rohner (2):
nilfs2: add nilfs_sufile_trim_fs to trim clean segs
nilfs2: add FITRIM ioctl support for nilfs2

Ryusuke Konishi (1):
nilfs2: verify metadata sizes read from disk

fs/nilfs2/cpfile.c | 12 ++++
fs/nilfs2/dat.c | 12 ++++
fs/nilfs2/ioctl.c | 45 +++++++++++++
fs/nilfs2/sufile.c | 164 +++++++++++++++++++++++++++++++++++++++++++++
fs/nilfs2/sufile.h | 1 +
fs/nilfs2/the_nilfs.c | 10 +++
include/linux/nilfs2_fs.h | 8 +++
7 files changed, 252 insertions(+)

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Ryusuke Konishi
2014-02-23 16:58:41 UTC
Permalink
Add code to check sizes of on-disk data of metadata files such as
inode size, segment usage size, DAT entry size, and checkpoint size.
Although these sizes are read from disk, the current implementation
doesn't check them.

If these sizes are not sane on disk, it can cause out-of-range access
to metadata or memory access overrun on metadata block buffers due to
overflow in sundry calculations.

Both lower limit and upper limit of metadata sizes are verified to
prevent these issues.

Signed-off-by: Ryusuke Konishi <***@lab.ntt.co.jp>
---
fs/nilfs2/cpfile.c | 12 ++++++++++++
fs/nilfs2/dat.c | 12 ++++++++++++
fs/nilfs2/sufile.c | 12 ++++++++++++
fs/nilfs2/the_nilfs.c | 10 ++++++++++
include/linux/nilfs2_fs.h | 8 ++++++++
5 files changed, 54 insertions(+)

diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index deaa3d3..0d58075 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -942,6 +942,18 @@ int nilfs_cpfile_read(struct super_block *sb, size_t cpsize,
struct inode *cpfile;
int err;

+ if (cpsize > sb->s_blocksize) {
+ printk(KERN_ERR
+ "NILFS: too large checkpoint size: %zu bytes.\n",
+ cpsize);
+ return -EINVAL;
+ } else if (cpsize < NILFS_MIN_CHECKPOINT_SIZE) {
+ printk(KERN_ERR
+ "NILFS: too small checkpoint size: %zu bytes.\n",
+ cpsize);
+ return -EINVAL;
+ }
+
cpfile = nilfs_iget_locked(sb, NULL, NILFS_CPFILE_INO);
if (unlikely(!cpfile))
return -ENOMEM;
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index fa0f803..0d5fada 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -484,6 +484,18 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size,
struct nilfs_dat_info *di;
int err;

+ if (entry_size > sb->s_blocksize) {
+ printk(KERN_ERR
+ "NILFS: too large DAT entry size: %zu bytes.\n",
+ entry_size);
+ return -EINVAL;
+ } else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) {
+ printk(KERN_ERR
+ "NILFS: too small DAT entry size: %zu bytes.\n",
+ entry_size);
+ return -EINVAL;
+ }
+
dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO);
if (unlikely(!dat))
return -ENOMEM;
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 84e384d..2a869c3 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -1169,6 +1169,18 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
void *kaddr;
int err;

+ if (susize > sb->s_blocksize) {
+ printk(KERN_ERR
+ "NILFS: too large segment usage size: %zu bytes.\n",
+ susize);
+ return -EINVAL;
+ } else if (susize < NILFS_MIN_SEGMENT_USAGE_SIZE) {
+ printk(KERN_ERR
+ "NILFS: too small segment usage size: %zu bytes.\n",
+ susize);
+ return -EINVAL;
+ }
+
sufile = nilfs_iget_locked(sb, NULL, NILFS_SUFILE_INO);
if (unlikely(!sufile))
return -ENOMEM;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 94c451c..8ba8229 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -399,6 +399,16 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
return -EINVAL;

nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size);
+ if (nilfs->ns_inode_size > nilfs->ns_blocksize) {
+ printk(KERN_ERR "NILFS: too large inode size: %d bytes.\n",
+ nilfs->ns_inode_size);
+ return -EINVAL;
+ } else if (nilfs->ns_inode_size < NILFS_MIN_INODE_SIZE) {
+ printk(KERN_ERR "NILFS: too small inode size: %d bytes.\n",
+ nilfs->ns_inode_size);
+ return -EINVAL;
+ }
+
nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino);

nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment);
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 1fb465f..ff3fea3 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -82,6 +82,8 @@ struct nilfs_inode {
__le32 i_pad;
};

+#define NILFS_MIN_INODE_SIZE 128
+
/**
* struct nilfs_super_root - structure of super root
* @sr_sum: check sum
@@ -482,6 +484,8 @@ struct nilfs_dat_entry {
__le64 de_rsv;
};

+#define NILFS_MIN_DAT_ENTRY_SIZE 32
+
/**
* struct nilfs_snapshot_list - snapshot list
* @ssl_next: next checkpoint number on snapshot list
@@ -520,6 +524,8 @@ struct nilfs_checkpoint {
struct nilfs_inode cp_ifile_inode;
};

+#define NILFS_MIN_CHECKPOINT_SIZE (64 + NILFS_MIN_INODE_SIZE)
+
/* checkpoint flags */
enum {
NILFS_CHECKPOINT_SNAPSHOT,
@@ -615,6 +621,8 @@ struct nilfs_segment_usage {
__le32 su_flags;
};

+#define NILFS_MIN_SEGMENT_USAGE_SIZE 16
+
/* segment usage flag */
enum {
NILFS_SEGMENT_USAGE_ACTIVE,
--
1.7.9.3
Ryusuke Konishi
2014-02-23 16:58:40 UTC
Permalink
From: Andreas Rohner <***@gmx.net>

This patch adds support for the FITRIM ioctl, which enables user space
tools to issue TRIM/DISCARD requests to the underlying device. Every
clean segment within the specified range will be discarded.

Signed-off-by: Andreas Rohner <***@gmx.net>
Signed-off-by: Ryusuke Konishi <***@lab.ntt.co.jp>
---
fs/nilfs2/ioctl.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 45 insertions(+)

diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index c19a231..422fb54 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -1072,6 +1072,48 @@ out:
}

/**
+ * nilfs_ioctl_trim_fs() - trim ioctl handle function
+ * @inode: inode object
+ * @argp: pointer on argument from userspace
+ *
+ * Decription: nilfs_ioctl_trim_fs is the FITRIM ioctl handle function. It
+ * checks the arguments from userspace and calls nilfs_sufile_trim_fs, which
+ * performs the actual trim operation.
+ *
+ * Return Value: On success, 0 is returned or negative error code, otherwise.
+ */
+static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp)
+{
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+ struct request_queue *q = bdev_get_queue(nilfs->ns_bdev);
+ struct fstrim_range range;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!blk_queue_discard(q))
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&range, argp, sizeof(range)))
+ return -EFAULT;
+
+ range.minlen = max_t(u64, range.minlen, q->limits.discard_granularity);
+
+ down_read(&nilfs->ns_segctor_sem);
+ ret = nilfs_sufile_trim_fs(nilfs->ns_sufile, &range);
+ up_read(&nilfs->ns_segctor_sem);
+
+ if (ret < 0)
+ return ret;
+
+ if (copy_to_user(argp, &range, sizeof(range)))
+ return -EFAULT;
+
+ return 0;
+}
+
+/**
* nilfs_ioctl_set_alloc_range - limit range of segments to be allocated
* @inode: inode object
* @argp: pointer on argument from userspace
@@ -1296,6 +1338,8 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return nilfs_ioctl_resize(inode, filp, argp);
case NILFS_IOCTL_SET_ALLOC_RANGE:
return nilfs_ioctl_set_alloc_range(inode, argp);
+ case FITRIM:
+ return nilfs_ioctl_trim_fs(inode, argp);
default:
return -ENOTTY;
}
@@ -1327,6 +1371,7 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case NILFS_IOCTL_SYNC:
case NILFS_IOCTL_RESIZE:
case NILFS_IOCTL_SET_ALLOC_RANGE:
+ case FITRIM:
break;
default:
return -ENOIOCTLCMD;
--
1.7.9.3
Ryusuke Konishi
2014-02-23 16:58:39 UTC
Permalink
From: Andreas Rohner <***@gmx.net>

This patch adds the nilfs_sufile_trim_fs function, which takes a
fstrim_range structure and calls blkdev_issue_discard for every
clean segment in the specified range. The range is truncated to
file system block boundaries.

Signed-off-by: Andreas Rohner <***@gmx.net>
Signed-off-by: Ryusuke Konishi <***@lab.ntt.co.jp>
---
fs/nilfs2/sufile.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nilfs2/sufile.h | 1 +
2 files changed, 153 insertions(+)

diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 5628b99..84e384d 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -1001,6 +1001,158 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf,
}

/**
+ * nilfs_sufile_trim_fs() - trim ioctl handle function
+ * @sufile: inode of segment usage file
+ * @range: fstrim_range structure
+ *
+ * start: First Byte to trim
+ * len: number of Bytes to trim from start
+ * minlen: minimum extent length in Bytes
+ *
+ * Decription: nilfs_sufile_trim_fs goes through all segments containing bytes
+ * from start to start+len. start is rounded up to the next block boundary
+ * and start+len is rounded down. For each clean segment blkdev_issue_discard
+ * function is invoked.
+ *
+ * Return Value: On success, 0 is returned or negative error code, otherwise.
+ */
+int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range)
+{
+ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+ struct buffer_head *su_bh;
+ struct nilfs_segment_usage *su;
+ void *kaddr;
+ size_t n, i, susz = NILFS_MDT(sufile)->mi_entry_size;
+ sector_t seg_start, seg_end, start_block, end_block;
+ sector_t start = 0, nblocks = 0;
+ u64 segnum, segnum_end, minlen, len, max_blocks, ndiscarded = 0;
+ int ret = 0;
+ unsigned int sects_per_block;
+
+ sects_per_block = (1 << nilfs->ns_blocksize_bits) /
+ bdev_logical_block_size(nilfs->ns_bdev);
+ len = range->len >> nilfs->ns_blocksize_bits;
+ minlen = range->minlen >> nilfs->ns_blocksize_bits;
+ max_blocks = ((u64)nilfs->ns_nsegments * nilfs->ns_blocks_per_segment);
+
+ if (!len || range->start >= max_blocks << nilfs->ns_blocksize_bits)
+ return -EINVAL;
+
+ start_block = (range->start + nilfs->ns_blocksize - 1) >>
+ nilfs->ns_blocksize_bits;
+
+ /*
+ * range->len can be very large (actually, it is set to
+ * ULLONG_MAX by default) - truncate upper end of the range
+ * carefully so as not to overflow.
+ */
+ if (max_blocks - start_block < len)
+ end_block = max_blocks - 1;
+ else
+ end_block = start_block + len - 1;
+
+ segnum = nilfs_get_segnum_of_block(nilfs, start_block);
+ segnum_end = nilfs_get_segnum_of_block(nilfs, end_block);
+
+ down_read(&NILFS_MDT(sufile)->mi_sem);
+
+ while (segnum <= segnum_end) {
+ n = nilfs_sufile_segment_usages_in_block(sufile, segnum,
+ segnum_end);
+
+ ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0,
+ &su_bh);
+ if (ret < 0) {
+ if (ret != -ENOENT)
+ goto out_sem;
+ /* hole */
+ segnum += n;
+ continue;
+ }
+
+ kaddr = kmap_atomic(su_bh->b_page);
+ su = nilfs_sufile_block_get_segment_usage(sufile, segnum,
+ su_bh, kaddr);
+ for (i = 0; i < n; ++i, ++segnum, su = (void *)su + susz) {
+ if (!nilfs_segment_usage_clean(su))
+ continue;
+
+ nilfs_get_segment_range(nilfs, segnum, &seg_start,
+ &seg_end);
+
+ if (!nblocks) {
+ /* start new extent */
+ start = seg_start;
+ nblocks = seg_end - seg_start + 1;
+ continue;
+ }
+
+ if (start + nblocks == seg_start) {
+ /* add to previous extent */
+ nblocks += seg_end - seg_start + 1;
+ continue;
+ }
+
+ /* discard previous extent */
+ if (start < start_block) {
+ nblocks -= start_block - start;
+ start = start_block;
+ }
+
+ if (nblocks >= minlen) {
+ kunmap_atomic(kaddr);
+
+ ret = blkdev_issue_discard(nilfs->ns_bdev,
+ start * sects_per_block,
+ nblocks * sects_per_block,
+ GFP_NOFS, 0);
+ if (ret < 0) {
+ put_bh(su_bh);
+ goto out_sem;
+ }
+
+ ndiscarded += nblocks;
+ kaddr = kmap_atomic(su_bh->b_page);
+ su = nilfs_sufile_block_get_segment_usage(
+ sufile, segnum, su_bh, kaddr);
+ }
+
+ /* start new extent */
+ start = seg_start;
+ nblocks = seg_end - seg_start + 1;
+ }
+ kunmap_atomic(kaddr);
+ put_bh(su_bh);
+ }
+
+
+ if (nblocks) {
+ /* discard last extent */
+ if (start < start_block) {
+ nblocks -= start_block - start;
+ start = start_block;
+ }
+ if (start + nblocks > end_block + 1)
+ nblocks = end_block - start + 1;
+
+ if (nblocks >= minlen) {
+ ret = blkdev_issue_discard(nilfs->ns_bdev,
+ start * sects_per_block,
+ nblocks * sects_per_block,
+ GFP_NOFS, 0);
+ if (!ret)
+ ndiscarded += nblocks;
+ }
+ }
+
+out_sem:
+ up_read(&NILFS_MDT(sufile)->mi_sem);
+
+ range->len = ndiscarded << nilfs->ns_blocksize_bits;
+ return ret;
+}
+
+/**
* nilfs_sufile_read - read or get sufile inode
* @sb: super block instance
* @susize: size of a segment usage entry
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index 366003c..b8afd72 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -66,6 +66,7 @@ void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs);
int nilfs_sufile_read(struct super_block *sb, size_t susize,
struct nilfs_inode *raw_inode, struct inode **inodep);
+int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range);

/**
* nilfs_sufile_scrap - make a segment garbage
--
1.7.9.3
Loading...