[Tux3] Patch: Freestanding kernel unit test for block operations

Thu Jan 15 20:44:01 PST 2009

Hi all,

Here is something a little bit different: a freestanding kernel unit
test for some of the primitive block operations I have talked about
lately, as a complete filesystem all in one file.  This is built on
"hackfs", a 200 line filesystem that is essentially a cut and paste
of RAMFS, reformatted and slightly rearranged by me to be more compact
and readable.  Basically, to get out of the way and just provide the
minimal support to be a complete filesystem, minus backing store.  One
difference from RAMFS: it is a block filesystem associated with a block
device, which makes it easy to try out IO transfer code ideas like the
ones being developed here.

Unit testing is rare or non-existent in kernel code, mainly because of
the difficulty of setting up isolated tests.  Traditionally, we test by
hacking full systems, and just keep modifying the full system as it
gets more and more complex.  But sometimes that strategy just isn't
very good at squeezing out corner cases.  Now, here is a solution.

This modified hackfs contains blockget and blockread, which have been
tested, and block_fork, which has not (but soon will be).  It also
contains the vecio and syncio convenience wrappers for submit_bio,
which make bio transfers very convenient indeed.  With these simple
tools, we can strip away a great deal of complexity, and get right down
to the business of transferring data, without needing help from complex
libraries.

I applied this patch to 2.6.26.5 and built with uml:

   make linux ARCH=um CONFIG_HACKFS=y && ./linux ubda=tuxroot ubdb=testdev 

This patch should work on a wide range of recent kernels.  It is quite
conservative about the kernel features it uses, which have not changed a
lot recently.

After booting, mount hackfs:

   mount -t hackfs /dev/ubdb /mnt

and the test() function will hexdump the first 256 bytes of testdev,
which will show you the Tux3 magic number if it's a Tux3 volume.  After
that, the thing to do is hack up a unit test for fork_buffer and see
what happens.  Or just start writing your own filesystem!

Regards,

Daniel

diff --git a/fs/Kconfig b/fs/Kconfig
index 2694648..a351533 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1529,6 +1529,14 @@ config SYSV_FS
 	  If you haven't heard about all of this before, it's safe to say N.
 
 
+config HACKFS
+	tristate "Hackfs, a useless filesystem"
+	help
+	  To compile this filesystem as a module, choose M here: the module will
+	  be called tux3.
+
+	  If unsure, run away.
+
 config UFS_FS
 	tristate "UFS file system support (read only)"
 	depends on BLOCK
diff --git a/fs/Makefile b/fs/Makefile
index 1e7a11b..1f2a77f 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -119,3 +119,4 @@ obj-$(CONFIG_HPPFS)		+= hppfs/
 obj-$(CONFIG_DEBUG_FS)		+= debugfs/
 obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
 obj-$(CONFIG_GFS2_FS)           += gfs2/
+obj-$(CONFIG_HACKFS)		+= hackfs/hackfs.o
diff --git a/fs/hackfs/hackfs.c b/fs/hackfs/hackfs.c
new file mode 100644
index 0000000..afaac69
--- /dev/null
+++ b/fs/hackfs/hackfs.c
@@ -0,0 +1,412 @@
+/*
+ * Hackfs, a Useless Filesystem
+ * Copyright (c) 2008, Daniel Phillips
+ * Portions obligingly contributed (c) 2008 by Maciej Zenczykowski
+ * Portions shamelessly hijacked from ramfs
+ */
+
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/bio.h>
+#include <linux/backing-dev.h>
+#include <linux/module.h>
+
+/* BIO wrapper unit test */
+
+#include <linux/buffer_head.h>
+#define assert(cond) BUG_ON(!(cond))
+#define DELTA_MASK 3
+#define DELTA_STATE_MASK (DELTA_MASK << BH_PrivateStart)
+
+unsigned bufdelta(struct buffer_head *buffer) 
+{
+	return (buffer->b_state >> BH_PrivateStart) & DELTA_MASK;
+}
+
+void set_bufdelta(struct buffer_head *buffer, unsigned delta) 
+{
+	buffer->b_state = (buffer->b_state & ~DELTA_STATE_MASK) | (delta << BH_PrivateStart);
+}
+
+struct list_head delta_list[4];
+
+int fork_buffer(struct buffer_head *buffer)
+{
+	struct page *oldpage = buffer->b_page;
+	struct address_space *mapping = oldpage->mapping;
+//	struct inode *inode = mapping->host;
+//	struct sb *sb = tux_sb(inode->i_sb);
+//	tuxnode_t *tuxnode = tux_inode(inode);
+	unsigned newdelta = 1 & DELTA_MASK;
+//	struct list_head *inode_dirty_list = &tuxnode->dirty;
+	static struct list_head *inode_dirty_list;
+	unsigned blocksize = 1 << 10;
+
+	// Use read_mapping_page to bring the full page uptodate
+	// Take the page lock (protects the buffer list)
+	lock_page(oldpage);
+	while (!PageUptodate(oldpage)) {
+		unlock_page(oldpage);
+		oldpage = read_mapping_page(mapping, oldpage->index, NULL);
+		lock_page(oldpage);
+	}
+
+	// The fork happened while waiting for the page lock?
+	if (bufdelta(buffer) == newdelta) {
+		unlock_page(oldpage);
+		return 0;
+	}
+
+	// Allocate a new page and put buffers on it
+	struct page *newpage = alloc_pages(0, GFP_KERNEL);
+	create_empty_buffers(newpage, blocksize, 0);
+
+	// Copy page data
+	memcpy(page_address(newpage), page_address(oldpage), PAGE_CACHE_SIZE);
+
+	// Walk the two buffer lists together
+	struct buffer_head *oldbuf = (void *)oldpage->private, *oldlist = oldbuf;
+	struct buffer_head *newbuf = (void *)newpage->private;
+	do {
+		newbuf->b_state = oldbuf->b_state & (BH_Uptodate | BH_Dirty);
+		newbuf->b_page = oldpage;
+		oldbuf->b_page = newpage;
+		if (buffer_dirty(oldbuf)) {
+			unsigned olddelta = bufdelta(oldbuf);
+			assert(olddelta != newdelta);
+
+			// Set old buffer dirty in the current delta
+			list_move_tail(&oldbuf->b_assoc_buffers, inode_dirty_list);
+			set_bufdelta(oldbuf, newdelta);
+
+			// Add new buffer to the earlier delta list
+			list_move_tail(&newbuf->b_assoc_buffers, delta_list + newdelta);
+			set_bufdelta(newbuf, olddelta);
+		}
+		oldbuf = oldbuf->b_this_page;
+		newbuf = newbuf->b_this_page;
+	} while (oldbuf != oldlist);
+
+	// Swap the page buffer lists
+	oldpage->private = newpage->private;
+	newpage->private = (unsigned long)oldlist;
+	newpage->index = oldpage->index;
+
+	// Replace page in radix tree
+	spin_lock_irq(&mapping->tree_lock);
+	void **slot = radix_tree_lookup_slot(&mapping->page_tree, oldpage->index);
+	radix_tree_replace_slot(slot, newpage);
+	spin_unlock_irq(&mapping->tree_lock);
+	get_page(newpage);
+	put_page(oldpage);
+	unlock_page(oldpage);
+	return 0;
+}
+
+typedef sector_t block_t;
+
+struct buffer_head *page_buffer(struct page *page, unsigned which)
+{
+	struct buffer_head *buffer = page_buffers(page);
+	while (which--)
+		buffer = buffer->b_this_page;
+	return buffer;
+}
+
+struct buffer_head *blockget(struct address_space *mapping, block_t block)
+{
+	unsigned blockbits = mapping->host->i_blkbits;
+	unsigned subshift = PAGE_CACHE_SHIFT - blockbits;
+	struct page *page = grab_cache_page(mapping, block >> subshift);
+	struct buffer_head *buffer;
+	if (!page)
+		return ERR_PTR(-ENOMEM);
+	if (!page_has_buffers(page))
+		create_empty_buffers(page, 1 << blockbits, 0);
+	buffer = page_buffer(page, block & ~(-1 << subshift));
+	get_bh(buffer);
+	unlock_page(page);
+	page_cache_release(page);
+	return buffer;
+}
+
+static int vecio(int rw, struct block_device *dev, sector_t sector,
+	bio_end_io_t endio, void *data, unsigned vecs, struct bio_vec *vec)
+{
+	struct bio *bio = bio_alloc(GFP_KERNEL, vecs);
+	if (!bio)
+		return -ENOMEM;
+	bio->bi_bdev = dev;
+	bio->bi_sector = sector;
+	bio->bi_end_io = endio;
+	bio->bi_private = data;
+	while (vecs--) {
+		bio->bi_io_vec[bio->bi_vcnt] = *vec++;
+		bio->bi_size += bio->bi_io_vec[bio->bi_vcnt++].bv_len;
+	}
+	submit_bio(rw, bio);
+	return 0;
+}
+
+struct biosync { wait_queue_head_t wait; int done, err; };
+
+static void biosync_endio(struct bio *bio, int err)
+{
+	struct biosync *sync = bio->bi_private;
+	bio_put(bio);
+	sync->err = err;
+	sync->done = 1;
+	wake_up(&sync->wait);
+}
+
+static int syncio(int rw, struct block_device *dev, sector_t sector, unsigned vecs, struct bio_vec *vec)
+{
+	struct biosync sync = { .wait = __WAIT_QUEUE_HEAD_INITIALIZER(sync.wait) };
+	if (!(sync.err = vecio(rw, dev, sector, biosync_endio, &sync, vecs, vec)))
+		wait_event(sync.wait, sync.done);
+	return sync.err;
+}
+
+int dev_blockio(struct buffer_head *buffer, int write)
+{
+	struct page *page = buffer->b_page;
+	unsigned offset = offset_in_page(buffer->b_data);
+	return syncio(write, page->mapping->host->i_sb->s_bdev,
+		((sector_t)page->index << (PAGE_CACHE_SHIFT - 9)) + (offset >> 9),
+		1, &(struct bio_vec){
+			.bv_page = page, .bv_offset = offset,
+			.bv_len = buffer->b_size });
+}
+
+typedef int (blockio_t)(struct buffer_head *buffer, int write);
+
+struct buffer_head *blockread(struct address_space *mapping, block_t block)
+{
+	struct buffer_head *buffer = blockget(mapping, block);
+	if (!IS_ERR(buffer) && !buffer_uptodate(buffer)) {
+		int err = ((blockio_t *)mapping->host->i_private)(buffer, READ);
+		if (err) {
+			brelse(buffer);
+			return ERR_PTR(err);
+		}
+	}
+	return buffer;
+}
+
+void hexdump(void *data, unsigned size)
+{
+	while (size) {
+		unsigned char *p;
+		int w = 16, n = size < w? size: w, pad = w - n;
+		printk("%p:  ", data);
+		for (p = data; p < (unsigned char *)data + n;)
+			printk("%02hx ", *p++);
+		printk("%*.s  \"", pad*3, "");
+		for (p = data; p < (unsigned char *)data + n;) {
+			int c = *p++;
+			printk("%c", c < ' ' || c > 127 ? '.' : c);
+		}
+		printk("\"\n");
+		data += w;
+		size -= n;
+	}
+}
+
+static int test(struct super_block *sb)
+{
+	struct inode *inode = new_inode(sb);
+	inode->i_private = dev_blockio;
+	inode->i_blkbits = 10;
+	struct buffer_head *buffer = blockread(inode->i_mapping, 4);
+	hexdump(buffer->b_data, 0x100);
+	iput(inode);
+	return 0;
+}
+
+/* Hackfs */
+
+const struct address_space_operations hack_aops = {
+	.readpage = simple_readpage,
+	.write_begin = simple_write_begin,
+	.write_end = simple_write_end,
+	.set_page_dirty = __set_page_dirty_no_writeback,
+};
+
+const struct file_operations hack_file_ops = {
+        .read = do_sync_read,
+        .write = do_sync_write,
+        .aio_read = generic_file_aio_read,
+        .aio_write = generic_file_aio_write,
+        .splice_read = generic_file_splice_read,
+        .llseek = generic_file_llseek,
+        .fsync = simple_sync_file,
+        .mmap = generic_file_mmap,
+};
+
+const struct inode_operations hack_inode_ops = {
+        .getattr = simple_getattr,
+};
+
+struct inode *hack_get_inode(struct super_block *sb, int mode, dev_t dev);
+
+static int hack_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+{
+	struct inode *inode = hack_get_inode(dir->i_sb, mode, dev);
+	if (!inode)
+		return -ENOSPC;
+	if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		if (S_ISDIR(mode))
+			inode->i_mode |= S_ISGID;
+	}
+	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+	d_instantiate(dentry, inode);
+	dget(dentry); /* Pin the dentry, kill by d_genocide */
+	return 0;
+}
+
+static int hack_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	int retval = hack_mknod(dir, dentry, mode | S_IFDIR, 0);
+	if (!retval)
+		inc_nlink(dir);
+	return retval;
+}
+
+static int hack_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
+{
+	return hack_mknod(dir, dentry, mode | S_IFREG, 0);
+}
+
+static int hack_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
+	int err;
+	struct inode *inode = hack_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
+	if (!inode)
+		return -ENOSPC;
+	if ((err = page_symlink(inode, symname, strlen(symname) + 1))) {
+		iput(inode);
+		return err;
+	}
+	if (dir->i_mode & S_ISGID)
+		inode->i_gid = dir->i_gid;
+	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+	d_instantiate(dentry, inode);
+	dget(dentry);
+	return 0;
+}
+
+static const struct inode_operations hack_dir_ops = {
+	.create		= hack_create,
+	.lookup		= simple_lookup,
+	.link		= simple_link,
+	.unlink		= simple_unlink,
+	.symlink	= hack_symlink,
+	.mkdir		= hack_mkdir,
+	.rmdir		= simple_rmdir,
+	.mknod		= hack_mknod,
+	.rename		= simple_rename,
+};
+
+static struct backing_dev_info hack_backing_dev_info = {
+	.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK |
+		BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | BDI_CAP_READ_MAP | 
+		BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP
+};
+
+struct inode *hack_get_inode(struct super_block *sb, int mode, dev_t dev)
+{
+	struct inode *inode = new_inode(sb);
+	if (inode) {
+		inode->i_mode = mode;
+		inode->i_uid = current->fsuid;
+		inode->i_gid = current->fsgid;
+		inode->i_blocks = 0;
+		inode->i_mapping->a_ops = &hack_aops;
+		inode->i_mapping->backing_dev_info = &hack_backing_dev_info;
+		mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
+		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		switch (mode & S_IFMT) {
+		default:
+			init_special_inode(inode, mode, dev);
+			break;
+		case S_IFREG:
+			inode->i_op = &hack_inode_ops;
+			inode->i_fop = &hack_file_ops;
+			break;
+		case S_IFDIR:
+			inode->i_op = &hack_dir_ops;
+			inode->i_fop = &simple_dir_operations;
+			inc_nlink(inode); /* links = 2 for "." entry */
+			break;
+		case S_IFLNK:
+			inode->i_op = &page_symlink_inode_operations;
+			break;
+		}
+	}
+	return inode;
+}
+
+static const struct super_operations hack_super_ops = {
+	.drop_inode = generic_delete_inode,
+	.statfs = simple_statfs,
+};
+
+static int hack_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct inode *inode;
+	struct dentry *root;
+
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = 0x4841434b;
+	sb->s_op = &hack_super_ops;
+	sb->s_time_gran = 1;
+	inode = hack_get_inode(sb, S_IFDIR | 0755, 0);
+	if (!inode)
+		return -ENOMEM;
+
+	root = d_alloc_root(inode);
+	if (!root) {
+		iput(inode);
+		return -ENOMEM;
+	}
+	sb->s_root = root;
+	return test(sb);
+}
+
+static int hack_get_sb(struct file_system_type *fs_type, int flags,
+	const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	return get_sb_bdev(fs_type, flags, dev_name, data, hack_fill_super, mnt);
+}
+
+void hack_kill_sb(struct super_block *sb)
+{
+	if (sb->s_root)
+		d_genocide(sb->s_root);
+	kill_block_super(sb);
+}
+
+static struct file_system_type hackfs = {
+	.name = "hackfs",
+	.fs_flags = FS_REQUIRES_DEV,
+	.get_sb = hack_get_sb,
+	.kill_sb = hack_kill_sb,
+	.owner = THIS_MODULE,
+};
+
+static int __init hack_init(void)
+{
+	return register_filesystem(&hackfs);
+}
+
+static void __exit hack_exit(void)
+{
+	unregister_filesystem(&hackfs);
+}
+
+module_init(hack_init)
+module_exit(hack_exit)
+MODULE_LICENSE("GPL");

_______________________________________________
Tux3 mailing list
Tux3 at tux3.org
http://mailman.tux3.org/cgi-bin/mailman/listinfo/tux3