[Tux3] Patch: Freestanding kernel unit test for block operations

Sun Jan 18 00:12:41 PST 2009

This revision introduces blockdirty, which calls fork_buffer if the
buffer needs to be forked, and otherwise just sets it dirty.  The
Tux3 inode infrastructure was partially ported to allow these functions
to be compiled exactly as they will be in place in the Tux3 code.  This
might be of some interest to those learning vfs: it is the mechanism
for allocating and using specialized inodes, which requires an
alloc_inode an destroy_inode in place of the drop_inode method used by
ramfs, and a container_of wrapper to obtain the specialized inode given
a pointer to a generic vfs inode, which is a field of the specialized
inode.  Here, we just need a buffer dirty list per inode, which a
generic vfs inode does not have.

A couple more reference count bugs were fixed.  These functions appear
nearly ready to be put to work in Tux3, after we have a userspace
prototype of atomic commit, which is the next project.

diff --git a/fs/Kconfig b/fs/Kconfig
index 2694648..a351533 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1529,6 +1529,14 @@ config SYSV_FS
 	  If you haven't heard about all of this before, it's safe to say N.
 
 
+config HACKFS
+	tristate "Hackfs, a useless filesystem"
+	help
+	  To compile this filesystem as a module, choose M here: the module will
+	  be called tux3.
+
+	  If unsure, run away.
+
 config UFS_FS
 	tristate "UFS file system support (read only)"
 	depends on BLOCK
diff --git a/fs/Makefile b/fs/Makefile
index 1e7a11b..1f2a77f 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -119,3 +119,4 @@ obj-$(CONFIG_HPPFS)		+= hppfs/
 obj-$(CONFIG_DEBUG_FS)		+= debugfs/
 obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
 obj-$(CONFIG_GFS2_FS)           += gfs2/
+obj-$(CONFIG_HACKFS)		+= hackfs/hackfs.o
diff --git a/fs/hackfs/hackfs.c b/fs/hackfs/hackfs.c
new file mode 100644
index 0000000..8b3c919
--- /dev/null
+++ b/fs/hackfs/hackfs.c
@@ -0,0 +1,469 @@
+/*
+ * Hackfs, a Useless Filesystem
+ * Copyright (c) 2008, Daniel Phillips
+ * Portions obligingly contributed (c) 2008 by Maciej Zenczykowski
+ * Portions shamelessly hijacked from ramfs
+ */
+
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/bio.h>
+#include <linux/backing-dev.h>
+#include <linux/module.h>
+
+/* BIO wrapper unit test */
+
+#include <linux/buffer_head.h>
+#define assert(cond) BUG_ON(!(cond))
+#define DELTA_MASK 3
+#define DELTA_STATE_MASK (DELTA_MASK << BH_PrivateStart)
+
+struct sb { unsigned delta; } static_sb = { .delta = 0 };
+typedef struct { struct list_head dirty; struct inode vfs_inode; } tuxnode_t;
+
+static inline tuxnode_t *tux_inode(struct inode *inode)
+{
+	return container_of(inode, tuxnode_t, vfs_inode);
+}
+
+static inline struct sb *tux_sb(struct super_block *super)
+{
+	return super->s_fs_info;
+}
+
+static struct inode *tux3_alloc_inode(struct super_block *sb)
+{
+	struct inode *inode = kmalloc(sizeof(struct inode), GFP_KERNEL);
+	tuxnode_t *tuxnode = tux_inode(inode);
+	inode_init_once(inode);
+	INIT_LIST_HEAD(&tuxnode->dirty);
+	return inode;
+}
+
+static void tux3_destroy_inode(struct inode *inode)
+{
+	kfree(inode);
+}
+
+unsigned bufdelta(struct buffer_head *buffer) 
+{
+	return (buffer->b_state >> BH_PrivateStart) & DELTA_MASK;
+}
+
+void set_bufdelta(struct buffer_head *buffer, unsigned delta) 
+{
+	buffer->b_state = (buffer->b_state & ~DELTA_STATE_MASK) | (delta << BH_PrivateStart);
+}
+
+struct list_head delta_list[4];
+
+int fork_buffer(struct buffer_head *buffer)
+{
+	struct page *oldpage = buffer->b_page;
+	struct address_space *mapping = oldpage->mapping;
+	struct inode *inode = mapping->host;
+	struct list_head *inode_dirty_list = &tux_inode(inode)->dirty;
+	unsigned newdelta = tux_sb(inode->i_sb)->delta & DELTA_MASK;
+	unsigned blocksize = inode->i_sb->s_blocksize;
+
+	// Use read_mapping_page to bring the full page uptodate
+	if (!PageUptodate(oldpage)) {
+		oldpage = read_mapping_page(mapping, oldpage->index, NULL);
+		if (IS_ERR(oldpage))
+			return PTR_ERR(oldpage);
+		put_page(oldpage); // drop refcount from read_mapping_page
+	}
+
+	// Take the page lock (protects the buffer list)
+	lock_page(oldpage);
+
+	// The fork happened while waiting for the page lock?
+	if (bufdelta(buffer) == newdelta) {
+		unlock_page(oldpage);
+		return 0;
+	}
+
+	// Allocate a new page and put buffers on it
+	struct page *newpage = alloc_pages(GFP_KERNEL, 0);
+	newpage->mapping = oldpage->mapping;
+	newpage->index = oldpage->index;
+	create_empty_buffers(newpage, blocksize, 0);
+
+	// Copy page data
+	printk("copy page from %p to %p\n", page_address(oldpage), page_address(newpage));
+	printk("page count old %u new %u\n", atomic_read(&oldpage->_count), atomic_read(&newpage->_count));
+	memcpy(page_address(newpage), page_address(oldpage), PAGE_CACHE_SIZE);
+	SetPageUptodate(newpage);
+
+	// Walk the two buffer lists together
+	struct buffer_head *oldbuf = (void *)oldpage->private, *oldlist = oldbuf;
+	struct buffer_head *newbuf = (void *)newpage->private;
+	do {
+		newbuf->b_state = oldbuf->b_state & (BH_Uptodate | BH_Dirty);
+		newbuf->b_page = oldpage;
+		oldbuf->b_page = newpage;
+		if (buffer_dirty(oldbuf)) {
+			unsigned olddelta = bufdelta(oldbuf);
+			assert(olddelta != newdelta);
+
+			// Set old buffer dirty in current delta
+			list_move_tail(&oldbuf->b_assoc_buffers, inode_dirty_list);
+			set_bufdelta(oldbuf, newdelta);
+
+			// Add new buffer to earlier delta list
+			list_move_tail(&newbuf->b_assoc_buffers, delta_list + newdelta);
+			set_bufdelta(newbuf, olddelta);
+		}
+		oldbuf = oldbuf->b_this_page;
+		newbuf = newbuf->b_this_page;
+	} while (oldbuf != oldlist);
+
+	// Swap the page buffer lists
+	oldpage->private = newpage->private;
+	newpage->private = (unsigned long)oldlist;
+
+	// Replace page in radix tree
+	spin_lock_irq(&mapping->tree_lock);
+	void **slot = radix_tree_lookup_slot(&mapping->page_tree, oldpage->index);
+	radix_tree_replace_slot(slot, newpage);
+	spin_unlock_irq(&mapping->tree_lock);
+	get_page(newpage);
+	put_page(oldpage);
+	unlock_page(oldpage);
+	printk("page count old %u new %u\n", atomic_read(&oldpage->_count), atomic_read(&newpage->_count));
+	return 0;
+}
+
+int blockdirty(struct buffer_head *buffer)
+{
+	struct page *page = buffer->b_page;
+	struct inode *inode = page->mapping->host;
+	unsigned newdelta = tux_sb(inode->i_sb)->delta & DELTA_MASK;
+	lock_page(page);
+	if (bufdelta(buffer) != newdelta) {
+		unlock_page(page);
+		return fork_buffer(buffer);
+	}
+	list_move_tail(&buffer->b_assoc_buffers, &tux_inode(inode)->dirty);
+	set_bufdelta(buffer, newdelta);
+	unlock_page(page);
+	return 0;
+}
+
+typedef sector_t block_t;
+
+struct buffer_head *page_buffer(struct page *page, unsigned which)
+{
+	struct buffer_head *buffer = page_buffers(page);
+	while (which--)
+		buffer = buffer->b_this_page;
+	return buffer;
+}
+
+struct buffer_head *blockget(struct address_space *mapping, block_t block)
+{
+	unsigned blockbits = mapping->host->i_blkbits;
+	unsigned subshift = PAGE_CACHE_SHIFT - blockbits;
+	struct page *page = grab_cache_page(mapping, block >> subshift);
+	struct buffer_head *buffer;
+	if (!page)
+		return ERR_PTR(-ENOMEM);
+	if (!page_has_buffers(page))
+		create_empty_buffers(page, 1 << blockbits, 0);
+	buffer = page_buffer(page, block & ~(-1 << subshift));
+	get_bh(buffer);
+	unlock_page(page);
+	page_cache_release(page);
+	return buffer;
+}
+
+typedef int (blockio_t)(struct buffer_head *buffer, int write);
+
+struct buffer_head *blockread(struct address_space *mapping, block_t block)
+{
+	struct buffer_head *buffer = blockget(mapping, block);
+	if (!IS_ERR(buffer) && !buffer_uptodate(buffer)) {
+		lock_buffer(buffer);
+		int err = ((blockio_t *)mapping->host->i_private)(buffer, READ);
+		if (err) {
+			unlock_buffer(buffer);
+			brelse(buffer);
+			return ERR_PTR(err);
+		}
+		unlock_buffer(buffer);
+	}
+	return buffer;
+}
+
+static int vecio(int rw, struct block_device *dev, sector_t sector,
+	bio_end_io_t endio, void *data, unsigned vecs, struct bio_vec *vec)
+{
+	struct bio *bio = bio_alloc(GFP_KERNEL, vecs);
+	if (!bio)
+		return -ENOMEM;
+	bio->bi_bdev = dev;
+	bio->bi_sector = sector;
+	bio->bi_end_io = endio;
+	bio->bi_private = data;
+	while (vecs--) {
+		bio->bi_io_vec[bio->bi_vcnt] = *vec++;
+		bio->bi_size += bio->bi_io_vec[bio->bi_vcnt++].bv_len;
+	}
+	submit_bio(rw, bio);
+	return 0;
+}
+
+struct biosync { wait_queue_head_t wait; int done, err; };
+
+static void biosync_endio(struct bio *bio, int err)
+{
+	struct biosync *sync = bio->bi_private;
+	bio_put(bio);
+	sync->err = err;
+	sync->done = 1;
+	wake_up(&sync->wait);
+}
+
+static int syncio(int rw, struct block_device *dev, sector_t sector, unsigned vecs, struct bio_vec *vec)
+{
+	struct biosync sync = { .wait = __WAIT_QUEUE_HEAD_INITIALIZER(sync.wait) };
+	if (!(sync.err = vecio(rw, dev, sector, biosync_endio, &sync, vecs, vec)))
+		wait_event(sync.wait, sync.done);
+	return sync.err;
+}
+
+int dev_blockio(struct buffer_head *buffer, int write)
+{
+	struct page *page = buffer->b_page;
+	unsigned offset = offset_in_page(buffer->b_data);
+	int err = syncio(write, page->mapping->host->i_sb->s_bdev,
+		((sector_t)page->index << (PAGE_CACHE_SHIFT - 9)) + (offset >> 9),
+		1, &(struct bio_vec){
+			.bv_page = page, .bv_offset = offset,
+			.bv_len = buffer->b_size });
+	if (!err)
+		set_buffer_uptodate(buffer);
+	return err;
+}
+
+void hexdump(void *data, unsigned size)
+{
+	while (size) {
+		unsigned char *p;
+		int w = 16, n = size < w? size: w, pad = w - n;
+		printk("%p:  ", data);
+		for (p = data; p < (unsigned char *)data + n;)
+			printk("%02hx ", *p++);
+		printk("%*.s  \"", pad*3, "");
+		for (p = data; p < (unsigned char *)data + n;) {
+			int c = *p++;
+			printk("%c", c < ' ' || c > 127 ? '.' : c);
+		}
+		printk("\"\n");
+		data += w;
+		size -= n;
+	}
+}
+
+/* Hackfs */
+
+const struct address_space_operations hack_aops = {
+	.readpage = simple_readpage,
+	.write_begin = simple_write_begin,
+	.write_end = simple_write_end,
+	.set_page_dirty = __set_page_dirty_no_writeback,
+};
+
+static int test(struct super_block *sb)
+{
+	struct inode *inode = new_inode(sb);
+	inode->i_mapping->a_ops = &hack_aops;
+	inode->i_private = dev_blockio;
+	inode->i_blkbits = 10;
+	struct buffer_head *buffer = blockread(inode->i_mapping, 4);
+	fork_buffer(buffer);
+//	hexdump(buffer->b_data, 0x100);
+	iput(inode);
+	return 0;
+}
+
+const struct file_operations hack_file_ops = {
+        .read = do_sync_read,
+        .write = do_sync_write,
+        .aio_read = generic_file_aio_read,
+        .aio_write = generic_file_aio_write,
+        .splice_read = generic_file_splice_read,
+        .llseek = generic_file_llseek,
+        .fsync = simple_sync_file,
+        .mmap = generic_file_mmap,
+};
+
+const struct inode_operations hack_inode_ops = {
+        .getattr = simple_getattr,
+};
+
+struct inode *hack_get_inode(struct super_block *sb, int mode, dev_t dev);
+
+static int hack_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+{
+	struct inode *inode = hack_get_inode(dir->i_sb, mode, dev);
+	if (!inode)
+		return -ENOSPC;
+	if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		if (S_ISDIR(mode))
+			inode->i_mode |= S_ISGID;
+	}
+	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+	d_instantiate(dentry, inode);
+	dget(dentry); /* Pin the dentry, kill by d_genocide */
+	return 0;
+}
+
+static int hack_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	int retval = hack_mknod(dir, dentry, mode | S_IFDIR, 0);
+	if (!retval)
+		inc_nlink(dir);
+	return retval;
+}
+
+static int hack_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
+{
+	return hack_mknod(dir, dentry, mode | S_IFREG, 0);
+}
+
+static int hack_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
+	int err;
+	struct inode *inode = hack_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
+	if (!inode)
+		return -ENOSPC;
+	if ((err = page_symlink(inode, symname, strlen(symname) + 1))) {
+		iput(inode);
+		return err;
+	}
+	if (dir->i_mode & S_ISGID)
+		inode->i_gid = dir->i_gid;
+	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+	d_instantiate(dentry, inode);
+	dget(dentry);
+	return 0;
+}
+
+static const struct inode_operations hack_dir_ops = {
+	.create		= hack_create,
+	.lookup		= simple_lookup,
+	.link		= simple_link,
+	.unlink		= simple_unlink,
+	.symlink	= hack_symlink,
+	.mkdir		= hack_mkdir,
+	.rmdir		= simple_rmdir,
+	.mknod		= hack_mknod,
+	.rename		= simple_rename,
+};
+
+static struct backing_dev_info hack_backing_dev_info = {
+	.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK |
+		BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | BDI_CAP_READ_MAP | 
+		BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP
+};
+
+struct inode *hack_get_inode(struct super_block *sb, int mode, dev_t dev)
+{
+	struct inode *inode = new_inode(sb);
+	if (inode) {
+		inode->i_mode = mode;
+		inode->i_uid = current->fsuid;
+		inode->i_gid = current->fsgid;
+		inode->i_blocks = 0;
+		inode->i_mapping->a_ops = &hack_aops;
+		inode->i_mapping->backing_dev_info = &hack_backing_dev_info;
+		mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
+		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		switch (mode & S_IFMT) {
+		default:
+			init_special_inode(inode, mode, dev);
+			break;
+		case S_IFREG:
+			inode->i_op = &hack_inode_ops;
+			inode->i_fop = &hack_file_ops;
+			break;
+		case S_IFDIR:
+			inode->i_op = &hack_dir_ops;
+			inode->i_fop = &simple_dir_operations;
+			inc_nlink(inode); /* links = 2 for "." entry */
+			break;
+		case S_IFLNK:
+			inode->i_op = &page_symlink_inode_operations;
+			break;
+		}
+	}
+	return inode;
+}
+
+static const struct super_operations hack_super_ops = {
+	.alloc_inode	= tux3_alloc_inode,
+	.destroy_inode	= tux3_destroy_inode,
+	.statfs = simple_statfs,
+};
+
+static int hack_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct inode *inode;
+	struct dentry *root;
+
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_blocksize_bits = 10;
+	sb->s_blocksize = 1 << sb->s_blocksize_bits;
+	sb->s_magic = 0x4841434b;
+	sb->s_op = &hack_super_ops;
+	sb->s_time_gran = 1;
+	inode = hack_get_inode(sb, S_IFDIR | 0755, 0);
+	if (!inode)
+		return -ENOMEM;
+
+	root = d_alloc_root(inode);
+	if (!root) {
+		iput(inode);
+		return -ENOMEM;
+	}
+	sb->s_root = root;
+	sb->s_fs_info = &static_sb;
+	return test(sb);
+}
+
+static int hack_get_sb(struct file_system_type *fs_type, int flags,
+	const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	return get_sb_bdev(fs_type, flags, dev_name, data, hack_fill_super, mnt);
+}
+
+void hack_kill_sb(struct super_block *sb)
+{
+	if (sb->s_root)
+		d_genocide(sb->s_root);
+	kill_block_super(sb);
+}
+
+static struct file_system_type hackfs = {
+	.name = "hackfs",
+	.fs_flags = FS_REQUIRES_DEV,
+	.get_sb = hack_get_sb,
+	.kill_sb = hack_kill_sb,
+	.owner = THIS_MODULE,
+};
+
+static int __init hack_init(void)
+{
+	return register_filesystem(&hackfs);
+}
+
+static void __exit hack_exit(void)
+{
+	unregister_filesystem(&hackfs);
+}
+
+module_init(hack_init)
+module_exit(hack_exit)
+MODULE_LICENSE("GPL");

_______________________________________________
Tux3 mailing list
Tux3 at tux3.org
http://mailman.tux3.org/cgi-bin/mailman/listinfo/tux3