Here is a simple implementation of delayed allocation for Tux3.<br><br>Delayed allocation defers block allocation from prepare-write(write-begin) time to page writeback time. It is a powerful technique and implemented by several filesystems such as XFS, ext4, and btrfs.<br>
<br>Unlike ext4's delalloc, this implementation is independent with extent tree structure. <br><br>Signed-off by XiaoFeng Liu.<br><br>---<br><br> balloc.c | 1 <br> filemap.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++<br>
inode.c | 2 -<br> super.c | 7 ++++<br> trace.h | 11 ++++++<br> tux3.h | 30 +++++++++++++++++<br> 6 files changed, 158 insertions(+), 1 deletion(-)<br><br><br>diff -pNur tux-orig/balloc.c tux-hack/balloc.c<br>
--- tux-orig/balloc.c 2009-01-14 20:00:22.000000000 +0800<br>+++ tux-hack/balloc.c 2009-01-15 14:29:26.000000000 +0800<br>@@ -281,6 +281,7 @@ int bfree(struct sb *sb, block_t start, <br> clear_bits(bufdata(buffer), start, blocks);<br>
brelse_dirty(buffer);<br> sb->freeblocks += blocks;<br>+ tux3_release_blocks(sb, blocks);<br> //set_sb_dirty(sb);<br> mutex_unlock(&sb->bitmap->i_mutex);<br> return 0;<br>diff -pNur tux-orig/filemap.c tux-hack/filemap.c<br>
--- tux-orig/filemap.c 2009-01-14 20:00:22.000000000 +0800<br>+++ tux-hack/filemap.c 2009-01-15 14:25:07.000000000 +0800<br>@@ -505,4 +505,112 @@ const struct address_space_operations tu<br> .sync_page = block_sync_page,<br>
.write_begin = tux3_vol_write_begin,<br> };<br>+<br>+<br>+/* <br>+ * Tux3's delayed allocation<br>+ * Note: support blocksize == pagesize only<br>+ * Written by XiaoFeng LIU <<a href="mailto:xfengliu@mail.ustc.edu.cn">xfengliu@mail.ustc.edu.cn</a>><br>
+ */<br>+<br>+/* proof of concept */<br>+#define NR_RESERV_BLOCKS 32<br>+<br>+static int tux3_da_reserve_blocks(struct super_block *sb, int count)<br>+{<br>+ long free_blocks;<br>+ struct sb *sbi = tux_sb(sb);<br>
+ free_blocks = percpu_counter_read_positive(freeblocks_counter(sbi));<br>+ xtrace("freeblocks_counter %ld", free_blocks);<br>+<br>+ if (free_blocks < count + NR_RESERV_BLOCKS)<br>+ return -ENOSPC;<br>
+ percpu_counter_sub(freeblocks_counter(sbi), count);<br>+ return 0;<br>+}<br>+<br>+static void tux3_da_release_blocks(struct super_block *sb, int count)<br>+{<br>+ struct sb *sbi = tux_sb(sb);<br>+ if (count) {<br>
+ percpu_counter_add(freeblocks_counter(sbi), count);<br>+ sb->s_dirt = 1;<br>+ }<br>+}<br>+<br>+static int tux3_get_block_delay(struct inode *inode, sector_t iblock,<br>+ struct buffer_head *bh_rslt, int create)<br>
+{<br>+ return tux3_get_block(inode, iblock, bh_rslt, 0);<br>+}<br>+<br>+/*<br>+ * a get_block() called at the writeout time.<br>+ */<br>+static int tux3_get_block_write(struct inode *inode, sector_t iblock,<br>+ struct buffer_head *bh_rslt, int create)<br>
+{<br>+ pgoff_t index = (pgoff_t) (iblock >> (PAGE_CACHE_SHIFT - inode->i_blkbits));<br>+ struct page *page = find_get_page(inode->i_mapping, index);<br>+<br>+ /* the page should be here, and dirty */<br>
+ if (unlikely(!page)) {<br>+ xtrace("find_get_page ret NULL.");<br>+ goto out;<br>+ }<br>+ if (create && PageChecked(page)) {<br>+ ClearPageChecked(page);<br>+ tux3_da_release_blocks(inode->i_sb, 1);<br>
+ }<br>+ if (page)<br>+ page_cache_release(page);<br>+<br>+out:<br>+ return tux3_get_block(inode, iblock, bh_rslt, create);<br>+}<br>+<br>+static int tux3_da_write_begin(struct file *file, struct address_space *mapping,<br>
+ loff_t pos, unsigned len, unsigned flags,<br>+ struct page **pagep, void **fsdata)<br>+{<br>+ return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata, <br>+ tux3_get_block_delay); <br>
+}<br>+<br>+static int tux3_da_write_end(struct file *file, struct address_space *mapping,<br>+ loff_t pos, unsigned len, unsigned copied,<br>+ struct page *page, void *fsdata)<br>+{<br>+ if (!PageChecked(page)) {<br>
+ int ret = tux3_da_reserve_blocks(mapping->host->i_sb, 1);<br>+ if (ret)<br>+ return ret;<br>+ SetPageChecked(page);<br>+ }<br>+ return nobh_write_end(file, mapping, pos, len, copied, page, fsdata);<br>
+}<br>+<br>+static int tux3_da_writepage(struct page *page, struct writeback_control *wbc)<br>+{<br>+ return nobh_writepage(page, tux3_get_block_write, wbc);<br>+}<br>+static int tux3_da_writepages(struct address_space *mapping,<br>
+ struct writeback_control *wbc)<br>+{<br>+ return mpage_writepages(mapping, wbc, tux3_get_block_write);<br>+}<br>+<br>+const struct address_space_operations tux_da_aops = {<br>+ .readpage = tux3_readpage,<br>
+ .readpages = tux3_readpages,<br>+ .writepage = tux3_da_writepage,<br>+ .writepages = tux3_da_writepages,<br>+ .sync_page = block_sync_page,<br>+ .write_begin = tux3_da_write_begin,<br>
+ .write_end = tux3_da_write_end,<br>+ .bmap = tux3_bmap,<br>+ .direct_IO = tux3_direct_IO,<br>+ .migratepage = buffer_migrate_page,<br>+};<br>+<br> #endif /* __KERNEL__ */<br>diff -pNur tux-orig/inode.c tux-hack/inode.c<br>
--- tux-orig/inode.c 2009-01-14 20:00:22.000000000 +0800<br>+++ tux-hack/inode.c 2009-01-15 15:29:43.000000000 +0800<br>@@ -438,7 +438,7 @@ static void tux_setup_inode(struct inode<br> case S_IFREG:<br> inode->i_op = &tux_file_iops;<br>
inode->i_fop = &tux_file_fops;<br>- inode->i_mapping->a_ops = &tux_aops;<br>+ inode->i_mapping->a_ops = &tux_da_aops;<br> break;<br> case S_IFDIR:<br> inode->i_op = &tux_dir_iops;<br>
diff -pNur tux-orig/super.c tux-hack/super.c<br>--- tux-orig/super.c 2009-01-14 20:00:22.000000000 +0800<br>+++ tux-hack/super.c 2009-01-15 14:27:53.000000000 +0800<br>@@ -106,6 +106,9 @@ static void tux3_put_super(struct super_<br>
iput(sbi->volmap);<br> iput(sbi->logmap);<br> <br>+ /* destroy block allocation info */<br>+ tux3_balloc_info_destroy(sbi);<br>+ <br> sb->s_fs_info = NULL;<br> kfree(sbi);<br> }<br>@@ -172,6 +175,10 @@ static int tux3_fill_super(struct super_<br>
err = tux_load_sb(sb, silent);<br> if (err)<br> goto error;<br>+<br>+ /* initialize block allocation info */<br>+ tux3_balloc_info_init(sbi);<br>+ <br> printk("%s: sb %p, ops %p, depth %Lu, block %Lu, entries_per_leaf %d\n",<br>
__func__,<br> sbi-><a href="http://itable.sb">itable.sb</a>, sbi->itable.ops,<br>diff -pNur tux-orig/trace.h tux-hack/trace.h<br>--- tux-orig/trace.h 2009-01-14 20:00:22.000000000 +0800<br>
+++ tux-hack/trace.h 2009-01-15 15:04:49.000000000 +0800<br>@@ -22,4 +22,15 @@<br> die(100); \<br> } while (0)<br> <br>+<br>+#ifdef __KERNEL__<br>+/* debug macro, xiaofeng */<br>+#define xtrace(f, a...) { \<br>
+ printk ("(%s, %d): %s:", \<br>+ __FILE__, __LINE__, __FUNCTION__); \<br>+ printk (f, ## a); \<br>+ printk ("\n"); \<br>
+ }<br>+<br>+#endif<br> #endif<br>diff -pNur tux-orig/tux3.h tux-hack/tux3.h<br>--- tux-orig/tux3.h 2009-01-14 20:00:22.000000000 +0800<br>+++ tux-hack/tux3.h 2009-01-15 15:34:37.000000000 +0800<br>
@@ -9,6 +9,8 @@<br> #include <linux/fs.h><br> #include <linux/buffer_head.h><br> #include <linux/mutex.h><br>+#include <linux/mm.h><br>+#include <linux/percpu_counter.h><br> <br> #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)<br>
#include <linux/cred.h> // fsuid<br>@@ -213,6 +215,13 @@ struct cursor {<br> } path[];<br> };<br> <br>+/* Tux3 block allocation information */<br>+struct tux3_balloc_info {<br>+ struct percpu_counter freeblocks_counter;<br>
+ /* nextalloc_counter, and others */<br>+};<br>+#define freeblocks_counter(sbi) (&sbi->balloc_info.freeblocks_counter)<br>+<br> /* Tux3-specific sb is a handle for the entire volume state */<br> <br> struct sb {<br>
@@ -241,6 +250,7 @@ struct sb {<br> struct mutex loglock; /* serialize log entries (spinlock me) */<br> #ifdef __KERNEL__<br> struct super_block *vfs_sb; /* Generic kernel superblock */<br>+ struct tux3_balloc_info balloc_info; /* control info for block allocation */<br>
#else<br> struct dev *dev; /* userspace block device */<br> #endif<br>@@ -620,6 +630,25 @@ static inline struct inode *buffer_inode<br> return buffer->b_page->mapping->host;<br> }<br> <br>+static inline void tux3_balloc_info_init(struct sb* sbi)<br>
+{<br>+ percpu_counter_init(freeblocks_counter(sbi), sbi->freeblocks);<br>+}<br>+static inline void tux3_balloc_info_destroy(struct sb* sbi)<br>+{<br>+ percpu_counter_destroy(freeblocks_counter(sbi));<br>+}<br>+<br>
+static inline void tux3_release_blocks(struct sb* sbi, int count)<br>+{<br>+ percpu_counter_add(freeblocks_counter(sbi), count);<br>+}<br>+<br>+static inline void tux3_reserve_blocks(struct sb* sbi, int count)<br>+{<br>
+ percpu_counter_sub(freeblocks_counter(sbi), count);<br>+}<br>+<br> /* btree.c */<br> struct buffer_head *cursor_leafbuf(struct cursor *cursor);<br> void release_cursor(struct cursor *cursor);<br>@@ -678,6 +707,7 @@ int tux3_get_block(struct inode *inode, <br>
extern const struct address_space_operations tux_aops;<br> extern const struct address_space_operations tux_blk_aops;<br> extern const struct address_space_operations tux_vol_aops;<br>+extern const struct address_space_operations tux_da_aops;<br>
<br> /* iattr.c */<br> unsigned encode_asize(unsigned bits);<br><br>