[Tux3] Patch : Data Deduplication in Userspace

Gaurav Tungatkar gauravstt at gmail.com
Tue Feb 24 10:33:13 PST 2009


Hi,

The prototype for data deduplication for tux3 in userspace is finally complete. The implementation closely follows the design
doc posted on the mailing list earlier. It has been developed with the 25th Jan 2009 snapshot as the base. The complete Tux3 
code with data deduplication in userspace can be downloaded/ cloned from http://bitbucket.org/kushal/tux3_dedup/ . 
The code uses the openssl SHA1 implementation and requires openssl libs to be installed.
The dleaf part of the tux3graph output after copying the same file twice with and without deduplication are also attached along with the patch. 
Keeping in mind archival storage as the main application of deduplication edits and deletes have not been implemented yet. 

The performance results for the tests performed are as followed -

1) For copying the same 100mb file 3 times -
Without deduplication - each copy takes around 3.8 secs with a total of 76803 blocks
With deduplication - first copy takes 4.18 and the later two take 2.5 secs with a total of 26229 blocks

2) Copying the daily snapshots of tux3 repo for 16 days -
Without deduplication - 3327 blocks
With deduplication - 1968 blocks


TODO -- 

1) Make bucket handling per inode rather than a global bucket
2) Collision handling
3) Kernel porting

We have started work on all the above.

The patch for deduplication in userspace against changeset 58e077f83dc1 of the official repository (http://hg.tux3.org/tux3/) is as follows

diff -r 58e077f83dc1 user/Makefile
--- a/user/Makefile	Sat Jan 24 15:04:49 2009 -0800
+++ b/user/Makefile	Tue Feb 24 17:28:24 2009 +0530
@@ -7,7 +7,7 @@
 endif
 
 CFLAGS += -std=gnu99 -Wall -g -rdynamic -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64
-CFLAGS += -Wall -Wextra -Werror
+CFLAGS += -Wall -Wextra -Werror -lssl
 CFLAGS += -Wno-unused-parameter -Wno-sign-compare -Wno-missing-field-initializers
 CFLAGS += $(UCFLAGS)
 
@@ -19,8 +19,8 @@
 
 TESTDIR = .
 
-testbin = buffer balloc dleaf ileaf iattr xattr btree dir filemap inode commit
-binaries = $(testbin) tux3 tux3fuse tux3graph
+testbin = buffer balloc dleaf ileaf iattr xattr btree dir filemap inode commit dedup
+binaries = $(testbin) tux3 tux3fuse
 
 ifeq ($(shell pkg-config fuse && echo found), found)
 	binaries += tux3fuse
@@ -37,7 +37,7 @@
 commitdeps	= $(inodedeps)
 dirdeps		= dir.c kernel/dir.c
 dleafdeps	= balloc-dummy.c kernel/dleaf.c
-filemapdeps	= $(dirdeps) kernel/log.c kernel/xattr.c kernel/dleaf.c \
+filemapdeps	= $(dirdeps) $(dedupdeps) kernel/log.c kernel/xattr.c kernel/dleaf.c \
 	kernel/btree.c kernel/iattr.c kernel/ileaf.c kernel/balloc.c \
 	filemap.c kernel/filemap.c
 iattrdeps	= btree-dummy.c kernel/iattr.c
@@ -47,6 +47,7 @@
 inodedeps	= $(filemapdeps) inode.c kernel/inode.c super.c
 superdeps	= $(inodedeps) kernel/commit.c super.c kernel/super.c
 xattrdeps	= kernel/balloc.c $(xattrcommondeps)
+dedupdeps	= kernel/dedup.c dedup.c
 
 all: $(binaries)
 tests: buffertest balloctest committest dleaftest ileaftest btreetest dirtest iattrtest xattrtest filemaptest inodetest
@@ -66,6 +67,7 @@
 ileaf.o: $(basedeps) $(ileafdeps)
 inode.o: $(basedeps) $(inodedeps)
 xattr.o:$(basedeps) $(xattrdeps)
+dedup.o: $(basedeps) $(dedupdeps)
 # programs
 tux3.o:$(basedeps) $(superdeps)
 tux3fuse.o:$(basedeps) $(superdeps)
@@ -82,6 +84,7 @@
 inode: vfs.o inode.o
 filemap: vfs.o filemap.o
 commit: vfs.o commit.o
+dedup: vfs.o dedup.o
 
 .c.o:
 	$(CC) $(CFLAGS) -Dbuild_$(<:.c=) -c -o $@ $<
diff -r 58e077f83dc1 user/commit.c
--- a/user/commit.c	Sat Jan 24 15:04:49 2009 -0800
+++ b/user/commit.c	Tue Feb 24 17:28:24 2009 +0530
@@ -9,7 +9,7 @@
  */
 
 #ifndef trace
-#define trace trace_on
+#define trace trace_off
 #endif
 
 #include "inode.c"
diff -r 58e077f83dc1 user/dedup.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/user/dedup.c	Tue Feb 24 17:28:24 2009 +0530
@@ -0,0 +1,7 @@
+#include "btree.c"
+#include "tux3.h"
+#include "kernel/dedup.c"
+#include "hexdump.c"
+
+
+
diff -r 58e077f83dc1 user/filemap.c
--- a/user/filemap.c	Sat Jan 24 15:04:49 2009 -0800
+++ b/user/filemap.c	Tue Feb 24 17:28:24 2009 +0530
@@ -2,7 +2,7 @@
 #include "diskio.h"
 
 #ifndef trace
-#define trace trace_on
+#define trace trace_off
 #endif
 
 #include "kernel/log.c"
@@ -91,14 +91,18 @@
 	int err = 0;
 	for (int i = 0, index = start; !err && i < segs; i++) {
 		int hole = map[i].state == SEG_HOLE;
-		trace_on("extent 0x%Lx/%x => %Lx", (L)index, map[i].count, (L)map[i].block);
+		trace("extent 0x%Lx/%x => %Lx state => %Lx", (L)index, map[i].count, (L)map[i].block, (L)map[i].state);
 		for (int j = 0; !err && j < map[i].count; j++) {
 			block_t block = map[i].block + j;
 			buffer = blockget(mapping(inode), index + j);
-			trace_on("block 0x%Lx => %Lx", (L)bufindex(buffer), (L)block);
+			trace("block 0x%Lx => %Lx", (L)bufindex(buffer), (L)block);
 			if (write) {
-				err = diskwrite(dev->fd, bufdata(buffer), sb->blocksize, block << dev->bits);
-			} else {
+				if (map[i].state != SEG_DUP) /* DREAMZ */
+					err = diskwrite(dev->fd, bufdata(buffer), sb->blocksize, block << dev->bits);
+				else
+					warn("Duplicate block not written");
+					
+			}else {
 				if (hole)
 					memset(bufdata(buffer), 0, sb->blocksize);
 				else
diff -r 58e077f83dc1 user/kernel/commit.c
--- a/user/kernel/commit.c	Sat Jan 24 15:04:49 2009 -0800
+++ b/user/kernel/commit.c	Tue Feb 24 17:28:24 2009 +0530
@@ -12,6 +12,7 @@
 int unpack_sb(struct sb *sb, struct disksuper *super, struct root *iroot, int silent)
 {
 	u64 iroot_val = from_be_u64(super->iroot);
+	u64 hroot_val = from_be_u64(super->hroot);/*  DREAMZ */
 	if (memcmp(super->magic, (char[])SB_MAGIC, sizeof(super->magic))) {
 		if (!silent)
 			printf("invalid superblock [%Lx]\n",
@@ -35,8 +36,11 @@
 	sb->atomgen = from_be_u32(super->atomgen);
 	sb->freeatom = from_be_u32(super->freeatom);
 	sb->dictsize = from_be_u64(super->dictsize);
+	sb->writebucket = from_be_u64(super->writebucket);
+	sb->readbucket = from_be_u64(super->readbucket);
 
 	*iroot = unpack_root(iroot_val);
+	sb->htable.root = unpack_root(hroot_val);
 
 	return 0;
 }
@@ -51,4 +55,7 @@
 	super->freeatom = to_be_u32(sb->freeatom); // probably does not belong here
 	super->dictsize = to_be_u64(sb->dictsize); // probably does not belong here
 	super->iroot = to_be_u64(pack_root(&itable_btree(sb)->root));
+	super->hroot = to_be_u64(pack_root(&sb->htable.root));/*  DREAMZ  */
+	super->writebucket = to_be_u64(sb->writebucket);
+	super->readbucket = to_be_u64(sb->readbucket);
 }
diff -r 58e077f83dc1 user/kernel/dedup.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/user/kernel/dedup.c	Tue Feb 24 17:28:24 2009 +0530
@@ -0,0 +1,265 @@
+#include "tux3.h"
+#include <openssl/sha.h>
+#ifdef trace
+#undef trace
+#endif
+#define trace trace_on
+
+
+struct hleaf {
+	u16 magic;
+	u32 count;
+	struct hleaf_entry { u64 key; block_t block; u16 offset; }entries[];
+};
+
+struct bucket {
+	u16 count;
+	struct bucket_entry { 
+		unsigned char sha_hash[SHA_DIGEST_LENGTH];
+		block_t block;
+		u16 refcount;
+	}entries[];
+};
+			       
+static inline struct hleaf *to_hleaf(vleaf *leaf)
+{
+	return leaf;
+}
+
+int hleaf_init(struct btree *btree,vleaf *leaf)
+{
+	*to_hleaf(leaf) = (struct hleaf){ .count = 0 , .magic = 0xdade };
+	return 0;
+}
+
+
+
+static void hleaf_btree_init(struct btree *btree)
+{
+	struct sb *sb = btree->sb;
+	btree->entries_per_leaf = (sb->blocksize - offsetof(struct hleaf,entries)) / sizeof(struct hleaf_entry);
+}
+
+int hleaf_sniff(struct btree *btree, vleaf *leaf)
+{
+	return to_hleaf(leaf)->magic == 0xdade;
+}
+
+tuxkey_t hleaf_split(struct btree *btree, tuxkey_t key, vleaf *from, vleaf *into)
+{
+	assert(hleaf_sniff(btree, from));
+	struct hleaf *leaf = from;
+	unsigned at = leaf->count / 2;
+	if (leaf->count && key > leaf->entries[leaf->count - 1].key)
+		at = leaf->count;
+	unsigned tail = leaf->count - at;
+	hleaf_init(btree, into);
+	veccopy(to_hleaf(into)->entries, leaf->entries + at, tail);
+	to_hleaf(into)->count = tail;
+	leaf->count = at;
+	return tail ? to_hleaf(into)->entries[0].key : key;
+}
+
+unsigned hleaf_free(struct btree *btree, vleaf *leaf)
+{
+	return btree->entries_per_leaf - to_hleaf(leaf)->count;
+}
+
+unsigned hleaf_seek(struct btree *btree, tuxkey_t key, struct hleaf *leaf)
+{
+	unsigned at = 0;
+	while (at < leaf->count && leaf->entries[at].key < key)
+		at++;
+	return at;
+}
+
+void *hleaf_resize(struct btree *btree, tuxkey_t key, vleaf *data, unsigned one)
+{
+	assert(hleaf_sniff(btree, data));
+	struct hleaf *leaf = data;
+	unsigned at = hleaf_seek(btree, key, leaf);
+	if (at < leaf->count && leaf->entries[at].key == key)
+		goto out;
+	if (hleaf_free(btree, leaf) < one)
+		return NULL;
+	vecmove(leaf->entries + at + one, leaf->entries + at, leaf->count++ - at);
+out:
+	return leaf->entries + at;
+}
+
+void hleaf_dump(struct btree *btree, vleaf *data)
+{
+	struct hleaf *leaf = data;
+	struct hleaf_entry *entry, *limit = leaf->entries + leaf->count;
+	for (entry = leaf->entries; entry < limit; entry++) {
+		printf(" %llu", entry->key); 
+		printf(" %llu", entry->block);
+	}
+	trace(" (%x free)\n", hleaf_free(btree, leaf));
+}
+
+block_t bucket_lookup(struct sb *sb, unsigned char *hash)
+{
+	trace("In read bucket %Lx",(L)sb->readbucket);
+	int k;
+	struct bucket_entry *entry;
+	block_t block;
+	struct buffer_head *buffer = sb_bread(sb, sb->readbucket);
+	struct bucket *bck = (struct bucket *)bufdata(buffer);
+	for (int i = 0;i < bck->count;i++) {
+		entry = bck->entries + i;
+		for(k = 0;k < 20;k++) {
+			if (hash[k] == entry->sha_hash[k])
+				continue;
+			else 
+				break;
+		}
+		
+		if(k == 20) {
+			entry->refcount++;
+			block = entry->block;
+			trace("Found block %Lx",(L)block);
+			brelse_dirty(buffer);
+			return block;
+		}
+		
+	}      
+	brelse(buffer);
+	trace("Not found in read bucket %Lx", (L)sb->readbucket);
+	return -1;
+}
+
+
+
+void make_hash_entry(struct sb *sb, unsigned char *hash, block_t block)
+{
+  
+	trace("Making hash entry for block %Lx", (L)block);
+	struct buffer_head *buffer = sb_bread(sb, sb->writebucket);
+	struct bucket *bck = (struct bucket *)bufdata(buffer);
+	struct bucket_entry *entry;
+	entry = bck->entries + bck->count ;
+ 	entry->refcount = 1; 
+ 	entry->block = block; 
+ 	memcpy(entry->sha_hash,hash,SHA_DIGEST_LENGTH); 
+ 	bck->count ++; 
+	brelse_dirty(buffer);
+}
+
+
+
+void init_writebucket(struct btree *btree)
+{
+	int err = btree->ops->balloc(btree->sb, 1, &btree->sb->writebucket);
+	if(err)
+		warn("Failed to initialize write bucket");
+	trace("Initialised new write bucket %Lx", (L)btree->sb->writebucket);
+	struct buffer_head *buffer = sb_bread(btree->sb, btree->sb->writebucket);
+	memset(bufdata(buffer), 0, bufsize(buffer));
+	struct bucket *bck = (struct bucket *)bufdata(buffer);
+	bck->count = 0;
+	brelse_dirty(buffer);
+}
+
+block_t htree_lookup(struct btree *btree, u64 sh, unsigned char *hash)
+{
+	u64 offset;
+	block_t bckno;
+	struct sb *sb = btree->sb;
+	struct cursor *cursor = alloc_cursor(btree,20);
+	if (!cursor)
+		return -ENOMEM;
+	down_write(&btree->lock);
+	tuxkey_t key = sh;
+	unsigned at;
+	if (probe(btree, key, cursor))
+		error("probe for %Lx failed", (L)key);
+	
+	at = hleaf_seek(btree, key, bufdata(cursor_leafbuf(cursor))); 
+
+	struct hleaf *leaf = (struct hleaf *)bufdata(cursor_leafbuf(cursor));
+	struct hleaf_entry *temp = leaf->entries + at;
+	
+	if(temp->key == sh) {
+		block_t block;
+		offset = temp->offset;
+		bckno = temp->block;
+		struct buffer_head *buffer = sb_bread(sb, bckno);
+		struct bucket *bck =(struct bucket *) bufdata(buffer);
+		struct bucket_entry *entry;
+		entry = bck->entries + offset;
+		entry->refcount++;
+		block = entry->block;
+		btree->sb->readbucket = bckno;
+		trace("Found entry in tree");
+		trace("Changed read bucket to %Lx", (L)bckno);
+		brelse_dirty(buffer);
+		release_cursor(cursor);
+		free(cursor);
+		up_write(&btree->lock);
+		return block;
+	}	
+	
+	trace("Entry not found in tree");
+	struct hleaf_entry *entry = (struct hleaf_entry *)tree_expand(btree, key, 1, cursor);
+	struct buffer_head *buffer = sb_bread(sb, sb->writebucket);
+        struct bucket *bck = (struct bucket *)bufdata(buffer);
+        u16 count = bck->count;
+	int flag = 0;
+        if(count >= 100) {		
+		brelse_dirty(buffer);
+		init_writebucket(&sb->htable);
+		count = 0;
+		flag = 1;
+       	}
+	entry->block = sb->writebucket; 
+	entry->key = key;
+	entry->offset = count;
+	if (flag != 1)
+		brelse(buffer);
+	mark_buffer_dirty(cursor_leafbuf(cursor));
+	release_cursor(cursor);
+	free_cursor(cursor);
+	up_write(&btree->lock);
+	return -1; 
+}
+
+/* ALGORITHM FOR DEDUPLICATION */
+/* 1. Perform hash lookup in the current readbucket. */
+/* 2. If a match is found,  */
+/*      -Increment refernce count for that entry */
+/* 	-Return the duplicate block number to be mapped */
+/* 3.Else, */
+/* 	-Performed lookup in the hash tree to get the corresponding bucket number. */
+/* 	-If an entry is found in the hash tree, then the current readbucket is written back and */
+/* 	 the bucket in the matched entry is loaded into memory as the current */
+/* 	 read bucket.  */
+/* 	-Else, */
+/* 	     -an entry for the particular block is added to the hash tree */
+/* 	      and an entry is added in the current writebucket with reference count as 1. */
+
+
+block_t hash_lookup(struct sb *sb, unsigned char *hash)
+{
+	block_t block = -1;
+	u64 sh;
+	if((block = bucket_lookup(sb,hash)) == -1) {
+		for(int k=0; k<8; k++) {
+				sh = sh << 8;
+				sh = sh | (hash[k]);
+		}
+		block = htree_lookup(&sb->htable, sh, hash);
+	}   
+	return block;
+}
+
+
+struct btree_ops htable_ops = {
+	.btree_init = hleaf_btree_init,
+	.leaf_init = hleaf_init,
+	.leaf_split = hleaf_split,
+	.leaf_resize = hleaf_resize,
+	.leaf_sniff = hleaf_sniff,
+	.leaf_free = hleaf_free,
+	.balloc = balloc,
+};
diff -r 58e077f83dc1 user/kernel/filemap.c
--- a/user/kernel/filemap.c	Sat Jan 24 15:04:49 2009 -0800
+++ b/user/kernel/filemap.c	Tue Feb 24 17:28:24 2009 +0530
@@ -9,13 +9,16 @@
  */
 
 #include "tux3.h"
-
+#include<openssl/sha.h>
+#include<string.h>
+#include "dedup.c"
 #ifndef trace
-#define trace trace_on
+#define trace trace_off
 #endif
 
 #define SEG_HOLE	(1 << 0)
 #define SEG_NEW		(1 << 1)
+#define SEG_DUP         (1 << 2)
 
 struct seg { block_t block; unsigned count; unsigned state; };
 
@@ -32,6 +35,7 @@
 {
 	struct sb *sb = tux_sb(inode->i_sb);
 	struct btree *btree = &tux_inode(inode)->btree;
+	unsigned char *hash;
 	int segs = 0;
 
 	assert(max_segs > 0);
@@ -62,7 +66,7 @@
 	if (limit > next_key(cursor, btree->root.depth))
 		limit = next_key(cursor, btree->root.depth);
 	struct dleaf *leaf = bufdata(cursor_leafbuf(cursor));
-	dleaf_dump(btree, leaf);
+	//dleaf_dump(btree, leaf);
 
 	struct dwalk *walk = &(struct dwalk){ };
 	block_t index = start, seg_start, block;
@@ -129,34 +133,58 @@
 		map[0].count = count;
 		map[0].state = SEG_HOLE;
 	}
+	block_t blk = -1;
+	struct buffer_head *buffer;
 	for (int i = 0; i < segs; i++) {
 		if (map[i].state == SEG_HOLE) {
-			count = map[i].count;
-			if ((err = balloc(sb, count, &block))) { // goal ???
-				/*
-				 * Out of space on file data allocation.  It happens.  Tread
-				 * carefully.  We have not stored anything in the btree yet,
-				 * so we free what we allocated so far.  We need to leave the
-				 * user with a nice ENOSPC return and all metadata consistent
-				 * on disk.  We better have reserved everything we need for
-				 * metadata, just giving up is not an option.
-				 */
-				/*
-				 * Alternatively, we can go ahead and try to record just what
-				 * we successfully allocated, then if the update fails on no
-				 * space for btree splits, free just the blocks for extents
-				 * we failed to store.
-				 */
-				segs = err;
-				goto out_create;
+			if( inode->inum > 4 && inode->inum != 10 && inode->inum != 13) {
+				buffer = (blockget(mapping(inode),start)); /* DREAMZ */
+				hash = (unsigned char *)malloc(sizeof(unsigned char) * SHA_DIGEST_LENGTH);
+				hash = SHA1(bufdata(buffer),inode->i_sb->blocksize,hash); 
+				brelse(buffer);
+				blk = hash_lookup(sb, hash);
 			}
-			trace("fill in %Lx/%i ", (L)block, count);
-			map[i] = (struct seg){
-				.block = block,
-				.count = count,
-				/* if create == 2, buffer should be dirty */
-				.state = create == 2 ? 0 : SEG_NEW,
-			};
+			if(blk == -1){
+				count = map[i].count;
+				if ((err = balloc(sb, count, &block))) { // goal ???
+					/*
+					 * Out of space on file data allocation.  It happens.  Tread
+					 * carefully.  We have not stored anything in the btree yet,
+					 * so we free what we allocated so far.  We need to leave the
+					 * user with a nice ENOSPC return and all metadata consistent
+					 * on disk.  We better have reserved everything we need for
+					 * metadata, just giving up is not an option.
+					 */
+					/*
+					 * Alternatively, we can go ahead and try to record just what
+					 * we successfully allocated, then if the update fails on no
+					 * space for btree splits, free just the blocks for extents
+					 * we failed to store.
+					 */
+					segs = err;
+					goto out_create;
+				}
+				if(inode->inum > 4 && inode->inum != 13 && inode->inum != 10){
+					make_hash_entry(sb, hash, block);
+					free(hash);
+				}
+				trace("fill in %Lx/%i ", (L)block, count);
+				map[i] = (struct seg){
+					.block = block,
+					.count = count,
+					/* if create == 2, buffer should be dirty */
+					.state = create == 2 ? 0 : SEG_NEW,				
+				};
+			
+			}
+			else{
+				if(inode->inum > 4 && inode->inum != 13 && inode->inum != 10)
+					free(hash);
+				trace("Duplicate found");
+				map[i] = (struct seg){ .block = blk, .count = count, .state = SEG_DUP, };
+		        }
+			
+			
 		}
 	}
 	/* Go back to region start and pack in new segs */
@@ -191,9 +219,9 @@
 			continue;
 		}
 		trace("pack 0x%Lx => %Lx/%x", (L)index, (L)map[i].block, map[i].count);
-		dleaf_dump(btree, leaf);
+		//dleaf_dump(btree, leaf);
 		dwalk_add(&headwalk, index, make_extent(map[i].block, map[i].count));
-		dleaf_dump(btree, leaf);
+		//dleaf_dump(btree, leaf);
 		index += map[i].count;
 	}
 	if (tail) {
diff -r 58e077f83dc1 user/kernel/tux3.h
--- a/user/kernel/tux3.h	Sat Jan 24 15:04:49 2009 -0800
+++ b/user/kernel/tux3.h	Tue Feb 24 17:28:24 2009 +0530
@@ -189,6 +189,7 @@
 	be_u64 flags;		/* Need to assign some flags */
 	be_u64 iroot;		/* Root of the inode table btree */
 	be_u64 aroot;		/* The atime table is a file now, delete on next format rev */
+	be_u64 hroot;           /*Root of the htable btree DREAMZ */
 	be_u16 blockbits;	/* Shift to get volume block size */
 	be_u16 unused1;		/* Throw away on next format rev */
 	be_u32 unused2;		/* Throw away on next format rev */
@@ -199,6 +200,9 @@
 	be_u32 freeatom;	/* Beginning of persistent free atom list in atable */
 	be_u32 atomgen;		/* Next atom number if there are no free atoms */
 	be_u64 dictsize;	/* Size of the atom dictionary instead if i_size */
+	block_t readbucket;      /* points to block number of current read bucket*/
+	block_t writebucket;    /* points to block number of current write bucket */
+	
 };
 
 struct root {
@@ -251,6 +255,7 @@
 	struct disksuper super;
 	struct inode *volmap;	/* Volume metadata cache (like blockdev).
 				 * Note, ->btree is the btree for itable. */
+	struct btree htable;    /* Cached root of the hash table DREAMZ */
 	struct inode *bitmap;	/* allocation bitmap special file */
 	struct inode *rootdir;	/* root directory special file */
 	struct inode *vtable;	/* version table special file */
@@ -273,6 +278,8 @@
 	unsigned char *logpos, *logtop; /* Where to emit next log entry */
 	struct mutex loglock;	/* serialize log entries (spinlock me) */
 	struct stash defree;	/* defer extent frees until affer commit */
+	block_t readbucket;      /* points to block number of current read bucket*/
+	block_t writebucket;    /* points to block number of current write bucket */
 #ifdef __KERNEL__
 	struct super_block *vfs_sb; /* Generic kernel superblock */
 #else
diff -r 58e077f83dc1 user/super.c
--- a/user/super.c	Sat Jan 24 15:04:49 2009 -0800
+++ b/user/super.c	Tue Feb 24 17:28:24 2009 +0530
@@ -11,7 +11,7 @@
 #include "tux3.h"
 
 #ifndef trace
-#define trace trace_on
+#define trace trace_off
 #endif
 
 #include "kernel/commit.c"
@@ -27,6 +27,7 @@
 	if (err)
 		return err;
 	init_btree(itable_btree(sb), sb, iroot, &itable_ops);
+	init_btree(&sb->htable, sb, sb->htable.root, &htable_ops);
 	return 0;
 }
 
@@ -90,11 +91,15 @@
 	int reserve = 1 << (sb->blockbits > 13 ? 0 : 13 - sb->blockbits);
 	for (int i = 0; i < reserve; i++) {
 		block_t block = balloc_from_range(sb, i, 1, 1);
-		trace("reserve %Lx", (L)block); // error ???
+		trace_on("reserve %Lx", (L)block); // error ???
 	}
 
 	trace("create inode table");
 	err = new_btree(itable_btree(sb), sb, &itable_ops);
+	if (err)
+		goto eek;
+	trace("create hash table");/*  DREAMZ */
+	err = new_btree(&sb->htable, sb, &htable_ops);
 	if (err)
 		goto eek;
 	sb->bitmap->i_size = (sb->volblocks + 7) >> 3;
@@ -119,6 +124,10 @@
 	sb->atomgen = 1; // atom 0 not allowed, means end of atom freelist
 	if (make_inode(sb->atable, TUX_ATABLE_INO))
 		goto eek;
+	
+	init_writebucket(&sb->htable); /* DREAMZ */
+	sb->readbucket = sb->writebucket;
+	
 	if ((err = sync_super(sb)))
 		goto eek;
 
diff -r 58e077f83dc1 user/tux3fuse.c
--- a/user/tux3fuse.c	Sat Jan 24 15:04:49 2009 -0800
+++ b/user/tux3fuse.c	Tue Feb 24 17:28:24 2009 +0530
@@ -38,7 +38,7 @@
 //#include <sys/xattr.h>
 #include "trace.h"
 #include "tux3.h"
-
+#include<execinfo.h>
 #define FUSE_USE_VERSION 27
 #include <fuse.h>
 #include <fuse/fuse_lowlevel.h>
@@ -74,7 +74,9 @@
 static void tux3_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
 {
 	trace("tux3_lookup(%Lx, '%s')", (L)parent, name);
-	struct inode *inode = tuxopen(sb->rootdir, name, strlen(name));
+	struct inode *parent_ino;
+	parent_ino = open_fuse_ino(parent);
+	struct inode *inode = tuxopen(parent_ino, name, strlen(name));
 
 	if (inode) {
 		struct fuse_entry_param ep = {
@@ -120,7 +122,6 @@
 static void tux3_read(fuse_req_t req, fuse_ino_t ino, size_t size,
 	off_t offset, struct fuse_file_info *fi)
 {
-	trace("tux3_read(%Lx)", (L)ino);
 	struct inode *inode = (struct inode *)(unsigned long)fi->fh;
 	struct file *file = &(struct file){ .f_inode = inode };
 
@@ -167,9 +168,10 @@
 	mode_t mode, struct fuse_file_info *fi)
 {
 	const struct fuse_ctx *ctx = fuse_req_ctx(req);
-
+	struct inode *parent_ino;
+	parent_ino = open_fuse_ino(parent);
 	trace("tux3_create(%Lx, '%s', uid = %u, gid = %u, mode = %o)", (L)parent, name, ctx->uid, ctx->gid, mode);
-	struct inode *inode = tuxcreate(sb->rootdir, name, strlen(name),
+	struct inode *inode = tuxcreate(parent_ino, name, strlen(name),
 		&(struct tux_iattr){ .uid = ctx->uid, .gid = ctx->gid, .mode = mode });
 	if (inode) {
 		struct fuse_entry_param fep = {
@@ -190,6 +192,10 @@
 			.attr_timeout = 0.0,
 			.entry_timeout = 0.0,
 		};
+		tuxsync(inode);
+		if(parent_ino->inum != TUX_ROOTDIR_INO)
+			tuxsync(parent_ino);
+		
 
 		fi->fh = (uint64_t)(unsigned long)inode;
 		fuse_reply_create(req, &fep, fi);
@@ -200,8 +206,46 @@
 
 static void tux3_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, mode_t mode)
 {
-	warn("not implemented");
-	fuse_reply_err(req, ENOSYS);
+	struct inode *parent_ino;
+	parent_ino = open_fuse_ino(parent);
+
+	const struct fuse_ctx *ctx = fuse_req_ctx(req);
+	
+	mode = mode | 1000000;
+	trace("tux3_mkdir(%Lx, '%s', uid = %u, gid = %u, mode = %o)", (L)parent, name, ctx->uid, ctx->gid, mode);
+	struct inode *inode = tuxcreate(parent_ino, name, strlen(name),
+		&(struct tux_iattr){ .uid = ctx->uid, .gid = ctx->gid, .mode = mode });
+	
+	
+	if (inode) {
+		struct fuse_entry_param fep = {
+			.attr = {
+				.st_ino   = inode->inum,
+				.st_mode  = inode->i_mode,
+				.st_ctim = inode->i_ctime,
+				.st_mtim = inode->i_mtime,
+				.st_atim = inode->i_atime,
+				.st_size  = inode->i_size,
+				.st_uid   = inode->i_uid,
+				.st_gid   = inode->i_gid,
+				.st_nlink = inode->i_nlink,
+			},
+
+			.ino = inode->inum,
+			.generation = 1,
+			.attr_timeout = 0.0,
+			.entry_timeout = 0.0,
+		};
+		
+		tuxclose(inode);
+		if(parent_ino->inum != TUX_ROOTDIR_INO)
+			tuxsync(parent_ino);
+		
+		sync_super(inode->i_sb);
+
+		fuse_reply_entry(req, &fep);
+	} else 
+		fuse_reply_err(req, ENOMEM);
 }
 
 static void tux3_write(fuse_req_t req, fuse_ino_t ino, const char *buf,
@@ -225,8 +269,8 @@
 	}
 
 	tuxsync(inode);
-	if ((errno = -sync_super(sb)))
-		goto eek;
+	/*if ((errno = -sync_super(sb)))
+	  goto eek;*/
 
 	fuse_reply_write(req, written);
 	return;
@@ -283,6 +327,7 @@
 	if (ino != FUSE_ROOT_ID) {
 		struct inode *inode = (struct inode *)(unsigned long)fi->fh;
 		tuxclose(inode);
+		sync_super(sb);
 	}
 	fuse_reply_err(req, 0); /* Success */
 }
@@ -307,7 +352,7 @@
 static void tux3_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset,
 	struct fuse_file_info *fi)
 {
-	trace("tux3_readdir(%Lx)", (L)ino);
+	trace_on("tux3_readdir(%Lx)", (L)ino);
 	struct inode *inode = (struct inode *)(unsigned long)fi->fh;
 	struct file *dirfile = &(struct file){ .f_inode = inode, .f_pos = offset };
 	char dirent[TUX_NAME_LEN + 1];
@@ -704,10 +749,9 @@
 	char *mountpoint;
 	int foreground;
 	int err = -1;
-
 	if (argc < 3)
 		error("usage: %s <volname> <mountpoint>", argv[0]);
-
+	
 	if (fuse_parse_cmdline(&args, &mountpoint, NULL, &foreground) != -1)
 	{
 		struct fuse_chan *fc = fuse_mount(mountpoint, &args);
@@ -725,13 +769,16 @@
 					fuse_session_add_chan(fs, fc);
 					fuse_daemonize(foreground);
 					err = fuse_session_loop(fs);
+					sync_super(sb);
 					fuse_remove_signal_handlers(fs);
 					fuse_session_remove_chan(fc);
 				}
 
 				fuse_session_destroy(fs);
 			}
-
+			fprintf(stderr,"\nTotal Number of blocks == %Lu",(L)sb->volblocks );
+			fprintf(stderr,"\nFree blocks available  == %Lu",(L)sb->freeblocks);
+			fprintf(stderr,"\nTotal blocks used      == %Lu\n\n",(L)(sb->volblocks - sb->freeblocks));
 			fuse_unmount(mountpoint, fc);
 		}
 	}

Regards,
Gaurav Tungatkar
Kushal Dalmia
Amey Magar
Chinmay Kamat


-------------- next part --------------
A non-text attachment was scrubbed...
Name: dedup.diff
Type: text/x-patch
Size: 24577 bytes
Desc: not available
URL: <http://phunq.net/pipermail/tux3/attachments/20090225/75b66dc3/attachment.bin>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: withdedup.jpg
Type: image/jpeg
Size: 120464 bytes
Desc: not available
URL: <http://phunq.net/pipermail/tux3/attachments/20090225/75b66dc3/attachment.jpg>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: wodedup.jpg
Type: image/jpeg
Size: 120040 bytes
Desc: not available
URL: <http://phunq.net/pipermail/tux3/attachments/20090225/75b66dc3/attachment-0001.jpg>
-------------- next part --------------
_______________________________________________
Tux3 mailing list
Tux3 at tux3.org
http://mailman.tux3.org/cgi-bin/mailman/listinfo/tux3


More information about the Tux3 mailing list