[Tux3] Patch: Put get_segs back together again

Sat Dec 27 20:40:20 PST 2008

The find_segs/fill_segs factoring of get_segs served its purpose in
allowing us to focus on developing a clean model for what is arguably
the heart of Tux3.  But while get_segs presents a very nice interface,
terse, efficient and well suited to its purpose, the internal glue
between find_segs and fill_segs is looking increasingly awkward.  This
patch (not committed yet) puts them back together again.

 filemap.c        |   14 ++-----
 kernel/filemap.c |   99 ++++++++++++++++++++++---------------------------------
 2 files changed, 44 insertions(+), 69 deletions(-)

The result is a somewhat rambling 180 line function, which is not too
bad considering that it does a similar job to ext2_get_block in
ext2/inode.c.  Check that out, and keep in mind that ext2 does not have
to worry about extents or btrees: its ->get_block works one data block
at a time in a simple radix tree.

One reason our code is shorter is, btree.c and dleaf.c hide a lot of
the work behind nice, abstract interfaces.  Thanks to Hirofumi for
digging into these and turning my messy prototypes into solid,
readable code!

One thing about ext2_get_block is, its SMP locking is much more
lightweight and parallel than the crude rw semaphore locking I just
added.  It uses RW spinlocks (sparingly) and a mutex to exclude against
truncate.  As we improve our locking strategy over time, Ext2 is the
efficiency benchmark to measure against.

Filemap is going to evolve a lot over the next phase of development,
Improvements that I think we should do before submitting for review
include:

  - Detect opportunities to merge physically contiguous segments
    together (->get_block can currently generate lots of wasteful
    single block extents)

  - Split segments up into multiple extents to handle extent size
    limit.

  - Add support for redirect on write, needed for atomic commit.

And the big one, a little further away, is:

  - Integrate algorithms from version.c

There will also be many opportunities for optimization.  For example,
many ->get_block calls could be handled by incrementing a single
field in an extent, so it would be nice to have a path that does that,
and falls back to the fully general code we have now to handle more
complex cases.

Hopefully, this patch moves in the direction of making it easier to
add all those shiny new things.

diff -r d2e1390e571d user/filemap.c

--- a/user/filemap.c	Sat Dec 27 18:22:55 2008 -0800
+++ b/user/filemap.c	Sat Dec 27 19:19:14 2008 -0800
@@ -179,16 +179,10 @@ int main(int argc, char *argv[])
 	struct seg segvec[64];
 	int segs;
 
-	if (0) {
-		for (int i = 0; i < 1; i++) {
-			struct cursor *cursor = alloc_cursor(&inode->btree, 1);
-			struct dwalk seek[2] = { };
-			unsigned overlap[2];
-			segs = find_segs(cursor, 2*i, 2*i + 1, segvec, 2, seek, overlap);
-			show_segs(segvec, segs);
-			segs = fill_segs(cursor, 2*i, 2*i + 1, segvec, segs, seek, overlap);
-			show_segs(segvec, segs);
-		}
+	if (1) {
+		for (int i = 0; i < 10; i++)
+			segs = get_segs(inode, 2*i, 1, segvec, 2, 1);
+		show_segs(segvec, segs);
 		struct delete_info delinfo = { .key = 0, };
 		segs = tree_chop(&inode->btree, &delinfo, 0);
 		assert(!segs);
diff -r d2e1390e571d user/kernel/filemap.c
--- a/user/kernel/filemap.c	Sat Dec 27 18:22:55 2008 -0800
+++ b/user/kernel/filemap.c	Sat Dec 27 19:19:14 2008 -0800
@@ -18,14 +18,25 @@ void show_segs(struct seg seglist[], uns
 	printf("\n");
 }
 
-static int find_segs(struct cursor *cursor, block_t start, block_t limit,
-	struct seg seg[], unsigned max_segs, struct dwalk seek[2], unsigned overlap[2])
+static int get_segs(struct inode *inode, block_t start, block_t count, struct seg seg[], unsigned max_segs, int create)
 {
+	struct cursor *cursor = alloc_cursor(&tux_inode(inode)->btree, 1); /* +1 for new depth */
+	if (!cursor)
+		return -ENOMEM;
+
+	if (create)
+		down_write(&cursor->btree->lock);
+	else
+		down_read(&cursor->btree->lock);
+
 	assert(max_segs > 0);
+	block_t limit = start + count;
 	trace("--- index %Lx, limit %Lx ---", (L)start, (L)limit);
 	struct btree *btree = cursor->btree;
 	struct sb *sb = btree->sb;
+	struct dwalk seek[2] = { };
 	int err;
+
 	if (!btree->root.depth)
 		return 0;
 
@@ -71,36 +82,18 @@ static int find_segs(struct cursor *curs
 			dwalk_next(walk);
  		}
 	}
-	trace("\n");
 	seek[1] = *walk;
-	if (segs) {
-		block_t below = start - seg_start;
-		block_t above = index - min(index, limit);
-		seg[0].block += below;
-		seg[0].count -= below;
-		seg[segs - 1].count -= above;
-		if (overlap) {
-			overlap[0] = below;
-			overlap[1] = above;
-		}
-	}
-	return segs;
-}
+	assert(segs);
+	block_t below = start - seg_start;
+	block_t above = index - min(index, limit);
+	seg[0].block += below;
+	seg[0].count -= below;
+	seg[segs - 1].count -= above;
 
-/*
- * This interface has no way of telling how long the seg vector is in case segs
- * has to be increased.  If ENOSPC, has no way of telling how may segs were
- * successfully allocated and recorded in the btree.  Sucks, still.
- */
+	if (!create)
+		goto out_release;
 
-static int fill_segs(struct cursor *cursor, block_t start, block_t limit,
-	struct seg seg[], int segs, struct dwalk seek[2], unsigned overlap[2])
-{
-	struct btree *btree = cursor->btree;
-	struct sb *sb = btree->sb;
-	struct dleaf *leaf = bufdata(cursor_leafbuf(cursor));
 	struct dleaf *tail = NULL;
-	unsigned below = overlap[0], above = overlap[1];
 	tuxkey_t tailkey;
 
 	if (!dwalk_end(&seek[1])) {
@@ -130,7 +123,8 @@ static int fill_segs(struct cursor *curs
 				 * space for btree splits, free just the blocks for extents
 				 * we failed to store.
 				 */
-				return -ENOSPC;
+				segs = -ENOSPC;
+				goto out_create;
 			}
 			seg[i] = (struct seg){ .block = block, .count = count, .state = SEG_NEW, };
 		}
@@ -142,8 +136,8 @@ static int fill_segs(struct cursor *curs
 			mark_buffer_dirty(cursor_leafbuf(cursor));
 			struct buffer_head *newbuf = new_leaf(btree);
 			if (!newbuf) {
-				release_cursor(cursor);
-				return -ENOMEM;
+				segs = -ENOMEM;
+				goto out_create;
 			}
 			/*
 			 * ENOSPC on btree index split could leave the cache state
@@ -172,46 +166,33 @@ static int fill_segs(struct cursor *curs
 		index += seg[i].count;
 	}
 	if (tail) {
-		if (dleaf_need(btree, tail) < dleaf_free(btree, leaf)) {
+		if (dleaf_need(btree, tail) < dleaf_free(btree, leaf))
 			dleaf_merge(btree, leaf, tail);
-			dleaf_dump(btree, leaf);
-		} else {
+		else {
 			mark_buffer_dirty(cursor_leafbuf(cursor));
 			assert(dleaf_groups(tail) >= 1);
 			/* Tail does not fit, add it as a new btree leaf */
 			struct buffer_head *newbuf = new_leaf(btree);
 			if (!newbuf) {
-				release_cursor(cursor);
-				return -ENOMEM;
+				segs = -ENOMEM;
+				goto out_create;
 			}
 			memcpy(bufdata(newbuf), tail, sb->blocksize);
-			btree_insert_leaf(cursor, tailkey, newbuf);
+			if ((err = btree_insert_leaf(cursor, tailkey, newbuf))) {
+				free(tail);
+				segs = err;
+				goto out_unlock;
+			}
+;
 		}
-		free(tail);
 	}
 	mark_buffer_dirty(cursor_leafbuf(cursor));
-//eek:
+out_create:
+	if (tail)
+		free(tail);
+out_release:
 	release_cursor(cursor);
-	return segs;
-}
-
-static int get_segs(struct inode *inode, block_t start, block_t count, struct seg segvec[], unsigned max_segs, int create)
-{
-	struct cursor *cursor = alloc_cursor(&tux_inode(inode)->btree, 1); /* +1 for new depth */
-	if (!cursor)
-		return -ENOMEM;
-
-	if (create)
-		down_write(&cursor->btree->lock);
-	else
-		down_read(&cursor->btree->lock);
-	unsigned overlap[2];
-	struct dwalk seek[2] = { };
-	int segs = find_segs(cursor, start, start + count, segvec, max_segs, seek, overlap);
-	if (segs > 0 && create)
-		segs = fill_segs(cursor, start, start + count, segvec, segs, seek, overlap);
-	if (segs >= 0)
-		release_cursor(cursor);
+out_unlock:
 	if (create)
 		up_write(&cursor->btree->lock);
 	else

_______________________________________________
Tux3 mailing list
Tux3 at tux3.org
http://mailman.tux3.org/cgi-bin/mailman/listinfo/tux3