summaryrefslogtreecommitdiffstats
path: root/src/cmd/venti/buildindex.c
diff options
context:
space:
mode:
authorrsc <devnull@localhost>2003-11-23 17:54:58 +0000
committerrsc <devnull@localhost>2003-11-23 17:54:58 +0000
commit7a4ee46d253e291044bba2d0c54b818b67ac013c (patch)
tree7bdcaf69a15ecd24c057a697936b67bbde93e00b /src/cmd/venti/buildindex.c
parent4fbfdd7acd4bf4fc71b1329230e05fc761907566 (diff)
downloadplan9port-7a4ee46d253e291044bba2d0c54b818b67ac013c.tar.gz
plan9port-7a4ee46d253e291044bba2d0c54b818b67ac013c.zip
Initial stab at Venti.
Diffstat (limited to 'src/cmd/venti/buildindex.c')
-rw-r--r--src/cmd/venti/buildindex.c145
1 files changed, 145 insertions, 0 deletions
diff --git a/src/cmd/venti/buildindex.c b/src/cmd/venti/buildindex.c
new file mode 100644
index 00000000..952e75dd
--- /dev/null
+++ b/src/cmd/venti/buildindex.c
@@ -0,0 +1,145 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static int
+writebucket(Index *ix, u32int buck, IBucket *ib, ZBlock *b)
+{
+ ISect *is;
+
+ is = findisect(ix, buck);
+ if(is == nil){
+ seterr(EAdmin, "bad math in writebucket");
+ return -1;
+ }
+ if(buck < is->start || buck >= is->stop)
+ seterr(EAdmin, "index write out of bounds: %d not in [%d,%d)\n",
+ buck, is->start, is->stop);
+ buck -= is->start;
+ qlock(&stats.lock);
+ stats.indexwrites++;
+ qunlock(&stats.lock);
+ packibucket(ib, b->data);
+ return writepart(is->part, is->blockbase + ((u64int)buck << is->blocklog), b->data, is->blocksize);
+}
+
+static int
+buildindex(Index *ix, Part *part, u64int off, u64int clumps, int zero)
+{
+ IEStream *ies;
+ IBucket ib, zib;
+ ZBlock *z, *b;
+ u32int next, buck;
+ int ok;
+ u64int found = 0;
+
+//ZZZ make buffer size configurable
+ b = alloczblock(ix->blocksize, 0);
+ z = alloczblock(ix->blocksize, 1);
+ ies = initiestream(part, off, clumps, 64*1024);
+ if(b == nil || z == nil || ies == nil){
+ ok = 0;
+ goto breakout;
+ return -1;
+ }
+ ok = 0;
+ next = 0;
+ ib.data = b->data + IBucketSize;
+ zib.data = z->data + IBucketSize;
+ zib.n = 0;
+ zib.next = 0;
+ for(;;){
+ buck = buildbucket(ix, ies, &ib);
+ found += ib.n;
+ if(zero){
+ for(; next != buck; next++){
+ if(next == ix->buckets){
+ if(buck != TWID32){
+ fprint(2, "bucket out of range\n");
+ ok = -1;
+ }
+ goto breakout;
+ }
+ if(writebucket(ix, next, &zib, z) < 0){
+ fprint(2, "can't write zero bucket to buck=%d: %r", next);
+ ok = -1;
+ }
+ }
+ }
+ if(buck >= ix->buckets){
+ if(buck == TWID32)
+ break;
+ fprint(2, "bucket out of range\n");
+ ok = -1;
+ goto breakout;
+ }
+ if(writebucket(ix, buck, &ib, b) < 0){
+ fprint(2, "bad bucket found=%lld: %r\n", found);
+ ok = -1;
+ }
+ next = buck + 1;
+ }
+breakout:;
+ fprint(2, "constructed index with %lld entries\n", found);
+ freeiestream(ies);
+ freezblock(z);
+ freezblock(b);
+ return ok;
+}
+
+void
+usage(void)
+{
+ fprint(2, "usage: buildindex [-Z] [-B blockcachesize] config tmppart\n");
+ threadexitsall(0);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+ Part *part;
+ u64int clumps, base;
+ u32int bcmem;
+ int zero;
+
+ zero = 1;
+ bcmem = 0;
+ ARGBEGIN{
+ case 'B':
+ bcmem = unittoull(ARGF());
+ break;
+ case 'Z':
+ zero = 0;
+ break;
+ default:
+ usage();
+ break;
+ }ARGEND
+
+ if(argc != 2)
+ usage();
+
+ if(initventi(argv[0]) < 0)
+ sysfatal("can't init venti: %r");
+
+ if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
+ bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
+ fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
+ initdcache(bcmem);
+
+ fprint(2, "building a new index %s using %s for temporary storage\n", mainindex->name, argv[1]);
+
+ part = initpart(argv[1], 1);
+ if(part == nil)
+ sysfatal("can't initialize temporary partition: %r");
+
+ clumps = sortrawientries(mainindex, part, &base);
+ if(clumps == TWID64)
+ sysfatal("can't build sorted index: %r");
+ fprint(2, "found and sorted index entries for clumps=%lld at %lld\n", clumps, base);
+
+ if(buildindex(mainindex, part, base, clumps, zero) < 0)
+ sysfatal("can't build new index: %r");
+
+ threadexitsall(0);
+}