/* Use mmap() to read the word database very efficiently. * * We can then do various things with it -- * - print it out * - check for consistency against the word->wid map * - reorder it by word frequency * We could also use it as a starting point to rewrite the data overflow * file in order to reduce fragmentation. * */ #include #include #include #include #include main() { extern caddr_t mmap(); int fd = open("WIDIndex", O_RDONLY, 0); struct stat statbuf; int len; caddr_t where; register char *p; long TotalWordBytes = 0; char c; register int i; if (fd < 0) { perror("open"); exit(1); } if (stat("WIDIndex", &statbuf) < 0) { perror("stat"); exit(1); } len = statbuf.st_size; if ((where = mmap( (caddr_t) 0, /* where to put it (0 = don't care, you choose) */ len, /* how many bytes from the file to map (all of it) */ PROT_READ|PROT_WRITE, /* let me change my copy */ MAP_PRIVATE, /* don't write back my changes! */ fd, /* file to map */ 0L /* start offset in file */ )) == (caddr_t) -1) { perror("mmap"); exit(1); } #ifdef DEBUG printf("where %x len %d\n", where, len); #endif for (p = where + 32; p - where < len; p += 32) { c = *p; TotalWordBytes += c + 1; for (i = 0; i < c; i++) { putchar(p[i + 1]); } putchar('\n'); } /* statistics: */ fprintf(stderr, "Total Word Bytes: %d\n", TotalWordBytes); }