diff options
author | H. Peter Anvin <hpa@zytor.com> | 2001-07-08 21:51:31 +0000 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2001-07-08 21:51:31 +0000 |
commit | 7be4b5ec6f20e3ffe13e2a574549e8028faba526 (patch) | |
tree | 8e14b9522e61a08a1c09c69639c0797c52b4368b | |
download | lpsm-7be4b5ec6f20e3ffe13e2a574549e8028faba526.tar.gz lpsm-7be4b5ec6f20e3ffe13e2a574549e8028faba526.tar.xz lpsm-7be4b5ec6f20e3ffe13e2a574549e8028faba526.zip |
Initial version under CVS control
-rw-r--r-- | alloc.c | 126 | ||||
-rw-r--r-- | arena.c | 642 | ||||
-rw-r--r-- | ftrunctest.c | 19 | ||||
-rw-r--r-- | lpsm.h | 71 | ||||
-rw-r--r-- | system.h | 29 | ||||
-rw-r--r-- | test_mmap.c | 21 | ||||
-rw-r--r-- | teststore.c | 47 |
7 files changed, 955 insertions, 0 deletions
@@ -0,0 +1,126 @@ +#ident "$Id$" +/* ----------------------------------------------------------------------- * + * + * Copyright 2000 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * alloc.c + * + * Provide persistent storage versions of malloc(), realloc() and free(). + * + * This code uses a modified buddy system allocator. It's probably broken + * if your byte size isn't at least a power of 2. + */ + +#include <stdlib.h> +#include <inttypes.h> +#include <limits.h> + +#define OBJSTORE_INTERNALS 1 +#include "objstore.h" + +#define OBJSTORE_ARENA_MAGIC 0xd8319f45 + +/* This is the minimal order worth allocating. This must be able + to hold two pointers plus an integer. 2^4 = 16 bytes. */ +#define ORDER_MIN 4 /* This must be >= 1. */ + +/* This is the size of the alloc bitmask. It will usually be sparse. */ +/* This is two bits per unit of the lowest order */ +#define ALLOC_BITMAP_SIZE ((ARENA_LIMIT >> ORDER_MIN)*2/CHAR_BIT) + +/* This bit is set in the order marker to indicate an occupied cell. */ +#define OCCUPIED ((unsigned char)(1 << (CHAR_BIT-1))) + +/* How many address bits? */ +static const int orders = sizeof(void *) * CHAR_BIT; + +/* + * Initalize the object store arena allocator. Note the argument + * that allows a fixed-offset structure at the beginning. This is + * essential, since otherwise we wouldn't be able to begin to + * pick apart the object hierarchy. + */ +void *objstore_arena_init(size_t leadin_size) +{ + struct ObjStore *os = objstore_os_struct; + void **order_list, **order_ptr; + uintptr_t begin_data, end_data, order_size; + int i; + + leadin_size = (leadin_size + os->pagesize - 1) & ~(os->pagesize - 1); + order_list = (void **)((char *)os->arena + leadin_size); + begin_data = (uintptr_t)(order_list + orders + 1); + end_data = (uintptr_t)os->arena + os->arena_len; + + if ( (uintptr_t)order_list[0] != (uintptr_t)OBJSTORE_ARENA_MAGIC ) { + /* The arena is uninitialized. */ + + /* Begin by initializing all the pointers to null */ + for ( i = 0 ; i < orders ; i++ ) + order_list[i] = NULL; + + /* Align the beginning and end to the lowest-supported order. + Note that begin_data is adjusted +1 above, to handle the + order/alloc marker of the first unit. */ + begin_data = (begin_data + ((uintptr_t)1 << ORDER_MIN) - 1) & + ~(((uintptr_t)1 << ORDER_MIN) - 1); + end_data = end_data & ~(((uintptr_t)1 << ORDER_MIN)-1); + + for ( i = ORDER_MIN ; i < orders ; i++ ) { + order_size = (uintptr_t)1 << i; + order_ptr = &order_list[i]; + if ( (begin_data & order_size) && + (end_data-begin_data) >= order_size ) { + ((unsigned char *)begin_data)[-1] = i; /* Free object of order i */ + *order_ptr = (void *)begin_data; + order_ptr = (void **)begin_data; + *order_ptr = NULL; + begin_data += order_size; + } + if ( (end_data & order_size) && + (end_data-begin_data) >= order_size ) { + end_data -= order_size; + ((unsigned char *)begin_data)[-1] = i; /* Free object of order i */ + *order_ptr = (void *)end_data; + order_ptr = (void **)end_data; + *order_ptr = NULL; + } + } + + /* Arena now initialized */ + order_list[0] = (void *)(uintptr_t)OBJSTORE_ARENA_MAGIC; + } +} + +void *objstore_malloc(size_t size) +{ + int order_needed, order, i; + + size += 1; /* Add 1 for bookkeeping byte */ + + /* Find the order needed */ + for ( order_needed = ORDER_MIN ; + (size_t)(1UL << order_needed) < size ; + order_needed++ ); + + /* Find the order available */ + for ( order = order_needed ; order < orders ; order++ ) { + if ( order_list[order] ) + break; + } + + if ( order >= orders ) + return NULL; /* Nothing available */ + + /* Split orders down to the needed one */ + for ( i = order ; i < order_needed ; i++ ) { + @@ -0,0 +1,642 @@ +#ident "$Id$" +/* ----------------------------------------------------------------------- * + * + * Copyright 2000 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * objstore.c + * + * Persistent object store implemented using memory-mapping tricks + */ + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <unistd.h> +#include <signal.h> +#include <inttypes.h> +#include <limits.h> +#include <fcntl.h> +#include <math.h> /* HUGE_VAL */ +#define __USE_MISC 1 /* Needed to support mremap() */ +#define __USE_GNU 1 /* Needed to support mremap() */ +#include <sys/mman.h> +#include <sys/stat.h> +#include <sched.h> + +#define OBJSTORE_INTERNALS 1 +#include "objstore.h" + +enum page_status { + page_unread = 0, + page_clean = 1, + page_dirty = 2, +}; + +/* + * This is the data structure for the object store. Note that only + * one active object store is supported, due to the need to trap + * SIGSEGV. + */ +struct ObjStore *objstore_os_struct; + +/* Wrappers for read() and write() which retries if incomplete */ +static ssize_t objstore_read(int fd, void *buf, size_t count) +{ + char *bufp = buf; + ssize_t total = 0; + ssize_t rv; + + while ( count ) { + rv = read(fd, bufp, count); + if ( rv == -1 ) { + if ( errno == EINTR || errno == EAGAIN ) + continue; + else + return total ? total : -1; + } else if ( rv == 0 ) { + return total; + } + bufp += rv; + count -= rv; + total += rv; + } + + return total; +} + +static ssize_t objstore_write(int fd, void *buf, size_t count) +{ + char *bufp = buf; + ssize_t total = 0; + ssize_t rv; + + while ( count ) { + rv = write(fd, bufp, count); + if ( rv == -1 ) { + if ( errno == EINTR || errno == EAGAIN ) + continue; + else + return total ? total : -1; + } else if ( rv == 0 ) { + return total; + } + bufp += rv; + count -= rv; + total += rv; + } + + return total; +} + +/* + * SIGSEGV handler for persistent object store + */ +static void objstore_sigsegv(int signal, siginfo_t *siginfo, void *ptr) +{ + struct ObjStore *os = objstore_os_struct; + void *page; + off_t offset; + char *pageinfo; + struct flock lock; + int old_errno = errno; +#ifdef __linux__ + struct sigcontext *ctxt; + +# ifdef __i386__ /* This is so specific to Linux/i386 */ + if ( siginfo->si_code == 0 ) { + /* Old kernel. Fill in data to the best of our knowledge. */ + /* Don't even begin to ask me where the 0x14 comes from */ + ctxt = (struct sigcontext *)((char *)ptr + 0x14); + if ( ctxt->trapno == 14 ) { + /* Linux/i386 uses unmapped pages to mimic PROT_NONE, so we can't + tell ACCERR and MAPERR apart from the register state */ + siginfo->si_code = SEGV_ACCERR; + siginfo->si_addr = (void *)ctxt->cr2; + } + } +# endif /* __i386__ */ +#endif /* __linux__ */ + + if ( signal != SIGSEGV || siginfo->si_code != SEGV_ACCERR || + ((uintptr_t)siginfo->si_addr - (uintptr_t)os->arena) >= os->arena_len ) { + struct sigaction dfl; + + dfl.sa_handler = SIG_DFL; + sigemptyset(&dfl.sa_mask); + dfl.sa_flags = SA_ONESHOT; + sigaction(SIGSEGV, &dfl, NULL); + + errno = old_errno; + return; /* Re-take fault */ + } + + page = (void *)((uintptr_t)siginfo->si_addr & ~(os->pagesize-1)); + offset = (uintptr_t)page - (uintptr_t)os->arena; + pageinfo = os->pageinfo + (offset >> os->pageshift); + + mprotect(page, os->pagesize, PROT_READ|PROT_WRITE); + + switch ( (enum page_status) *pageinfo ) { + case page_unread: + lseek(os->main_fd, offset, SEEK_SET); + + lock.l_type = F_RDLCK; + lock.l_whence = SEEK_SET; + lock.l_start = offset; + lock.l_len = os->pagesize; + while ( fcntl(os->main_fd, F_SETLKW, &lock) == -1 && errno == EINTR ); + if ( objstore_read(os->main_fd, page, os->pagesize) < os->pagesize ) + abort(); /* Uh-oh... */ + + lock.l_type = F_UNLCK; + while ( fcntl(os->main_fd, F_SETLKW, &lock) == -1 && errno == EINTR ); + + mprotect(page, os->pagesize, PROT_READ); /* Make page readonly */ + *pageinfo = page_clean; /* Page read and clean */ + os->loaded_count++; /* For accounting purposes */ + break; + + case page_clean: + *pageinfo = page_dirty; /* Page now dirty */ + os->dirty_count++; /* For accounting purposes */ + /* Leave page r/w */ + break; + + default: + abort(); /* This shouldn't happen */ + } + + errno = old_errno; +} + +/* + * Routine to do log writeback. Used by initial log recovery routine + * as well as during-execution garbage collect. + * THIS ROUTINE SHOULD BE INVOKED WITH LOCK HELD ON THE LOG FILE. + */ +static int objstore_log_writeback(void) +{ + struct ObjStore *os = objstore_os_struct; + struct ObjStore_LogRecord record; + off_t position, last_commit; + struct flock lockmain; + + last_commit = 0; /* Last COMMIT record found */ + position = lseek(os->log_fd, 0, SEEK_SET); + + while ( objstore_read(os->log_fd, &record, sizeof(record)) == sizeof(record) ) { + if ( record.magic != LOGRECORD_MAGIC ) + break; /* Bad magic, assume rest of log corrupt */ + if ( record.record_type == osrec_commit ) { + /* NOTE: last_commit points to the final byte to examine, thus + at the *end* of the final commit record. */ + position += sizeof(record); + last_commit = position; /* Found a commit record */ + } else if ( record.record_type == osrec_page ) { + /* Advance past current page cluster */ + position = lseek(os->log_fd, record.size, SEEK_CUR); + } else { + return -1; /* Unknown record - unsafe to process */ + } + } + + /* Now we know where the last commit was. Now we can process + everything up to that point. */ + + position = lseek(os->log_fd, 0, SEEK_SET); + + while ( objstore_read(os->log_fd, &record, sizeof(record)) + == sizeof(record) && position < last_commit ) { + if ( record.magic != LOGRECORD_MAGIC ) + break; /* Bad magic, assume rest of log corrupt */ + if ( record.record_type == osrec_commit ) { + /* Found a commit record, do nothing */ + position += sizeof(record); + } else if ( record.record_type == osrec_page ) { + /* Write back data to file */ + char *data; + + position += sizeof(record); + if ( !data ) + return -1; /* Badness... */ + + lockmain.l_type = F_WRLCK; + lockmain.l_whence = SEEK_SET; + lockmain.l_start = record.offset; + lockmain.l_len = record.size; + while ( fcntl(os->main_fd, F_SETLKW, &lockmain) == -1 && errno == EINTR ); + data = mmap(NULL, record.size, PROT_WRITE, MAP_SHARED, + os->main_fd, record.offset); + if ( data == MAP_FAILED ) + return -1; + if ( objstore_read(os->log_fd, data, record.size) != record.size ) + return -1; /* Badness */ + if ( munmap(data, record.size) ) + return -1; + + lockmain.l_type = F_UNLCK; + while ( fcntl(os->main_fd, F_SETLKW, &lockmain) == -1 && errno == EINTR ); + position += record.size; + } else { + return -1; /* Unknown record - unsafe to process */ + } + } + + /* Log successfully recovered. Truncate. */ + fsync(os->main_fd); + ftruncate(os->log_fd, 0); + /* Write initial commit record, for sequence number recovery */ + record.magic = LOGRECORD_MAGIC; + record.record_type = osrec_commit; + record.size = os->fork_seq; + record.offset = 0x54494d43; /* For debugging */ + if ( objstore_write(os->log_fd, &record, sizeof(record)) < sizeof(record) ) + return -1; + + fsync(os->log_fd); /* Indicate log recovery complete */ + + return 0; +} + +/* + * Routine to do log recovery + */ +static int objstore_recover_log(void) +{ + struct ObjStore *os = objstore_os_struct; + struct flock lock; + int rv = 0; + + /* First, lock the log file */ + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + while ( fcntl(os->log_fd, F_SETLKW, &lock) == -1 && errno == EINTR ); + + /* Do log recovery, and write initial commit record. */ + rv = objstore_log_writeback(); + + /* Increase the sequence number, since we just wrote a commit. */ + os->fork_seq++; + + /* Unlock file and run. */ + lock.l_type = F_UNLCK; + while ( fcntl(os->log_fd, F_SETLKW, &lock) == -1 && errno == EINTR ); + + return rv; +} + +/* + * Opens the object store. This includes log + * playback (crash recovery) if the log file exists + * and is nonempty. + */ +void *objstore_init(char *main_file, char *log_file, size_t *arena_len) +{ + struct ObjStore *os; + void *arena_ptr; + struct sigaction sigact; + struct flock lock; + off_t file_len, len = arena_len ? *arena_len : 0; + size_t file_pages, len_pages; + + arena_ptr = ARENA_ADDRESS; + + objstore_os_struct = os = malloc(sizeof(struct ObjStore)); + if ( !os ) + goto errx0; + + os->fork_seq = 0; /* Initialize sequence counter */ + + os->main_fd = open(main_file, O_RDWR|O_CREAT, 0666); + if ( os->main_fd < 0 ) + goto errx1; + + os->pagesize = getpagesize(); + if ( os->pagesize & (os->pagesize - 1) ) + goto errx2; /* WTF -- pagesize not a power of 2? */ + + /* Compute log2(os->pagesize) */ + os->pageshift = 0; + while ( (1 << os->pageshift) < os->pagesize ) + os->pageshift++; + + /* + * Open log file + */ + os->log_fd = open(log_file, O_RDWR|O_APPEND|O_CREAT, 0666); + if ( os->log_fd < 0 ) + goto errx3; + + /* Now, do log recovery if needed */ + if ( objstore_recover_log() ) + goto errx3; + + /* Allocate arena memory space */ + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + while ( fcntl(os->main_fd, F_SETLKW, &lock) == -1 && errno == EINTR ); + file_len = lseek(os->main_fd, 0, SEEK_END); + if ( len == 0 ) { + len = file_len; + } + len = (len + os->pagesize - 1) & ~(os->pagesize - 1); + if ( len > file_len ) { + ftruncate(os->main_fd, len); /* Extend file */ + } + lock.l_type = F_UNLCK; + while ( fcntl(os->main_fd, F_SETLKW, &lock) == -1 && errno == EINTR ); + + os->arena = mmap(arena_ptr, len, PROT_NONE, + MAP_ANON|MAP_PRIVATE|MAP_FIXED, 0, 0); + if ( os->arena == MAP_FAILED ) + goto errx3; + + os->arena_len = len; + if ( *arena_len ) + *arena_len = len; + + os->pageinfo = malloc(len >> os->pageshift); + if ( !os->pageinfo ) + goto errx4; + + /* The pageinfo up to and including file_len is "unread"; beyond + file_len we know it must be zero and thus it can be marked "clean" */ + file_len = (file_len + os->pagesize - 1) & ~(os->pagesize-1); + file_pages = file_len >> os->pageshift; + len_pages = len >> os->pageshift; + + memset(os->pageinfo, page_unread, file_pages); + + if ( len_pages > file_pages ) { + mprotect((char *)os->arena + file_len, len - file_len, PROT_READ); + memset(os->pageinfo + file_pages, page_clean, len_pages-file_pages); + } + + sigact.sa_sigaction = objstore_sigsegv; + sigemptyset(&sigact.sa_mask); + sigact.sa_flags = SA_RESTART|SA_SIGINFO; + if ( sigaction(SIGSEGV, &sigact, &os->oldact) ) + goto errx5; + + return os->arena; + + errx5: + munmap(os->pageinfo, len >> os->pageshift); + errx4: + munmap(arena_ptr, len); + errx3: + if ( os->log_fd >= 0 ) close(os->log_fd); + errx2: + close(os->main_fd); + errx1: + free(os); + errx0: + + return NULL; +} + +/* + * Object store checkpoint. Writes entries to the log file. + * The "gc_factor" is the factor of maximum log size file relative + * to the arena size. For example, if gc_factor == 0.5 then if the + * log size is more than 50% of the arena file size a writeback cycle + * will take place after the log has been written. This means other + * checkpoints will have to wait! + * + * Set gc_factor to 0.0 to force a gc, and to HUGE_VAL to inhibit gc. + */ +int objstore_checkpoint(double gc_factor) +{ + struct ObjStore *os = objstore_os_struct; + int f; + char *pi, *epi; + void *page; + + pi = os->pageinfo; + epi = os->pageinfo + (os->arena_len >> os->pageshift); + + f = fork(); + if ( f < 0 ) + return 1; /* Checkpoint failed! */ + else if ( f > 0 ) { + /* Parent process -- just mark all dirty pages clean */ + + size_t size, count; + char *opi; + int found_dirty; + + /* Aggregate both clean and dirty pages; this should allow the OS + to avoid keeping track of quite as many memory protect regions */ + for ( pi = os->pageinfo ; pi < epi ; pi++ ) { + if ( *pi == page_dirty || *pi == page_clean ) { + found_dirty = (*pi == page_dirty); + page = (char *)os->arena + + ((uintptr_t)(pi - os->pageinfo) << os->pageshift); + + opi = pi; + size = os->pagesize; + count = 1; + while ( pi+1 < epi && + (pi[1] == page_dirty || pi[1] == page_clean) ) { + pi++; + found_dirty = found_dirty || (*pi == page_dirty); + count++; + size += os->pagesize; + } + if ( found_dirty ) { + mprotect(page, size, PROT_READ); + memset(opi, page_clean, count); + } + } + } + os->dirty_count = 0; /* No pages dirty */ + os->fork_seq++; /* Increase next sequence number */ + return 0; + } else { + /* Child process -- do the actual work of writing back dirty pages */ + + struct ObjStore_LogRecord record, last_rec; + struct flock lock; + off_t logsize; + + record.magic = LOGRECORD_MAGIC; + record.record_type = osrec_page; + + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + for (;;) { + /* First, lock the entire log file */ + lock.l_type = F_WRLCK; + while ( fcntl(os->log_fd, F_SETLKW, &lock) == -1 && errno == EINTR ); + + /* Make sure we were indeed next in turn */ + lseek(os->log_fd, -(off_t)sizeof(last_rec), SEEK_END); + if ( objstore_read(os->log_fd, &last_rec, sizeof(last_rec)) < sizeof(last_rec)) { + kill(getppid(), SIGABRT); /* Kill main process */ + _exit(99); + } + if ( last_rec.size+1 == os->fork_seq ) + break; /* It's for us... */ + + /* Someone else is ahead of us in line. Yield to them. */ + lock.l_type = F_UNLCK; + while ( fcntl(os->log_fd, F_SETLKW, &lock) == -1 && errno == EINTR ); + + sched_yield(); /* Snore... */ + } + + /* Write dirty pages to log file */ + for ( pi = os->pageinfo ; pi < epi ; pi++ ) { + if ( *pi == page_dirty ) { + page = (char *)os->arena + + ((uintptr_t)(pi - os->pageinfo) << os->pageshift); + record.offset = (char *)page - (char *)os->arena; + + /* Aggregate contiguous pages into a single record */ + record.size = os->pagesize; + while ( pi+1 < epi && pi[1] == page_dirty ) { + pi++; + record.size += os->pagesize; + } + + if ( objstore_write(os->log_fd, &record, sizeof(record)) + < sizeof(record) || + objstore_write(os->log_fd, page, record.size) < record.size ) { + kill(getppid(), SIGABRT); /* Kill main process */ + _exit(99); + } + } + } + + /* This might be more efficiently done with fdatasync() */ + fsync(os->log_fd); /* Make sure we have written everything */ + + /* Write commit record */ + record.record_type = osrec_commit; + record.size = os->fork_seq; + record.offset = (off_t)0x54494d43; + if ( objstore_write(os->log_fd, &record, sizeof(record)) < sizeof(record) ) { + kill(getppid(), SIGABRT); + _exit(99); + } + fsync(os->log_fd); + + /* Check to see if it's time for garbage collect */ + logsize = lseek(os->log_fd, 0, SEEK_END); + if ( gc_factor < HUGE_VAL && (double)logsize >= gc_factor*os->arena_len ) { + /* Replaying the log isn't the most efficient way to do this. + We could also keep a status bit per page around, and flush + them out of the shadow array. The biggest problem with that + is that it probably can't be done in the background, unlike + this method. Leave this as-is for now. */ + if ( objstore_log_writeback() ) { + kill(getppid(), SIGABRT); + _exit(99); + } + } + + /* Drop lock on log file */ + lock.l_type = F_UNLCK; + while ( fcntl(os->log_fd, F_SETLKW, &lock) == -1 && errno == EINTR ); + + _exit(0); /* Done! */ + } +} + +/* + * Extend the size of the object store. + * + * This currently relies on several Linux-specific features, + * specifically mremap() and knowing that we probably can extend + * it without changing the virtual address. + */ +int objstore_extend(size_t new_size) +{ + struct ObjStore *os = objstore_os_struct; + struct flock lock; + void *newp, *infop; + off_t file_size; + int ft; + size_t add_size, old_size; + size_t add_pages, old_pages, new_pages, file_pages; + + old_size = os->arena_len; + + if ( new_size <= old_size ) + return 0; /* No action */ + + new_size = (new_size + os->pagesize - 1) & ~(os->pagesize - 1); + add_size = new_size - old_size; + + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + while ( fcntl(os->main_fd, F_SETLKW, &lock) == -1 && errno == EINTR ); + + lock.l_type = F_UNLCK; + file_size = lseek(os->main_fd, 0, SEEK_END); + if ( file_size < new_size ) + ft = ftruncate(os->main_fd, new_size); + else + ft = 0; + + while ( fcntl(os->main_fd, F_SETLKW, &lock) == -1 && errno == EINTR ); + if ( ft ) + return -1; /* Failure */ + + newp = mmap((char*)os->arena + old_size, + add_size, + PROT_NONE, + MAP_PRIVATE|MAP_ANON|MAP_FIXED, 0, 0); + + if ( newp == MAP_FAILED ) + return -1; /* Failure */ + + /* Since we specified MAP_FIXED, this should be guaranteed */ + assert( newp == (char*)os->arena + old_size ); + + /* Convert sizes to pages */ + file_size = (file_size + os->pagesize - 1) & ~(os->pagesize-1); + new_pages = new_size >> os->pageshift; + old_pages = old_size >> os->pageshift; + file_pages = file_size >> os->pageshift; + add_pages = new_pages - old_pages; + + infop = realloc(os->pageinfo, new_pages); + if ( !infop ) { + munmap(newp, add_size); + return -1; /* Failure */ + } + + os->arena_len = new_size; + os->pageinfo = infop; + + /* If we extended the file, the new area is known to contain + zero, and can thus be considered "clean"; otherwise we have + to consider it "unread". */ + if ( file_pages > old_pages ) { + memset(os->pageinfo + old_pages, page_unread, file_pages-old_pages); + } + if ( file_pages < new_pages ) { + memset(os->pageinfo + file_pages, page_clean, new_pages-file_pages); + mprotect((char *)os->arena + file_size, new_size-file_size, PROT_READ); + } + + return 0; +} diff --git a/ftrunctest.c b/ftrunctest.c new file mode 100644 index 0000000..dfa58b3 --- /dev/null +++ b/ftrunctest.c @@ -0,0 +1,19 @@ +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <string.h> + +int main(int argc, char *argv[]) +{ + int fd; + char *mapping; + + fd = open("ftrunc.dat", O_RDWR|O_CREAT, 0666); + ftruncate(fd, 1024*1024*1024); + close(fd); + + return 0; +} + + @@ -0,0 +1,71 @@ +#ident "$Id$" +/* ----------------------------------------------------------------------- * + * + * Copyright 2000 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * objstore.h + * + * Header file for the persistent object store + */ + +#ifndef OBJSTORE_H +#define OBJSTORE_H + +#include <stdlib.h> +#include <signal.h> +#include <inttypes.h> + +#ifdef OBJSTORE_INTERNALS + +#include "system.h" /* System-specific constants */ + +struct ObjStore { + int main_fd; /* Primary file descriptor */ + int log_fd; /* Log file descriptor */ + int pagesize; /* Page size */ + int pageshift; /* log2(pagesize) */ + void *arena; /* Mapped memory zone */ + size_t arena_len; /* Length of arena */ + char *pageinfo; /* Page info pointer */ + size_t loaded_count; /* Loaded pages count (accounting) */ + size_t dirty_count; /* Dirty pages count (accounting) */ + struct sigaction oldact; /* Previous signal action */ + size_t fork_seq; /* Sequence number of forked processes */ +}; + +enum ObjStore_RecordType { + osrec_page, /* Page data */ + osrec_commit, /* Commit record */ +}; + +#define LOGRECORD_MAGIC 0x9247746e + +struct ObjStore_LogRecord { + unsigned int magic; /* Magic number; for verification */ + unsigned int record_type; /* Record */ + size_t size; /* Data byte count (sequence # for commit) */ + off_t offset; /* Offset of data */ +}; + +extern struct ObjStore *objstore_os_struct; + +#else /* not OBJSTORE_INTERNALS */ + +struct Objstore; + +#endif /* not OBJSTORE_INTERNALS */ + +void *objstore_init(char *main_file, char *log_file, size_t *arena_len); +int objstore_checkpoint(double gc_factor); +int objstore_extend(size_t new_size); + +#endif diff --git a/system.h b/system.h new file mode 100644 index 0000000..9d74409 --- /dev/null +++ b/system.h @@ -0,0 +1,29 @@ +#ident "$Id$" +/* ----------------------------------------------------------------------- * + * + * Copyright 2000 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * system.h + * + * System-specific constants. + */ + +#ifndef SYSTEM_H +#define SYSTEM_H + +/* These constants are appropriate for Linux/i386 */ + +/* This is where we map the database file - must be constant */ +#define ARENA_ADDRESS ((void *)0x60000000) + +#endif /* SYSTEM_H */ + diff --git a/test_mmap.c b/test_mmap.c new file mode 100644 index 0000000..445870f --- /dev/null +++ b/test_mmap.c @@ -0,0 +1,21 @@ +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <string.h> +#include <errno.h> + +int main(int argc, char *argv[]) +{ + char *mapping; + + mapping = mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, 0, 0); + mmap(mapping+8192, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON|MAP_FIXED, 0, 0); + strcpy(mapping+0, "Foo mani padme hum"); + strcpy(mapping+4096, "Foo mani padme hum"); + errno = 0; + mmap(mapping+4096, 4096, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE|MAP_ANON, 0, 0); + munmap(mapping, 16384); +} + + diff --git a/teststore.c b/teststore.c new file mode 100644 index 0000000..14c40fc --- /dev/null +++ b/teststore.c @@ -0,0 +1,47 @@ +#ident "$Id$" +/* ----------------------------------------------------------------------- * + * + * Copyright 2000 Transmeta Corporation - All Rights Reserved + * + * This source module contains confidential and proprietary information + * of Transmeta Corporation. It is not to be disclosed or used except + * in accordance with applicable agreements. This copyright notice does + * not evidence any actual or intended publication of such source code. + * + * ----------------------------------------------------------------------- */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include "objstore.h" + +int main(int argc, char *argv[]) +{ + void *buf; + int arena_len = 16384; + + buf = objstore_init("test.dat", "test.log", &arena_len); + + printf("Read from first page: %s\n", (char *)buf); + + strcpy((char *)buf + 4096, "This is the second page!"); + strcpy((char *)buf + 8192, "This is the third page!"); + + printf("Read from third page: %s\n", (char *)buf + 8192); + objstore_checkpoint(0.5); + + strcpy((char *)buf + 8192, "This is also the third page!"); + + printf("Read from third page: %s\n", (char *)buf + 8192); + objstore_checkpoint(0.0); + + objstore_extend(65536); + strcpy((char *)buf + 32768, "This is the ninth page!"); + objstore_checkpoint(0.0); + + sleep(5); + + return 0; +} + |