commit 1f81d56b897746fc8af241f0df38495718b798e5
Author: JeffBezanson <jeff.bezanson@gmail.com>
Date:   Tue Jul 1 01:53:51 2008 +0000

    import of llt library source

diff --git a/llt/Makefile b/llt/Makefile
new file mode 100644
index 0000000..71c1f36
--- /dev/null
+++ b/llt/Makefile
@@ -0,0 +1,44 @@
+CC = gcc
+
+SRCS = bitvector.c hashing.c socket.c timefuncs.c utils.c dblprint.c ptrhash.c \
+	utf8.c ios.c operators.c cplxprint.c dirpath.c
+OBJS = $(SRCS:%.c=%.o)
+DOBJS = $(SRCS:%.c=%.do)
+TARGET = libllt.a
+TESTSRC = unittest.c
+TESTER = llttest
+
+FLAGS = -Wall -Wextra -Wno-strict-aliasing $(CFLAGS)
+LIBS =
+
+DEBUGFLAGS = -g -DDEBUG $(FLAGS)
+SHIPFLAGS = -O2 -DNDEBUG $(FLAGS)
+
+default: release
+
+%.o: %.c
+	$(CC) $(SHIPFLAGS) -c $< -o $@
+%.do: %.c
+	$(CC) $(DEBUGFLAGS) -c $< -o $@
+
+debug: $(DOBJS)
+	rm -rf $(TARGET)
+	ar rs $(TARGET) $(DOBJS)
+
+release: $(OBJS)
+	rm -rf $(TARGET)
+	ar rs $(TARGET) $(OBJS)
+
+test:
+	make clean
+	make release CFLAGS=-DENABLE_LLT_TEST
+	gcc $(TESTSRC) $(TARGET) -o $(TESTER) -lm
+	./$(TESTER)
+
+clean:
+	rm -f *.o
+	rm -f *.do
+	rm -f *~
+	rm -f core*
+	rm -f $(TARGET)
+	rm -f $(TESTER)
diff --git a/llt/attic/ios.c.old b/llt/attic/ios.c.old
new file mode 100644
index 0000000..5c71a15
--- /dev/null
+++ b/llt/attic/ios.c.old
@@ -0,0 +1,396 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+#include <errno.h>
+
+#ifdef WIN32
+#include <malloc.h>
+#include <io.h>
+#include <fcntl.h>
+#define fileno _fileno
+#else
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/select.h>
+#endif
+
+#include "dtypes.h"
+#include "utils.h"
+#include "utf8.h"
+#include "ios.h"
+#include "socket.h"
+
+/* OS-level primitive wrappers */
+
+// return error code, #bytes read in *nread
+static int _os_read(long fd, void *buf, size_t n, size_t *nread)
+{
+    ssize_t r = read((int)fd, buf, n);
+    if (r == -1) {
+        *nread = 0;
+        return errno;
+    }
+    *nread = (size_t)r;
+    return 0;
+}
+
+static int _os_write(long fd, void *buf, size_t n, size_t *nwritten)
+{
+    ssize_t r = write((int)fd, buf, n);
+    if (r == -1) {
+        *nwritten = 0;
+        return errno;
+    }
+    *nread = (size_t)r;
+    return 0;
+}
+
+static int _fd_available(long fd)
+{
+#ifndef WIN32
+    fd_set set;
+    struct timeval tv = {0, 0};
+
+    FD_ZERO(&set);
+    FD_SET(fd, &set);
+    return (select(fd+1, &set, NULL, NULL, &tv)!=0);
+#else
+    return 0;
+#endif
+}
+
+
+/* internal utility functions */
+
+static char *_buf_realloc(ios_t *s, size_t sz)
+{
+    char *temp;
+
+    if (sz <= s->maxsize)
+        return s->buf;
+
+    if ((s->buf==NULL || s->buf==&s->local[0]) && (sz <= IOS_INLSIZE)) {
+        /* TODO: if we want to allow shrinking, see if the buffer shrank
+           down to this size, in which case we need to copy. */
+        s->buf = &s->local[0];
+        s->maxsize = IOS_INLSIZE;
+        s->ownbuf = 1;
+        return s->buf;
+    }
+    else if (s->ownbuf && s->buf != &s->local[0]) {
+        // if we own the buffer we're free to resize it
+        // always allocate 1 bigger in case user wants to add a NUL
+        // terminator after taking over the buffer
+        temp = realloc(s->buf, sz+1);
+        if (temp == NULL)
+            return NULL;
+    }
+    else {
+        temp = malloc(sz+1);
+        s->ownbuf = 1;
+        if (temp == NULL)
+            return NULL;
+    }
+
+    if (s->buf != temp && s->size > 0)
+        memcpy(temp, s->buf, s->size);
+    s->buf = temp;
+    s->maxsize = sz;
+    return s->buf;
+}
+
+// write a block of data into the buffer at the current position, resizing
+// if necessary. returns # written.
+static size_t _writebuf_force(ios_t *s, char *data, size_t n)
+{
+    size_t amt;
+    size_t newsize;
+
+    if (n == 0)
+        return 0;
+
+    if (s->bpos + n > s->size) {
+        if (s->bpos + n > s->maxsize) {
+            /* TO DO: here you might want to add a mechanism for limiting
+               the growth of the stream. */
+            newsize = s->maxsize * 2;
+            while (s->bpos + n > newsize)
+                newsize *= 2;
+            if (_buf_realloc(s, newsize) == NULL) {
+                /* no more space; write as much as we can */
+                amt = s->maxsize - s->bpos;
+                if (amt > 0) {
+                    memcpy(&s->buf[s->bpos], data, amt);
+                }
+                s->bpos += amt;
+                s->size = s->maxsize;
+                return amt;
+            }
+        }
+        s->size = s->bpos + n;
+    }
+    memcpy(&s->buf[s->bpos], data, n);
+    s->bpos += n;
+
+    return n;
+}
+
+
+/* interface functions, low level */
+
+size_t ios_read(ios_t *s, char *dest, size_t n)
+{
+}
+
+size_t ios_write(ios_t *s, char *data, size_t n)
+{
+}
+
+off_t ios_seek(ios_t *s, off_t pos)
+{
+}
+
+off_t ios_seek_end(ios_t *s)
+{
+}
+
+off_t ios_skip(ios_t *s, off_t offs)
+{
+}
+
+off_t ios_pos(ios_t *s)
+{
+    if (s->bm == bm_mem)
+        return (off_t)s->bpos;
+
+    off_t fdpos = lseek(s->fd, 0, SEEK_CUR);
+    if (fdpos == (off_t)-1)
+        return fdpos;
+
+    if (s->state == iost_wr)
+        fdpos += s->bpos;
+    else if (s->state == iost_rd)
+        fdpos -= (s->size - s->bpos);
+    return fdpos;
+}
+
+size_t ios_trunc(ios_t *s, size_t size)
+{
+}
+
+int ios_eof(ios_t *s)
+{
+    if (s->bm == bm_mem)
+        return (s->bpos >= s->size);
+    if (s->fd == -1)
+        return 1;
+    // todo
+}
+
+static void _discard_partial_buffer(ios_t *s)
+{
+    // this function preserves the invariant that data to write
+    // begins at the beginning of the buffer, and s->size refers
+    // to how much valid file data is stored in the buffer.
+
+    // this needs to be called when normal operation is interrupted in
+    // the middle of the buffer. "normal operation" is reading or
+    // writing to the end of the buffer. this happens e.g. when flushing.
+    if (s->bpos && s->size > s->bpos) {
+        memmove(s->buf, s->buf + s->bpos, s->size - s->bpos);
+    }
+    s->size -= s->bpos;
+    s->bpos = 0;
+}
+
+int ios_flush(ios_t *s)
+{
+    if (ndirty == 0 || s->bm == bm_mem || s->buf == NULL)
+        return 0;
+    if (s->fd == -1)
+        return -1;
+
+    int partialb=0;
+    if (s->bitpos > 0 && s->ndirty==s->bpos+1) {
+        // flushing partial byte
+        partialb=1;
+    }
+
+    size_t nw, ntowrite=s->ndirty;
+    int err = _os_write(s->fd, s->buf, ntowrite, &nw);
+    // todo: try recovering from some kinds of errors (e.g. retry)
+    if (partialb) {
+        // skip back 1, since we might have to write this byte again
+        // if more bits in it are changed
+        if (lseek(s->fd, -1, SEEK_CUR) == (off_t)-1) {
+            // uhoh. can't write the "rest" of this byte. move to next
+            // byte instead.
+            s->bpos++;
+            s->bitpos = 0;
+        }
+    }
+
+    _discard_partial_buffer(s);
+    s->ndirty = 0;
+
+    if (err)
+        return err;
+    if (nw < ntowrite)
+        return -1;
+    return 0;
+}
+
+void ios_close(ios_t *s)
+{
+    ios_flush(s);
+    if (s->fd != -1 && s->ownfd)
+        close(s->fd);
+    s->fd = -1;
+}
+
+char *ios_takebuf(ios_t *s, size_t *psize)
+{
+    char *buf;
+
+    ios_flush(s);
+
+    if (s->buf == &s->local[0]) {
+        buf = malloc(s->size+1);
+        if (buf == NULL)
+            return NULL;
+        if (s->size)
+            memcpy(buf, s->buf, s->size);
+        buf[s->size] = '\0';
+    }
+    else {
+        buf = s->buf;
+    }
+
+    *psize = s->size+1;  // buffer is actually 1 bigger for terminating NUL
+
+    /* empty stream and reinitialize */
+    if (s->bm == bm_mem || s->bm == bm_none) {
+        s->buf = &s->local[0];
+        s->maxsize = IOS_INLSIZE;
+    }
+    else {
+        s->buf = NULL;
+        _buf_realloc(s, IOS_BUFSIZE);
+    }
+    s->size = s->bpos = 0;
+    s->bitpos = 0;
+
+    return buf;
+}
+
+int ios_setbuf(ios_t *s, char *buf, size_t size, int own)
+{
+    ios_flush(s);
+    size_t nvalid=0;
+
+    nvalid = s->size;
+    if (s->size == s->bpos && s->bitpos>0)
+        nvalid++;
+    if (size < nvalid)
+        nvalid = size;
+    memcpy(buf, s->buf, nvalid);
+    if (s->bpos > nvalid) {
+        // truncated
+        s->bpos = nvalid;
+        s->bitpos = 0;
+    }
+    s->size = nvalid;
+
+    if (s->buf!=NULL && s->ownbuf && s->buf!=&s->local[0])
+        free(s->buf);
+    s->buf = buf;
+    s->maxsize = size;
+    s->ownbuf = own;
+    return 0;
+}
+
+int ios_bufmode(ios_t *s, bufmode_t mode)
+{
+    // no fd; can only do mem-only buffering
+    if (s->fd == -1 && mode != bm_mem)
+        return -1;
+    s->bm = mode;
+    return 0;
+}
+
+void ios_bswap(ios_t *s, int bswap)
+{
+    s->byteswap = !!bswap;
+}
+
+int ios_copy(ios_t *to, ios_t *from, size_t nbytes, bool_t all)
+{
+}
+
+
+/* stream object initializers. we do no allocation. */
+
+ios_t *ios_file(ios_t *s, char *fname, int create, int rewrite)
+{
+}
+
+ios_t *ios_mem(ios_t *s, size_t initsize)
+{
+}
+
+ios_t *ios_fd(ios_t *s, long fd)
+{
+}
+
+
+/* higher level interface */
+
+int ios_putbit(ios_t *s, int bit)
+{
+    byte_t mask = 1<<s->bitpos;
+
+    if (_ios_setstate(s, iost_wr))
+        return 0;
+
+    if (!s->dirty) {
+        // haven't written any bits yet
+        // if stenciling is turned on, the buffer is already full and
+        // we don't need to do anything
+        if (s->rereadable && !s->stenciled) {
+            // fill buffer if we can
+        }
+    }
+
+    if (bit)
+        s->buf[s->bpos] |= mask;
+    else
+        s->buf[s->bpos] &= ~mask;
+    s->dirty = 1;
+
+    s->bitpos++;
+    if (s->bitpos > 7) {
+        s->bitpos = 0;
+        s->bpos++;
+        s->tally++;
+    }
+    return 1;
+}
+
+int ios_getbit(ios_t *s, int *pbit)
+{
+    byte_t mask = 1<<s->bitpos;
+
+    if (_ios_setstate(s, iost_rd))
+        return 0;
+
+    *pbit = (s->buf[s->bpos] & mask) ? 1 : 0;
+    s->bitpos++;
+    if (s->bitpos > 7) {
+        s->bitpos = 0;
+        s->bpos++;
+        s->tally++;
+    }
+    return 1;
+}
diff --git a/llt/attic/ios.h.old b/llt/attic/ios.h.old
new file mode 100644
index 0000000..257b350
--- /dev/null
+++ b/llt/attic/ios.h.old
@@ -0,0 +1,198 @@
+#ifndef __IOS_H_
+#define __IOS_H_
+
+// this flag controls when data actually moves out to the underlying I/O
+// channel. memory streams are a special case of this where the data
+// never moves out.
+typedef enum { bm_none, bm_line, bm_block, bm_mem } bufmode_t;
+
+typedef enum { iost_none, iost_rd, iost_wr } iostate_t;
+
+#define IOS_INLSIZE 54
+#define IOS_BUFSIZE 4095
+
+typedef struct {
+    bufmode_t bm;
+
+    // the state only indicates where the underlying file position is relative
+    // to the buffer. reading: at the end. writing: at the beginning.
+    // in general, you can do any operation in any state.
+    iostate_t state;
+
+    char *buf;        // start of buffer
+    size_t maxsize;   // space allocated to buffer
+    size_t size;      // length of valid data in buf, >=ndirty
+    size_t bpos;      // current position in buffer
+    size_t ndirty;    // # bytes at &buf[0] that need to be written
+
+    // this is a public field that keeps a running count of bytes
+    // read or written. you can freely use and change it. this is
+    // intended for keeping track of relative positions in streams
+    // that don't have absolute positions (like sockets).
+    size_t tally;
+
+    // pointer-size integer to support platforms where it might have
+    // to be a pointer
+    long fd;
+
+    byte_t bitpos;
+    //unsigned char bitdirty:1;      // bit buffer needs to be written
+    unsigned char byteswap:1;
+    //unsigned char readonly:1;
+    unsigned char ownbuf:1;
+    unsigned char ownfd:1;
+
+    // this means you can read, seek back, then read the same data
+    // again any number of times. usually only true for files and strings.
+    unsigned char rereadable:1;
+
+    // this enables "stenciled writes". you can alternately write and
+    // seek without flushing in between. this performs read-before-write
+    // to populate the buffer, so "rereadable" capability is required.
+    // this is off by default, except for bit I/O if rereadable is true.
+    unsigned char stenciled:1;
+
+    // request durable writes (fsync)
+    // unsigned char durable:1;
+
+    // todo: mutex
+    char local[IOS_INLSIZE];
+} ios_t;
+
+/* low-level interface functions */
+size_t ios_read(ios_t *s, char *dest, size_t n);
+size_t ios_write(ios_t *s, char *data, size_t n);
+off_t ios_seek(ios_t *s, off_t pos);   // absolute seek
+off_t ios_seek_end(ios_t *s);
+off_t ios_skip(ios_t *s, off_t offs);  // relative seek
+off_t ios_pos(ios_t *s);  // get current position
+size_t ios_trunc(ios_t *s, size_t size);
+int ios_eof(ios_t *s);
+int ios_flush(ios_t *s);
+void ios_close(ios_t *s);
+char *ios_takebuf(ios_t *s, size_t *psize);  // release buffer to caller
+// set buffer space to use
+int ios_setbuf(ios_t *s, char *buf, size_t size, int own);
+int ios_bufmode(ios_t *s, bufmode_t mode);
+void ios_bswap(ios_t *s, int bswap);
+int ios_copy(ios_t *to, ios_t *from, size_t nbytes, bool_t all);
+//void ios_lock(ios_t *s);
+//int ios_trylock(ios_t *s);
+//int ios_unlock(ios_t *s);
+
+/* stream creation */
+ios_t *ios_file(ios_t *s, char *fname, int create, int rewrite);
+ios_t *ios_mem(ios_t *s, size_t initsize);
+ios_t *ios_fd(ios_t *s, long fd);
+// todo: ios_socket
+
+/* high-level functions - output */
+int ios_putnum(ios_t *s, char *data, uint32_t type);
+int ios_putint(ios_t *s, int n);
+int ios_pututf8(ios_t *s, uint32_t wc);
+int ios_putstringz(ios_t *s, char *str, bool_t do_write_nulterm);
+/* single-bit I/O - even works for mixed reads and writes.
+   mixing bit-level I/O with normal byte stream I/O has undefined effects and
+   will almost certainly destroy your file. */
+int ios_putbit(ios_t *s, int bit);
+int ios_printf(ios_t *s, char *format, ...);
+
+/* high-level stream functions - input */
+int ios_getnum(ios_t *s, char *data, uint32_t type);
+int ios_getutf8(ios_t *s, uint32_t *pwc);
+int ios_ungetutf8(ios_t *s, uint32_t wc);
+int ios_getstringz(ios_t *dest, ios_t *src);
+int ios_getstringn(ios_t *dest, ios_t *src, size_t nchars);
+int ios_readline(ios_t *dest, ios_t *s, char delim);
+int ios_getline(ios_t *s, char **pbuf, size_t *psz);
+int ios_getbit(ios_t *s, int *pbit);  // returns # of bits read (0 or 1)
+
+// seek by utf8 sequence increments
+int ios_nextutf8(ios_t *s);
+int ios_prevutf8(ios_t *s);
+
+/* stdio-style functions */
+#define IOS_EOF (-1)
+int ios_putc(ios_t *s, int c);
+wint_t ios_putwc(ios_t *s, wchar_t wc);
+int ios_getc(ios_t *s);
+wint_t ios_getwc(ios_t *s);
+int ios_ungetc(ios_t *s, int c);
+wint_t ios_ungetwc(ios_t *s, wint_t wc);
+#define ios_puts(s, str) ios_write(s, str, strlen(str))
+
+/*
+  With memory streams, mixed reads and writes are equivalent to performing
+  sequences of *p++, as either an lvalue or rvalue. File streams behave
+  similarly, but other streams might not support this. Using unbuffered
+  mode makes this more predictable.
+
+  Note on "unget" functions:
+  There are two kinds of functions here: those that operate on sized
+  blocks of bytes and those that operate on logical units like "character"
+  or "integer". The "unget" functions only work on logical units. There
+  is no "unget n bytes". You can only do an unget after a matching get.
+  However, data pushed back by an unget is available to all read operations.
+  The reason for this is that unget is defined in terms of its effect on
+  the underlying buffer (namely, it rebuffers data as if it had been
+  buffered but not read yet). IOS reserves the right to perform large block
+  operations directly, bypassing the buffer. In such a case data was
+  never buffered, so "rebuffering" has no meaning (i.e. there is no
+  correspondence between the buffer and the physical stream).
+
+  Single-bit I/O is able to write partial bytes ONLY IF the stream supports
+  seeking. Also, line buffering is not well-defined in the context of
+  single-bit I/O, so it might not do what you expect.
+
+  implementation notes:
+  in order to know where we are in a file, we must ensure the buffer
+  is only populated from the underlying stream starting with p==buf.
+
+  to switch from writing to reading: flush, set p=buf, cnt=0
+  to switch from reading to writing: seek backwards cnt bytes, p=buf, cnt=0
+
+  when writing: buf starts at curr. physical stream pos, p - buf is how
+  many bytes we've written logically. cnt==0
+
+  dirty == (bitpos>0 && state==iost_wr), EXCEPT right after switching from
+  reading to writing, where we might be in the middle of a byte without
+  having changed it.
+
+  to write a bit: if !dirty, read up to maxsize-(p-buf) into buffer, then
+  seek back by the same amount (undo it). write onto those bits. now set
+  the dirty bit. in this state, we can bit-read up to the end of the byte,
+  then formally switch to the read state using flush.
+
+  design points:
+  - data-source independence, including memory streams
+  - support 64-bit and large files
+  - efficient, low-latency buffering
+  - unget
+  - expose buffer to user, allow user-owned buffers
+  - allow direct I/O, don't always go through buffer
+  - buffer-internal seeking. makes seeking back 1-2 bytes very fast,
+    and makes it possible for sockets where it otherwise wouldn't be
+  - special support for utf8
+  - single-bit I/O
+  - tries to allow switching between reading and writing
+  - type-aware functions with byte-order swapping service
+  - position counter for meaningful data offsets with sockets
+
+  note:
+  the current code needs to be mostly rewritten. the design should be
+  as follows:
+
+  the buffer is a view of part of a file/stream. you can seek, read, and
+  write around in it as much as you like, as if it were just a string.
+
+  we keep track of the part of the buffer that's invalid (written to).
+  we remember whether the position of the underlying stream is aligned
+  with the end of the buffer (reading mode) or the beginning (writing mode).
+
+  based on this info, we might have to seek back before doing a flush.
+
+  as optimizations, we do no writing if the buffer isn't "dirty", and we
+  do no reading if the data will only be overwritten.
+*/
+
+#endif
diff --git a/llt/attic/streams.c b/llt/attic/streams.c
new file mode 100644
index 0000000..20a7752
--- /dev/null
+++ b/llt/attic/streams.c
@@ -0,0 +1,1107 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <limits.h>
+
+#ifdef WIN32
+#include <malloc.h>
+#include <io.h>
+#include <fcntl.h>
+#include <errno.h>
+#define fileno _fileno
+#else
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/select.h>
+#endif
+
+#include "dtypes.h"
+#include "utils.h"
+#include "utf8.h"
+#include "streams.h"
+
+unsigned int type_sizes[] = {
+    sizeof(int32_t), sizeof(int8_t), sizeof(int16_t),
+    sizeof(int32_t), sizeof(float),  sizeof(double), sizeof(int64_t), 0,
+
+    /* unsigned */
+    0, sizeof(u_int8_t), sizeof(u_int16_t),
+    sizeof(u_int32_t), 0, 0, sizeof(u_int64_t), 0,
+
+    /* complex */
+    2*sizeof(int32_t), 2*sizeof(int8_t), 2*sizeof(int16_t),
+    2*sizeof(int32_t), 2*sizeof(float),  2*sizeof(double), 2*sizeof(int64_t),0,
+
+    /* complex unsigned */
+    0, 2*sizeof(u_int8_t), 2*sizeof(u_int16_t),
+    2*sizeof(u_int32_t), 0, 0, 2*sizeof(u_int64_t), 0
+};
+
+bool_t valid_type(u_int32_t type)
+{
+    u_int32_t sz;
+
+    /* irrelevant bits set */
+    if (type & ~T_TYPEMASK)
+        return false;
+    sz = type & T_TYPESIZE;
+    if (sz > T_INT64)
+        return false;
+    if (type == T_CPLX|T_BOOL)
+        return false;
+    /* no unsigned float or complex unsigned */
+    if (type & T_UNSIGNED) {
+        if ((sz > T_INT && sz != T_INT64) || sz == T_BOOL || type&T_CPLX)
+            return false;
+    }
+    return true;
+}
+
+/* an important function: some stream routines need to call this at
+   various points to ensure proper coexistence with bitwise I/O */
+static void stream_bit_flush(stream_t *s)
+{
+    if (s->bitpos > 0) {
+        stream_write(s, &s->bitbuf, 1);
+        s->bitpos = 0;
+    }
+}
+
+void stream_flush(stream_t *s)
+{
+    stream_bit_flush(s);
+    s->funcs->flush(s);
+}
+
+void stream_free(stream_t *s)
+{
+    stream_flush(s);
+    s->funcs->free_func(s);
+}
+
+int stream_readfd(stream_t *s)
+{
+    if (is_filestream(s)) {
+        return fileno(s->fptr);
+    }
+    else if (is_pipestream(s)) {
+        return s->rd;
+    }
+    return -1;
+}
+
+int stream_writefd(stream_t *s)
+{
+    if (is_filestream(s)) {
+        return fileno(s->fptr);
+    }
+    else if (is_pipestream(s)) {
+        return s->wd;
+    }
+    return -1;
+}
+
+
+/* file stream */
+
+off_t fs_seek(struct _stream *s, off_t where)
+{
+    FILE *f = s->fptr;
+
+    stream_bit_flush(s);
+
+    if (fseek(f, where, SEEK_SET) == -1)
+        return -1;
+    s->pos = ftell(f);
+    return s->pos;
+}
+
+off_t fs_skip(struct _stream *s, off_t offs)
+{
+    FILE *f = s->fptr;
+
+    stream_bit_flush(s);
+
+    // a successful fseek always resets the end-of-file condition, even if
+    // the offset is zero. we change this so that moving by 0 bytes when you're
+    // at eof means you stay at eof.
+    if (offs == 0)
+        return s->pos;
+
+    if (fseek(f, offs, SEEK_CUR) == -1)
+        return -1;
+    s->pos += offs;
+    return s->pos;
+}
+
+off_t fs_seek_end(struct _stream *s)
+{
+    FILE *f = s->fptr;
+
+    stream_bit_flush(s);
+
+    if (fseek(f, 0, SEEK_END) == -1)
+        return -1;
+    s->pos = ftell(f);
+    return s->pos;
+}
+
+size_t fs_read(struct _stream *s, char *dest, size_t size)
+{
+    FILE *f = s->fptr;
+    size_t c;
+
+    c = fread(dest, 1, size, f); /* read single-byte entities */
+    s->pos += c;
+    return c;
+}
+
+size_t fs_write(struct _stream *s, char *data, size_t size)
+{
+    FILE *f = s->fptr;
+    size_t c;
+
+    c = fwrite(data, 1, size, f);
+    s->pos += c;
+    return c;
+}
+
+size_t fs_trunc(struct _stream *s, size_t size)
+{
+    FILE *f = s->fptr;
+
+#ifndef WIN32
+    if (ftruncate(fileno(f), s->pos) == -1)
+        return -1; // TODO this should be unsigned!
+#else
+    if (_chsize(_fileno(f), s->pos) == -1)
+        return -1;
+#endif
+    return s->pos;
+}
+
+void fs_flush(struct _stream *s)
+{
+    FILE *f = s->fptr;
+
+    (void)fflush(f);
+}
+
+bool_t fs_eof(struct _stream *s)
+{
+    return (bool_t)feof(s->fptr);
+}
+
+void free_fs_stream(stream_t *s)
+{
+    fclose(s->fptr);
+    free(s->name);
+}
+
+void stream_close(stream_t *s)
+{
+    /*
+    if (!is_filestream(s))
+        return;
+    */
+    stream_flush(s);
+    // so far file streams are not designed to exist in the closed state
+    //    fclose(s->fptr);
+}
+
+stream_interface_t fs_funcs =
+    {free_fs_stream, fs_seek, fs_seek_end, fs_skip,
+     fs_read, fs_write, fs_trunc, fs_eof, fs_flush};
+
+stream_t *filestream_new(stream_t *s,
+                         char *fName, bool_t create, bool_t rewrite)
+{
+    FILE *f;
+    size_t sz;
+
+    if (create) {
+        if (rewrite) {
+            f = fopen(fName, "w+b");
+        }
+        else {
+            f = fopen(fName, "a+b");
+            if (f)
+                fseek(f, 0, SEEK_SET);
+        }
+    }
+    else {
+        f = fopen(fName, "r+b");
+    }
+    if (f == NULL) {
+        /* try readonly */
+        f = fopen(fName, "rb");
+        if (f == NULL)
+            return NULL;
+    }
+
+    s->fptr = f;
+    s->name = strdup(fName);
+
+    s->funcs = &fs_funcs;
+
+    s->pos = 0;
+    s->bitpos = s->bitbuf = 0;
+    s->byteswap = false;
+    return s;
+}
+
+stream_t *stream_fromfile(stream_t *s, FILE *f, char *name)
+{
+    s->fptr = f;
+    s->name = strdup(name);
+
+    s->funcs = &fs_funcs;
+
+    s->pos = 0;
+    s->bitpos = s->bitbuf = 0;
+    s->byteswap = false;
+    return s;
+}
+
+/* memory stream */
+
+off_t ms_seek(struct _stream *s, off_t where)
+{
+    if ((size_t)where > s->size)
+        return -1;
+
+    stream_bit_flush(s);
+    s->pos = where;
+
+    return s->pos;
+}
+
+off_t ms_skip(struct _stream *s, off_t offs)
+{
+    if (s->pos+offs < 0 || s->pos+offs > s->size)
+        return -1;
+
+    stream_bit_flush(s);
+
+    s->pos += offs;
+    return s->pos;
+}
+
+off_t ms_seek_end(struct _stream *s)
+{
+    stream_bit_flush(s);
+    s->pos = s->size;
+    return s->pos;
+}
+
+size_t ms_read(struct _stream *s, char *dest, size_t size)
+{
+    size_t amt = size;
+
+    assert(s->pos <= s->size);
+
+    if (size == 0)
+        return 0;
+
+    if (s->size - s->pos < size)
+        amt = s->size - s->pos;
+
+    if (amt > 0) {
+        memcpy(dest, &s->data[s->pos], amt);
+    }
+
+    s->pos += amt;
+    return amt;
+}
+
+static char *ms_realloc(stream_t *s, size_t size)
+{
+    char *temp;
+
+    if (size <= s->maxsize)
+        return s->data;
+
+    /* UNOFFICIALLY, we put a '\0' at the end of every memory stream for
+       compatability with C library string functions, which are convenient.
+       You are not allowed to depend on this behavior; it may eventually
+       change.
+
+       We implement this by telling everybody that maxsize is one less than
+       the actual size of the buffer. They think the last byte is at index
+       maxsize-1, but it is actually at index maxsize.
+
+       data[s->size] and data[s->maxsize] are kept at '\0'. */
+
+    if (size <= N_STREAM_LOCAL-1) {
+        /* TO DO: if we want to allow shrinking, see if the buffer shrank
+           down to this size, in which case we need to copy. */
+        s->data = &s->local[0];
+        s->maxsize = N_STREAM_LOCAL-1;
+        s->data[s->maxsize] = '\0';
+        return s->data;
+    }
+    if (s->data == &s->local[0]) {
+        temp = malloc(size+1);
+        // TODO nullcheck
+        memcpy(temp, s->data, s->size);
+    }
+    else {
+        /* here we rely on the realloc() behavior of
+           doing a malloc() when passed a NULL pointer. */
+        temp = realloc(s->data, size+1);
+        if (temp == NULL)
+            return NULL;
+    }
+    s->data = temp;
+    s->maxsize = size;
+    s->data[s->maxsize] = '\0';
+    return s->data;
+}
+
+size_t ms_trunc(struct _stream *s, size_t size)
+{
+    if (size == s->size)
+        return size;
+
+    if (size < s->size) {
+        // TODO: if big shrink, release space
+        if (s->pos > size)
+            s->pos = size;
+    }
+    else {
+        if (ms_realloc(s, size) == NULL)
+            return s->size;
+    }
+
+    s->size = size;
+    s->data[size] = '\0';
+    return size;
+}
+
+size_t ms_write(struct _stream *s, char *data, size_t size)
+{
+    size_t amt;
+    size_t newsize;
+
+    if (size == 0)
+        return 0;
+
+    if (s->pos + size > s->size) {
+        if (s->pos + size > s->maxsize) {
+            /* TO DO: here you might want to add a mechanism for limiting
+               the growth of the stream. */
+            newsize = s->maxsize * 2;
+            while (s->pos + size > newsize)
+                newsize *= 2;
+            if (ms_realloc(s, newsize) == NULL) {
+                /* no more space; write as much as we can */
+                amt = s->maxsize - s->pos;
+                if (amt > 0) {
+                    memcpy(&s->data[s->pos], data, amt);
+                }
+                s->pos += amt;
+                s->size = s->maxsize;
+                /* in this case we've written up to the end of the buffer,
+                   so we know the next char is \0 since ms_realloc sets that
+                   up every time it allocates. */
+                return amt;
+            }
+        }
+        s->size = s->pos + size;
+
+        /* always valid since secretly we know the buffer is 1 bigger than
+           maxsize */
+        s->data[s->size] = '\0';
+    }
+    memcpy(&s->data[s->pos], data, size);
+    s->pos += size;
+
+    return size;
+}
+
+void ms_flush(struct _stream *s)
+{
+}
+
+bool_t ms_eof(struct _stream *s)
+{
+    assert(s->pos <= s->size);
+    return (bool_t)(s->pos == s->size);
+}
+
+void free_ms_stream(stream_t *s)
+{
+    if (s->data != NULL && s->data != &s->local[0])
+        free(s->data);
+}
+/*
+stream_t *memstream_copy(stream_t *s)
+{
+    stream_t *ns = memstream_new(s->size);
+
+    ms_write(ns, s->s, s->size);
+    stream_seek(ns, 0);
+    return ns;
+}
+*/
+stream_interface_t ms_funcs =
+    {free_ms_stream, ms_seek, ms_seek_end, ms_skip,
+     ms_read, ms_write, ms_trunc, ms_eof, ms_flush};
+
+stream_t *memstream_new(stream_t *s, size_t initsize)
+{
+    s->pos = 0;
+    s->size = 0;
+    s->data = &s->local[0];
+    s->maxsize = N_STREAM_LOCAL-1;
+    s->data[s->maxsize] = '\0';
+
+    if (ms_realloc(s, initsize) == NULL)
+        julia_outofmemory();
+
+    s->data[initsize] = '\0';
+    s->data[s->pos] = '\0';
+    s->size = initsize;
+
+    s->funcs = &ms_funcs;
+
+    s->bitpos = s->bitbuf = 0;
+    s->byteswap = false;
+
+    return s;
+}
+
+#if 0
+string_t *string_new(int len)
+{
+    string_t *str;
+
+    str = memstream_new(len);
+    str->len = len;
+
+    return str;
+}
+
+/* this function makes string streams from C strings (NUL-term) */
+string_t *string_fromc(char *s)
+{
+    string_t *str;
+    int len = strlen(s);
+
+    str = string_new(len);
+    stream_write(str, s, len);
+    stream_seek(str, 0);
+
+    return str;
+}
+#endif
+
+/* pipe */
+off_t ps_seek(struct _stream *s, off_t where)
+{
+    return -1;
+}
+
+size_t ps_read(struct _stream *s, char *dest, size_t size);
+
+off_t ps_skip(struct _stream *s, off_t offs)
+{
+    char buf[CHUNK_SIZE];
+    int rd = s->rd;
+    size_t c, amt;
+
+    if (offs < 0)
+        return -1;
+    while (offs > 0) {
+        amt = offs > CHUNK_SIZE ? CHUNK_SIZE : offs;
+        c = ps_read(s, buf, amt);
+        if (c < amt)
+            return 0;
+        offs -= c;
+    }
+    return 0;
+}
+
+off_t ps_seek_end(struct _stream *s)
+{
+    return -1;
+}
+
+size_t ps_read(struct _stream *s, char *dest, size_t size)
+{
+    if (ps_eof(s))
+        return 0;
+#ifdef WIN32
+    int c = _read(s->rd, dest, size);
+#else
+    ssize_t c = read(s->rd, dest, size);
+#endif
+    if (c < 0)
+        return 0;
+    return (size_t)c;
+}
+
+size_t ps_write(struct _stream *s, char *data, size_t size)
+{
+#ifdef WIN32
+    int c = _write(s->wd, data, size);
+#else
+    ssize_t c = write(s->wd, data, size);
+#endif
+    if (c < 0)
+        return 0;
+    return c;
+}
+
+size_t ps_trunc(struct _stream *s, size_t size)
+{
+    return 0;
+}
+
+void ps_flush(struct _stream *s)
+{
+}
+
+bool_t ps_eof(struct _stream *s)
+{
+#ifndef WIN32
+    fd_set set;
+    struct timeval tv = {0, 0};
+
+    FD_ZERO(&set);
+    FD_SET(s->rd, &set);
+    return (select(s->rd+1, &set, NULL, NULL, &tv)==0);
+#else
+    return 0;
+#endif
+}
+
+void free_ps_stream(stream_t *s)
+{
+    close(s->rd);
+    close(s->wd);
+}
+
+stream_interface_t ps_funcs =
+    {free_ps_stream, ps_seek, ps_seek_end, ps_skip,
+     ps_read, ps_write, ps_trunc, ps_eof, ps_flush};
+
+stream_t *pipestream_new(stream_t *s, int flags)
+{
+    int fds[2];
+
+    s->funcs = &ps_funcs;
+
+#ifdef WIN32
+    _pipe(&fds[0], 32768, _O_BINARY | flags);
+#else
+    pipe(&fds[0]);
+#endif
+    s->rd = fds[0]; s->wd = fds[1];
+
+    s->byteswap = false;
+    s->bitpos = s->bitbuf = 0;
+    s->pos = 0;
+
+    return s;
+}
+
+
+/* high-level stream functions */
+
+/* type is a "T_" code as defined in streams.h */
+int stream_put_num(stream_t *stream, char *d, u_int32_t type)
+{
+    int c, sz, i;
+
+    assert(valid_type(type));
+    sz = type_sizes[type];
+
+    if (!stream->byteswap) {
+        c = stream_write(stream, d, sz);
+    }
+    else if (sz >= 4) {
+        char temp[32];
+        if (type & T_CPLX) {
+            bswap_to(temp,      (byte_t*)d,      sz/2);
+            bswap_to(temp+sz/2, (byte_t*)d+sz/2, sz/2);
+        }
+        else {
+            bswap_to(temp, (byte_t*)d, sz);
+        }
+        c = stream_write(stream, temp, sz);
+    }
+    else {
+        assert(sz == 2 || sz == 1);
+        if (type & T_CPLX) {
+            c = stream_write(stream, &d[0], 2);
+            /*
+            for(i=sz/2-1; i >= 0; i--) {
+                c += stream_write(stream, &d[i], 1);
+            }
+            for(i=sz-1; i >= sz/2; i--) {
+                c += stream_write(stream, &d[i], 1);
+            }
+            */
+        }
+        else {
+            c = 0;
+            if (sz == 2)
+                c += stream_write(stream, &d[1], 1);
+            c += stream_write(stream, &d[0], 1);
+        }
+    }
+
+    return c;
+}
+
+int stream_get_num(stream_t *s, char *data, u_int32_t type)
+{
+    int c, sz;
+
+    assert(valid_type(type));
+    sz = type_sizes[type];
+
+    c = stream_read(s, data, sz);
+
+    if (s->byteswap && c == sz) {
+        if (type & T_CPLX) {
+            bswap((byte_t*)data, sz/2);
+            bswap((byte_t*)data+sz/2, sz/2);
+        }
+        else {
+            bswap((byte_t*)data, sz);
+        }
+    }
+    if (c < sz)
+        return -1;
+    return c;
+}
+
+int stream_put_int(stream_t *s, int n)
+{
+    return stream_put_num(s, (char*)&n, T_INT);
+}
+
+int stream_put_char(stream_t *s, u_int32_t wc)
+{
+    char buf[8];
+    int amt;
+
+    amt = u8_wc_toutf8(buf, wc);
+    return stream_write(s, buf, amt);
+}
+
+int stream_put_stringz(stream_t *s, char *str, bool_t nulterm)
+{
+    int c, l = strlen(str);
+
+    c = stream_write(s, str, l);
+    if (nulterm)
+        c += stream_write(s, &str[l], 1);
+    return c;
+}
+
+int stream_get_char(stream_t *s, u_int32_t *pwc)
+{
+    char buf[8];
+    int amt;
+    unsigned int i;
+
+    amt = stream_read(s, buf, 1);
+    if (amt == 0)
+        return 0;
+    amt = u8_seqlen(buf) - 1;  // find out how many more bytes in this seq
+    if (amt) {
+        stream_read(s, &buf[1], amt);
+    }
+    amt++;
+    buf[amt] = '\0';
+    i=0;
+    *pwc = u8_nextmemchar(buf, &i);
+    return i;
+}
+
+int stream_nextchar(stream_t *s)
+{
+    u_int32_t wc;
+
+    if (is_memstream(s)) {
+        if (stream_eof(s))
+            return -1;
+        stream_bit_flush(s);
+        s->pos++;
+        while (!stream_eof(s) && !isutf(s->s[s->pos]))
+            s->pos++;
+        return s->pos;
+    }
+
+    if (stream_get_char(s, &wc) == 0)
+        return -1;
+    return s->pos;
+}
+
+int stream_prevchar(stream_t *s)
+{
+    char c;
+
+    if (is_memstream(s)) {
+        if (s->pos == 0)
+            return -1;
+        stream_bit_flush(s);
+        s->pos--;
+        while (s->pos > 0 && !isutf(s->s[s->pos]))
+            s->pos--;
+        return s->pos;
+    }
+
+    do {
+        if (stream_skip(s, -1) == -1)
+            return -1;
+        stream_read(s, &c, 1);
+        stream_skip(s, -1);
+    } while(!isutf(c));
+    return s->pos;
+}
+
+void stream_put_bit(stream_t *s, int bit)
+{
+    byte_t mask = 0x1;
+
+    if (s->bitpos == 0) {
+        if (!stream_read(s, &s->bitbuf, 1)) {
+            s->bitbuf = 0;
+        }
+        else {
+            stream_skip(s, -1);
+        }
+    }
+
+    mask <<= s->bitpos;
+    if (bit)
+        s->bitbuf |= mask;
+    else
+        s->bitbuf &= ~mask;
+    s->dirty = 1;
+
+    s->bitpos++;
+    if (s->bitpos > 7) {
+        s->bitpos = 0;
+        stream_write(s, &s->bitbuf, 1);
+    }
+}
+
+int stream_get_bit(stream_t *s, int *pbit)
+{
+    byte_t mask = 0x1;
+
+    if (s->bitpos == 0) {
+        if (!stream_read(s, &s->bitbuf, 1)) {
+            return 0;
+        }
+        else {
+            stream_skip(s, -1);
+        }
+        s->dirty = 0;
+    }
+
+    mask <<= s->bitpos;
+    *pbit = (s->bitbuf & mask) ? 1 : 0;
+
+    s->bitpos++;
+
+    if (s->bitpos > 7) {
+        s->bitpos = 0;
+        if (s->dirty) {
+            stream_write(s, &s->bitbuf, 1);
+        }
+        else {
+            stream_skip(s, 1);
+        }
+    }
+    return 1;
+}
+
+/* warning: DO NOT write a trivial wrapper for this function; it allows
+   easily crashing the interpreter using unfriendly format strings.
+
+   also, this function is designed to print small things like messages. it
+   cannot print arbitrarily large data. to do that, use stream_write,
+   or repeated calls to this function.
+
+   TODO: this doesn't handle UTF-8 properly:
+
+printf("%*s|\n%*s|\n", -4, "X", -4, "a")
+printf("%-4s|\n%-4s|\n", "X", "a")
+X  |
+a   |
+
+Where X is a 2-byte character.
+*/
+int stream_printf(stream_t *s, char *format, ...)
+{
+    int c;
+    va_list args;
+    char buf[512];
+
+    va_start(args, format);
+    c = vsnprintf(buf, sizeof(buf), format, args);
+    va_end(args);
+
+    if (c < 0)
+        return 0;
+
+    if ((unsigned)c > sizeof(buf))
+        c = sizeof(buf);
+
+    return stream_write(s, buf, c);
+}
+
+char *stream_take_buffer(stream_t *s, size_t *size)
+{
+    char *buf;
+
+    if (!is_memstream(s) || s->data == NULL)
+        return NULL;
+
+    stream_flush(s);
+
+    if (s->data == &s->local[0]) {
+        buf = malloc(s->size+1);
+        if (buf == NULL)
+            return NULL;
+        memcpy(buf, s->data, s->size+1);
+        buf[s->size] = '\0';
+    }
+    else {
+        buf = s->data;
+    }
+
+    *size = s->size+1; /* buffer is actually 1 bigger for terminating NUL */
+
+    /* empty stream and reinitialize */
+    s->data = &s->local[0];
+    s->maxsize = N_STREAM_LOCAL-1;
+    s->data[s->maxsize] = '\0';
+    stream_trunc(s, 0);
+
+    return buf;
+}
+
+/* Chunk size for reading lines: if too small, then we make too many low-level
+   calls. if too large, then we waste time reading bytes beyond the end of the
+   current line. this is effectively a guess as to how big a line is.
+   this value should be < N_STREAM_LOCAL, allowing at least some lines to
+   fit without extra allocation. */
+#define LINE_CHUNK_SIZE (N_STREAM_LOCAL-1)
+
+int stream_readline(stream_t *dest, stream_t *s, char delim)
+{
+    char chunk[LINE_CHUNK_SIZE];
+    int i, cnt, total=0;
+
+    if (stream_eof(s))
+        return 0;
+
+    do {
+        cnt = stream_read(s, chunk, LINE_CHUNK_SIZE);
+        for(i=0; i < cnt; i++) {
+            if (chunk[i] == delim)
+                break;
+        }
+        if (i < cnt) {
+            total += stream_write(dest, chunk, i+1);
+            stream_skip(s, -(cnt-(i+1)));
+            break;
+        }
+        total += stream_write(dest, chunk, cnt);
+    } while (!stream_eof(s));
+
+    return total;
+}
+
+/* extract one line of text from a stream, into a memory buffer.
+   a pointer to the buffer is returned in *pbuf, the size of the buffer
+   in *psz, and the number of characters in the line (including newline) is
+   returned. the line may contain NULs, but there will also be a NUL
+   terminator as the last character in the buffer.
+
+   This routine's behavior is not exactly the same as GNU getline. In
+   particular, it always allocates a buffer.
+*/
+int stream_getline(stream_t *s, char **pbuf, size_t *psz)
+{
+    stream_t buf;
+
+    memstream_new(&buf, 0);
+
+    stream_readline(&buf, s, '\n');
+
+    *pbuf = stream_take_buffer(&buf, psz);
+
+    return *psz-1;
+}
+
+int stream_get_stringz(stream_t *dest, stream_t *src)
+{
+    return stream_readline(dest, src, '\0');
+}
+
+/* get n UTF-8 characters */
+int stream_get_stringn(stream_t *dest, stream_t *src, size_t c)
+{
+    u_int32_t wc;
+    size_t cnt=0;
+
+    while (c > 0) {
+        if (stream_get_char(src, &wc) == 0 ||
+            stream_put_char(dest, wc) == 0)
+            break;
+        c--;
+        cnt++;
+    }
+    return cnt;
+}
+
+/* TODO: change API to allow passing a heap-allocated page-aligned
+   chunk to reuse on each copy. if it's NULL we alloc our own. */
+int stream_copy(stream_t *to, stream_t *from, size_t nbytes, bool_t all)
+{
+    int c=0, cnt, wc, rdc;
+    char buf[CHUNK_SIZE];
+    int remain = all ? CHUNK_SIZE : nbytes;
+
+    if (is_memstream(to)) {
+        // avoid extra copy, read directly into memstream
+        while (!stream_eof(from)) {
+            if (all) {
+                rdc = CHUNK_SIZE;
+            }
+            else {
+                /* if !all, only 1 call to stream_read needed */
+                rdc = nbytes;
+            }
+
+            if (ms_realloc(to, to->pos + rdc) == NULL) {
+                rdc = to->maxsize - to->pos;
+                if (rdc == 0)
+                    break;
+            }
+            cnt = stream_read(from, &to->s[to->pos], rdc);
+            wc = cnt;
+            to->pos += wc;
+            if (to->pos > to->size) {
+                to->size = to->pos;
+                to->s[to->size] = '\0';
+            }
+            c += wc;
+
+            if (!all)
+                remain -= wc;
+            if (wc < rdc || remain == 0)
+                break;
+        }
+    }
+    else if (is_memstream(from)) {
+        while (1) {
+            if (all) {
+                rdc = CHUNK_SIZE;
+            }
+            else {
+                /* if !all, only 1 call to stream_read needed */
+                rdc = nbytes;
+            }
+            // check for source out of data
+            if (from->size - from->pos < rdc) {
+                remain = rdc = from->size - from->pos;
+            }
+            cnt = stream_write(to, &from->s[from->pos], rdc);
+            wc = cnt;
+            from->pos += wc;
+            c += wc;
+            if (!all)
+                remain -= wc;
+            if (wc < rdc || remain == 0)
+                break;
+        }
+    }
+    else {
+        while (!stream_eof(from)) {
+            rdc = remain>CHUNK_SIZE ? CHUNK_SIZE : remain;
+
+            cnt = stream_read(from, buf, rdc);
+            wc = stream_write(to, buf, cnt);
+            c += wc;
+
+            if (!all)
+                remain -= wc;
+            if (wc < rdc || remain == 0)
+                break;
+        }
+    }
+    return c;
+}
+
+
+/* serialization functions */
+
+/* nbytes is either 4 or 8 */
+size_t stream_get_offset(stream_t *s, off_t *po, int nbytes)
+{
+    size_t c;
+    int64_t off64;
+    int32_t off32;
+
+    if (nbytes == 4) {
+        c = stream_read(s, (char*)&off32, nbytes);
+        if (c < nbytes)
+            return c;
+        if (s->byteswap)
+            off32 = bswap_32(off32);
+        /* OK on either system since off_t is >= off32 in size */
+        *po = (off_t)off32;
+    }
+    else {
+        c = stream_read(s, (char*)&off64, nbytes);
+        if (c < nbytes)
+            return c;
+        if (s->byteswap)
+            off64 = bswap_64(off64);
+        if (sizeof(off_t) == 8) {
+            *po = (off_t)off64;
+        }
+        else {
+            if (off64 <= INT_MAX && off64 >= INT_MIN) {
+                // downcast safe
+                *po = (off_t)off64;
+            }
+            else {
+                cerror("I/O error: 64-bit offset truncated on input.\n");
+            }
+        }
+    }
+
+    return c;
+}
+
+size_t stream_put_offset(stream_t *s, off_t o, int nbytes)
+{
+    int64_t off64;
+    int32_t off32;
+
+    if (nbytes == 4) {
+        if (sizeof(off_t) == 8 && (o > INT_MAX || o < INT_MIN)) {
+            cerror("I/O error: 64-bit offset truncated on output.\n");
+        }
+        off32 = (int32_t)o;
+        if (s->byteswap)
+            off32 = bswap_32(off32);
+        return stream_write(s, (char*)&off32, 4);
+    }
+    off64 = (int64_t)o;
+    if (s->byteswap)
+        off64 = bswap_64(off64);
+    return stream_write(s, (char*)&off64, 8);
+}
diff --git a/llt/attic/streams.h b/llt/attic/streams.h
new file mode 100644
index 0000000..1f25f3b
--- /dev/null
+++ b/llt/attic/streams.h
@@ -0,0 +1,213 @@
+#ifndef __STREAMS_H_
+#define __STREAMS_H_
+
+struct _stream;
+
+// numeric type codes
+#define T_BOOL   0x000
+#define T_BYTE   0x001
+#define T_SHORT  0x002
+#define T_INT    0x003
+#define T_FLOAT  0x004
+#define T_DOUBLE 0x005
+#define T_INT64   0x006
+//#define T_LDOUBLE 0x007
+#define T_UNSIGNED 0x008
+#define T_CPLX     0x010
+
+#define T_TYPEMASK 0x1f  /* bits related to numeric type */
+#define T_TYPESIZE 0x07  /* bits related to type size */
+
+#define is_type(a, t) (((a) & T_TYPESIZE) == (t))
+// type_bitseq tells whether 2 types are bit-representation compatible
+#define type_bitseq(a, b) (((a) & ~T_UNSIGNED) == ((b) & ~T_UNSIGNED))
+
+extern unsigned int type_sizes[];
+
+typedef struct {
+    void (*free_func)(struct _stream *s);
+    /* these return -1 on error, or new position if successful */
+    /* set absolute position */
+    off_t (*seek)(struct _stream *s, off_t where);
+    /* seek to end (past last byte) */
+    off_t (*seek_end)(struct _stream *s);
+    /* move relative to current position */
+    off_t (*skip)(struct _stream *s, off_t offs);
+
+    /* these return # of bytes read/written, 0 on error */
+    size_t (*read)(struct _stream *s, char *dest, size_t size);
+    size_t (*write)(struct _stream *s, char *data, size_t size);
+
+    /* truncate a stream to the given length */
+    size_t (*trunc)(struct _stream *s, size_t size);
+
+    /* no data left? */
+    bool_t (*eof)(struct _stream *s);
+
+    /* sync bit buffer, and sync to hardware if applicable */
+    void (*flush)(struct _stream *s);
+    /* could add fsync() call for durable writes */
+} stream_interface_t;
+
+extern stream_interface_t fs_funcs;
+extern stream_interface_t ms_funcs;
+extern stream_interface_t ps_funcs;
+
+#define is_memstream(s)  (((stream_t*)s)->funcs==&ms_funcs)
+#define is_filestream(s) (((stream_t*)s)->funcs==&fs_funcs)
+#define is_pipestream(s) (((stream_t*)s)->funcs==&ps_funcs)
+
+/* general i/o chunk size */
+#define CHUNK_SIZE 4096
+
+/* this should be a multiple of 4; otherwise the struct gets padded to
+   the next multiple of 4 anyway, wasting space */
+#define N_STREAM_LOCAL 40
+
+typedef struct _stream {
+    /* unfortunately, it seems that pos needs to be a signed type to be
+       compatible with OS functions. */
+    off_t pos;
+    byte_t bitpos;     /* offset within a byte, for writing individual bits */
+    byte_t bitbuf;     /* a copy of the byte at the current stream position */
+    char ungotc;
+    struct {
+        /* does bit buffer need to be written? */
+        unsigned char dirty:1;
+        unsigned char byteswap:1;
+        unsigned char readonly:1;
+        unsigned char ungot:1;
+    };
+
+    stream_interface_t *funcs;
+
+    /* stream-specific data */
+    union {
+        char *name;
+        size_t maxsize;
+        int rd; // pipe read descriptor
+    };
+    union {
+        FILE *fptr;
+        char *data;
+        char *s;
+        int wd; // pipe write descriptor
+    };
+    union {
+        /* this is always a size in BYTES */
+        size_t size;
+        size_t len;
+    };
+    char local[N_STREAM_LOCAL];
+} stream_t;
+
+#include <stdio.h>
+
+stream_t *filestream_new(stream_t *s, char *fName,
+                         bool_t create, bool_t rewrite);
+stream_t *memstream_new(stream_t *s, size_t initsize);
+stream_t *pipestream_new(stream_t *s, int flags);
+stream_t *stream_fromfile(stream_t *s, FILE *f, char *name);
+//string_t *string_new(int len);
+//string_t *string_fromc(char *s);
+stream_t *memstream_copy(stream_t *s);
+void stream_free(stream_t *s);
+void stream_flush(stream_t *s);
+
+
+/* high level stream functions */
+
+/* 'all' means copy to end of stream */
+int stream_copy(stream_t *to, stream_t *from, size_t nbytes, bool_t all);
+
+int stream_put_num(stream_t *s, char *data, u_int32_t type);
+int stream_put_int(stream_t *s, int n);
+int stream_put_char(stream_t *s, u_int32_t wc);
+int stream_put_stringz(stream_t *s, char *str, bool_t nulterm);
+
+/* single-bit I/O - even works for mixed reads and writes.
+   mixing bit-level I/O with normal byte stream I/O has undefined effects and
+   will almost certainly destroy your file. however, it is safe to switch
+   between bit and byte I/O if you call stream_flush in between. */
+void stream_put_bit(stream_t *s, int bit);
+
+/* warning: this uses a fixed-size buffer. it is intended only for printing
+   normal things like "a= %d". if you might be printing a large buffer, you
+   should use stream i/o functions directly. */
+int stream_printf(stream_t *s, char *format, ...);
+
+
+/* high level stream functions - input */
+
+int stream_get_num(stream_t *s, char *data, u_int32_t type);
+int stream_get_char(stream_t *s, u_int32_t *pwc);
+int stream_get_stringz(stream_t *dest, stream_t *src);
+int stream_get_stringn(stream_t *dest, stream_t *src, size_t c);
+int stream_readline(stream_t *dest, stream_t *s, char delim);
+int stream_getline(stream_t *s, char **pbuf, size_t *psz);
+/* returns # of bits read (0 or 1) */
+int stream_get_bit(stream_t *s, int *pbit);
+
+int stream_nextchar(stream_t *s);
+int stream_prevchar(stream_t *s);
+
+void stream_close(stream_t *s);
+
+/* TODO */
+// stream_fgetc
+// stream_ungetc
+
+/* get underlying file descriptors, -1 if none */
+int stream_readfd(stream_t *s);
+int stream_writefd(stream_t *s);
+
+/*
+  low level stream functions
+
+  Streams are intended to provide a uniform function interface to various
+  kinds of byte streams.
+
+  The eight low-level stream functions (below) are intended to be light weight.
+  It must be easy to implement reasonably efficient higher-level stream
+  functions, therefore any complexity required to make (for example) single
+  byte reads and writes efficient must be implemented by the stream.
+
+  Note that you can implement file streams using fread(), fwrite(), etc.
+  because buffering is already implemented in every standard C library. These
+  calls do not make system calls in general, and are perfectly fine to use.
+*/
+
+#define stream_seek(s, w)      (s)->funcs->seek(s, w)
+#define stream_seek_end(s)     (s)->funcs->seek_end(s)
+#define stream_skip(s, o)      (s)->funcs->skip(s, o)
+#define stream_read(s, d, sz)  (s)->funcs->read(s, (char*)d, sz)
+#define stream_write(s, d, sz) (s)->funcs->write(s, (char*)d, sz)
+#define stream_trunc(s, sz)    (s)->funcs->trunc(s, sz)
+#define stream_eof(s)          (s)->funcs->eof(s)
+
+
+STATIC_INLINE size_t stream_put_byte(stream_t *s, byte_t b)
+{
+    return stream_write(s, (char*)&b, 1);
+}
+
+STATIC_INLINE size_t stream_get_byte(stream_t *s, byte_t *pb)
+{
+    return stream_read(s, (char*)pb, 1);
+}
+
+#define stream_puts(s, str) stream_write(s, str, strlen(str))
+
+/*
+  stream_take_buffer
+
+  This lets you get the data of a stream without having to copy it. In order
+  not to either lose the buffer or free it twice, this operation effectively
+  empties the stream. In other words, size goes to 0, data becomes NULL.
+  Whoever called the function takes full responsibility for the buffer.
+  You must free it eventually.
+  "size" gets set to the size of the data in the buffer.
+*/
+char *stream_take_buffer(stream_t *s, size_t *size);
+
+#endif
diff --git a/llt/attic/trash.c b/llt/attic/trash.c
new file mode 100644
index 0000000..0046840
--- /dev/null
+++ b/llt/attic/trash.c
@@ -0,0 +1,68 @@
+/* moving data in small power-of-2 sized units */
+void copy_el(char *dest, char *src, size_t sz)
+{
+    switch (sz) {
+    case 16:
+        *(int64_t*)&dest[0] = *(int64_t*)&src[0];
+        *(int64_t*)&dest[8] = *(int64_t*)&src[8];
+        break;
+    case 8: *(int64_t*)dest = *(int64_t*)src; break;
+    case 4: *(int32_t*)dest = *(int32_t*)src; break;
+    case 2: *(int16_t*)dest = *(int16_t*)src; break;
+    case 1: *dest = *src; break;
+    }
+}
+
+void swap_el(char *a, char *b, size_t sz)
+{
+    int64_t i64;
+    int32_t i32;
+    int16_t i16;
+    int8_t i8;
+    switch (sz) {
+    case 16:
+        i64 = *(int64_t*)&a[0];
+        *(int64_t*)&a[0] = *(int64_t*)&b[0];
+        *(int64_t*)&b[0] = i64;
+        i64 = *(int64_t*)&a[8];
+        *(int64_t*)&a[8] = *(int64_t*)&b[8];
+        *(int64_t*)&b[8] = i64;
+        break;
+    case 8:
+        i64 = *(int64_t*)a;
+        *(int64_t*)a = *(int64_t*)b;
+        *(int64_t*)b = i64;
+        break;
+    case 4:
+        i32 = *(int32_t*)a;
+        *(int32_t*)a = *(int32_t*)b;
+        *(int32_t*)b = i32;
+        break;
+    case 2:
+        i16 = *(int16_t*)a;
+        *(int16_t*)a = *(int16_t*)b;
+        *(int16_t*)b = i16;
+        break;
+    case 1:
+        i8 = *a;
+        *a = *b;
+        *b = i8;
+        break;
+    }
+}
+
+void neg_any(void *dest, void *a, numerictype_t tag)
+{
+    switch (tag) {
+    case T_INT8:   *(int8_t  *)dest = -*(int8_t  *)a; break;
+    case T_UINT8:  *(uint8_t *)dest = -*(uint8_t *)a; break;
+    case T_INT16:  *(int16_t *)dest = -*(int16_t *)a; break;
+    case T_UINT16: *(uint16_t*)dest = -*(uint16_t*)a; break;
+    case T_INT32:  *(int32_t *)dest = -*(int32_t *)a; break;
+    case T_UINT32: *(uint32_t*)dest = -*(uint32_t*)a; break;
+    case T_INT64:  *(int64_t *)dest = -*(int64_t *)a; break;
+    case T_UINT64: *(uint64_t*)dest = -*(uint64_t*)a; break;
+    case T_FLOAT:  *(float   *)dest = -*(float   *)a; break;
+    case T_DOUBLE: *(double  *)dest = -*(double  *)a; break;
+    }
+}
diff --git a/llt/bitvector.c b/llt/bitvector.c
new file mode 100644
index 0000000..40aeed6
--- /dev/null
+++ b/llt/bitvector.c
@@ -0,0 +1,544 @@
+/*
+  bit vector primitives
+
+  todo:
+  * reverse
+  * nreverse
+ (- rotate left/right)
+  * shl_to
+  * not
+  - shr_row, shl_row
+
+  These routines are the back end supporting bit matrices. Many operations
+  on bit matrices are slow (such as accessing or setting a single element!)
+  but certain operations are privileged and lend themselves to extremely
+  efficient implementation due to the bit-vector nature of machine integers.
+  These are:
+  done:
+    &  |  $  ~  copy  reverse  fill  sum  prod
+  todo:
+    shift  trans  rowswap
+  would be nice:
+    channel  interleave
+
+  Important note:
+  Out-of-place functions always assume dest and source have the same amount
+  of space available.
+
+  shr_to, shl_to, not_to, and reverse_to assume source and dest don't overlap
+  and_to, or_to, and xor_to allow overlap.
+*/
+
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+
+#include "dtypes.h"
+#include "bitvector.h"
+
+#ifdef WIN32
+#include <malloc.h>
+#define alloca _alloca
+#endif
+
+// greater than this # of words we use malloc instead of alloca
+#define MALLOC_CUTOFF 2000
+
+u_int32_t *bitvector_resize(u_int32_t *b, size_t n, int initzero)
+{
+    u_int32_t *p;
+    size_t sz = ((n+31)>>5) * 4;
+    p = realloc(b, sz);
+    if (p == NULL) return NULL;
+    if (initzero) memset(p, 0, sz);
+    return p;
+}
+
+u_int32_t *bitvector_new(size_t n, int initzero)
+{
+    return bitvector_resize(NULL, n, initzero);
+}
+
+void bitvector_set(u_int32_t *b, u_int32_t n, u_int32_t c)
+{
+    if (c)
+        b[n>>5] |= (1<<(n&31));
+    else
+        b[n>>5] &= ~(1<<(n&31));
+}
+
+u_int32_t bitvector_get(u_int32_t *b, u_int32_t n)
+{
+    return b[n>>5] & (1<<(n&31));
+}
+
+u_int32_t bitreverse(u_int32_t x)
+{
+    u_int32_t m;
+
+#ifdef __INTEL_COMPILER
+    x = _bswap(x);
+#else
+    x = (x >> 16)      | (x << 16);        m = 0xff00ff00;
+    x = ((x & m) >> 8) | ((x & ~m) << 8);
+#endif
+    m = 0xf0f0f0f0;
+    x = ((x & m) >> 4) | ((x & ~m) << 4);  m = 0xcccccccc;
+    x = ((x & m) >> 2) | ((x & ~m) << 2);  m = 0xaaaaaaaa;
+    x = ((x & m) >> 1) | ((x & ~m) << 1);
+
+    return x;
+}
+
+// shift all bits in a long bit vector
+// n is # of int32s to consider, s is shift distance
+// lowest bit-index is bit 0 of word 0
+// TODO: handle boundary case of shift distance >= data size?
+void bitvector_shr(u_int32_t *b, size_t n, u_int32_t s)
+{
+    u_int32_t i;
+    if (s == 0 || n == 0) return;
+    i = (s>>5);
+    if (i) {
+        n -= i;
+        memmove(b, &b[i], n*4);
+        memset(&b[n], 0, i*4);
+        s &= 31;
+    }
+    for(i=0; i < n-1; i++) {
+        b[i] = (b[i]>>s) | (b[i+1]<<(32-s));
+    }
+    b[i]>>=s;
+}
+
+// out-of-place version, good for re-aligning a strided submatrix to
+// linear representation when a copy is needed
+// assumes that dest has the same amount of space as source, even if it
+// wouldn't have been necessary to hold the shifted bits
+void bitvector_shr_to(u_int32_t *dest, u_int32_t *b, size_t n, u_int32_t s)
+{
+    u_int32_t i, j;
+    if (n == 0) return;
+    if (s == 0) {
+        memcpy(dest, b, n*4);
+        return;
+    }
+    j = (s>>5);
+    if (j) {
+        n -= j;
+        memset(&dest[n], 0, j*4);
+        s &= 31;
+        b = &b[j];
+    }
+    for(i=0; i < n-1; i++) {
+        dest[i] = (b[i]>>s) | (b[i+1]<<(32-s));
+    }
+    dest[i] = b[i]>>s;
+}
+
+void bitvector_shl(u_int32_t *b, size_t n, u_int32_t s)
+{
+    u_int32_t i, scrap=0, temp;
+    if (s == 0 || n == 0) return;
+    i = (s>>5);
+    if (i) {
+        n -= i;
+        memmove(&b[i], b, n*4);
+        memset(b, 0, i*4);
+        s &= 31;
+        b = &b[i];
+    }
+    for(i=0; i < n; i++) {
+        temp = (b[i]<<s) | scrap;
+        scrap = b[i]>>(32-s);
+        b[i] = temp;
+    }
+}
+
+// if dest has more space than source, set scrap to true to keep the
+// top bits that would otherwise be shifted out
+void bitvector_shl_to(u_int32_t *dest, u_int32_t *b, size_t n, u_int32_t s,
+                      bool_t scrap)
+{
+    u_int32_t i, j, sc=0;
+    if (n == 0) return;
+    if (s == 0) {
+        memcpy(dest, b, n*4);
+        return;
+    }
+    j = (s>>5);
+    if (j) {
+        n -= j;
+        memset(dest, 0, j*4);
+        s &= 31;
+        dest = &dest[j];
+    }
+    for(i=0; i < n; i++) {
+        dest[i] = (b[i]<<s) | sc;
+        sc = b[i]>>(32-s);
+    }
+    if (scrap)
+        dest[i] = sc;
+}
+
+// set nbits to c, starting at given bit offset
+// assumes offs < 32
+void bitvector_fill(u_int32_t *b, u_int32_t offs, u_int32_t c, u_int32_t nbits)
+{
+    index_t i;
+    u_int32_t nw, tail;
+    u_int32_t mask;
+
+    if (nbits == 0) return;
+    nw = (offs+nbits+31)>>5;
+
+    if (nw == 1) {
+        mask = (lomask(nbits)<<offs);
+        if (c) b[0]|=mask; else b[0]&=(~mask);
+        return;
+    }
+
+    mask = lomask(offs);
+    if (c) b[0]|=(~mask); else b[0]&=mask;
+
+    if (c) mask=ONES32; else mask = 0;
+    for(i=1; i < nw-1; i++)
+        b[i] = mask;
+
+    tail = (offs+nbits)&31;
+    if (tail==0) {
+        b[i] = mask;
+    }
+    else {
+        mask = lomask(tail);
+        if (c) b[i]|=mask; else b[i]&=(~mask);
+    }
+}
+
+void bitvector_not(u_int32_t *b, u_int32_t offs, u_int32_t nbits)
+{
+    index_t i;
+    u_int32_t nw, tail;
+    u_int32_t mask;
+
+    if (nbits == 0) return;
+    nw = (offs+nbits+31)>>5;
+
+    if (nw == 1) {
+        mask = (lomask(nbits)<<offs);
+        b[0] ^= mask;
+        return;
+    }
+
+    mask = ~lomask(offs);
+    b[0]^=mask;
+
+    for(i=1; i < nw-1; i++)
+        b[i] = ~b[i];
+
+    tail = (offs+nbits)&31;
+    if (tail==0) {
+        b[i] = ~b[i];
+    }
+    else {
+        mask = lomask(tail);
+        b[i]^=mask;
+    }
+}
+
+// constant-space bit vector copy in a single pass, with arbitrary
+// offsets and lengths. to get this right, there are 16 cases to handle!
+#define BITVECTOR_COPY_OP(name, OP)                                          \
+void bitvector_##name(u_int32_t *dest, u_int32_t doffs,                      \
+                      u_int32_t *src, u_int32_t soffs, u_int32_t nbits)      \
+{                                                                            \
+    index_t i;                                                               \
+    u_int32_t s, nw, tail, snw;                                              \
+    u_int32_t mask, scrap;                                                   \
+                                                                             \
+    if (nbits == 0) return;                                                  \
+    nw = (doffs+nbits+31)>>5;                                                \
+                                                                             \
+    if (soffs == doffs) {                                                    \
+        if (nw == 1) {                                                       \
+            mask = (lomask(nbits)<<doffs);                                   \
+            dest[0] = (dest[0] & ~mask) | (OP(src[0]) & mask);               \
+            return;                                                          \
+        }                                                                    \
+        mask = ~lomask(doffs);                                               \
+        dest[0] = (dest[0] & ~mask) | (OP(src[0]) & mask);                   \
+        for(i=1; i < nw-1; i++)                                              \
+            dest[i] = OP(src[i]);                                            \
+        tail = (doffs+nbits)&31;                                             \
+        if (tail==0) { dest[i]=src[i]; } else {                              \
+            mask = lomask(tail);                                             \
+            dest[i] = (dest[i] & ~mask) | (OP(src[i]) & mask); }             \
+        return;                                                              \
+    }                                                                        \
+    snw = (soffs+nbits+31)>>5;                                               \
+    if (soffs < doffs) {                                                     \
+        s = doffs-soffs;                                                     \
+        if (nw == 1) {                                                       \
+            mask = (lomask(nbits)<<doffs);                                   \
+            dest[0] = (dest[0] & ~mask) | ((OP(src[0])<<s) & mask);          \
+            return;                                                          \
+        }                                                                    \
+        mask = ~lomask(doffs);                                               \
+        dest[0] = (dest[0] & ~mask) | ((OP(src[0])<<s) & mask);              \
+        scrap = OP(src[0])>>(32-s);                                          \
+        for(i=1; i < snw-1; i++) {                                           \
+            dest[i] = (OP(src[i])<<s) | scrap;                               \
+            scrap = OP(src[i])>>(32-s);                                      \
+        }                                                                    \
+        tail = (doffs+nbits)&31;                                             \
+        if (tail==0) { mask=ONES32; } else { mask = lomask(tail); }          \
+        if (snw == nw) {                                                     \
+            dest[i] = (dest[i] & ~mask) | (((OP(src[i])<<s)|scrap) & mask);  \
+        }                                                                    \
+        else /* snw < nw */ {                                                \
+            if (snw == 1) {                                                  \
+                dest[i] = (dest[i] & ~mask) |                                \
+                    (((OP(src[i])<<s) | scrap) & mask);                      \
+            }                                                                \
+            else {                                                           \
+                dest[i] = (OP(src[i])<<s) | scrap;                           \
+                scrap = OP(src[i])>>(32-s);                                  \
+                i++;                                                         \
+                dest[i] = (dest[i] & ~mask) | (scrap & mask);                \
+            }                                                                \
+        }                                                                    \
+    }                                                                        \
+    else {                                                                   \
+        s = soffs-doffs;                                                     \
+        if (snw == 1) {                                                      \
+            mask = (lomask(nbits)<<doffs);                                   \
+            dest[0] = (dest[0] & ~mask) | ((OP(src[0])>>s) & mask);          \
+            return;                                                          \
+        }                                                                    \
+        if (nw == 1) {                                                       \
+            mask = (lomask(nbits)<<doffs);                                   \
+            dest[0] = (dest[0] & ~mask) |                                    \
+                (((OP(src[0])>>s)|(OP(src[1])<<(32-s))) & mask);             \
+            return;                                                          \
+        }                                                                    \
+        mask = ~lomask(doffs);                                               \
+        dest[0] = (dest[0] & ~mask) |                                        \
+            (((OP(src[0])>>s)|(OP(src[1])<<(32-s))) & mask);                 \
+        for(i=1; i < nw-1; i++) {                                            \
+            dest[i] = (OP(src[i])>>s) | (OP(src[i+1])<<(32-s));              \
+        }                                                                    \
+        tail = (doffs+nbits)&31;                                             \
+        if (tail==0) { mask=ONES32; } else { mask = lomask(tail); }          \
+        if (snw == nw) {                                                     \
+            dest[i] = (dest[i] & ~mask) | ((OP(src[i])>>s) & mask);          \
+        }                                                                    \
+        else /* snw > nw */ {                                                \
+            dest[i] = (dest[i] & ~mask) |                                    \
+                (((OP(src[i])>>s)|(OP(src[i+1])<<(32-s))) & mask);           \
+        }                                                                    \
+    }                                                                        \
+}
+
+#define BV_COPY(a) (a)
+#define BV_NOT(a) (~(a))
+BITVECTOR_COPY_OP(copy, BV_COPY)
+BITVECTOR_COPY_OP(not_to, BV_NOT)
+
+// right-shift the bits in one logical "row" of a long 2d bit vector
+/*
+void bitvector_shr_row(u_int32_t *b, u_int32_t offs, size_t nbits, u_int32_t s)
+{
+}
+*/
+
+// copy from source to dest while reversing bit-order
+// assumes dest offset == 0
+// assumes source and dest don't overlap
+// assumes offset < 32
+void bitvector_reverse_to(u_int32_t *dest, u_int32_t *src, u_int32_t soffs,
+                          u_int32_t nbits)
+{
+    index_t i;
+    u_int32_t nw, tail;
+
+    if (nbits == 0) return;
+
+    nw = (soffs+nbits+31)>>5;
+    // first, reverse the words while reversing bit order within each word
+    for(i=0; i < nw/2; i++) {
+        dest[i]      = bitreverse(src[nw-i-1]);
+        dest[nw-i-1] = bitreverse(src[i]);
+    }
+    if (nw&0x1)
+        dest[i] = bitreverse(src[i]);
+
+    tail = (soffs+nbits)&31;
+    if (tail)
+        bitvector_shr(dest, nw, 32-tail);
+}
+
+void bitvector_reverse(u_int32_t *b, u_int32_t offs, u_int32_t nbits)
+{
+    index_t i;
+    u_int32_t nw, tail;
+    u_int32_t *temp;
+
+    if (nbits == 0) return;
+
+    nw = (offs+nbits+31)>>5;
+    temp = (nw > MALLOC_CUTOFF) ? malloc(nw*4) : alloca(nw*4);
+    for(i=0; i < nw/2; i++) {
+        temp[i]      = bitreverse(b[nw-i-1]);
+        temp[nw-i-1] = bitreverse(b[i]);
+    }
+    if (nw&0x1)
+        temp[i] = bitreverse(b[i]);
+
+    tail = (offs+nbits)&31;
+    bitvector_copy(b, offs, temp, (32-tail)&31, nbits);
+    if (nw > MALLOC_CUTOFF) free(temp);
+}
+
+u_int32_t bitvector_count(u_int32_t *b, u_int32_t offs, u_int32_t nbits)
+{
+    index_t i;
+    u_int32_t nw, tail;
+    u_int32_t ans;
+
+    if (nbits == 0) return 0;
+    nw = (offs+nbits+31)>>5;
+
+    if (nw == 1) {
+        return count_bits(b[0] & (lomask(nbits)<<offs));
+    }
+
+    ans = count_bits(b[0]>>offs);  // first end cap
+
+    for(i=1; i < nw-1; i++) {
+        /* popcnt can be computed branch-free, so these special cases
+           probably don't help much */
+        /*
+        v = b[i];
+        if (v == 0)
+            continue;
+        if (v == ONES32)
+            ans += 32;
+        else
+        */
+        ans += count_bits(b[i]);
+    }
+
+    tail = (offs+nbits)&31;
+    ans += count_bits(b[i]&(tail>0?lomask(tail):ONES32));  // last end cap
+
+    return ans;
+}
+
+u_int32_t bitvector_any0(u_int32_t *b, u_int32_t offs, u_int32_t nbits)
+{
+    index_t i;
+    u_int32_t nw, tail;
+    u_int32_t mask;
+
+    if (nbits == 0) return 0;
+    nw = (offs+nbits+31)>>5;
+
+    if (nw == 1) {
+        mask = (lomask(nbits)<<offs);
+        if ((b[0] & mask) != mask) return 1;
+        return 0;
+    }
+
+    mask = ~lomask(offs);
+    if ((b[0] & mask) != mask) return 1;
+
+    for(i=1; i < nw-1; i++) {
+        if (b[i] != ONES32) return 1;
+    }
+
+    tail = (offs+nbits)&31;
+    if (tail==0) {
+        if (b[i] != ONES32) return 1;
+    }
+    else {
+        mask = lomask(tail);
+        if ((b[i] & mask) != mask) return 1;
+    }
+    return 0;
+}
+
+u_int32_t bitvector_any1(u_int32_t *b, u_int32_t offs, u_int32_t nbits)
+{
+    index_t i;
+    u_int32_t nw, tail;
+    u_int32_t mask;
+
+    if (nbits == 0) return 0;
+    nw = (offs+nbits+31)>>5;
+
+    if (nw == 1) {
+        mask = (lomask(nbits)<<offs);
+        if ((b[0] & mask) != 0) return 1;
+        return 0;
+    }
+
+    mask = ~lomask(offs);
+    if ((b[0] & mask) != 0) return 1;
+
+    for(i=1; i < nw-1; i++) {
+        if (b[i] != 0) return 1;
+    }
+
+    tail = (offs+nbits)&31;
+    if (tail==0) {
+        if (b[i] != 0) return 1;
+    }
+    else {
+        mask = lomask(tail);
+        if ((b[i] & mask) != 0) return 1;
+    }
+    return 0;
+}
+
+static void adjust_offset_to(u_int32_t *dest, u_int32_t *src, u_int32_t nw,
+                             u_int32_t soffs, u_int32_t newoffs)
+{
+    if (newoffs > soffs)
+        bitvector_shl_to(dest, src, nw, newoffs-soffs, true);
+    else
+        bitvector_shr_to(dest, src, nw, soffs-newoffs);
+}
+
+#define BITVECTOR_BINARY_OP_TO(opname, OP)                                   \
+void bitvector_##opname##_to(u_int32_t *dest, u_int32_t doffs,               \
+                             u_int32_t *a, u_int32_t aoffs,                  \
+                             u_int32_t *b, u_int32_t boffs, u_int32_t nbits) \
+{                                                                            \
+    u_int32_t nw = (doffs+nbits+31)>>5;                                      \
+    u_int32_t *temp = nw>MALLOC_CUTOFF ? malloc((nw+1)*4) : alloca((nw+1)*4);\
+    u_int32_t i, anw, bnw;                                                   \
+    if (aoffs == boffs) {                                                    \
+        anw = (aoffs+nbits+31)>>5;                                           \
+    }                                                                        \
+    else if (aoffs == doffs) {                                               \
+        bnw = (boffs+nbits+31)>>5;                                           \
+        adjust_offset_to(temp, b, bnw, boffs, aoffs);                        \
+        b = temp; anw = nw;                                                  \
+    }                                                                        \
+    else {                                                                   \
+        anw = (aoffs+nbits+31)>>5;                                           \
+        bnw = (boffs+nbits+31)>>5;                                           \
+        adjust_offset_to(temp, a, anw, aoffs, boffs);                        \
+        a = temp; aoffs = boffs; anw = bnw;                                  \
+    }                                                                        \
+    for(i=0; i < anw; i++) temp[i] = OP(a[i], b[i]);                         \
+    bitvector_copy(dest, doffs, temp, aoffs, nbits);                         \
+    if (nw>MALLOC_CUTOFF) free(temp);                                        \
+}
+
+#define BV_AND(a,b) ((a)&(b))
+#define BV_OR(a,b)  ((a)|(b))
+#define BV_XOR(a,b) ((a)^(b))
+BITVECTOR_BINARY_OP_TO(and, BV_AND)
+BITVECTOR_BINARY_OP_TO(or,  BV_OR)
+BITVECTOR_BINARY_OP_TO(xor, BV_XOR)
diff --git a/llt/bitvector.h b/llt/bitvector.h
new file mode 100644
index 0000000..be5ca9a
--- /dev/null
+++ b/llt/bitvector.h
@@ -0,0 +1,66 @@
+#ifndef __BITVECTOR_H_
+#define __BITVECTOR_H_
+
+// a mask with n set lo or hi bits
+#define lomask(n) (u_int32_t)((((u_int32_t)1)<<(n))-1)
+#define himask(n) (~lomask(32-n))
+#define ONES32 ((u_int32_t)0xffffffff)
+
+#ifdef __INTEL_COMPILER
+#define count_bits(b) _popcnt32(b)
+#else
+static inline u_int32_t count_bits(u_int32_t b)
+{
+    b = b - ((b>>1)&0x55555555);
+    b = ((b>>2)&0x33333333) + (b&0x33333333);
+    b = ((b>>4)+b)&0x0f0f0f0f;
+    b += (b>>8);
+    b += (b>>16);
+    return b & 0x3f;
+    // here is the non-optimized version, for clarity:
+    /*
+    b = ((b>> 1)&0x55555555) + (b&0x55555555);
+    b = ((b>> 2)&0x33333333) + (b&0x33333333);
+    b = ((b>> 4)&0x0f0f0f0f) + (b&0x0f0f0f0f);
+    b = ((b>> 8)&0x00ff00ff) + (b&0x00ff00ff);
+    b = ((b>>16)&0x0000ffff) + (b&0x0000ffff);
+    return b & 0x3f;
+    */
+}
+#endif
+
+u_int32_t bitreverse(u_int32_t x);
+
+u_int32_t *bitvector_new(size_t n, int initzero);
+u_int32_t *bitvector_resize(u_int32_t *b, size_t n, int initzero);
+void bitvector_set(u_int32_t *b, u_int32_t n, u_int32_t c);
+u_int32_t bitvector_get(u_int32_t *b, u_int32_t n);
+
+void bitvector_shr(u_int32_t *b, size_t n, u_int32_t s);
+void bitvector_shr_to(u_int32_t *dest, u_int32_t *b, size_t n, u_int32_t s);
+void bitvector_shl(u_int32_t *b, size_t n, u_int32_t s);
+void bitvector_shl_to(u_int32_t *dest, u_int32_t *b, size_t n, u_int32_t s,
+                      bool_t scrap);
+void bitvector_fill(u_int32_t *b,u_int32_t offs, u_int32_t c, u_int32_t nbits);
+void bitvector_copy(u_int32_t *dest, u_int32_t doffs,
+                    u_int32_t *a, u_int32_t aoffs, u_int32_t nbits);
+void bitvector_not(u_int32_t *b, u_int32_t offs, u_int32_t nbits);
+void bitvector_not_to(u_int32_t *dest, u_int32_t doffs,
+                      u_int32_t *a, u_int32_t aoffs, u_int32_t nbits);
+void bitvector_reverse(u_int32_t *b, u_int32_t offs, u_int32_t nbits);
+void bitvector_reverse_to(u_int32_t *dest, u_int32_t *src, u_int32_t soffs,
+                          u_int32_t nbits);
+void bitvector_and_to(u_int32_t *dest, u_int32_t doffs,
+                      u_int32_t *a, u_int32_t aoffs,
+                      u_int32_t *b, u_int32_t boffs, u_int32_t nbits);
+void bitvector_or_to(u_int32_t *dest, u_int32_t doffs,
+                     u_int32_t *a, u_int32_t aoffs,
+                     u_int32_t *b, u_int32_t boffs, u_int32_t nbits);
+void bitvector_xor_to(u_int32_t *dest, u_int32_t doffs,
+                      u_int32_t *a, u_int32_t aoffs,
+                      u_int32_t *b, u_int32_t boffs, u_int32_t nbits);
+u_int32_t bitvector_count(u_int32_t *b, u_int32_t offs, u_int32_t nbits);
+u_int32_t bitvector_any0(u_int32_t *b, u_int32_t offs, u_int32_t nbits);
+u_int32_t bitvector_any1(u_int32_t *b, u_int32_t offs, u_int32_t nbits);
+
+#endif
diff --git a/llt/config.h b/llt/config.h
new file mode 100644
index 0000000..e10681c
--- /dev/null
+++ b/llt/config.h
@@ -0,0 +1,15 @@
+#ifndef __CONFIG_H_
+#define __CONFIG_H_
+
+#define LINUX
+#undef WIN32
+
+#undef BITS64
+
+#define ARCH_X86
+#undef ARCH_X86_64
+
+#define __CPU__ 586
+
+
+#endif
diff --git a/llt/cplxprint.c b/llt/cplxprint.c
new file mode 100644
index 0000000..9c11883
--- /dev/null
+++ b/llt/cplxprint.c
@@ -0,0 +1,67 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "ieee754.h"
+#include "dtypes.h"
+#include "utils.h"
+
+void snprint_cplx(char *s, size_t cnt, double re, double im,
+                  // args to pass on to snprint_real
+                  int width, int dec,
+                  int max_digs_rt, int max_digs_lf,
+                  // print spaces around sign in a+bi
+                  int spflag)
+{
+    int fzr = (re==0) || rel_zero(re,im);
+    int fzi = (im==0) || rel_zero(im,re);
+    size_t len, sl;
+    size_t space = cnt;
+
+    s[0] = '\0';
+    if (isnan(im) && fzr) {
+        if (space < 2) return;
+        snprint_real(s, space-2, im, width, dec, max_digs_rt, max_digs_lf);
+        strcat(s, "i");
+        return;
+    }
+    if (!fzr || (fzr && fzi)) {
+        if (space < 4) return;
+        snprint_real(s, space-4, re, width, dec, max_digs_rt, max_digs_lf);
+        if ((im >= 0 || (isnan(im)&&!sign_bit(im))) && !fzi) {
+            if (spflag) {
+                strcat(s, " + ");
+            }
+            else {
+                strcat(s, "+");
+            }
+        }
+        else if (!fzi) {
+            im = -im;
+            if (spflag)
+                strcat(s, " - ");
+            else
+                strcat(s, "-");
+        }
+    }
+    if (!fzi) {
+        len = sl = strlen(s);
+        if (dbl_equals(im, -1)) {
+            while (len-sl < (size_t)width-2 && len < (space-3))
+                s[len++] = ' ';
+            s[len] =   '-';
+            s[len+1] = 'i';
+            s[len+2] = '\0';
+        }
+        else if (dbl_equals(im, 1)) {
+            while (len-sl < (size_t)width-1 && len < (space-2))
+                s[len++] = ' ';
+            s[len] =   'i';
+            s[len+1] = '\0';
+        }
+        else {
+            snprint_real(s+len, space-len-2, im, width, dec,
+                         max_digs_rt, max_digs_lf);
+            strcat(s, "i");
+        }
+    }
+}
diff --git a/llt/dblprint.c b/llt/dblprint.c
new file mode 100644
index 0000000..7f6631a
--- /dev/null
+++ b/llt/dblprint.c
@@ -0,0 +1,202 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "ieee754.h"
+#include "dtypes.h"
+
+static uint64_t max_ulps;
+static uint32_t flt_max_ulps;
+
+static uint64_t nexti64pow2(uint64_t i)
+{
+    if (i==0) return 1;
+    if ((i&(i-1))==0) return i;
+    if (i&BIT63) return BIT63;
+    // repeatedly clear bottom bit
+    while (i&(i-1))
+        i = i&(i-1);
+    return i<<1;
+}
+
+static uint32_t nexti32pow2(uint32_t i)
+{
+    if (i==0) return 1;
+    if ((i&(i-1))==0) return i;
+    if (i&BIT31) return BIT31;
+    // repeatedly clear bottom bit
+    while (i&(i-1))
+        i = i&(i-1);
+    return i<<1;
+}
+
+void dbl_tolerance(double tol)
+{
+    max_ulps = nexti64pow2((uint64_t)(tol/DBL_EPSILON));
+}
+
+void flt_tolerance(float tol)
+{
+    flt_max_ulps = nexti32pow2((uint32_t)(tol/FLT_EPSILON));
+}
+
+#ifdef __INTEL_COMPILER
+static inline int64_t llabs(int64_t j)
+{
+    return NBABS(j, 64);
+}
+#else
+extern int64_t llabs(int64_t j);
+#endif
+
+int dbl_equals(double a, double b)
+{
+    int64_t aint, bint;
+
+    if (a == b)
+        return 1;
+    aint = *(int64_t*)&a;
+    bint = *(int64_t*)&b;
+    if (aint < 0)
+        aint = BIT63 - aint;
+    if (bint < 0)
+        bint = BIT63 - bint;
+    /* you'd think it makes no difference whether the result of llabs is
+       signed or unsigned, but if it's signed then the case of
+       0x8000000000000000 blows up, making 4 == -1 :) */
+    if ((uint64_t)llabs(aint-bint) <= max_ulps)
+        return 1;
+    return 0;
+}
+
+int flt_equals(float a, float b)
+{
+    int32_t aint, bint;
+
+    if (a == b)
+        return 1;
+    aint = *(int32_t*)&a;
+    bint = *(int32_t*)&b;
+    if (aint < 0)
+        aint = BIT31 - aint;
+    if (bint < 0)
+        bint = BIT31 - bint;
+    if ((uint32_t)abs(aint-bint) <= flt_max_ulps)
+        return 1;
+    return 0;
+}
+
+int double_exponent(double d)
+{
+    union ieee754_double dl;
+
+    dl.d = d;
+    return dl.ieee.exponent - IEEE754_DOUBLE_BIAS;
+}
+
+double double_mantissa(double d)
+{
+    union ieee754_double dl;
+
+    dl.d = d;
+    dl.ieee.exponent = IEEE754_DOUBLE_BIAS;
+    dl.ieee.negative = 0;
+    return dl.d;
+}
+
+int float_exponent(float f)
+{
+    union ieee754_float fl;
+
+    fl.f = f;
+    return fl.ieee.exponent - IEEE754_FLOAT_BIAS;
+}
+
+float float_mantissa(float f)
+{
+    union ieee754_float fl;
+
+    fl.f = f;
+    fl.ieee.exponent = IEEE754_FLOAT_BIAS;
+    fl.ieee.negative = 0;
+    return fl.f;
+}
+
+void snprint_real(char *s, size_t cnt, double r,
+                  int width,    // printf field width, or 0
+                  int dec,      // # decimal digits desired, recommend 16
+                  // # of zeros in .00...0x before using scientific notation
+                  // recommend 3-4 or so
+                  int max_digs_rt,
+                  // # of digits left of decimal before scientific notation
+                  // recommend 10
+                  int max_digs_lf)
+{
+    int mag;
+    double fpart, temp;
+    char format[8];
+    char num_format[3];
+    int sz, keepz=0;
+
+    s[0] = '\0';
+    if (width == -1) {
+        width = 0;
+        keepz=1;
+    }
+    if (isnan(r)) {
+        if (sign_bit(r))
+            strncpy(s, "-nan", cnt);
+        else
+            strncpy(s, "nan", cnt);
+        return;
+    }
+    if (r == 0) {
+        strncpy(s, "0", cnt);
+        return;
+    }
+
+    num_format[0] = 'l';
+    num_format[2] = '\0';
+
+    mag = double_exponent(r);
+
+    mag = (int)(((double)mag)/LOG2_10 + 0.5);
+    if (r == 0)
+        mag = 0;
+    if ((mag > max_digs_lf-1) || (mag < -max_digs_rt)) {
+        num_format[1] = 'e';
+        temp = r/pow(10, mag);      /* see if number will have a decimal */
+        fpart = temp - floor(temp); /* when written in scientific notation */
+    }
+    else {
+        num_format[1] = 'f';
+        fpart = r - floor(r);
+    }
+    if (fpart == 0)
+        dec = 0;
+    if (width == 0) {
+        snprintf(format, 8, "%%.%d%s", dec, num_format);
+    }
+    else {
+        snprintf(format, 8, "%%%d.%d%s", width, dec, num_format);
+    }
+    sz = snprintf(s, cnt, format, r);
+    /* trim trailing zeros from fractions. not when using scientific
+       notation, since we might have e.g. 1.2000e+100. also not when we
+       need a specific output width */
+    if (width == 0 && !keepz) {
+        if (sz > 2 && fpart && num_format[1]!='e') {
+            while (s[sz-1] == '0') {
+                s[sz-1]='\0';
+                sz--;
+            }
+            // don't need trailing .
+            if (s[sz-1] == '.') {
+                s[sz-1] = '\0';
+                sz--;
+            }
+        }
+    }
+    // TODO. currently 1.1e20 prints as 1.1000000000000000e+20; be able to
+    // get rid of all those zeros.
+}
diff --git a/llt/dirpath.c b/llt/dirpath.c
new file mode 100644
index 0000000..4e76b5e
--- /dev/null
+++ b/llt/dirpath.c
@@ -0,0 +1,101 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#ifdef WIN32
+#include <malloc.h>
+#include <sys/timeb.h>
+#include <windows.h>
+#undef NO_ERROR
+#undef MOD_SHIFT
+#undef TRUE
+#undef FALSE
+#undef VOID
+#else
+#include <sys/time.h>
+#include <sys/poll.h>
+#include <unistd.h>
+#endif
+
+#include "dtypes.h"
+
+void get_cwd(char *buf, size_t size)
+{
+#ifndef WIN32
+    getcwd(buf, size);
+#else
+    GetCurrentDirectory(size, buf);
+#endif
+}
+
+int set_cwd(char *buf)
+{
+#ifndef WIN32
+    if (chdir(buf) == -1)
+        return 1;
+#else
+    if (SetCurrentDirectory(buf) == 0)
+        return 1;
+#endif
+    return 0;
+}
+
+#ifdef LINUX
+char *get_exename(char *buf, size_t size)
+{
+    char linkname[64]; /* /proc/<pid>/exe */
+    pid_t pid;
+    ssize_t ret;
+
+    /* Get our PID and build the name of the link in /proc */
+    pid = getpid();
+
+    if (snprintf(linkname, sizeof(linkname), "/proc/%i/exe", pid) < 0)
+        return NULL;
+
+    /* Now read the symbolic link */
+    ret = readlink(linkname, buf, size);
+
+    /* In case of an error, leave the handling up to the caller */
+    if (ret == -1)
+        return NULL;
+
+    /* Report insufficient buffer size */
+    if ((size_t)ret >= size)
+        return NULL;
+
+    /* Ensure proper NUL termination */
+    buf[ret] = 0;
+
+    return buf;
+}
+#elif defined(WIN32)
+char *get_exename(char *buf, size_t size)
+{
+    if (GetModuleFileName(NULL, buf, size) == 0)
+        return NULL;
+
+    return buf;
+}
+#elif defined(MACOSX) || defined(MACINTEL)
+#include "/Developer/Headers/FlatCarbon/Processes.h"
+#include "/Developer/Headers/FlatCarbon/Files.h"
+char *get_exename(char *buf, size_t size)
+{
+    ProcessSerialNumber PSN;
+    FSRef ref;
+
+    if (GetCurrentProcess(&PSN) < 0 ||
+        GetProcessBundleLocation(&PSN, &ref) < 0 ||
+        FSRefMakePath(&ref, buf, size) < 0)
+        return NULL;
+
+    return buf;
+}
+#endif
diff --git a/llt/dirpath.h b/llt/dirpath.h
new file mode 100644
index 0000000..adb3c3a
--- /dev/null
+++ b/llt/dirpath.h
@@ -0,0 +1,23 @@
+#ifndef __DIRPATH_H_
+#define __DIRPATH_H_
+
+#ifdef WIN32
+#define PATHSEP '\\'
+#define PATHSEPSTRING "\\"
+#define PATHLISTSEP ';'
+#define PATHLISTSEPSTRING ";"
+#define ISPATHSEP(c) ((c)=='/' || (c)=='\\')
+#define MAXPATHLEN 1024
+#else
+#define PATHSEP '/'
+#define PATHSEPSTRING "/"
+#define PATHLISTSEP ':'
+#define PATHLISTSEPSTRING ":"
+#define ISPATHSEP(c) ((c)=='/')
+#endif
+
+void get_cwd(char *buf, size_t size);
+int set_cwd(char *buf);
+char *get_exename(char *buf, size_t size);
+
+#endif
diff --git a/llt/dtypes.h b/llt/dtypes.h
new file mode 100644
index 0000000..ffc840d
--- /dev/null
+++ b/llt/dtypes.h
@@ -0,0 +1,124 @@
+#ifndef __DTYPES_H_
+#define __DTYPES_H_
+
+/*
+  This file defines sane integer types for our target platforms. This
+  library only runs on machines with the following characteristics:
+
+  - supports integer word sizes of 8, 16, 32, and 64 bits
+  - uses unsigned and signed 2's complement representations
+  - all pointer types are the same size
+  - there is an integer type with the same size as a pointer
+
+  Some features require:
+  - IEEE 754 single- and double-precision floating point
+
+  We assume the LP64 convention for 64-bit platforms.
+*/
+
+#include "config.h"
+
+typedef int bool_t;
+/* unfortunately we can't make this an enum, since false is an invalid
+   enum label in C++ (since it's a keyword) */
+#define false (0)
+#define true (1)
+
+#if defined(__INTEL_COMPILER) && defined(WIN32)
+# define STATIC_INLINE static
+# define INLINE
+# ifdef BITS64
+typedef unsigned long size_t;
+# else
+typedef unsigned int size_t;
+# endif
+#else
+# define STATIC_INLINE static inline
+# define INLINE inline
+#endif
+
+typedef unsigned char  byte_t;   /* 1 byte */
+#if defined(WIN32)
+typedef short int16_t;
+typedef int int32_t;
+typedef long long int64_t;
+typedef unsigned char u_int8_t;
+typedef unsigned short u_int16_t;
+typedef unsigned int u_int32_t;
+#ifdef BITS64
+typedef unsigned long u_int64_t;
+#else
+typedef unsigned long long u_int64_t;
+#endif
+#ifdef __INTEL_COMPILER
+typedef signed char int8_t;
+typedef short int16_t;
+typedef int int32_t;
+#endif
+#else
+#include <sys/types.h>
+#endif
+
+#ifdef BITS64
+#define TOP_BIT 0x8000000000000000
+#define NBITS 64
+typedef unsigned long uint_t;  // preferred int type on platform
+typedef long int_t;
+typedef int64_t offset_t;
+typedef u_int64_t index_t;
+typedef int64_t ptrint_t; // pointer-size int
+typedef u_int64_t u_ptrint_t
+#else
+#define TOP_BIT 0x80000000
+#define NBITS 32
+typedef unsigned long uint_t;
+typedef long int_t;
+typedef int32_t offset_t;
+typedef u_int32_t index_t;
+typedef int32_t ptrint_t;
+typedef u_int32_t u_ptrint_t;
+#endif
+
+typedef u_int8_t  uint8_t;
+typedef u_int16_t uint16_t;
+typedef u_int32_t uint32_t;
+typedef u_int64_t uint64_t;
+typedef u_ptrint_t uptrint_t;
+
+#define ALIGN(x, sz) (((x) + (sz-1)) & (-sz))
+
+#define DBL_MAXINT 9007199254740992LL
+#define FLT_MAXINT 16777216
+#define U64_MAX    18446744073709551615ULL
+#define S64_MAX    9223372036854775807LL
+#define S64_MIN    (-S64_MAX - 1LL)
+#define BIT63      0x8000000000000000LL
+#define BIT31      0x80000000
+
+#define DBL_EPSILON      2.2204460492503131e-16
+#define FLT_EPSILON      1.1920928e-7
+#define DBL_MAX          1.7976931348623157e+308
+#define DBL_MIN          2.2250738585072014e-308
+#define FLT_MAX          3.402823466e+38
+#define FLT_MIN          1.175494351e-38
+#define LOG2_10          3.3219280948873626
+#define rel_zero(a, b) (fabs((a)/(b)) < DBL_EPSILON)
+#define sign_bit(r) ((*(int64_t*)&(r)) & BIT63)
+#define LABS(n) (((n)^((n)>>(NBITS-1))) - ((n)>>(NBITS-1)))
+#define NBABS(n,nb) (((n)^((n)>>((nb)-1))) - ((n)>>((nb)-1)))
+#define DFINITE(d) (((*(int64_t*)&(d))&0x7ff0000000000000LL)!=0x7ff0000000000000LL)
+
+typedef enum { T_INT8, T_UINT8, T_INT16, T_UINT16, T_INT32, T_UINT32,
+               T_INT64, T_UINT64, T_FLOAT, T_DOUBLE } numerictype_t;
+
+#define N_NUMTYPES ((int)T_DOUBLE+1)
+
+#ifdef BITS64
+# define T_LONG T_INT64
+# define T_ULONG T_UINT64
+#else
+# define T_LONG T_INT32
+# define T_ULONG T_UINT32
+#endif
+
+#endif
diff --git a/llt/hashing.c b/llt/hashing.c
new file mode 100644
index 0000000..1bcccdf
--- /dev/null
+++ b/llt/hashing.c
@@ -0,0 +1,125 @@
+/*
+  Hashing and random numbers
+*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include "ieee754.h"
+#include "dtypes.h"
+#include "utils.h"
+#include "hashing.h"
+#include "timefuncs.h"
+
+uint_t nextipow2(uint_t i)
+{
+    if (i==0) return 1;
+    if ((i&(i-1))==0) return i;
+    if (i&TOP_BIT) return TOP_BIT;
+
+    // repeatedly clear bottom bit
+    while (i&(i-1))
+        i = i&(i-1);
+
+    return i<<1;
+}
+
+u_int32_t int32hash(u_int32_t a)
+{
+    a = (a+0x7ed55d16) + (a<<12);
+    a = (a^0xc761c23c) ^ (a>>19);
+    a = (a+0x165667b1) + (a<<5);
+    a = (a+0xd3a2646c) ^ (a<<9);
+    a = (a+0xfd7046c5) + (a<<3);
+    a = (a^0xb55a4f09) ^ (a>>16);
+    return a;
+}
+
+u_int64_t int64hash(u_int64_t key)
+{
+    key = (~key) + (key << 21);            // key = (key << 21) - key - 1;
+    key =   key  ^ (key >> 24);
+    key = (key + (key << 3)) + (key << 8); // key * 265
+    key =  key ^ (key >> 14);
+    key = (key + (key << 2)) + (key << 4); // key * 21
+    key =  key ^ (key >> 28);
+    key =  key + (key << 31);
+    return key;
+}
+
+u_int32_t int64to32hash(u_int64_t key)
+{
+    key = (~key) + (key << 18); // key = (key << 18) - key - 1;
+    key =   key  ^ (key >> 31);
+    key = key * 21;             // key = (key + (key << 2)) + (key << 4);
+    key = key ^ (key >> 11);
+    key = key + (key << 6);
+    key = key ^ (key >> 22);
+    return (u_int32_t)key;
+}
+
+#include "lookup3.c"
+
+u_int64_t memhash(char* buf, size_t n)
+{
+    u_int32_t c=0xcafe8881, b=0x4d6a087c;
+
+    hashlittle2(buf, n, &c, &b);
+    return (u_int64_t)c | (((u_int64_t)b)<<32);
+}
+
+#include "mt19937ar.c"
+
+double rand_double()
+{
+    union ieee754_double d;
+
+    d.ieee.mantissa0 = random();
+    d.ieee.mantissa1 = random();
+    d.ieee.negative = 0;
+    d.ieee.exponent = IEEE754_DOUBLE_BIAS + 0;    /* 2^0 */
+    return d.d - 1.0;
+}
+
+float rand_float()
+{
+    union ieee754_float f;
+
+    f.ieee.mantissa = random();
+    f.ieee.negative = 0;
+    f.ieee.exponent = IEEE754_FLOAT_BIAS + 0;     /* 2^0 */
+    return f.f - 1.0;
+}
+
+void randn(double *pre, double *pim)
+{
+    double s, vre, vim, ure, uim;
+
+    do {
+        ure = rand_double();
+        uim = rand_double();
+        vre = 2*ure - 1;
+        vim = 2*uim - 1;
+        s = vre*vre + vim*vim;
+    } while (s >= 1);
+    s = sqrt(-2*log(s)/s);
+    *pre = s * vre;
+    *pim = s * vim;
+}
+
+void randomize()
+{
+    u_int64_t tm = i64time();
+    init_by_array((unsigned long*)&tm, 2);
+}
+
+void llt_init()
+{
+    /*
+      I used this function to guess good values based on epsilon:
+      tol(eps) = exp(ln(eps)*-.2334012088721472)*eps
+    */
+    dbl_tolerance(1e-12);
+    flt_tolerance(5e-6);
+
+    randomize();
+}
diff --git a/llt/hashing.h b/llt/hashing.h
new file mode 100644
index 0000000..acd1adc
--- /dev/null
+++ b/llt/hashing.h
@@ -0,0 +1,24 @@
+#ifndef __HASHING_H_
+#define __HASHING_H_
+
+uint_t nextipow2(uint_t i);
+u_int32_t int32hash(u_int32_t a);
+u_int64_t int64hash(u_int64_t key);
+u_int32_t int64to32hash(u_int64_t key);
+#ifdef BITS64
+#define inthash int64hash
+#else
+#define inthash int32hash
+#endif
+u_int64_t memhash(char* buf, size_t n);
+#define random() genrand_int32()
+#define srandom(n) init_genrand(n)
+double rand_double();
+float rand_float();
+void randn(double *pre, double *pim);
+u_int64_t i64time();
+void randomize();
+unsigned long genrand_int32();
+void init_genrand(unsigned long s);
+
+#endif
diff --git a/llt/ieee754.h b/llt/ieee754.h
new file mode 100644
index 0000000..e1c6704
--- /dev/null
+++ b/llt/ieee754.h
@@ -0,0 +1,222 @@
+/* Copyright (C) 1992, 1995, 1996, 1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _IEEE754_H
+
+#define _IEEE754_H 1
+#ifdef LINUX
+#include <features.h>
+
+#include <endian.h>
+
+__BEGIN_DECLS
+#else
+#define __LITTLE_ENDIAN	1234
+#define __BIG_ENDIAN	4321
+#define __PDP_ENDIAN	3412
+#endif
+
+#ifdef MACOSX
+#define __BYTE_ORDER __BIG_ENDIAN
+#define __FLOAT_WORD_ORDER __BIG_ENDIAN
+#endif
+
+#ifdef MACINTEL
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#define __FLOAT_WORD_ORDER __LITTLE_ENDIAN
+#endif
+
+#ifdef WIN32
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#define __FLOAT_WORD_ORDER __LITTLE_ENDIAN
+#endif
+
+union ieee754_float
+  {
+    float f;
+
+    /* This is the IEEE 754 single-precision format.  */
+    struct
+      {
+#if	__BYTE_ORDER == __BIG_ENDIAN
+	unsigned int negative:1;
+	unsigned int exponent:8;
+	unsigned int mantissa:23;
+#endif				/* Big endian.  */
+#if	__BYTE_ORDER == __LITTLE_ENDIAN
+	unsigned int mantissa:23;
+	unsigned int exponent:8;
+	unsigned int negative:1;
+#endif				/* Little endian.  */
+      } ieee;
+
+    /* This format makes it easier to see if a NaN is a signalling NaN.  */
+    struct
+      {
+#if	__BYTE_ORDER == __BIG_ENDIAN
+	unsigned int negative:1;
+	unsigned int exponent:8;
+	unsigned int quiet_nan:1;
+	unsigned int mantissa:22;
+#endif				/* Big endian.  */
+#if	__BYTE_ORDER == __LITTLE_ENDIAN
+	unsigned int mantissa:22;
+	unsigned int quiet_nan:1;
+	unsigned int exponent:8;
+	unsigned int negative:1;
+#endif				/* Little endian.  */
+      } ieee_nan;
+  };
+
+#define IEEE754_FLOAT_BIAS	0x7f /* Added to exponent.  */
+
+
+union ieee754_double
+  {
+    double d;
+
+    /* This is the IEEE 754 double-precision format.  */
+    struct
+      {
+#if	__BYTE_ORDER == __BIG_ENDIAN
+	unsigned int negative:1;
+	unsigned int exponent:11;
+	/* Together these comprise the mantissa.  */
+	unsigned int mantissa0:20;
+	unsigned int mantissa1:32;
+#endif				/* Big endian.  */
+#if	__BYTE_ORDER == __LITTLE_ENDIAN
+# if	__FLOAT_WORD_ORDER == BIG_ENDIAN
+	unsigned int mantissa0:20;
+	unsigned int exponent:11;
+	unsigned int negative:1;
+	unsigned int mantissa1:32;
+# else
+	/* Together these comprise the mantissa.  */
+	unsigned int mantissa1:32;
+	unsigned int mantissa0:20;
+	unsigned int exponent:11;
+	unsigned int negative:1;
+# endif
+#endif				/* Little endian.  */
+      } ieee;
+
+    /* This format makes it easier to see if a NaN is a signalling NaN.  */
+    struct
+      {
+#if	__BYTE_ORDER == __BIG_ENDIAN
+	unsigned int negative:1;
+	unsigned int exponent:11;
+	unsigned int quiet_nan:1;
+	/* Together these comprise the mantissa.  */
+	unsigned int mantissa0:19;
+	unsigned int mantissa1:32;
+#else
+# if	__FLOAT_WORD_ORDER == BIG_ENDIAN
+	unsigned int mantissa0:19;
+	unsigned int quiet_nan:1;
+	unsigned int exponent:11;
+	unsigned int negative:1;
+	unsigned int mantissa1:32;
+# else
+	/* Together these comprise the mantissa.  */
+	unsigned int mantissa1:32;
+	unsigned int mantissa0:19;
+	unsigned int quiet_nan:1;
+	unsigned int exponent:11;
+	unsigned int negative:1;
+# endif
+#endif
+      } ieee_nan;
+  };
+
+#define IEEE754_DOUBLE_BIAS	0x3ff /* Added to exponent.  */
+
+
+union ieee854_long_double
+  {
+    long double d;
+
+    /* This is the IEEE 854 double-extended-precision format.  */
+    struct
+      {
+#if	__BYTE_ORDER == __BIG_ENDIAN
+	unsigned int negative:1;
+	unsigned int exponent:15;
+	unsigned int empty:16;
+	unsigned int mantissa0:32;
+	unsigned int mantissa1:32;
+#endif
+#if	__BYTE_ORDER == __LITTLE_ENDIAN
+# if	__FLOAT_WORD_ORDER == BIG_ENDIAN
+	unsigned int exponent:15;
+	unsigned int negative:1;
+	unsigned int empty:16;
+	unsigned int mantissa0:32;
+	unsigned int mantissa1:32;
+# else
+	unsigned int mantissa1:32;
+	unsigned int mantissa0:32;
+	unsigned int exponent:15;
+	unsigned int negative:1;
+	unsigned int empty:16;
+# endif
+#endif
+      } ieee;
+
+    /* This is for NaNs in the IEEE 854 double-extended-precision format.  */
+    struct
+      {
+#if	__BYTE_ORDER == __BIG_ENDIAN
+	unsigned int negative:1;
+	unsigned int exponent:15;
+	unsigned int empty:16;
+	unsigned int one:1;
+	unsigned int quiet_nan:1;
+	unsigned int mantissa0:30;
+	unsigned int mantissa1:32;
+#endif
+#if	__BYTE_ORDER == __LITTLE_ENDIAN
+# if	__FLOAT_WORD_ORDER == BIG_ENDIAN
+	unsigned int exponent:15;
+	unsigned int negative:1;
+	unsigned int empty:16;
+	unsigned int mantissa0:30;
+	unsigned int quiet_nan:1;
+	unsigned int one:1;
+	unsigned int mantissa1:32;
+# else
+	unsigned int mantissa1:32;
+	unsigned int mantissa0:30;
+	unsigned int quiet_nan:1;
+	unsigned int one:1;
+	unsigned int exponent:15;
+	unsigned int negative:1;
+	unsigned int empty:16;
+# endif
+#endif
+      } ieee_nan;
+  };
+
+#define IEEE854_LONG_DOUBLE_BIAS 0x3fff
+
+#ifdef LINUX
+__END_DECLS
+#endif
+
+#endif /* ieee754.h */
diff --git a/llt/ios.c b/llt/ios.c
new file mode 100644
index 0000000..ec24835
--- /dev/null
+++ b/llt/ios.c
@@ -0,0 +1,479 @@
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+#include <errno.h>
+#include <wchar.h>
+
+#ifdef WIN32
+#include <malloc.h>
+#include <io.h>
+#include <fcntl.h>
+#define fileno _fileno
+#else
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/select.h>
+#endif
+
+#include "dtypes.h"
+#include "utils.h"
+#include "utf8.h"
+#include "ios.h"
+#include "socket.h"
+#include "timefuncs.h"
+
+/* OS-level primitive wrappers */
+
+static int _fd_available(long fd)
+{
+#ifndef WIN32
+    fd_set set;
+    struct timeval tv = {0, 0};
+
+    FD_ZERO(&set);
+    FD_SET(fd, &set);
+    return (select(fd+1, &set, NULL, NULL, &tv)!=0);
+#else
+    return 0;
+#endif
+}
+
+// poll for read, unless forwrite!=0
+static void _fd_poll(long fd, int forwrite)
+{
+#ifndef WIN32
+    fd_set set;
+
+    FD_ZERO(&set);
+    FD_SET(fd, &set);
+    if (forwrite)
+        select(fd+1, NULL, &set, NULL, NULL);
+    else
+        select(fd+1, &set, NULL, NULL, NULL);
+#else
+#endif
+}
+
+static int _enonfatal(int err)
+{
+    return (err == EAGAIN || err == EINPROGRESS || err == EINTR ||
+            err == EWOULDBLOCK);
+}
+
+#define SLEEP_TIME 5//ms
+
+// return error code, #bytes read in *nread
+// these wrappers retry operations until success or a fatal error
+static int _os_read(long fd, void *buf, size_t n, size_t *nread)
+{
+    ssize_t r;
+
+    while (1) {
+        r = read((int)fd, buf, n);
+        if (r > -1) {
+            *nread = (size_t)r;
+            return 0;
+        }
+        if (!_enonfatal(errno)) {
+            *nread = 0;
+            return errno;
+        }
+        sleep_ms(SLEEP_TIME);
+    }
+    return 0;
+}
+
+static int _os_read_all(long fd, void *buf, size_t n, size_t *nread)
+{
+    size_t got;
+
+    *nread = 0;
+
+    while (n>0) {
+        int err = _os_read(fd, buf, n, &got);
+        n -= got;
+        *nread += got;
+        buf += got;
+        if (err)
+            return err;
+        if (got == 0)
+            _fd_poll(fd, 0);
+    }
+    return 0;
+}
+
+static int _os_write(long fd, void *buf, size_t n, size_t *nwritten)
+{
+    ssize_t r;
+
+    while (1) {
+        r = write((int)fd, buf, n);
+        if (r > -1) {
+            *nwritten = (size_t)r;
+            return 0;
+        }
+        if (!_enonfatal(errno)) {
+            *nwritten = 0;
+            return errno;
+        }
+        sleep_ms(5);
+    }
+    return 0;
+}
+
+static int _os_write_all(long fd, void *buf, size_t n, size_t *nwritten)
+{
+    size_t wrote;
+
+    *nwritten = 0;
+
+    while (n>0) {
+        int err = _os_write(fd, buf, n, &wrote);
+        n -= wrote;
+        *nwritten += wrote;
+        buf += wrote;
+        if (err)
+            return err;
+        if (wrote == 0)
+            _fd_poll(fd, 1);
+    }
+    return 0;
+}
+
+
+/* internal utility functions */
+
+static char *_buf_realloc(ios_t *s, size_t sz)
+{
+    char *temp;
+
+    if (sz <= s->maxsize)
+        return s->buf;
+
+    if ((s->buf==NULL || s->buf==&s->local[0]) && (sz <= IOS_INLSIZE)) {
+        /* TODO: if we want to allow shrinking, see if the buffer shrank
+           down to this size, in which case we need to copy. */
+        s->buf = &s->local[0];
+        s->maxsize = IOS_INLSIZE;
+        s->ownbuf = 1;
+        return s->buf;
+    }
+    else if (s->ownbuf && s->buf != &s->local[0]) {
+        // if we own the buffer we're free to resize it
+        // always allocate 1 bigger in case user wants to add a NUL
+        // terminator after taking over the buffer
+        temp = realloc(s->buf, sz+1);
+        if (temp == NULL)
+            return NULL;
+    }
+    else {
+        temp = malloc(sz+1);
+        if (temp == NULL)
+            return NULL;
+        s->ownbuf = 1;
+        if (s->size > 0)
+            memcpy(temp, s->buf, s->size);
+    }
+
+    s->buf = temp;
+    s->maxsize = sz;
+    return s->buf;
+}
+
+// write a block of data into the buffer at the current position, resizing
+// if necessary. returns # written.
+static size_t _writebuf_force(ios_t *s, char *data, size_t n)
+{
+    size_t amt;
+    size_t newsize;
+
+    if (n == 0)
+        return 0;
+
+    if (s->bpos + n > s->size) {
+        if (s->bpos + n > s->maxsize) {
+            /* TODO: here you might want to add a mechanism for limiting
+               the growth of the stream. */
+            newsize = s->maxsize * 2;
+            while (s->bpos + n > newsize)
+                newsize *= 2;
+            if (_buf_realloc(s, newsize) == NULL) {
+                /* no more space; write as much as we can */
+                amt = s->maxsize - s->bpos;
+                if (amt > 0) {
+                    memcpy(&s->buf[s->bpos], data, amt);
+                }
+                s->bpos += amt;
+                s->size = s->maxsize;
+                return amt;
+            }
+        }
+        s->size = s->bpos + n;
+    }
+    memcpy(&s->buf[s->bpos], data, n);
+    s->bpos += n;
+
+    return n;
+}
+
+
+/* interface functions, low level */
+
+size_t ios_read(ios_t *s, char *dest, size_t n, int all)
+{
+    size_t tot = 0;
+    size_t got, avail;
+    int result;
+
+    while (n > 0) {
+        avail = s->size - s->bpos;
+        
+        if (avail >= n) {
+            memcpy(dest, s->buf + s->bpos, n);
+            s->bpos += n;
+            return tot+n;
+        }
+        
+        if (avail > 0) {
+            memcpy(dest, s->buf + s->bpos, avail);
+        }
+        if (s->bm == bm_mem || s->fd == -1) {
+            // can't get any more data
+            s->bpos += avail;
+            return avail;
+        }
+        else {
+            dest += avail;
+            n -= avail;
+            tot += avail;
+
+            ios_flush(s);
+            s->bpos = s->size = 0;
+            s->state = bst_rd;
+        }
+        
+        if (n > (s->maxsize - (s->maxsize>>4))) {
+            // doesn't fit comfortably in buffer; go direct
+            if (all)
+                result = _os_read_all(s->fd, dest, n, &got);
+            else
+                result = _os_read(s->fd, dest, n, &got);
+            return tot+got;
+        }
+        else {
+            // refill buffer
+            if (_os_read(s->fd, s->buf, s->maxsize, &got)) {
+                return tot;
+            }
+            if (got == 0) {
+                if (all)
+                    _fd_poll(s->fd, 0);
+                else
+                    return tot;
+            }
+            s->size = got;
+        }
+    }
+
+    return tot;
+}
+
+size_t ios_write(ios_t *s, char *data, size_t n)
+{
+}
+
+off_t ios_seek(ios_t *s, off_t pos)
+{
+}
+
+off_t ios_seek_end(ios_t *s)
+{
+}
+
+off_t ios_skip(ios_t *s, off_t offs)
+{
+}
+
+off_t ios_pos(ios_t *s)
+{
+    if (s->bm == bm_mem)
+        return (off_t)s->bpos;
+
+    off_t fdpos = lseek(s->fd, 0, SEEK_CUR);
+    if (fdpos == (off_t)-1)
+        return fdpos;
+
+    if (s->state == bst_wr)
+        fdpos += s->bpos;
+    else if (s->state == bst_rd)
+        fdpos -= (s->size - s->bpos);
+    return fdpos;
+}
+
+size_t ios_trunc(ios_t *s, size_t size)
+{
+}
+
+int ios_eof(ios_t *s)
+{
+    if (s->bm == bm_mem)
+        return (s->bpos >= s->size);
+    if (s->fd == -1)
+        return 1;
+    // todo
+}
+
+static void _discard_partial_buffer(ios_t *s)
+{
+    // this function preserves the invariant that data to write
+    // begins at the beginning of the buffer, and s->size refers
+    // to how much valid file data is stored in the buffer.
+
+    // this needs to be called when normal operation is interrupted in
+    // the middle of the buffer. "normal operation" is reading or
+    // writing to the end of the buffer. this happens e.g. when flushing.
+    size_t delta = 0;
+    if (s->ndirty && s->size > s->ndirty) {
+        delta = s->size - s->ndirty;
+        memmove(s->buf, s->buf + s->ndirty, delta);
+    }
+    s->size -= s->ndirty;
+    s->bpos -= s->ndirty;
+}
+
+int ios_flush(ios_t *s)
+{
+    if (s->ndirty == 0 || s->bm == bm_mem || s->buf == NULL)
+        return 0;
+    if (s->fd == -1)
+        return -1;
+
+    if (s->state == bst_rd) {
+        if (lseek(s->fd, -(off_t)s->size, SEEK_CUR) == (off_t)-1) {
+        }
+    }
+
+    size_t nw, ntowrite=s->ndirty;
+    // todo: this should use sendall
+    int err = _os_write(s->fd, s->buf, ntowrite, &nw);
+    // todo: try recovering from some kinds of errors (e.g. retry)
+
+    if (s->state == bst_rd) {
+        if (lseek(s->fd, s->size - nw, SEEK_CUR) == (off_t)-1) {
+        }
+    }
+
+    if (s->ndirty <= s->bpos) {
+        // in this case assume we're done with the first part of the buffer
+        _discard_partial_buffer(s);
+    }
+    s->ndirty = 0;
+
+    if (err)
+        return err;
+    if (nw < ntowrite)
+        return -1;
+    return 0;
+}
+
+void ios_close(ios_t *s)
+{
+    ios_flush(s);
+    if (s->fd != -1 && s->ownfd)
+        close(s->fd);
+    s->fd = -1;
+}
+
+char *ios_takebuf(ios_t *s, size_t *psize)
+{
+    char *buf;
+
+    ios_flush(s);
+
+    if (s->buf == &s->local[0]) {
+        buf = malloc(s->size+1);
+        if (buf == NULL)
+            return NULL;
+        if (s->size)
+            memcpy(buf, s->buf, s->size);
+        buf[s->size] = '\0';
+    }
+    else {
+        buf = s->buf;
+    }
+
+    *psize = s->size+1;  // buffer is actually 1 bigger for terminating NUL
+
+    /* empty stream and reinitialize */
+    if (s->bm == bm_mem || s->bm == bm_none) {
+        s->buf = &s->local[0];
+        s->maxsize = IOS_INLSIZE;
+    }
+    else {
+        s->buf = NULL;
+        _buf_realloc(s, IOS_BUFSIZE);
+    }
+    s->size = s->bpos = 0;
+
+    return buf;
+}
+
+int ios_setbuf(ios_t *s, char *buf, size_t size, int own)
+{
+    ios_flush(s);
+    size_t nvalid=0;
+
+    nvalid = (size < s->size) ? size : s->size;
+    memcpy(buf, s->buf, nvalid);
+    if (s->bpos > nvalid) {
+        // truncated
+        s->bpos = nvalid;
+    }
+    s->size = nvalid;
+
+    if (s->buf!=NULL && s->ownbuf && s->buf!=&s->local[0])
+        free(s->buf);
+    s->buf = buf;
+    s->maxsize = size;
+    s->ownbuf = own;
+    return 0;
+}
+
+int ios_bufmode(ios_t *s, bufmode_t mode)
+{
+    // no fd; can only do mem-only buffering
+    if (s->fd == -1 && mode != bm_mem)
+        return -1;
+    s->bm = mode;
+    return 0;
+}
+
+void ios_bswap(ios_t *s, int bswap)
+{
+    s->byteswap = !!bswap;
+}
+
+int ios_copy(ios_t *to, ios_t *from, size_t nbytes, bool_t all)
+{
+}
+
+
+/* stream object initializers. we do no allocation. */
+
+ios_t *ios_file(ios_t *s, char *fname, int create, int rewrite)
+{
+}
+
+ios_t *ios_mem(ios_t *s, size_t initsize)
+{
+}
+
+ios_t *ios_fd(ios_t *s, long fd)
+{
+}
+
+
+/* higher level interface */
+
diff --git a/llt/ios.h b/llt/ios.h
new file mode 100644
index 0000000..371f11c
--- /dev/null
+++ b/llt/ios.h
@@ -0,0 +1,192 @@
+#ifndef __IOS_H_
+#define __IOS_H_
+
+// this flag controls when data actually moves out to the underlying I/O
+// channel. memory streams are a special case of this where the data
+// never moves out.
+typedef enum { bm_none, bm_line, bm_block, bm_mem } bufmode_t;
+
+typedef enum { bst_none, bst_rd, bst_wr } bufstate_t;
+
+#define IOS_INLSIZE 54
+#define IOS_BUFSIZE 4095
+
+typedef struct {
+    bufmode_t bm;
+
+    // the state only indicates where the underlying file position is relative
+    // to the buffer. reading: at the end. writing: at the beginning.
+    // in general, you can do any operation in any state.
+    bufstate_t state;
+
+    int errcode;
+
+    char *buf;        // start of buffer
+    size_t maxsize;   // space allocated to buffer
+    size_t size;      // length of valid data in buf, >=ndirty
+    size_t bpos;      // current position in buffer
+    size_t ndirty;    // # bytes at &buf[0] that need to be written
+
+    // this is a public field that keeps a running count of bytes
+    // read or written. you can freely use and change it. this is
+    // intended for keeping track of relative positions in streams
+    // that don't have absolute positions (like sockets).
+    size_t tally;
+
+    // pointer-size integer to support platforms where it might have
+    // to be a pointer
+    long fd;
+
+    unsigned char byteswap:1;
+    //unsigned char readonly:1;
+    unsigned char ownbuf:1;
+    unsigned char ownfd:1;
+
+    // this means you can read, seek back, then read the same data
+    // again any number of times. usually only true for files and strings.
+    unsigned char rereadable:1;
+
+    // this enables "stenciled writes". you can alternately write and
+    // seek without flushing in between. this performs read-before-write
+    // to populate the buffer, so "rereadable" capability is required.
+    // this is off by default.
+    unsigned char stenciled:1;
+
+    // request durable writes (fsync)
+    // unsigned char durable:1;
+
+    // todo: mutex
+    char local[IOS_INLSIZE];
+} ios_t;
+
+/* low-level interface functions */
+size_t ios_read(ios_t *s, char *dest, size_t n, int all);
+size_t ios_write(ios_t *s, char *data, size_t n);
+off_t ios_seek(ios_t *s, off_t pos);   // absolute seek
+off_t ios_seek_end(ios_t *s);
+off_t ios_skip(ios_t *s, off_t offs);  // relative seek
+off_t ios_pos(ios_t *s);  // get current position
+size_t ios_trunc(ios_t *s, size_t size);
+int ios_eof(ios_t *s);
+int ios_flush(ios_t *s);
+void ios_close(ios_t *s);
+char *ios_takebuf(ios_t *s, size_t *psize);  // release buffer to caller
+// set buffer space to use
+int ios_setbuf(ios_t *s, char *buf, size_t size, int own);
+int ios_bufmode(ios_t *s, bufmode_t mode);
+void ios_bswap(ios_t *s, int bswap);
+int ios_copy(ios_t *to, ios_t *from, size_t nbytes, bool_t all);
+//void ios_lock(ios_t *s);
+//int ios_trylock(ios_t *s);
+//int ios_unlock(ios_t *s);
+
+/* stream creation */
+ios_t *ios_file(ios_t *s, char *fname, int create, int rewrite);
+ios_t *ios_mem(ios_t *s, size_t initsize);
+ios_t *ios_fd(ios_t *s, long fd);
+// todo: ios_socket
+
+/* high-level functions - output */
+int ios_putnum(ios_t *s, char *data, uint32_t type);
+int ios_putint(ios_t *s, int n);
+int ios_pututf8(ios_t *s, uint32_t wc);
+int ios_putstringz(ios_t *s, char *str, bool_t do_write_nulterm);
+int ios_printf(ios_t *s, char *format, ...);
+
+/* high-level stream functions - input */
+int ios_getnum(ios_t *s, char *data, uint32_t type);
+int ios_getutf8(ios_t *s, uint32_t *pwc);
+int ios_ungetutf8(ios_t *s, uint32_t wc);
+int ios_getstringz(ios_t *dest, ios_t *src);
+int ios_getstringn(ios_t *dest, ios_t *src, size_t nchars);
+int ios_readline(ios_t *dest, ios_t *s, char delim);
+int ios_getline(ios_t *s, char **pbuf, size_t *psz);
+
+// seek by utf8 sequence increments
+int ios_nextutf8(ios_t *s);
+int ios_prevutf8(ios_t *s);
+
+/* stdio-style functions */
+#define IOS_EOF (-1)
+int ios_putc(ios_t *s, int c);
+wint_t ios_putwc(ios_t *s, wchar_t wc);
+int ios_getc(ios_t *s);
+wint_t ios_getwc(ios_t *s);
+int ios_ungetc(ios_t *s, int c);
+wint_t ios_ungetwc(ios_t *s, wint_t wc);
+#define ios_puts(s, str) ios_write(s, str, strlen(str))
+
+/*
+  With memory streams, mixed reads and writes are equivalent to performing
+  sequences of *p++, as either an lvalue or rvalue. File streams behave
+  similarly, but other streams might not support this. Using unbuffered
+  mode makes this more predictable.
+
+  Note on "unget" functions:
+  There are two kinds of functions here: those that operate on sized
+  blocks of bytes and those that operate on logical units like "character"
+  or "integer". The "unget" functions only work on logical units. There
+  is no "unget n bytes". You can only do an unget after a matching get.
+  However, data pushed back by an unget is available to all read operations.
+  The reason for this is that unget is defined in terms of its effect on
+  the underlying buffer (namely, it rebuffers data as if it had been
+  buffered but not read yet). IOS reserves the right to perform large block
+  operations directly, bypassing the buffer. In such a case data was
+  never buffered, so "rebuffering" has no meaning (i.e. there is no
+  correspondence between the buffer and the physical stream).
+
+  Single-bit I/O is able to write partial bytes ONLY IF the stream supports
+  seeking. Also, line buffering is not well-defined in the context of
+  single-bit I/O, so it might not do what you expect.
+
+  implementation notes:
+  in order to know where we are in a file, we must ensure the buffer
+  is only populated from the underlying stream starting with p==buf.
+
+  to switch from writing to reading: flush, set p=buf, cnt=0
+  to switch from reading to writing: seek backwards cnt bytes, p=buf, cnt=0
+
+  when writing: buf starts at curr. physical stream pos, p - buf is how
+  many bytes we've written logically. cnt==0
+
+  dirty == (bitpos>0 && state==iost_wr), EXCEPT right after switching from
+  reading to writing, where we might be in the middle of a byte without
+  having changed it.
+
+  to write a bit: if !dirty, read up to maxsize-(p-buf) into buffer, then
+  seek back by the same amount (undo it). write onto those bits. now set
+  the dirty bit. in this state, we can bit-read up to the end of the byte,
+  then formally switch to the read state using flush.
+
+  design points:
+  - data-source independence, including memory streams
+  - support 64-bit and large files
+  - efficient, low-latency buffering
+  - unget
+  - expose buffer to user, allow user-owned buffers
+  - allow direct I/O, don't always go through buffer
+  - buffer-internal seeking. makes seeking back 1-2 bytes very fast,
+    and makes it possible for sockets where it otherwise wouldn't be
+  - special support for utf8
+  - tries to allow switching between reading and writing
+  - type-aware functions with byte-order swapping service
+  - position counter for meaningful data offsets with sockets
+
+  note:
+  the current code needs to be mostly rewritten. the design should be
+  as follows:
+
+  the buffer is a view of part of a file/stream. you can seek, read, and
+  write around in it as much as you like, as if it were just a string.
+
+  we keep track of the part of the buffer that's invalid (written to).
+  we remember whether the position of the underlying stream is aligned
+  with the end of the buffer (reading mode) or the beginning (writing mode).
+
+  based on this info, we might have to seek back before doing a flush.
+
+  as optimizations, we do no writing if the buffer isn't "dirty", and we
+  do no reading if the data will only be overwritten.
+*/
+
+#endif
diff --git a/llt/llt.h b/llt/llt.h
new file mode 100644
index 0000000..4a789e8
--- /dev/null
+++ b/llt/llt.h
@@ -0,0 +1,17 @@
+#ifndef __LLT_H_
+#define __LLT_H_
+
+#include "dtypes.h"
+#include "utils.h"
+#include "utf8.h"
+#include "ios.h"
+#include "socket.h"
+#include "timefuncs.h"
+#include "hashing.h"
+#include "ptrhash.h"
+#include "bitvector.h"
+#include "dirpath.h"
+
+void llt_init();
+
+#endif
diff --git a/llt/lookup3.c b/llt/lookup3.c
new file mode 100644
index 0000000..32c3876
--- /dev/null
+++ b/llt/lookup3.c
@@ -0,0 +1,984 @@
+/*
+-------------------------------------------------------------------------------
+lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+
+These are functions for producing 32-bit hashes for hash table lookup.
+hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() 
+are externally useful functions.  Routines to test the hash are included 
+if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+the public domain.  It has no warranty.
+
+You probably want to use hashlittle().  hashlittle() and hashbig()
+hash byte arrays.  hashlittle() is is faster than hashbig() on
+little-endian machines.  Intel and AMD are little-endian machines.
+On second thought, you probably want hashlittle2(), which is identical to
+hashlittle() except it returns two 32-bit hashes for the price of one.  
+You could implement hashbig2() if you wanted but I haven't bothered here.
+
+If you want to find a hash of, say, exactly 7 integers, do
+  a = i1;  b = i2;  c = i3;
+  mix(a,b,c);
+  a += i4; b += i5; c += i6;
+  mix(a,b,c);
+  a += i7;
+  final(a,b,c);
+then use c as the hash value.  If you have a variable length array of
+4-byte integers to hash, use hashword().  If you have a byte array (like
+a character string), use hashlittle().  If you have several byte arrays, or
+a mix of things, see the comments above hashlittle().  
+
+Why is this so big?  I read 12 bytes at a time into 3 4-byte integers, 
+then mix those integers.  This is fast (you can do a lot more thorough
+mixing with 12*3 instructions on 3 integers than you can with 3 instructions
+on 1 byte), but shoehorning those bytes into integers efficiently is messy.
+-------------------------------------------------------------------------------
+*/
+//#define SELF_TEST 1
+
+#include <stdio.h>      /* defines printf for tests */
+#include <time.h>       /* defines time_t for timings in the test */
+#ifndef WIN32
+#include <stdint.h>     /* defines uint32_t etc */
+#include <sys/param.h>  /* attempt to define endianness */
+#else
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+#endif
+#ifdef LINUX
+# include <endian.h>    /* attempt to define endianness */
+#endif
+
+/*
+ * My best guess at if you are big-endian or little-endian.  This may
+ * need adjustment.
+ */
+#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
+     __BYTE_ORDER == __LITTLE_ENDIAN) || \
+    (defined(i386) || defined(__i386__) || defined(__i486__) || \
+     defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
+# define HASH_LITTLE_ENDIAN 1
+# define HASH_BIG_ENDIAN 0
+#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
+       __BYTE_ORDER == __BIG_ENDIAN) || \
+      (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
+# define HASH_LITTLE_ENDIAN 0
+# define HASH_BIG_ENDIAN 1
+#else
+# define HASH_LITTLE_ENDIAN 0
+# define HASH_BIG_ENDIAN 0
+#endif
+
+#define hashsize(n) ((uint32_t)1<<(n))
+#define hashmask(n) (hashsize(n)-1)
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+/*
+-------------------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+
+This is reversible, so any information in (a,b,c) before mix() is
+still in (a,b,c) after mix().
+
+If four pairs of (a,b,c) inputs are run through mix(), or through
+mix() in reverse, there are at least 32 bits of the output that
+are sometimes the same for one pair and different for another pair.
+This was tested for:
+* pairs that differed by one bit, by two bits, in any combination
+  of top bits of (a,b,c), or in any combination of bottom bits of
+  (a,b,c).
+* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
+  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+  is commonly produced by subtraction) look like a single 1-bit
+  difference.
+* the base values were pseudorandom, all zero but one bit set, or 
+  all zero plus a counter that starts at zero.
+
+Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
+satisfy this are
+    4  6  8 16 19  4
+    9 15  3 18 27 15
+   14  9  3  7 17  3
+Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
+for "differ" defined as + with a one-bit base and a two-bit delta.  I
+used http://burtleburtle.net/bob/hash/avalanche.html to choose 
+the operations, constants, and arrangements of the variables.
+
+This does not achieve avalanche.  There are input bits of (a,b,c)
+that fail to affect some output bits of (a,b,c), especially of a.  The
+most thoroughly mixed value is c, but it doesn't really even achieve
+avalanche in c.
+
+This allows some parallelism.  Read-after-writes are good at doubling
+the number of bits affected, so the goal of mixing pulls in the opposite
+direction as the goal of parallelism.  I did what I could.  Rotates
+seem to cost as much as shifts on every machine I could lay my hands
+on, and rotates are much kinder to the top and bottom bits, so I used
+rotates.
+-------------------------------------------------------------------------------
+*/
+#define mix(a,b,c) \
+{ \
+  a -= c;  a ^= rot(c, 4);  c += b; \
+  b -= a;  b ^= rot(a, 6);  a += c; \
+  c -= b;  c ^= rot(b, 8);  b += a; \
+  a -= c;  a ^= rot(c,16);  c += b; \
+  b -= a;  b ^= rot(a,19);  a += c; \
+  c -= b;  c ^= rot(b, 4);  b += a; \
+}
+
+/*
+-------------------------------------------------------------------------------
+final -- final mixing of 3 32-bit values (a,b,c) into c
+
+Pairs of (a,b,c) values differing in only a few bits will usually
+produce values of c that look totally different.  This was tested for
+* pairs that differed by one bit, by two bits, in any combination
+  of top bits of (a,b,c), or in any combination of bottom bits of
+  (a,b,c).
+* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
+  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+  is commonly produced by subtraction) look like a single 1-bit
+  difference.
+* the base values were pseudorandom, all zero but one bit set, or 
+  all zero plus a counter that starts at zero.
+
+These constants passed:
+ 14 11 25 16 4 14 24
+ 12 14 25 16 4 14 24
+and these came close:
+  4  8 15 26 3 22 24
+ 10  8 15 26 3 22 24
+ 11  8 15 26 3 22 24
+-------------------------------------------------------------------------------
+*/
+#define final(a,b,c) \
+{ \
+  c ^= b; c -= rot(b,14); \
+  a ^= c; a -= rot(c,11); \
+  b ^= a; b -= rot(a,25); \
+  c ^= b; c -= rot(b,16); \
+  a ^= c; a -= rot(c,4);  \
+  b ^= a; b -= rot(a,14); \
+  c ^= b; c -= rot(b,24); \
+}
+
+/*
+--------------------------------------------------------------------
+ This works on all machines.  To be useful, it requires
+ -- that the key be an array of uint32_t's, and
+ -- that the length be the number of uint32_t's in the key
+
+ The function hashword() is identical to hashlittle() on little-endian
+ machines, and identical to hashbig() on big-endian machines,
+ except that the length has to be measured in uint32_ts rather than in
+ bytes.  hashlittle() is more complicated than hashword() only because
+ hashlittle() has to dance around fitting the key bytes into registers.
+--------------------------------------------------------------------
+*/
+uint32_t hashword(
+const uint32_t *k,                   /* the key, an array of uint32_t values */
+size_t          length,               /* the length of the key, in uint32_ts */
+uint32_t        initval)         /* the previous hash, or an arbitrary value */
+{
+  uint32_t a,b,c;
+
+  /* Set up the internal state */
+  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
+
+  /*------------------------------------------------- handle most of the key */
+  while (length > 3)
+  {
+    a += k[0];
+    b += k[1];
+    c += k[2];
+    mix(a,b,c);
+    length -= 3;
+    k += 3;
+  }
+
+  /*------------------------------------------- handle the last 3 uint32_t's */
+  switch(length)                     /* all the case statements fall through */
+  { 
+  case 3 : c+=k[2];
+  case 2 : b+=k[1];
+  case 1 : a+=k[0];
+    final(a,b,c);
+  case 0:     /* case 0: nothing left to add */
+    break;
+  }
+  /*------------------------------------------------------ report the result */
+  return c;
+}
+
+/*
+--------------------------------------------------------------------
+hashword2() -- same as hashword(), but take two seeds and return two
+32-bit values.  pc and pb must both be nonnull, and *pc and *pb must
+both be initialized with seeds.  If you pass in (*pb)==0, the output 
+(*pc) will be the same as the return value from hashword().
+--------------------------------------------------------------------
+*/
+void hashword2 (
+const uint32_t *k,                   /* the key, an array of uint32_t values */
+size_t          length,               /* the length of the key, in uint32_ts */
+uint32_t       *pc,                      /* IN: seed OUT: primary hash value */
+uint32_t       *pb)               /* IN: more seed OUT: secondary hash value */
+{
+  uint32_t a,b,c;
+
+  /* Set up the internal state */
+  a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
+  c += *pb;
+
+  /*------------------------------------------------- handle most of the key */
+  while (length > 3)
+  {
+    a += k[0];
+    b += k[1];
+    c += k[2];
+    mix(a,b,c);
+    length -= 3;
+    k += 3;
+  }
+
+  /*------------------------------------------- handle the last 3 uint32_t's */
+  switch(length)                     /* all the case statements fall through */
+  { 
+  case 3 : c+=k[2];
+  case 2 : b+=k[1];
+  case 1 : a+=k[0];
+    final(a,b,c);
+  case 0:     /* case 0: nothing left to add */
+    break;
+  }
+  /*------------------------------------------------------ report the result */
+  *pc=c; *pb=b;
+}
+
+#if 0
+/*
+-------------------------------------------------------------------------------
+hashlittle() -- hash a variable-length key into a 32-bit value
+  k       : the key (the unaligned variable-length array of bytes)
+  length  : the length of the key, counting by bytes
+  initval : can be any 4-byte value
+Returns a 32-bit value.  Every bit of the key affects every bit of
+the return value.  Two keys differing by one or two bits will have
+totally different hash values.
+
+The best hash table sizes are powers of 2.  There is no need to do
+mod a prime (mod is sooo slow!).  If you need less than 32 bits,
+use a bitmask.  For example, if you need only 10 bits, do
+  h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (uint8_t **)k, do it like this:
+  for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
+
+By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
+code any way you wish, private, educational, or commercial.  It's free.
+
+Use for hash table lookup, or anything where one collision in 2^^32 is
+acceptable.  Do NOT use for cryptographic purposes.
+-------------------------------------------------------------------------------
+*/
+
+uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
+{
+  uint32_t a,b,c;                                          /* internal state */
+  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
+
+  /* Set up the internal state */
+  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+
+  u.ptr = key;
+  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
+    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
+    const uint8_t  *k8;
+
+    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+    while (length > 12)
+    {
+      a += k[0];
+      b += k[1];
+      c += k[2];
+      mix(a,b,c);
+      length -= 12;
+      k += 3;
+    }
+
+    /*----------------------------- handle the last (probably partial) block */
+    /* 
+     * "k[2]&0xffffff" actually reads beyond the end of the string, but
+     * then masks off the part it's not allowed to read.  Because the
+     * string is aligned, the masked-off tail is in the same word as the
+     * rest of the string.  Every machine with memory protection I've seen
+     * does it on word boundaries, so is OK with this.  But VALGRIND will
+     * still catch it and complain.  The masking trick does make the hash
+     * noticably faster for short strings (like English words).
+     */
+#ifndef VALGRIND
+
+    switch(length)
+    {
+    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
+    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
+    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
+    case 8 : b+=k[1]; a+=k[0]; break;
+    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
+    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
+    case 5 : b+=k[1]&0xff; a+=k[0]; break;
+    case 4 : a+=k[0]; break;
+    case 3 : a+=k[0]&0xffffff; break;
+    case 2 : a+=k[0]&0xffff; break;
+    case 1 : a+=k[0]&0xff; break;
+    case 0 : return c;              /* zero length strings require no mixing */
+    }
+
+#else /* make valgrind happy */
+
+    k8 = (const uint8_t *)k;
+    switch(length)
+    {
+    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
+    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
+    case 9 : c+=k8[8];                   /* fall through */
+    case 8 : b+=k[1]; a+=k[0]; break;
+    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
+    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
+    case 5 : b+=k8[4];                   /* fall through */
+    case 4 : a+=k[0]; break;
+    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
+    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
+    case 1 : a+=k8[0]; break;
+    case 0 : return c;
+    }
+
+#endif /* !valgrind */
+
+  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
+    const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
+    const uint8_t  *k8;
+
+    /*--------------- all but last block: aligned reads and different mixing */
+    while (length > 12)
+    {
+      a += k[0] + (((uint32_t)k[1])<<16);
+      b += k[2] + (((uint32_t)k[3])<<16);
+      c += k[4] + (((uint32_t)k[5])<<16);
+      mix(a,b,c);
+      length -= 12;
+      k += 6;
+    }
+
+    /*----------------------------- handle the last (probably partial) block */
+    k8 = (const uint8_t *)k;
+    switch(length)
+    {
+    case 12: c+=k[4]+(((uint32_t)k[5])<<16);
+             b+=k[2]+(((uint32_t)k[3])<<16);
+             a+=k[0]+(((uint32_t)k[1])<<16);
+             break;
+    case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
+    case 10: c+=k[4];
+             b+=k[2]+(((uint32_t)k[3])<<16);
+             a+=k[0]+(((uint32_t)k[1])<<16);
+             break;
+    case 9 : c+=k8[8];                      /* fall through */
+    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
+             a+=k[0]+(((uint32_t)k[1])<<16);
+             break;
+    case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
+    case 6 : b+=k[2];
+             a+=k[0]+(((uint32_t)k[1])<<16);
+             break;
+    case 5 : b+=k8[4];                      /* fall through */
+    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
+             break;
+    case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
+    case 2 : a+=k[0];
+             break;
+    case 1 : a+=k8[0];
+             break;
+    case 0 : return c;                     /* zero length requires no mixing */
+    }
+
+  } else {                        /* need to read the key one byte at a time */
+    const uint8_t *k = (const uint8_t *)key;
+
+    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+    while (length > 12)
+    {
+      a += k[0];
+      a += ((uint32_t)k[1])<<8;
+      a += ((uint32_t)k[2])<<16;
+      a += ((uint32_t)k[3])<<24;
+      b += k[4];
+      b += ((uint32_t)k[5])<<8;
+      b += ((uint32_t)k[6])<<16;
+      b += ((uint32_t)k[7])<<24;
+      c += k[8];
+      c += ((uint32_t)k[9])<<8;
+      c += ((uint32_t)k[10])<<16;
+      c += ((uint32_t)k[11])<<24;
+      mix(a,b,c);
+      length -= 12;
+      k += 12;
+    }
+
+    /*-------------------------------- last block: affect all 32 bits of (c) */
+    switch(length)                   /* all the case statements fall through */
+    {
+    case 12: c+=((uint32_t)k[11])<<24;
+    case 11: c+=((uint32_t)k[10])<<16;
+    case 10: c+=((uint32_t)k[9])<<8;
+    case 9 : c+=k[8];
+    case 8 : b+=((uint32_t)k[7])<<24;
+    case 7 : b+=((uint32_t)k[6])<<16;
+    case 6 : b+=((uint32_t)k[5])<<8;
+    case 5 : b+=k[4];
+    case 4 : a+=((uint32_t)k[3])<<24;
+    case 3 : a+=((uint32_t)k[2])<<16;
+    case 2 : a+=((uint32_t)k[1])<<8;
+    case 1 : a+=k[0];
+             break;
+    case 0 : return c;
+    }
+  }
+
+  final(a,b,c);
+  return c;
+}
+#endif
+
+/*
+ * hashlittle2: return 2 32-bit hash values
+ *
+ * This is identical to hashlittle(), except it returns two 32-bit hash
+ * values instead of just one.  This is good enough for hash table
+ * lookup with 2^^64 buckets, or if you want a second hash if you're not
+ * happy with the first, or if you want a probably-unique 64-bit ID for
+ * the key.  *pc is better mixed than *pb, so use *pc first.  If you want
+ * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
+ */
+void hashlittle2( 
+  const void *key,       /* the key to hash */
+  size_t      length,    /* length of the key */
+  uint32_t   *pc,        /* IN: primary initval, OUT: primary hash */
+  uint32_t   *pb)        /* IN: secondary initval, OUT: secondary hash */
+{
+  uint32_t a,b,c;                                          /* internal state */
+  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
+
+  /* Set up the internal state */
+  a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
+  c += *pb;
+
+  u.ptr = key;
+  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
+    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
+    const uint8_t  *k8;
+
+    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+    while (length > 12)
+    {
+      a += k[0];
+      b += k[1];
+      c += k[2];
+      mix(a,b,c);
+      length -= 12;
+      k += 3;
+    }
+
+    /*----------------------------- handle the last (probably partial) block */
+    /* 
+     * "k[2]&0xffffff" actually reads beyond the end of the string, but
+     * then masks off the part it's not allowed to read.  Because the
+     * string is aligned, the masked-off tail is in the same word as the
+     * rest of the string.  Every machine with memory protection I've seen
+     * does it on word boundaries, so is OK with this.  But VALGRIND will
+     * still catch it and complain.  The masking trick does make the hash
+     * noticably faster for short strings (like English words).
+     */
+#ifndef VALGRIND
+    (void)k8;
+    switch(length)
+    {
+    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
+    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
+    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
+    case 8 : b+=k[1]; a+=k[0]; break;
+    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
+    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
+    case 5 : b+=k[1]&0xff; a+=k[0]; break;
+    case 4 : a+=k[0]; break;
+    case 3 : a+=k[0]&0xffffff; break;
+    case 2 : a+=k[0]&0xffff; break;
+    case 1 : a+=k[0]&0xff; break;
+    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
+    }
+
+#else /* make valgrind happy */
+
+    k8 = (const uint8_t *)k;
+    switch(length)
+    {
+    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
+    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
+    case 9 : c+=k8[8];                   /* fall through */
+    case 8 : b+=k[1]; a+=k[0]; break;
+    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
+    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
+    case 5 : b+=k8[4];                   /* fall through */
+    case 4 : a+=k[0]; break;
+    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
+    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
+    case 1 : a+=k8[0]; break;
+    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
+    }
+
+#endif /* !valgrind */
+
+  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
+    const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
+    const uint8_t  *k8;
+
+    /*--------------- all but last block: aligned reads and different mixing */
+    while (length > 12)
+    {
+      a += k[0] + (((uint32_t)k[1])<<16);
+      b += k[2] + (((uint32_t)k[3])<<16);
+      c += k[4] + (((uint32_t)k[5])<<16);
+      mix(a,b,c);
+      length -= 12;
+      k += 6;
+    }
+
+    /*----------------------------- handle the last (probably partial) block */
+    k8 = (const uint8_t *)k;
+    switch(length)
+    {
+    case 12: c+=k[4]+(((uint32_t)k[5])<<16);
+             b+=k[2]+(((uint32_t)k[3])<<16);
+             a+=k[0]+(((uint32_t)k[1])<<16);
+             break;
+    case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
+    case 10: c+=k[4];
+             b+=k[2]+(((uint32_t)k[3])<<16);
+             a+=k[0]+(((uint32_t)k[1])<<16);
+             break;
+    case 9 : c+=k8[8];                      /* fall through */
+    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
+             a+=k[0]+(((uint32_t)k[1])<<16);
+             break;
+    case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
+    case 6 : b+=k[2];
+             a+=k[0]+(((uint32_t)k[1])<<16);
+             break;
+    case 5 : b+=k8[4];                      /* fall through */
+    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
+             break;
+    case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
+    case 2 : a+=k[0];
+             break;
+    case 1 : a+=k8[0];
+             break;
+    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
+    }
+
+  } else {                        /* need to read the key one byte at a time */
+    const uint8_t *k = (const uint8_t *)key;
+
+    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+    while (length > 12)
+    {
+      a += k[0];
+      a += ((uint32_t)k[1])<<8;
+      a += ((uint32_t)k[2])<<16;
+      a += ((uint32_t)k[3])<<24;
+      b += k[4];
+      b += ((uint32_t)k[5])<<8;
+      b += ((uint32_t)k[6])<<16;
+      b += ((uint32_t)k[7])<<24;
+      c += k[8];
+      c += ((uint32_t)k[9])<<8;
+      c += ((uint32_t)k[10])<<16;
+      c += ((uint32_t)k[11])<<24;
+      mix(a,b,c);
+      length -= 12;
+      k += 12;
+    }
+
+    /*-------------------------------- last block: affect all 32 bits of (c) */
+    switch(length)                   /* all the case statements fall through */
+    {
+    case 12: c+=((uint32_t)k[11])<<24;
+    case 11: c+=((uint32_t)k[10])<<16;
+    case 10: c+=((uint32_t)k[9])<<8;
+    case 9 : c+=k[8];
+    case 8 : b+=((uint32_t)k[7])<<24;
+    case 7 : b+=((uint32_t)k[6])<<16;
+    case 6 : b+=((uint32_t)k[5])<<8;
+    case 5 : b+=k[4];
+    case 4 : a+=((uint32_t)k[3])<<24;
+    case 3 : a+=((uint32_t)k[2])<<16;
+    case 2 : a+=((uint32_t)k[1])<<8;
+    case 1 : a+=k[0];
+             break;
+    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
+    }
+  }
+
+  final(a,b,c);
+  *pc=c; *pb=b;
+}
+
+
+#if 0
+/*
+ * hashbig():
+ * This is the same as hashword() on big-endian machines.  It is different
+ * from hashlittle() on all machines.  hashbig() takes advantage of
+ * big-endian byte ordering. 
+ */
+uint32_t hashbig( const void *key, size_t length, uint32_t initval)
+{
+  uint32_t a,b,c;
+  union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
+
+  /* Set up the internal state */
+  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+
+  u.ptr = key;
+  if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
+    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
+    const uint8_t  *k8;
+
+    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+    while (length > 12)
+    {
+      a += k[0];
+      b += k[1];
+      c += k[2];
+      mix(a,b,c);
+      length -= 12;
+      k += 3;
+    }
+
+    /*----------------------------- handle the last (probably partial) block */
+    /* 
+     * "k[2]<<8" actually reads beyond the end of the string, but
+     * then shifts out the part it's not allowed to read.  Because the
+     * string is aligned, the illegal read is in the same word as the
+     * rest of the string.  Every machine with memory protection I've seen
+     * does it on word boundaries, so is OK with this.  But VALGRIND will
+     * still catch it and complain.  The masking trick does make the hash
+     * noticably faster for short strings (like English words).
+     */
+#ifndef VALGRIND
+
+    switch(length)
+    {
+    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+    case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
+    case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
+    case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
+    case 8 : b+=k[1]; a+=k[0]; break;
+    case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
+    case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
+    case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
+    case 4 : a+=k[0]; break;
+    case 3 : a+=k[0]&0xffffff00; break;
+    case 2 : a+=k[0]&0xffff0000; break;
+    case 1 : a+=k[0]&0xff000000; break;
+    case 0 : return c;              /* zero length strings require no mixing */
+    }
+
+#else  /* make valgrind happy */
+
+    k8 = (const uint8_t *)k;
+    switch(length)                   /* all the case statements fall through */
+    {
+    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+    case 11: c+=((uint32_t)k8[10])<<8;  /* fall through */
+    case 10: c+=((uint32_t)k8[9])<<16;  /* fall through */
+    case 9 : c+=((uint32_t)k8[8])<<24;  /* fall through */
+    case 8 : b+=k[1]; a+=k[0]; break;
+    case 7 : b+=((uint32_t)k8[6])<<8;   /* fall through */
+    case 6 : b+=((uint32_t)k8[5])<<16;  /* fall through */
+    case 5 : b+=((uint32_t)k8[4])<<24;  /* fall through */
+    case 4 : a+=k[0]; break;
+    case 3 : a+=((uint32_t)k8[2])<<8;   /* fall through */
+    case 2 : a+=((uint32_t)k8[1])<<16;  /* fall through */
+    case 1 : a+=((uint32_t)k8[0])<<24; break;
+    case 0 : return c;
+    }
+
+#endif /* !VALGRIND */
+
+  } else {                        /* need to read the key one byte at a time */
+    const uint8_t *k = (const uint8_t *)key;
+
+    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+    while (length > 12)
+    {
+      a += ((uint32_t)k[0])<<24;
+      a += ((uint32_t)k[1])<<16;
+      a += ((uint32_t)k[2])<<8;
+      a += ((uint32_t)k[3]);
+      b += ((uint32_t)k[4])<<24;
+      b += ((uint32_t)k[5])<<16;
+      b += ((uint32_t)k[6])<<8;
+      b += ((uint32_t)k[7]);
+      c += ((uint32_t)k[8])<<24;
+      c += ((uint32_t)k[9])<<16;
+      c += ((uint32_t)k[10])<<8;
+      c += ((uint32_t)k[11]);
+      mix(a,b,c);
+      length -= 12;
+      k += 12;
+    }
+
+    /*-------------------------------- last block: affect all 32 bits of (c) */
+    switch(length)                   /* all the case statements fall through */
+    {
+    case 12: c+=k[11];
+    case 11: c+=((uint32_t)k[10])<<8;
+    case 10: c+=((uint32_t)k[9])<<16;
+    case 9 : c+=((uint32_t)k[8])<<24;
+    case 8 : b+=k[7];
+    case 7 : b+=((uint32_t)k[6])<<8;
+    case 6 : b+=((uint32_t)k[5])<<16;
+    case 5 : b+=((uint32_t)k[4])<<24;
+    case 4 : a+=k[3];
+    case 3 : a+=((uint32_t)k[2])<<8;
+    case 2 : a+=((uint32_t)k[1])<<16;
+    case 1 : a+=((uint32_t)k[0])<<24;
+             break;
+    case 0 : return c;
+    }
+  }
+
+  final(a,b,c);
+  return c;
+}
+#endif
+
+#ifdef SELF_TEST
+
+/* used for timings */
+void driver1()
+{
+  uint8_t buf[256];
+  uint32_t i;
+  uint32_t h=0;
+  time_t a,z;
+
+  time(&a);
+  for (i=0; i<256; ++i) buf[i] = 'x';
+  for (i=0; i<1; ++i) 
+  {
+    h = hashlittle(&buf[0],1,h);
+  }
+  time(&z);
+  if (z-a > 0) printf("time %d %.8x\n", z-a, h);
+}
+
+/* check that every input bit changes every output bit half the time */
+#define HASHSTATE 1
+#define HASHLEN   1
+#define MAXPAIR 60
+#define MAXLEN  70
+void driver2()
+{
+  uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
+  uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
+  uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
+  uint32_t x[HASHSTATE],y[HASHSTATE];
+  uint32_t hlen;
+
+  printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
+  for (hlen=0; hlen < MAXLEN; ++hlen)
+  {
+    z=0;
+    for (i=0; i<hlen; ++i)  /*----------------------- for each input byte, */
+    {
+      for (j=0; j<8; ++j)   /*------------------------ for each input bit, */
+      {
+	for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */
+	{
+	  for (l=0; l<HASHSTATE; ++l)
+	    e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
+
+      	  /*---- check that every output bit is affected by that input bit */
+	  for (k=0; k<MAXPAIR; k+=2)
+	  { 
+	    uint32_t finished=1;
+	    /* keys have one bit different */
+	    for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
+	    /* have a and b be two keys differing in only one bit */
+	    a[i] ^= (k<<j);
+	    a[i] ^= (k>>(8-j));
+	     c[0] = hashlittle(a, hlen, m);
+	    b[i] ^= ((k+1)<<j);
+	    b[i] ^= ((k+1)>>(8-j));
+	     d[0] = hashlittle(b, hlen, m);
+	    /* check every bit is 1, 0, set, and not set at least once */
+	    for (l=0; l<HASHSTATE; ++l)
+	    {
+	      e[l] &= (c[l]^d[l]);
+	      f[l] &= ~(c[l]^d[l]);
+	      g[l] &= c[l];
+	      h[l] &= ~c[l];
+	      x[l] &= d[l];
+	      y[l] &= ~d[l];
+	      if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
+	    }
+	    if (finished) break;
+	  }
+	  if (k>z) z=k;
+	  if (k==MAXPAIR) 
+	  {
+	     printf("Some bit didn't change: ");
+	     printf("%.8x %.8x %.8x %.8x %.8x %.8x  ",
+	            e[0],f[0],g[0],h[0],x[0],y[0]);
+	     printf("i %d j %d m %d len %d\n", i, j, m, hlen);
+	  }
+	  if (z==MAXPAIR) goto done;
+	}
+      }
+    }
+   done:
+    if (z < MAXPAIR)
+    {
+      printf("Mix success  %2d bytes  %2d initvals  ",i,m);
+      printf("required  %d  trials\n", z/2);
+    }
+  }
+  printf("\n");
+}
+
+/* Check for reading beyond the end of the buffer and alignment problems */
+void driver3()
+{
+  uint8_t buf[MAXLEN+20], *b;
+  uint32_t len;
+  uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
+  uint32_t h;
+  uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
+  uint32_t i;
+  uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
+  uint32_t j;
+  uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
+  uint32_t ref,x,y;
+  uint8_t *p;
+
+  printf("Endianness.  These lines should all be the same (for values filled in):\n");
+  printf("%.8x                            %.8x                            %.8x\n",
+         hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
+         hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
+         hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
+  p = q;
+  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+         hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
+         hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
+         hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
+         hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
+         hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
+         hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
+  p = &qq[1];
+  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+         hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
+         hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
+         hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
+         hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
+         hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
+         hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
+  p = &qqq[2];
+  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+         hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
+         hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
+         hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
+         hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
+         hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
+         hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
+  p = &qqqq[3];
+  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+         hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
+         hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
+         hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
+         hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
+         hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
+         hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
+  printf("\n");
+
+  /* check that hashlittle2 and hashlittle produce the same results */
+  i=47; j=0;
+  hashlittle2(q, sizeof(q), &i, &j);
+  if (hashlittle(q, sizeof(q), 47) != i)
+    printf("hashlittle2 and hashlittle mismatch\n");
+
+  /* check that hashword2 and hashword produce the same results */
+  len = 0xdeadbeef;
+  i=47, j=0;
+  hashword2(&len, 1, &i, &j);
+  if (hashword(&len, 1, 47) != i)
+    printf("hashword2 and hashword mismatch %x %x\n", 
+	   i, hashword(&len, 1, 47));
+
+  /* check hashlittle doesn't read before or after the ends of the string */
+  for (h=0, b=buf+1; h<8; ++h, ++b)
+  {
+    for (i=0; i<MAXLEN; ++i)
+    {
+      len = i;
+      for (j=0; j<i; ++j) *(b+j)=0;
+
+      /* these should all be equal */
+      ref = hashlittle(b, len, (uint32_t)1);
+      *(b+i)=(uint8_t)~0;
+      *(b-1)=(uint8_t)~0;
+      x = hashlittle(b, len, (uint32_t)1);
+      y = hashlittle(b, len, (uint32_t)1);
+      if ((ref != x) || (ref != y)) 
+      {
+	printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
+               h, i);
+      }
+    }
+  }
+}
+
+/* check for problems with nulls */
+ void driver4()
+{
+  uint8_t buf[1];
+  uint32_t h,i,state[HASHSTATE];
+
+
+  buf[0] = ~0;
+  for (i=0; i<HASHSTATE; ++i) state[i] = 1;
+  printf("These should all be different\n");
+  for (i=0, h=0; i<8; ++i)
+  {
+    h = hashlittle(buf, 0, h);
+    printf("%2ld  0-byte strings, hash is  %.8x\n", i, h);
+  }
+}
+
+
+int main()
+{
+  driver1();   /* test that the key is hashed: used for timings */
+  driver2();   /* test that whole key is hashed thoroughly */
+  driver3();   /* test that nothing but the key is hashed */
+  driver4();   /* test hashing multiple buffers (all buffers are null) */
+  return 1;
+}
+
+#endif  /* SELF_TEST */
diff --git a/llt/mt19937ar.c b/llt/mt19937ar.c
new file mode 100644
index 0000000..63b8107
--- /dev/null
+++ b/llt/mt19937ar.c
@@ -0,0 +1,193 @@
+/* 
+   A C-program for MT19937, with initialization improved 2002/1/26.
+   Coded by Takuji Nishimura and Makoto Matsumoto.
+
+   Before using, initialize the state by using init_genrand(seed)  
+   or init_by_array(init_key, key_length).
+
+   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+   All rights reserved.                          
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     1. Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+
+     2. Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+
+     3. The names of its contributors may not be used to endorse or promote 
+        products derived from this software without specific prior written 
+        permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+   Any feedback is very welcome.
+   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
+*/
+
+#include <stdio.h>
+
+/* Period parameters */  
+#define mtN 624
+#define mtM 397
+#define MATRIX_A 0x9908b0dfUL   /* constant vector a */
+#define UPPER_MASK 0x80000000UL /* most significant w-r bits */
+#define LOWER_MASK 0x7fffffffUL /* least significant r bits */
+
+static unsigned long mt[mtN]; /* the array for the state vector  */
+static int mti=mtN+1; /* mti==mtN+1 means mt[mtN] is not initialized */
+
+/* initializes mt[mtN] with a seed */
+void init_genrand(unsigned long s)
+{
+    mt[0]= s & 0xffffffffUL;
+    for (mti=1; mti<mtN; mti++) {
+        mt[mti] = 
+	    (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti); 
+        /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+        /* In the previous versions, MSBs of the seed affect   */
+        /* only MSBs of the array mt[].                        */
+        /* 2002/01/09 modified by Makoto Matsumoto             */
+        mt[mti] &= 0xffffffffUL;
+        /* for >32 bit machines */
+    }
+}
+
+/* initialize by an array with array-length */
+/* init_key is the array for initializing keys */
+/* key_length is its length */
+/* slight change for C++, 2004/2/26 */
+void init_by_array(unsigned long init_key[], int key_length)
+{
+    int i, j, k;
+    init_genrand(19650218UL);
+    i=1; j=0;
+    k = (mtN>key_length ? mtN : key_length);
+    for (; k; k--) {
+        mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525UL))
+          + init_key[j] + j; /* non linear */
+        mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
+        i++; j++;
+        if (i>=mtN) { mt[0] = mt[mtN-1]; i=1; }
+        if (j>=key_length) j=0;
+    }
+    for (k=mtN-1; k; k--) {
+        mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941UL))
+          - i; /* non linear */
+        mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
+        i++;
+        if (i>=mtN) { mt[0] = mt[mtN-1]; i=1; }
+    }
+
+    mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ 
+}
+
+/* generates a random number on [0,0xffffffff]-interval */
+unsigned long genrand_int32(void)
+{
+    unsigned long y;
+    static unsigned long mag01[2]={0x0UL, MATRIX_A};
+    /* mag01[x] = x * MATRIX_A  for x=0,1 */
+
+    if (mti >= mtN) { /* generate mtN words at one time */
+        int kk;
+
+        if (mti == mtN+1)   /* if init_genrand() has not been called, */
+            init_genrand(5489UL); /* a default initial seed is used */
+
+        for (kk=0;kk<mtN-mtM;kk++) {
+            y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
+            mt[kk] = mt[kk+mtM] ^ (y >> 1) ^ mag01[y & 0x1UL];
+        }
+        for (;kk<mtN-1;kk++) {
+            y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
+            mt[kk] = mt[kk+(mtM-mtN)] ^ (y >> 1) ^ mag01[y & 0x1UL];
+        }
+        y = (mt[mtN-1]&UPPER_MASK)|(mt[0]&LOWER_MASK);
+        mt[mtN-1] = mt[mtM-1] ^ (y >> 1) ^ mag01[y & 0x1UL];
+
+        mti = 0;
+    }
+  
+    y = mt[mti++];
+
+    /* Tempering */
+    y ^= (y >> 11);
+    y ^= (y << 7) & 0x9d2c5680UL;
+    y ^= (y << 15) & 0xefc60000UL;
+    y ^= (y >> 18);
+
+    return y;
+}
+
+#if 0
+/* generates a random number on [0,0x7fffffff]-interval */
+long genrand_int31(void)
+{
+    return (long)(genrand_int32()>>1);
+}
+
+/* generates a random number on [0,1]-real-interval */
+double genrand_real1(void)
+{
+    return genrand_int32()*(1.0/4294967295.0); 
+    /* divided by 2^32-1 */ 
+}
+
+/* generates a random number on [0,1)-real-interval */
+double genrand_real2(void)
+{
+    return genrand_int32()*(1.0/4294967296.0); 
+    /* divided by 2^32 */
+}
+
+/* generates a random number on (0,1)-real-interval */
+double genrand_real3(void)
+{
+    return (((double)genrand_int32()) + 0.5)*(1.0/4294967296.0); 
+    /* divided by 2^32 */
+}
+
+/* generates a random number on [0,1) with 53-bit resolution*/
+double genrand_res53(void) 
+{ 
+    unsigned long a=genrand_int32()>>5, b=genrand_int32()>>6; 
+    return(a*67108864.0+b)*(1.0/9007199254740992.0); 
+} 
+#endif
+/* These real versions are due to Isaku Wada, 2002/01/09 added */
+#if 0
+int main(void)
+{
+    int i;
+    unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4;
+    init_by_array(init, length);
+    printf("1000 outputs of genrand_int32()\n");
+    for (i=0; i<1000; i++) {
+      printf("%10lu ", genrand_int32());
+      if (i%5==4) printf("\n");
+    }
+    printf("\n1000 outputs of genrand_real2()\n");
+    for (i=0; i<1000; i++) {
+      printf("%10.8f ", genrand_real2());
+      if (i%5==4) printf("\n");
+    }
+    return 0;
+}
+#endif
diff --git a/llt/notes b/llt/notes
new file mode 100644
index 0000000..73cb47e
--- /dev/null
+++ b/llt/notes
@@ -0,0 +1,44 @@
+my c library (jlibc)
+------------
+
+* bytevector utilities: memswap, memreverse, swap_el, etc.
+* hashing, random#s: int32hash, int64hash, int64to32hash, lookup3, ptrhash
+* utf8
+* bitvector
+- iostream, socket, asynch io
+- cross-platform pathnames, cwd, exename, date/time, etc.
+* floating point number utils: comparison, print_real, print_cplx
+- strtab (with prefix searching)
+
+(- pool allocator with hooks for gc (sweep function))
+(- list (dequeue))
+(- sort: msort list, qsort numbers) not too important since stdlib has qsort
+
+- use non-allocating APIs. this means the interface never allocates or
+  frees memory for you. you have to manage space for objects yourself.
+
+- separate math library. includes numal, cephes, my complex number routines,
+  more special functions
+
+
+stream redesign:
+
+memstream, single-descriptor-backed, pipe (read/write on separate descriptors)
+
+do our own buffering, so we can implement getline without seek/skip
+
+all provided functions must be in terms of read,write,poll,flush only
+seek/skip will be available, but only works on files and strings
+change semantics of bit i/o so it doesn't require skip(-1)
+
+compare our implementation to somebody else's fread,fwrite,etc.
+
+
+cool trick for faking string streams with stdio:
+
+    char buf[256];
+    v = list2(number(6), number(4));
+    FILE *f = fopen("/dev/null", "a");
+    setbuffer(f, buf, sizeof(buf));
+    print(f, v, 0);
+    printf("got '%s'\n", buf);
diff --git a/llt/operators.c b/llt/operators.c
new file mode 100644
index 0000000..1f6d462
--- /dev/null
+++ b/llt/operators.c
@@ -0,0 +1,316 @@
+#include <limits.h>
+#include <assert.h>
+#include "dtypes.h"
+#include "utils.h"
+#include "ieee754.h"
+
+// given a number, determine an appropriate type for storing it
+#if 0
+numerictype_t effective_numerictype(double r)
+{
+    double fp;
+
+    fp = fpart(r);
+    if (fp != 0 || r > U64_MAX || r < S64_MIN) {
+        if (r > FLT_MAX || r < -FLT_MAX || (fabs(r) < FLT_MIN)) {
+            return T_DOUBLE;
+        }
+        else {
+            return T_FLOAT;
+        }
+    }
+    else if (r >= SCHAR_MIN && r <= SCHAR_MAX) {
+        return T_INT8;
+    }
+    else if (r >= SHRT_MIN && r <= SHRT_MAX) {
+        return T_INT16;
+    }
+    else if (r >= INT_MIN && r <= INT_MAX) {
+        return T_INT32;
+    }
+    else if (r <= S64_MAX) {
+        return T_INT64;
+    }
+    return T_UINT64;
+}
+#else
+// simpler version implementing a smaller preferred type repertoire
+numerictype_t effective_numerictype(double r)
+{
+    double fp;
+
+    fp = fpart(r);
+    if (fp != 0 || r > U64_MAX || r < S64_MIN) {
+        return T_DOUBLE;
+    }
+    else if (r >= INT_MIN && r <= INT_MAX) {
+        return T_INT32;
+    }
+    else if (r <= S64_MAX) {
+        return T_INT64;
+    }
+    return T_UINT64;
+}
+#endif
+
+double conv_to_double(void *data, numerictype_t tag)
+{
+    double d=0;
+    switch (tag) {
+    case T_INT8:   d = (double)*(int8_t*)data; break;
+    case T_UINT8:  d = (double)*(uint8_t*)data; break;
+    case T_INT16:  d = (double)*(int16_t*)data; break;
+    case T_UINT16: d = (double)*(uint16_t*)data; break;
+    case T_INT32:  d = (double)*(int32_t*)data; break;
+    case T_UINT32: d = (double)*(uint32_t*)data; break;
+    case T_INT64:
+        d = (double)*(int64_t*)data;
+        if (d > 0 && *(int64_t*)data < 0)  // can happen!
+            d = -d;
+        break;
+    case T_UINT64: d = (double)*(uint64_t*)data; break;
+    case T_FLOAT:  d = (double)*(float*)data; break;
+    case T_DOUBLE: return *(double*)data;
+    }
+    return d;
+}
+
+void conv_from_double(void *dest, double d, numerictype_t tag)
+{
+    switch (tag) {
+    case T_INT8:   *(int8_t*)dest = d; break;
+    case T_UINT8:  *(uint8_t*)dest = d; break;
+    case T_INT16:  *(int16_t*)dest = d; break;
+    case T_UINT16: *(uint16_t*)dest = d; break;
+    case T_INT32:  *(int32_t*)dest = d; break;
+    case T_UINT32: *(uint32_t*)dest = d; break;
+    case T_INT64:
+        *(int64_t*)dest = d;
+        if (d > 0 && *(int64_t*)dest < 0)  // 0x8000000000000000 is a bitch
+            *(int64_t*)dest = S64_MAX;
+        break;
+    case T_UINT64: *(uint64_t*)dest = d; break;
+    case T_FLOAT:  *(float*)dest = d; break;
+    case T_DOUBLE: *(double*)dest = d; break;
+    }
+}
+
+#define CONV_TO_INTTYPE(type)                               \
+type##_t conv_to_##type(void *data, numerictype_t tag)      \
+{                                                           \
+    type##_t i=0;                                           \
+    switch (tag) {                                          \
+    case T_INT8:   i = (type##_t)*(int8_t*)data; break;     \
+    case T_UINT8:  i = (type##_t)*(uint8_t*)data; break;    \
+    case T_INT16:  i = (type##_t)*(int16_t*)data; break;    \
+    case T_UINT16: i = (type##_t)*(uint16_t*)data; break;   \
+    case T_INT32:  i = (type##_t)*(int32_t*)data; break;    \
+    case T_UINT32: i = (type##_t)*(uint32_t*)data; break;   \
+    case T_INT64:  i = (type##_t)*(int64_t*)data; break;    \
+    case T_UINT64: i = (type##_t)*(uint64_t*)data; break;   \
+    case T_FLOAT:  i = (type##_t)*(float*)data; break;      \
+    case T_DOUBLE: i = (type##_t)*(double*)data; break;     \
+    }                                                       \
+    return i;                                               \
+}
+
+CONV_TO_INTTYPE(int64)
+CONV_TO_INTTYPE(uint64)
+CONV_TO_INTTYPE(int32)
+CONV_TO_INTTYPE(uint32)
+
+int cmp_same_lt(void *a, void *b, numerictype_t tag)
+{
+    switch (tag) {
+    case T_INT8:   return *(int8_t*)a < *(int8_t*)b;
+    case T_UINT8:  return *(uint8_t*)a < *(uint8_t*)b;
+    case T_INT16:  return *(int16_t*)a < *(int16_t*)b;
+    case T_UINT16: return *(uint16_t*)a < *(uint16_t*)b;
+    case T_INT32:  return *(int32_t*)a < *(int32_t*)b;
+    case T_UINT32: return *(uint32_t*)a < *(uint32_t*)b;
+    case T_INT64:  return *(int64_t*)a < *(int64_t*)b;
+    case T_UINT64: return *(uint64_t*)a < *(uint64_t*)b;
+    case T_FLOAT:  return *(float*)a < *(float*)b;
+    case T_DOUBLE: return *(double*)a < *(double*)b;
+    }
+    return 0;
+}
+
+int cmp_same_eq(void *a, void *b, numerictype_t tag)
+{
+    switch (tag) {
+    case T_INT8:   return *(int8_t*)a == *(int8_t*)b;
+    case T_UINT8:  return *(uint8_t*)a == *(uint8_t*)b;
+    case T_INT16:  return *(int16_t*)a == *(int16_t*)b;
+    case T_UINT16: return *(uint16_t*)a == *(uint16_t*)b;
+    case T_INT32:  return *(int32_t*)a == *(int32_t*)b;
+    case T_UINT32: return *(uint32_t*)a == *(uint32_t*)b;
+    case T_INT64:  return *(int64_t*)a == *(int64_t*)b;
+    case T_UINT64: return *(uint64_t*)a == *(uint64_t*)b;
+    case T_FLOAT:  return flt_equals(*(float*)a, *(float*)b);
+    case T_DOUBLE: return dbl_equals(*(double*)a, *(double*)b);
+    }
+    return 0;
+}
+
+int cmp_lt(void *a, numerictype_t atag, void *b, numerictype_t btag)
+{
+    if (atag==btag)
+        return cmp_same_lt(a, b, atag);
+
+    double da = conv_to_double(a, atag);
+    double db = conv_to_double(b, btag);
+
+    // casting to double will only get the wrong answer for big int64s
+    // that differ in low bits
+    if (da < db)
+        return 1;
+    if (db < da)
+        return 0;
+
+    if (atag == T_UINT64) {
+        // this is safe because if a had been bigger than S64_MAX,
+        // we would already have concluded that it's bigger than b.
+        if (btag == T_INT64) {
+            return ((int64_t)*(uint64_t*)a < *(int64_t*)b);
+        }
+        else if (btag == T_DOUBLE) {
+            return (*(uint64_t*)a < (uint64_t)*(double*)b);
+        }
+    }
+    else if (atag == T_INT64) {
+        if (btag == T_UINT64) {
+            return (*(int64_t*)a < (int64_t)*(uint64_t*)b);
+        }
+        else if (btag == T_DOUBLE) {
+            return (*(int64_t*)a < (int64_t)*(double*)b);
+        }
+    }
+    else if (btag == T_UINT64) {
+        if (atag == T_INT64) {
+            return ((int64_t)*(uint64_t*)b > *(int64_t*)a);
+        }
+        else if (atag == T_DOUBLE) {
+            return (*(uint64_t*)b > (uint64_t)*(double*)a);
+        }
+    }
+    else if (btag == T_INT64) {
+        if (atag == T_UINT64) {
+            return (*(int64_t*)b > (int64_t)*(uint64_t*)a);
+        }
+        else if (atag == T_DOUBLE) {
+            return (*(int64_t*)b > (int64_t)*(double*)a);
+        }
+    }
+    return 0;
+}
+
+int cmp_eq(void *a, numerictype_t atag, void *b, numerictype_t btag)
+{
+    if (atag==btag)
+        return cmp_same_eq(a, b, atag);
+
+    double da = conv_to_double(a, atag);
+    double db = conv_to_double(b, btag);
+
+    if ((int)atag >= T_FLOAT && (int)btag >= T_FLOAT)
+        return dbl_equals(da, db);
+
+    if (da != db)
+        return 0;
+
+    if (atag == T_UINT64) {
+        // this is safe because if a had been bigger than S64_MAX,
+        // we would already have concluded that it's bigger than b.
+        if (btag == T_INT64) {
+            return ((int64_t)*(uint64_t*)a == *(int64_t*)b);
+        }
+        else if (btag == T_DOUBLE) {
+            return (*(uint64_t*)a == (uint64_t)*(double*)b);
+        }
+    }
+    else if (atag == T_INT64) {
+        if (btag == T_UINT64) {
+            return (*(int64_t*)a == (int64_t)*(uint64_t*)b);
+        }
+        else if (btag == T_DOUBLE) {
+            return (*(int64_t*)a == (int64_t)*(double*)b);
+        }
+    }
+    else if (btag == T_UINT64) {
+        if (atag == T_INT64) {
+            return ((int64_t)*(uint64_t*)b == *(int64_t*)a);
+        }
+        else if (atag == T_DOUBLE) {
+            return (*(uint64_t*)b == (uint64_t)*(double*)a);
+        }
+    }
+    else if (btag == T_INT64) {
+        if (atag == T_UINT64) {
+            return (*(int64_t*)b == (int64_t)*(uint64_t*)a);
+        }
+        else if (atag == T_DOUBLE) {
+            return (*(int64_t*)b == (int64_t)*(double*)a);
+        }
+    }
+    return 1;
+}
+
+#ifdef ENABLE_LLT_TEST
+void test_operators()
+{
+    int8_t i8, i8b;
+    uint8_t ui8, ui8b;
+    int16_t i16, i16b;
+    uint16_t ui16, ui16b;
+    int32_t i32, i32b;
+    uint32_t ui32, ui32b;
+    int64_t i64, i64b;
+    uint64_t ui64, ui64b;
+    float f, fb;
+    double d, db;
+
+    ui64 = U64_MAX;
+    ui64b = U64_MAX-1;
+    i64 = S64_MIN;
+    i64b = i64+1;
+    d = (double)ui64;
+    db = (double)i64b;
+
+    assert(cmp_lt(&i64, T_INT64, &ui64, T_UINT64));
+    assert(!cmp_lt(&ui64, T_UINT64, &i64, T_INT64));
+    assert(cmp_lt(&i64, T_INT64, &ui64b, T_UINT64));
+    assert(!cmp_lt(&ui64b, T_UINT64, &i64, T_INT64));
+    assert(cmp_lt(&i64, T_INT64, &i64b, T_INT64));
+    assert(!cmp_lt(&i64b, T_INT64, &i64, T_INT64));
+
+    // try to compare a double too big to fit in an int64 with an
+    // int64 requiring too much precision to fit in a double...
+    // this case fails but it's very difficult/expensive to support
+    //assert(cmp_lt(&ui64b, T_UINT64, &d, T_DOUBLE));
+
+    i64 = S64_MAX;
+    ui64 = S64_MAX-1;
+    assert(cmp_lt(&ui64, T_UINT64, &i64, T_INT64));
+    assert(!cmp_lt(&i64, T_INT64, &ui64, T_UINT64));
+    i64 = S64_MAX-1;
+    ui64 = S64_MAX;
+    assert(cmp_lt(&i64, T_INT64, &ui64, T_UINT64));
+    assert(!cmp_lt(&ui64, T_UINT64, &i64, T_INT64));
+
+    d = DBL_MAXINT;
+    i64 = DBL_MAXINT+100;
+    assert(cmp_lt(&d, T_DOUBLE, &i64, T_INT64));
+    assert(!cmp_lt(&i64, T_INT64, &d, T_DOUBLE));
+    i64 = DBL_MAXINT+10;
+    assert(cmp_lt(&d, T_DOUBLE, &i64, T_INT64));
+    assert(!cmp_lt(&i64, T_INT64, &d, T_DOUBLE));
+    i64 = DBL_MAXINT+1;
+    assert(cmp_lt(&d, T_DOUBLE, &i64, T_INT64));
+    assert(!cmp_lt(&i64, T_INT64, &d, T_DOUBLE));
+
+    assert(!cmp_eq(&d, T_DOUBLE, &i64, T_INT64));
+    i64 = DBL_MAXINT;
+    assert(cmp_eq(&d, T_DOUBLE, &i64, T_INT64));
+}
+#endif
diff --git a/llt/ptrhash.c b/llt/ptrhash.c
new file mode 100644
index 0000000..84b294a
--- /dev/null
+++ b/llt/ptrhash.c
@@ -0,0 +1,195 @@
+/*
+  pointer hash table
+  optimized for storing info about particular values
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+
+#include "dtypes.h"
+#include "ptrhash.h"
+#include "hashing.h"
+
+#define ptrhash_size(h) ((h)->size/2)
+
+ptrhash_t *ptrhash_new(ptrhash_t *h, size_t size)
+{
+    size = nextipow2(size);
+    size *= 2;  // 2 pointers per key/value pair
+    size *= 2;  // aim for 50% occupancy
+    h->size = size;
+    h->table = (void**)malloc(size*sizeof(void*));
+    if (h->table == NULL) return NULL;
+    size_t i;
+    for(i=0; i < size; i++)
+        h->table[i] = PH_NOTFOUND;
+    return h;
+}
+
+void ptrhash_free(ptrhash_t *h)
+{
+    free(h->table);
+}
+
+// empty and reduce size
+void ptrhash_reset(ptrhash_t *h, size_t sz)
+{
+    if (h->size > sz*4) {
+        size_t newsz = sz*4;
+        void **newtab = (void**)realloc(h->table, newsz*sizeof(void*));
+        if (newtab == NULL)
+            return;
+        h->size = newsz;
+        h->table = newtab;
+    }
+    size_t i, hsz=h->size;
+    for(i=0; i < hsz; i++)
+        h->table[i] = PH_NOTFOUND;
+}
+
+// compute empirical max-probe for a given size
+#define ph_max_probe(size) ((size)>>5)
+
+static void **ptrhash_lookup_bp(ptrhash_t *h, void *key)
+{
+    uint_t hv;
+    size_t i, orig, index, iter;
+    size_t newsz, sz = ptrhash_size(h);
+    size_t maxprobe = ph_max_probe(sz);
+    void **tab = h->table;
+    void **ol;
+
+    hv = inthash((uptrint_t)key);
+ retry_bp:
+    iter = 0;
+    index = (index_t)(hv & (sz-1)) * 2;
+    sz *= 2;
+    orig = index;
+
+    do {
+        if (tab[index] == PH_NOTFOUND) {
+            tab[index] = key;
+            return &tab[index+1];
+        }
+
+        if (key == tab[index])
+            return &tab[index+1];
+
+        index = (index+2) & (sz-1);
+        iter++;
+        if (iter > maxprobe)
+            break;
+    } while (index != orig);
+
+    // table full
+    // quadruple size, rehash, retry the insert
+    // it's important to grow the table really fast; otherwise we waste
+    // lots of time rehashing all the keys over and over.
+    sz = h->size;
+    ol = h->table;
+    if (sz >= (1<<19))
+        newsz = sz<<1;
+    else
+        newsz = sz<<2;
+    //printf("trying to allocate %d words.\n", newsz); fflush(stdout);
+    tab = (void**)malloc(newsz*sizeof(void*));
+    if (tab == NULL)
+        return NULL;
+    for(i=0; i < newsz; i++)
+        tab[i] = PH_NOTFOUND;
+    h->table = tab;
+    h->size = newsz;
+    for(i=0; i < sz; i+=2) {
+        if (ol[i] != PH_NOTFOUND && ol[i+1] != PH_NOTFOUND) {
+            (*ptrhash_lookup_bp(h, ol[i])) = ol[i+1];
+            /*
+            // this condition is not really possible
+            if (bp == NULL) {
+                free(h->table);
+                h->table = ol;
+                h->size = sz;
+                // another thing we could do in this situation
+                // is newsz<<=1 and go back to the malloc, retrying with
+                // a bigger buffer on this level of recursion.
+                return NULL;
+            }
+            */
+        }
+    }
+    free(ol);
+
+    sz = ptrhash_size(h);
+    maxprobe = ph_max_probe(sz);
+
+    goto retry_bp;
+
+    return NULL;
+}
+
+void ptrhash_put(ptrhash_t *h, void *key, void *val)
+{
+    void **bp = ptrhash_lookup_bp(h, key);
+
+    *bp = val;
+}
+
+void **ptrhash_bp(ptrhash_t *h, void *key)
+{
+    return ptrhash_lookup_bp(h, key);
+}
+
+// returns bp if key is in hash, otherwise NULL
+static void **ptrhash_peek_bp(ptrhash_t *h, void *key)
+{
+    size_t sz = ptrhash_size(h);
+    size_t maxprobe = ph_max_probe(sz);
+    void **tab = h->table;
+    size_t index = (index_t)(inthash((uptrint_t)key) & (sz-1)) * 2;
+    sz *= 2;
+    size_t orig = index;
+    size_t iter = 0;
+
+    do {
+        if (tab[index] == PH_NOTFOUND)
+            return NULL;
+        if (key == tab[index] && tab[index+1] != PH_NOTFOUND)
+            return &tab[index+1];
+
+        index = (index+2) & (sz-1);
+        iter++;
+        if (iter > maxprobe)
+            break;
+    } while (index != orig);
+
+    return NULL;
+}
+
+void *ptrhash_get(ptrhash_t *h, void *key)
+{
+    void **bp = ptrhash_peek_bp(h, key);
+    if (bp == NULL)
+        return PH_NOTFOUND;
+    return *bp;
+}
+
+int ptrhash_has(ptrhash_t *h, void *key)
+{
+    return (ptrhash_get(h,key) != PH_NOTFOUND);
+}
+
+void ptrhash_remove(ptrhash_t *h, void *key)
+{
+    void **bp = ptrhash_peek_bp(h, key);
+    if (bp != NULL)
+        *bp = PH_NOTFOUND;
+}
+
+void ptrhash_adjoin(ptrhash_t *h, void *key, void *val)
+{
+    void **bp = ptrhash_lookup_bp(h, key);
+    if (*bp == PH_NOTFOUND)
+        *bp = val;
+}
diff --git a/llt/ptrhash.h b/llt/ptrhash.h
new file mode 100644
index 0000000..0b6423f
--- /dev/null
+++ b/llt/ptrhash.h
@@ -0,0 +1,44 @@
+#ifndef __PTRHASH_H_
+#define __PTRHASH_H_
+
+typedef struct _ptrhash_t {
+    size_t size;
+    void **table;
+} ptrhash_t;
+
+// define this to be an invalid key/value
+#define PH_NOTFOUND ((void*)2)
+
+// initialize and free
+ptrhash_t *ptrhash_new(ptrhash_t *h, size_t size);
+void ptrhash_free(ptrhash_t *h);
+
+// clear and (possibly) change size
+void ptrhash_reset(ptrhash_t *h, size_t sz);
+
+// return value, or PH_NOTFOUND if key not found
+void *ptrhash_get(ptrhash_t *h, void *key);
+
+// add key/value binding
+void ptrhash_put(ptrhash_t *h, void *key, void *val);
+
+// add binding iff key is unbound
+void ptrhash_adjoin(ptrhash_t *h, void *key, void *val);
+
+// does key exist?
+int ptrhash_has(ptrhash_t *h, void *key);
+
+// logically remove key
+void ptrhash_remove(ptrhash_t *h, void *key);
+
+// get a pointer to the location of the value for the given key.
+// creates the location if it doesn't exist. only returns NULL
+// if memory allocation fails.
+// this should be used for updates, for example:
+//     void **bp = ptrhash_bp(h, key);
+//     *bp = f(*bp);
+// do not reuse bp if there might be intervening calls to ptrhash_put,
+// ptrhash_bp, ptrhash_reset, or ptrhash_free.
+void **ptrhash_bp(ptrhash_t *h, void *key);
+
+#endif
diff --git a/llt/scrap b/llt/scrap
new file mode 100644
index 0000000..ad1f63f
--- /dev/null
+++ b/llt/scrap
@@ -0,0 +1,176 @@
+/* null stream */
+off_t null_seek(struct _stream *s, off_t where)
+{
+    return -1;
+}
+
+off_t null_skip(struct _stream *s, off_t offs)
+{
+    return 0;
+}
+
+off_t null_seek_end(struct _stream *s)
+{
+    return -1;
+}
+
+size_t null_read(struct _stream *s, char *dest, size_t size)
+{
+    return 0;
+}
+
+size_t null_write(struct _stream *s, char *data, size_t size)
+{
+    return 0;
+}
+
+size_t null_trunc(struct _stream *s, size_t size)
+{
+    return 0;
+}
+
+void null_flush(struct _stream *s)
+{
+}
+
+bool_t null_eof(struct _stream *s)
+{
+    return true;
+}
+
+void free_null_stream(stream_t *s)
+{
+}
+
+DLLEXPORT stream_interface_t null_funcs =
+    {free_null_stream, null_seek, null_seek_end, null_skip,
+     null_read, null_write, null_trunc, null_eof, null_flush};
+
+stream_t *nullstream_new()
+{
+    stream_t *s;
+
+    s = (stream_t*)obj_alloc(stream_pool);
+    s->funcs = &null_funcs;
+    s->byteswap = false;
+    s->bitpos = s->bitbuf = 0;
+    s->pos = 0;
+
+    return s;
+}
+
+void free_roms_stream(stream_t *s)
+{
+    (void)s;
+}
+
+size_t roms_write(struct _stream *s, char *data, size_t size)
+{
+    (void)s;
+    (void)data;
+    (void)size;
+    return 0;
+}
+
+size_t roms_trunc(struct _stream *s, size_t size)
+{
+    (void)size;
+    return s->size;
+}
+
+void roms_flush(struct _stream *s)
+{
+    s->bitpos = 0;
+}
+
+stream_interface_t roms_funcs =
+    {free_roms_stream, ms_seek, ms_seek_end, ms_skip,
+     ms_read, roms_write, roms_trunc, ms_eof, roms_flush};
+
+/* read-only memory stream */
+stream_t *romemstream_new(stream_t *s, char *data, size_t len)
+{
+    memstream_new(s, 0);
+
+    s->funcs = &roms_funcs;
+
+    s->data = data;
+    s->size = len;
+    s->maxsize = len+1;
+    s->pos = 0;
+    s->bitpos = s->bitbuf = 0;
+    s->byteswap = false;
+
+    return s;
+}
+
+int stream_vput_int(stream_t *s, int nargs, ...)
+{
+    u_int32_t val, i, c=0;
+    va_list ap;
+
+    va_start(ap, nargs);
+    for(i=0; i < (unsigned)nargs; i++) {
+        val = va_arg(ap, int);
+        c += stream_put_int(s, val);
+    }
+    va_end(ap);
+
+    return c;
+}
+
+// after this function you are guaranteed to be able to perform
+// operations of the kind requested.
+static int _ios_setstate(ios_t *s, iostate_t newstate)
+{
+    if (s->state != newstate) {
+        if (s->state == iost_none || s->bm == bm_mem || s->bm == bm_none) {
+        }
+        else if (s->state == iost_rd) {
+            // reading -> writing; try to put back unused buffer data
+            // todo: another possibility here is to seek back s->size bytes,
+            // not move any data, and retain the ability to seek backwards
+            // within the buffer. the downside to that would be redundant
+            // writes of stuff that was already in the file.
+            ios_skip(s, -(off_t)(s->size - s->bpos));
+            // todo: if the seek fails...?
+            _discard_partial_buffer(s);
+            // note: bitpos is left alone, so if all goes well we pick up
+            // writing exactly where we stopped reading.
+        }
+        else if (s->state == iost_wr) {
+            ios_flush(s);
+        }
+        s->state = newstate;
+    }
+
+    // now make sure buffer is set up for the state we're in
+    if (s->state == iost_wr) {
+        // TODO: fill buffer if stenciling is requested
+    }
+    else if (s->state == iost_rd) {
+        // TODO: fill buffer if needed
+    }
+
+    return 0;
+}
+
+
+/* convert double to int64 in software */
+int64_t double_to_int64(double d)
+{
+    int64_t i;
+    int ex;
+    union ieee754_double dl;
+
+    if (fabs(d) < 1) return 0;
+
+    dl.d = d;
+    ex = dl.ieee.exponent - IEEE754_DOUBLE_BIAS;
+    // fill mantissa into bits 0 to 51
+    i = ((((int64_t)dl.mantissa0)<<32) | ((int64_t)dl.mantissa1));
+    if (ex < 52)
+        i >>= (52-ex);
+    else if (ex > 52)
+        i <<= (ex-52);
+}
diff --git a/llt/socket.c b/llt/socket.c
new file mode 100644
index 0000000..2921d13
--- /dev/null
+++ b/llt/socket.c
@@ -0,0 +1,212 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+
+#include "dtypes.h"
+#include "socket.h"
+
+
+int mysocket(int domain, int type, int protocol)
+{
+    int val;
+    int s = socket(domain, type, protocol);
+    if (s < 0)
+        return s;
+    val = 4096;
+    setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char*)&val, sizeof(int));
+    val = 4096;
+    setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char*)&val, sizeof(int));
+    return s;
+}
+
+#ifdef WIN32
+void bzero(void *s, size_t n)
+{
+    memset(s, 0, n);
+}
+#endif
+
+/* returns a socket on which to accept() connections */
+int open_tcp_port(short portno)
+{
+    int sockfd;
+    struct sockaddr_in serv_addr;
+
+    sockfd = mysocket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
+    if (sockfd < 0)
+        return -1;
+    bzero(&serv_addr, sizeof(serv_addr));
+    serv_addr.sin_family = AF_INET;
+    serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
+    serv_addr.sin_port = htons(portno);
+    if (bind(sockfd, (struct sockaddr*)&serv_addr, sizeof(serv_addr)) < 0) {
+        fprintf(stderr, "could not bind to port %d.\n",
+                portno);
+        return -1;
+    }
+
+    listen(sockfd, 4);
+    return sockfd;
+}
+
+/* returns a socket on which to accept() connections, finding some
+   available port (portno is value-return) */
+int open_any_tcp_port(short *portno)
+
+{
+    int sockfd;
+    struct sockaddr_in serv_addr;
+
+    sockfd = mysocket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
+    if (sockfd < 0)
+        return -1;
+    bzero(&serv_addr, sizeof(serv_addr));
+    serv_addr.sin_family = AF_INET;
+    serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
+    serv_addr.sin_port = htons(*portno);
+    while (bind(sockfd, (struct sockaddr*)&serv_addr, sizeof(serv_addr)) < 0) {
+        (*portno)++;
+        serv_addr.sin_port = htons(*portno);
+    }
+
+    listen(sockfd, 4);
+    return sockfd;
+}
+
+/* returns a socket on which to accept() connections, finding some
+   available port (portno is value-return) */
+int open_any_udp_port(short *portno)
+{
+    int sockfd;
+    struct sockaddr_in serv_addr;
+
+    sockfd = mysocket(PF_INET, SOCK_DGRAM, IPPROTO_TCP);
+    if (sockfd < 0)
+        return -1;
+    bzero(&serv_addr, sizeof(serv_addr));
+    serv_addr.sin_family = AF_INET;
+    serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
+    serv_addr.sin_port = htons(*portno);
+    while (bind(sockfd, (struct sockaddr*)&serv_addr, sizeof(serv_addr)) < 0) {
+        (*portno)++;
+        serv_addr.sin_port = htons(*portno);
+    }
+
+    return sockfd;
+}
+
+#ifndef WIN32
+void closesocket(int fd)
+{
+    close(fd);
+}
+#endif
+
+/* returns a socket to use to send data to the given address */
+int connect_to_host(char *hostname, short portno)
+{
+    struct hostent *host_info;
+    int sockfd, yes=1;
+    struct sockaddr_in host_addr;
+
+    host_info = gethostbyname(hostname);
+    if (host_info == NULL) {
+        return -1;
+    }
+
+    sockfd = mysocket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+    if (sockfd < 0) {
+        return -1;
+    }
+    (void)setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(int));
+    memset((char*)&host_addr, 0, sizeof(host_addr));
+    host_addr.sin_family = host_info->h_addrtype;
+    memcpy((char*)&host_addr.sin_addr, host_info->h_addr,
+           host_info->h_length);
+
+    host_addr.sin_port = htons(portno);
+
+    if (connect(sockfd, (struct sockaddr*)&host_addr,
+                sizeof(struct sockaddr_in)) != 0) {
+        closesocket(sockfd);
+        return -1;
+    }
+
+    return sockfd;
+}
+
+int connect_to_addr(struct sockaddr_in *host_addr)
+{
+    int sockfd, yes=1;
+
+    sockfd = mysocket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+    if (sockfd < 0) {
+        return -1;
+    }
+    (void)setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(int));
+
+    if (connect(sockfd, (struct sockaddr*)host_addr,
+                sizeof(struct sockaddr_in)) != 0) {
+        closesocket(sockfd);
+        return -1;
+    }
+
+    return sockfd;
+}
+
+/* repeated send until all of buffer is sent */
+int sendall(int sockfd, char *buffer, int bufLen, int flags)
+{
+    int numBytesToSend=bufLen, length;
+
+    while (numBytesToSend>0) {
+        length = send(sockfd, (void *) buffer, numBytesToSend, flags);
+        if (length < 0) {
+            return(-1);
+        }
+        numBytesToSend -= length ;
+        buffer += length ;
+    }
+    return(bufLen);
+}
+
+/* repeated read until all of buffer is read */
+int readall(int sockfd, char *buffer, int bufLen, int flags)
+{
+    int numBytesToRead=bufLen, length;
+
+    while (numBytesToRead>0) {
+        length = recv(sockfd, buffer, numBytesToRead, flags);
+        if (length <= 0) {
+            return(length);
+        }
+        numBytesToRead -= length;
+        buffer += length;
+    }
+    return(bufLen);
+}
+
+int addr_eq(struct sockaddr_in *a, struct sockaddr_in *b)
+{
+    if (a->sin_port == b->sin_port &&
+        a->sin_addr.s_addr == b->sin_addr.s_addr)
+        return 1;
+    return 0;
+}
+
+int socket_ready(int sock)
+{
+    fd_set fds;
+    struct timeval timeout;
+
+    timeout.tv_sec = 0;
+    timeout.tv_usec = 1000;
+
+    FD_ZERO(&fds);
+    FD_SET(sock, &fds);
+    select(sock+1, &fds, NULL, NULL, &timeout);
+    return FD_ISSET(sock, &fds);
+}
diff --git a/llt/socket.h b/llt/socket.h
new file mode 100644
index 0000000..d85284a
--- /dev/null
+++ b/llt/socket.h
@@ -0,0 +1,30 @@
+#ifndef __JCSOCKET_H_
+#define __JCSOCKET_H_
+
+#ifdef WIN32
+#include <winsock2.h>
+#else
+#include <netinet/in.h>
+#include <netdb.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#endif
+
+int open_tcp_port(short portno);
+int open_any_tcp_port(short *portno);
+int open_any_udp_port(short *portno);
+int connect_to_host(char *hostname, short portno);
+int connect_to_addr(struct sockaddr_in *host_addr);
+int sendall(int sockfd, char *buffer, int bufLen, int flags);
+int readall(int sockfd, char *buffer, int bufLen, int flags);
+int addr_eq(struct sockaddr_in *a, struct sockaddr_in *b);
+int socket_ready(int sock);
+
+#ifdef WIN32
+void bzero(void *s, size_t n);
+#endif
+#ifndef WIN32
+void closesocket(int fd);
+#endif
+
+#endif
diff --git a/llt/textread.m b/llt/textread.m
new file mode 100755
index 0000000..66c8789
--- /dev/null
+++ b/llt/textread.m
@@ -0,0 +1,601 @@
+function varargout = textread(filename, format, varargin)
+% textread Formatted file read to one or more variables
+%
+% Syntax:
+% [<variable-1> <<,variable-2>> <<,variable-N>> ] = ...
+%                               textread( <input-filename>, <format-specifiers> <<,number-of-lines-to-read>> )
+%
+% This function is available for task parallel processing.
+%
+% ************
+%
+% The Star-P M implementation of this function exhibits the same signature and output characteristics as the Matlab 
+% function of the same name.
+% 
+% For details, please see matlab:textread. 
+%
+% ************************
+%
+%
+
+% DOC % A
+	if nargin < 2, error('Not enough input arguments.'); end
+
+	if ~ischar(filename), error('Filename must be a string.'); end
+
+	ifExist = exist(filename, 'file');
+	if ifExist ~= 2 && ifExist ~= 4, error('File not found'); end
+
+	fid = fopen(filename, 'r');
+	if fid == -1, error(lasterror()); end;
+   
+    formatin = formatread(format);
+    argin = readvarargin(varargin{:});
+
+    
+    % Проверка количества исходящих аргументов
+    count = 0;
+    for k = 1:length(formatin),
+        if ~isequal(formatin(k).symbol, '*'), count = count + 1; end;
+    end
+    if count ~= nargout, error('widthber of outputs must match the widthber of unskipped input fields.');end
+       
+    % Флаг flag_N - опредиляет сколько раз использовать строку формата 
+    % (N или пока не считаем весь файл)
+    flag_N = 1; 
+    if ~isempty(argin.N)
+        N = argin.N;
+    else
+        N = 1; flag_N = 0;
+    end
+    
+    % Пропустить первые N == headerlines линий
+    for i = 1:argin.headerlines
+        text = fgets(fid);
+    end
+    
+    % Если строка пустая считать следующую
+    text = fgets(fid);
+        
+    t = 1;
+    k = 1;
+    
+    maxlen = 1;
+    vararginEmpty = 1;
+    
+    while N
+        
+        t = 1;
+        if ~isempty(format)
+            if passLine(text, argin)
+                for j = 1:length(formatin)
+                    s = formatin(j);
+                    
+                    if s.type == 'c' && isempty(text)
+                        while 1
+                            text = fgets(fid);
+                            if ~ischar(text)
+                                fclose(fid);
+                                return;
+                            else
+                                if ~(text(1) == 13)
+                                    break;
+                                end
+                            end
+                        end
+                    end
+                        
+
+                    % Удалить первые лишние пробелы
+                    text = removeAllFirstSpaces(text, argin.delimiter);
+                    % Считать следующее слово указанного типа
+                    [out, text] =  switchType(text, s, argin, fid);
+                    % Пропустить слово если установлен параметр *
+
+                    if ~isequal(s.symbol, '*')
+                        if ~isempty(text) || ~(isempty(out) || isequal(out, {''}))
+                            out = setEmptyValue(out, s, argin);                        
+                            if vararginEmpty
+                                varargout{t}(1, :) = out;
+                            else
+                                varargout{t}(end + 1, :) = out;                            
+                            end
+                        end
+                        t = t + 1; 
+                    end;
+
+                    % Убрать первый символ если он равен delimiter 
+                    if ~isempty(argin.delimiter) && ~isempty(text) && isa(text, 'char')
+                        if find(argin.delimiter == text(1))
+                            text = text(2:end);
+                        end
+                    end;
+                end
+                vararginEmpty = 0;
+            end
+        else % Если строка формата не задана читать как double
+            
+            if passLine(text, argin)
+                [out, text] = readDoubleArray(text, argin);
+                curmaxlen = maxlen;
+                if length(out) > maxlen, maxlen = length(out); end;
+                for z = 1:k 
+                    for q = curmaxlen+1:maxlen
+                        varargout{1}(z, q) = argin.emptyvalue;
+                    end
+                end
+                for q = length(out)+1:maxlen
+                    out(q) = argin.emptyvalue;
+                end
+
+                varargout{1}(k, :) = out;
+                k = k + 1;
+            end
+            
+            
+        end
+        
+        text = removeAllFirstSpaces(text, argin.delimiter);
+        % Если строка пустая считать следующую
+        if isempty(text)
+            text = fgets(fid);
+        elseif find(text(1) == [10 13])
+            text = fgets(fid);
+        end
+        % Выйти если не смогли считать строку
+        if ~ischar(text), break; end;
+        
+        if flag_N, N = N - 1; end;
+    end
+   
+    fclose(fid);
+    
+end
+
+% -------- Работа с текстом ---------------------------
+
+% Удаляет все первые разделители
+function text = removeAllFirstSpaces(text, delimiter)
+    %if ~isempty(delimiter), return; end;
+    idx = [];
+    for k = 1:length(text)
+        idx = find(text(k) ~= ' ', 1);
+        if ~isempty(idx), break; end;
+    end
+	if ~isempty(idx)
+        text = text(k:end);
+    else
+        text = '';
+	end
+end
+
+% Читает первые n - символов
+function [word, text] = readCharacters(text, n, fid)
+    word = '';
+    while n
+        if n > length(text)
+            word = [word text(1:end)];
+            n = n - length(text);
+            text = fgets(fid);
+            if ~ischar(text), error(sprintf('Trouble reading characters from file: %s', text)); end
+        else
+            word = [word text(1:n)];
+            text = text(n+1:end);
+            n = 0;
+        end 
+    end
+end
+
+% Читает первое слово до разделитель или первые n - символов
+function [word, text] = readString(text, n, delimiter)
+    if isempty(delimiter), delimiter = [13, 10, 32]; 
+    else
+        delimiter = [delimiter, 13, 10];
+    end
+    
+    word = '';
+    if isempty(n) || n > length(text) , n = length(text); end;
+    for k = 1:n
+        if find(delimiter == text(k))
+            word = text(1:k-1);
+            text = text(k:end);
+            return;
+        end
+    end
+    word = text(1:k);
+    text = text(k+1:end);
+    
+end
+
+% Читает первые числа до разделителяили или первые n - символов
+function [word, text] = readNumber(text, n)
+
+    if isempty(text), word = ''; end;
+    
+    word = [];
+    if isempty(n) ||  length(text) < n, n = length(text); end;
+    
+    for k = 1:n
+       if text(k) < 48 || text(k) > 57
+           word = text(1:k-1);
+           text = text(k:end);
+           return;
+       end
+    end
+    word = text(1:k);
+    text = text(k+1:end);
+    
+end
+
+% Читает число с точкой до разделителяили или первые n - символов
+function [word, text] = readFloat(text, s)
+    
+    if isempty(text), word = ''; return; end;
+    
+    if isempty(s), s.width = []; s.precision = []; end;
+    
+    if isempty(s.width) ||  length(text) < s.width
+        n = length(text); 
+    else
+        n = s.width;
+    end;
+    
+    if isempty(s.precision), s.precision = n; end;
+    
+    % Чтение знака
+    [sign, text] = getSign(text);
+    if ~isempty(sign), n = n - 1; end;
+
+    point = 0;
+    npoint = 0;
+    word = sign;
+    for k = 1:n
+        if point
+            npoint = npoint + 1;
+        end
+        if text(k) == '.' && ~point
+            point = 1;
+            continue;
+        end
+        if text(k) < 48 || text(k) > 57 || npoint > s.precision
+            word = [word text(1:k-1)];
+            text = text(k:end);
+            return;
+        end
+    end
+    word = [word text(1:k)];
+    text = text(k+1:end);
+    
+end
+
+% Определяет знак
+function [sign, text] = getSign(text)
+    if isempty(text), sign = ''; return; end;
+    if text(1) == '+' || text(1) == '-'
+        sign = text(1);
+        text = text(2:end);
+        if isempty(text) || text(1) < 48 || text(1) > 57, error(sprintf('Trouble reading double from file: %s', text)); end;
+    else
+        sign = [];
+    end
+end
+
+% 0 - пропустить строку, 1 - обрабатывать
+function out = passLine(text, argin)
+
+    isdelimiter = 0;
+    if argin.delimiter
+        if ~isempty(find(text == argin.delimiter, 1))
+            isdelimiter = 1;
+        end
+    end
+    
+    isnewline = 0;
+    if ~isempty(find(text(1) == [10 13], 1))
+        isnewline = 1;
+    end 
+    if ~isnewline || isdelimiter
+        out = 1;
+    else
+        out = 0;
+    end
+    
+end
+
+
+% -------- Парс входящих параметров ---------------------------
+
+% Читает входящие параметры в структуру
+function argin = readvarargin(varargin)
+
+    
+    argin = struct();
+    argin(1).N = [];
+    argin(1).bufsize = 4095;
+    argin(1).commentstyle = [];
+    argin(1).delimiter = '';
+    argin(1).emptyvalue = 0;
+    argin(1).endofline = [];
+    argin(1).expchars = [];
+    argin(1).headerlines = 0;
+    argin(1).whitespace = [];
+    
+    if nargin == 0, return; end;
+
+    k = 1;
+    if isnumeric(varargin{1})
+        argin.N = varargin{1};
+        k = k + 1;
+    end
+   
+
+    count = (length(varargin(k:end)) / 2);
+    if floor(count) - count ~= 0, error('Param/value pairs must come in pairs'); end;
+    
+    while k < nargin
+        switch varargin{k}
+            
+            case 'bufsize'
+                k = k + 1;
+                if isinteger(varargin{k}) && isscalar(varargin{k})
+                    argin(1).bufsize = str2double(varargin{k});
+                else
+                    error('Buffer size must be a scalar integer.');
+                end
+                
+            case 'commentstyle'
+                k = k + 1;
+                switch varargin{k}
+                    case 'matlab'
+                        argin(1).commentstyle = '%';
+                    case 'shell'
+                        argin(1).commentstyle = '#';
+                    case 'c++'
+                        argin(1).commentstyle = '//';
+                    otherwise
+                        error('Invalid comment style.');
+                end
+                
+            case 'delimiter'
+                k = k + 1;
+                switch varargin{k}
+                    case '\n'
+                        num = 10;
+                    case '\r'
+                        num = 13;
+                    otherwise
+                        num = double(varargin{k});
+                end
+                argin(1).delimiter = num;
+                
+            case 'emptyvalue'
+                k = k + 1;
+                if isnumeric(varargin{k}) && isscalar(varargin{k})
+                    argin(1).emptyvalue = varargin{k};
+                else
+                    error('Emptyvalue must be a scalar double.');
+                end              
+                 
+            case 'endofline'
+                k = k + 1;
+                if ischar(varargin{k})
+                    argin(1).endofline = varargin{k};
+                else
+                    error('endofline must be a scalar double.');
+                end   
+                
+            case 'expchars'
+                
+            case 'headerlines'
+                k = k + 1;
+                if isnumeric(varargin{k}) && isscalar(varargin{k})
+                    argin(1).headerlines = varargin{k};
+                else
+                    error('Headerlines must be a scalar integer.');
+                end              
+                
+            case 'whitespace'  
+                
+            otherwise
+                error('Unknown option');
+        end
+        
+        k = k + 1;
+        
+    end
+    
+end
+
+% Читает строку формата в структуру
+function R = formatread(format)
+
+    formatType = ['d', 'u', 'f', 's', 'q', 'c'];
+    k = 1;
+    t = 1;
+    s = struct();
+    s(t).type = [];
+    s(t).width = [];
+    s(t).precision = [];
+    s(t).symbol = [];
+    s(t).text = [];
+    
+	while ~isempty(format) 
+        
+        type = [];
+        width = [];
+        precision = [];
+        symbol = [];
+        text = [];
+        
+        format = removeAllFirstSpaces(format, '');
+        if format(1) == '%'
+            format = format(2:end);
+            
+            
+            if format(1) == '*'
+                symbol = '*'; 
+                format = format(2:end);
+            end;
+            
+            [width, format] = readNumber(format, []);
+            if format(1) == '.'
+                format = format(2:end);
+                [precision, format] = readNumber(format, []);
+            end
+            
+            type = format(1);
+            format = format(2:end);
+            
+            % Check and save correct format
+            idx = find( formatType == type );
+            if isempty(idx)
+                error('Incorrect format'); 
+            end;
+            
+            % Save width
+            if ~isempty(width), width = str2double(width);end;
+            % Save precision
+            if ~isempty(precision), precision = str2double(precision);end;
+            
+        else
+            
+            [text, format] = readString(format, [], [' ', '%']);
+            symbol = '*';
+            type = 'r';
+        end
+        
+        s(t).type = type;
+        s(t).width = width;
+        s(t).precision = precision;
+        s(t).symbol = symbol;
+        s(t).text = text;
+        
+        t = t + 1;
+        
+	end
+    
+    R = s;
+    
+end
+
+% ------------- Вспомагательные функции --------------------
+
+function [out, text] = switchType(text, s, argin, fid)
+
+    switch s.type
+
+        case 'd'
+            width = s.width;
+            % Чтение знака числа
+            [sign, text] = getSign(text);
+            if ~isempty(sign), width = width - 1; end;
+            % Чиение числа
+            [word, text] = readNumber(text, width);
+            % Обьеденить знак и число
+            out = [sign word];
+            % Если опция emptyvalue установлена и число пустое то заменить на заданное
+            if ~isempty(out)
+                out = str2double(out);
+                if isequalwithequalnans(out, NaN), error(sprintf('Trouble reading double from file: %s', text)); end;
+            else
+                if ~isempty(text) && isempty(find(text(1) == [13, 10], 1))
+                    error(sprintf('Trouble reading integer from file: %s', text));
+                end
+            end
+
+        case 'u'
+            if isempty(text) || ~isempty(find(text(1) == [13, 10], 1))
+                out = []; return;
+            end            
+            [out, text] = readNumber(text, s.width);
+            % Если опция emptyvalue установлена и число пустое то заменить на заданное
+            if ~isempty(out)
+                out = str2double(out);
+                if isequalwithequalnans(out, NaN), error(sprintf('Trouble reading integer from file: %s', text)); end;
+            else
+                if ~isempty(text) && isempty(find(text(1) == [13, 10], 1))
+                    error(sprintf('Trouble reading integer from file: %s', text));
+                end
+            end
+            
+        case 'f'
+            % Чтение числа
+            [out, text] = readFloat(text, s);
+            % Если опция emptyvalue установлена и число пустое то заменить на заданное
+            if ~isempty(out)
+                out = str2double(out);
+                if isequalwithequalnans(out, NaN), error(sprintf('Trouble reading double from file: %s', text)); end;
+            else
+                if ~isempty(text) && isempty(find(text(1) == [13, 10], 1))
+                    error(sprintf('Trouble reading integer from file: %s', text));
+                end
+            end
+
+        case 's'
+            [word, text] = readString(text, s.width, argin.delimiter);
+            if isempty(word)
+                out = {''};
+            else
+                out = {word};
+            end
+
+        case 'q'
+
+        case 'c'
+            n = 1;
+            if ~isempty(s.width), n = s.width; end;
+            [word, text] = readCharacters(text, n, fid);
+            out = word(:);
+
+        case 'r'
+            [out, text] = readCharacters(text, length(s.text));
+            if ~isequal(out, s.text), error('Trouble reading characters from file'); end;
+
+        otherwise
+            error('Error');
+    end
+
+end
+
+function out = setEmptyValue(text, s, argin)
+    out = text;
+    if isempty(text)
+        if find(['d', 'u', 'f'] == s.type)
+            out = argin.emptyvalue;
+        end
+    end
+end
+
+function [out, text] = readDoubleArray(text, argin)
+
+    if isempty(text); out = []; return; end;
+    t = 1;
+    while isempty(find(text(1) == [13 10], 1))
+        % Чтение знака
+        [sign, text] = getSign(text);
+        % Чтение числа
+        [word, text] = readFloat(text, []);
+        % Обьеденить знак и число
+        word = [sign word];
+        % Если опция emptyvalue установлена и число пустое то заменить на заданное
+        if ~isempty(argin.emptyvalue) && isempty(word)
+            out(t) = argin.emptyvalue;
+        else
+            out(t) = str2double(word);
+            if isequalwithequalnans(out(t), NaN), error('Trouble reading integer from file'); end;
+        end
+        
+        % Убрать первый символ если он равен delimiter 
+        if ~isempty(argin.delimiter) && ~isempty(text)
+            if find(argin.delimiter == text(1))
+                text = text(2:end);
+            end
+        end;
+                
+        t = t + 1;
+        if isempty(text); break; end;
+    end
+    
+end
+
+
diff --git a/llt/timefuncs.c b/llt/timefuncs.c
new file mode 100644
index 0000000..ed1b823
--- /dev/null
+++ b/llt/timefuncs.c
@@ -0,0 +1,164 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#ifdef WIN32
+#include <malloc.h>
+#include <sys/timeb.h>
+#include <windows.h>
+#else
+#include <sys/time.h>
+#include <sys/poll.h>
+#include <unistd.h>
+#endif
+
+#include "dtypes.h"
+#include "timefuncs.h"
+
+#ifdef WIN32
+double tvals2float(struct tm *t, struct timeb *tstruct)
+{
+	return (double)t->tm_hour * 3600 + (double)t->tm_min * 60 +
+        (double)t->tm_sec + (double)tstruct->millitm/1.0e3;
+}
+
+double floattime()
+{
+    struct timeb tstruct;
+
+	ftime(&tstruct);
+    return (double)tstruct.time + (double)tstruct.millitm/1.0e3;
+}
+#else
+double tv2float(struct timeval *tv)
+{
+    return (double)tv->tv_sec + (double)tv->tv_usec/1.0e6;
+}
+
+double diff_time(struct timeval *tv1, struct timeval *tv2)
+{
+    return tv2float(tv1) - tv2float(tv2);
+}
+#endif
+
+// return as many bits of system randomness as we can get our hands on
+u_int64_t i64time()
+{
+    u_int64_t a;
+#ifdef WIN32
+    struct timeb tstruct;
+	ftime(&tstruct);
+    a = (((u_int64_t)tstruct.time)<<32) + (u_int64_t)tstruct.millitm;
+#else
+    struct timeval now;
+    gettimeofday(&now, NULL);
+    a = (((u_int64_t)now.tv_sec)<<32) + (u_int64_t)now.tv_usec;
+#endif
+
+    return a;
+}
+
+double clock_now()
+{
+#ifdef WIN32
+    return floattime();
+#else
+    struct timeval now;
+
+    gettimeofday(&now, NULL);
+    return tv2float(&now);
+#endif
+}
+
+#ifndef LINUX
+static char *wdaystr[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
+static char *monthstr[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul","Aug",
+                           "Sep","Oct","Nov","Dec"};
+#endif
+
+void timestring(double seconds, char *buffer, size_t len)
+{
+    time_t tme = (time_t)seconds;
+    char *fmt = "%c"; /* needed to suppress GCC warning */
+
+#ifdef LINUX
+    struct tm tm;
+
+    localtime_r(&tme, &tm);
+    strftime(buffer, len, fmt, &tm);
+#else
+    struct tm *tm;
+    int hr;
+
+    tm = localtime(&tme);
+    hr = tm->tm_hour;
+    if (hr > 12) hr -= 12;
+    if (hr == 0) hr = 12;
+    snprintf(buffer, len, "%s %02d %s %d %02d:%02d:%02d %s %s",
+             wdaystr[tm->tm_wday], tm->tm_mday, monthstr[tm->tm_mon],
+             tm->tm_year+1900, hr, tm->tm_min, tm->tm_sec,
+             tm->tm_hour>11 ? "PM" : "AM", "");
+#endif
+}
+
+#ifdef LINUX
+extern char *strptime(const char *s, const char *format, struct tm *tm);
+double parsetime(char *str)
+{
+    char *fmt = "%c"; /* needed to suppress GCC warning */
+    char *res;
+    time_t t;
+    struct tm tm;
+
+    res = strptime(str, fmt, &tm);
+    if (res != NULL) {
+        t = mktime(&tm);
+        if (t == ((time_t)-1))
+            return -1;
+        return (double)t;
+    }
+    return -1;
+}
+#else
+// TODO
+#endif
+
+void sleep_ms(int ms)
+{
+    if (ms == 0)
+        return;
+
+#ifdef WIN32
+    Sleep(ms);
+#else
+    struct timeval timeout;
+
+    timeout.tv_sec = ms/1000;
+    timeout.tv_usec = (ms % 1000) * 1000;
+    select(0, NULL, NULL, NULL, &timeout);
+#endif
+}
+
+void timeparts(int32_t *buf, double t)
+{
+    time_t tme = (time_t)t;
+
+#ifndef WIN32
+    struct tm tm;
+    localtime_r(&tme, &tm);
+    tm.tm_year += 1900;
+    memcpy(buf, (char*)&tm, sizeof(struct tm));
+#else
+    struct tm *tm;
+
+    tm = localtime(&tme);
+    tm->tm_year += 1900;
+    memcpy(buf, (char*)tm, sizeof(struct tm));
+#endif
+}
diff --git a/llt/timefuncs.h b/llt/timefuncs.h
new file mode 100644
index 0000000..1465333
--- /dev/null
+++ b/llt/timefuncs.h
@@ -0,0 +1,11 @@
+#ifndef __TIMEFUNCS_H_
+#define __TIMEFUNCS_H_
+
+u_int64_t i64time();
+double clock_now();
+void timestring(double seconds, char *buffer, size_t len);
+double parsetime(char *str);
+void sleep_ms(int ms);
+void timeparts(int32_t *buf, double t);
+
+#endif
diff --git a/llt/u8.txt b/llt/u8.txt
new file mode 100644
index 0000000..09c3c39
--- /dev/null
+++ b/llt/u8.txt
@@ -0,0 +1,19 @@
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов
+% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка количества исходящих аргументов% Проверка к       
+
diff --git a/llt/unittest.c b/llt/unittest.c
new file mode 100644
index 0000000..f12800f
--- /dev/null
+++ b/llt/unittest.c
@@ -0,0 +1,113 @@
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <wchar.h>
+#include "llt.h"
+
+int main()
+{
+    llt_init();
+
+    test_dblprint();
+    test_operators();
+
+    /*
+    char *buf = malloc(20000);
+    char *buf2 = malloc(20000);
+    FILE *f = fopen("textread.m","rb");
+    int i=0;
+    while (!feof(f))
+      buf[i++] = fgetc(f);
+    buf[i-1] = '\0';
+    int len = i-1;
+    double t0 = clock_now();
+    int j=0;
+    for(i=0; i < 20000; i++) {
+        //j+=u8_charnum(buf,len);
+        u8_reverse(buf2, buf, len);
+    }
+    printf("textread took %.4f sec (%d)\n", clock_now()-t0, j);
+
+    FILE *f2 = fopen("u8.txt","rb");
+    i=0;
+    while (!feof(f2))
+      buf[i++] = fgetc(f2);
+    buf[i-1] = '\0';
+    len = i-1;
+    t0 = clock_now();
+    j=0;
+    for(i=0; i < 20000; i++) {
+        //j+=u8_charnum(buf,len);
+        u8_reverse(buf2, buf, len);
+    }
+    printf("u8 took %.4f sec (%d)\n\n", clock_now()-t0, j);
+    */
+
+    return 0;
+}
+
+static void prettycplx(double r, double i)
+{
+    char str[64];
+    snprint_cplx(str, sizeof(str), r, i, 0, 16, 3, 10, 1);
+    fputs(str, stdout);
+    fputc('\n', stdout);
+}
+
+static void prettyreal(double r)
+{
+    char str[64];
+    snprint_real(str, sizeof(str), r, 0, 16, 3, 10);
+    fputs(str, stdout);
+    fputc('\n', stdout);
+}
+
+void test_dblprint()
+{
+    char str[64];
+
+    dbl_tolerance(1e-12);
+
+    prettycplx(0,0);
+    prettycplx(1,0);
+    prettycplx(0,1);
+    prettycplx(1,1);
+    prettycplx(-1,0);
+    prettycplx(0,-1);
+    prettycplx(1,-1);
+    prettycplx(-1,1);
+    prettycplx(-1,-1);
+    prettycplx(2,0);
+    prettycplx(0,2);
+    prettycplx(2,2);
+    prettycplx(-2,0);
+    prettycplx(0,-2);
+    prettycplx(2,-2);
+    prettycplx(-2,2);
+    prettycplx(-2,-2);
+
+    prettyreal(1.5);
+    prettyreal(1.1);
+    prettyreal(1.1e-100);
+    prettyreal(1.1e20);
+    prettyreal(123456789);
+    prettyreal(1234567890);
+    prettyreal(12345678901);
+    prettyreal(-12345678901);
+    prettyreal(12345678901223);
+    prettyreal(-12345678901223);
+    prettyreal(.02);
+    prettyreal(.002);
+    prettyreal(.0002);
+    prettyreal(-.0002);
+    prettyreal(.00002);
+    prettyreal(-.00002);
+
+    prettyreal(1.0/0);
+    prettyreal(-1.0/0);
+    prettyreal(strtod("nan",NULL));
+    prettyreal(0.0/0);
+    prettyreal(-0.0/0);
+
+    prettyreal(DBL_EPSILON);
+}
diff --git a/llt/unittest.c.1 b/llt/unittest.c.1
new file mode 100644
index 0000000..3470a16
--- /dev/null
+++ b/llt/unittest.c.1
@@ -0,0 +1,81 @@
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <wchar.h>
+#include "llt.h"
+
+int main()
+{
+    llt_init();
+
+    test_dblprint();
+    test_operators();
+
+    return 0;
+}
+
+static void prettycplx(double r, double i)
+{
+    char str[64];
+    snprint_cplx(str, sizeof(str), r, i, 0, 16, 3, 10, 1);
+    fputs(str, stdout);
+    fputc('\n', stdout);
+}
+
+static void prettyreal(double r)
+{
+    char str[64];
+    snprint_real(str, sizeof(str), r, 0, 16, 3, 10);
+    fputs(str, stdout);
+    fputc('\n', stdout);
+}
+
+void test_dblprint()
+{
+    char str[64];
+
+    dbl_tolerance(1e-12);
+
+    prettycplx(0,0);
+    prettycplx(1,0);
+    prettycplx(0,1);
+    prettycplx(1,1);
+    prettycplx(-1,0);
+    prettycplx(0,-1);
+    prettycplx(1,-1);
+    prettycplx(-1,1);
+    prettycplx(-1,-1);
+    prettycplx(2,0);
+    prettycplx(0,2);
+    prettycplx(2,2);
+    prettycplx(-2,0);
+    prettycplx(0,-2);
+    prettycplx(2,-2);
+    prettycplx(-2,2);
+    prettycplx(-2,-2);
+
+    prettyreal(1.5);
+    prettyreal(1.1);
+    prettyreal(1.1e-100);
+    prettyreal(1.1e20);
+    prettyreal(123456789);
+    prettyreal(1234567890);
+    prettyreal(12345678901);
+    prettyreal(-12345678901);
+    prettyreal(12345678901223);
+    prettyreal(-12345678901223);
+    prettyreal(.02);
+    prettyreal(.002);
+    prettyreal(.0002);
+    prettyreal(-.0002);
+    prettyreal(.00002);
+    prettyreal(-.00002);
+
+    prettyreal(1.0/0);
+    prettyreal(-1.0/0);
+    prettyreal(strtod("nan",NULL));
+    prettyreal(0.0/0);
+    prettyreal(-0.0/0);
+
+    prettyreal(DBL_EPSILON);
+}
diff --git a/llt/utf8.c b/llt/utf8.c
new file mode 100644
index 0000000..78c1d99
--- /dev/null
+++ b/llt/utf8.c
@@ -0,0 +1,728 @@
+/*
+  Basic UTF-8 manipulation routines
+  by Jeff Bezanson
+  placed in the public domain Fall 2005
+
+  This code is designed to provide the utilities you need to manipulate
+  UTF-8 as an internal string encoding. These functions do not perform the
+  error checking normally needed when handling UTF-8 data, so if you happen
+  to be from the Unicode Consortium you will want to flay me alive.
+  I do this because error checking can be performed at the boundaries (I/O),
+  with these routines reserved for higher performance on data known to be
+  valid.
+  A UTF-8 validation routine is included.
+*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <wchar.h>
+#include <wctype.h>
+#ifdef WIN32
+#include <malloc.h>
+#define snprintf _snprintf
+#else
+#include <alloca.h>
+#endif
+#include <assert.h>
+
+#include "utf8.h"
+
+static const u_int32_t offsetsFromUTF8[6] = {
+    0x00000000UL, 0x00003080UL, 0x000E2080UL,
+    0x03C82080UL, 0xFA082080UL, 0x82082080UL
+};
+
+static const char trailingBytesForUTF8[256] = {
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+};
+
+/* returns length of next utf-8 sequence */
+size_t u8_seqlen(const char *s)
+{
+    return trailingBytesForUTF8[(unsigned int)(unsigned char)s[0]] + 1;
+}
+
+/* returns the # of bytes needed to encode a certain character
+   0 means the character cannot (or should not) be encoded. */
+size_t u8_charlen(u_int32_t ch)
+{
+    if (ch < 0x80)
+        return 1;
+    else if (ch < 0x800)
+        return 2;
+    else if (ch < 0x10000)
+        return 3;
+    else if (ch < 0x110000)
+        return 4;
+    return 0;
+}
+
+size_t u8_codingsize(u_int32_t *wcstr, size_t n)
+{
+    size_t i, c=0;
+
+    for(i=0; i < n; i++)
+        c += u8_charlen(wcstr[i]);
+    return c;
+}
+
+/* conversions without error checking
+   only works for valid UTF-8, i.e. no 5- or 6-byte sequences
+   srcsz = source size in bytes
+   sz = dest size in # of wide characters
+
+   returns # characters converted
+   if sz = srcsz+1 (i.e. 4*srcsz+4 bytes), there will always be enough space.
+*/
+size_t u8_toucs(u_int32_t *dest, size_t sz, const char *src, size_t srcsz)
+{
+    u_int32_t ch;
+    const char *src_end = src + srcsz;
+    size_t nb;
+    size_t i=0;
+
+    if (sz == 0 || srcsz == 0)
+        return 0;
+
+    while (i < sz) {
+        nb = trailingBytesForUTF8[(unsigned char)*src];
+        if (src + nb >= src_end)
+            break;
+        ch = 0;
+        switch (nb) {
+            /* these fall through deliberately */
+        case 3: ch += (unsigned char)*src++; ch <<= 6;
+        case 2: ch += (unsigned char)*src++; ch <<= 6;
+        case 1: ch += (unsigned char)*src++; ch <<= 6;
+        case 0: ch += (unsigned char)*src++;
+        }
+        ch -= offsetsFromUTF8[nb];
+        dest[i++] = ch;
+    }
+    return i;
+}
+
+/* srcsz = number of source characters
+   sz = size of dest buffer in bytes
+
+   returns # bytes stored in dest
+   the destination string will never be bigger than the source string.
+*/
+size_t u8_toutf8(char *dest, size_t sz, const u_int32_t *src, size_t srcsz)
+{
+    u_int32_t ch;
+    size_t i = 0;
+    char *dest0 = dest;
+    char *dest_end = dest + sz;
+
+    while (i < srcsz) {
+        ch = src[i];
+        if (ch < 0x80) {
+            if (dest >= dest_end)
+                break;
+            *dest++ = (char)ch;
+        }
+        else if (ch < 0x800) {
+            if (dest >= dest_end-1)
+                break;
+            *dest++ = (ch>>6) | 0xC0;
+            *dest++ = (ch & 0x3F) | 0x80;
+        }
+        else if (ch < 0x10000) {
+            if (dest >= dest_end-2)
+                break;
+            *dest++ = (ch>>12) | 0xE0;
+            *dest++ = ((ch>>6) & 0x3F) | 0x80;
+            *dest++ = (ch & 0x3F) | 0x80;
+        }
+        else if (ch < 0x110000) {
+            if (dest >= dest_end-3)
+                break;
+            *dest++ = (ch>>18) | 0xF0;
+            *dest++ = ((ch>>12) & 0x3F) | 0x80;
+            *dest++ = ((ch>>6) & 0x3F) | 0x80;
+            *dest++ = (ch & 0x3F) | 0x80;
+        }
+        i++;
+    }
+    return (dest-dest0);
+}
+
+size_t u8_wc_toutf8(char *dest, u_int32_t ch)
+{
+    if (ch < 0x80) {
+        dest[0] = (char)ch;
+        return 1;
+    }
+    if (ch < 0x800) {
+        dest[0] = (ch>>6) | 0xC0;
+        dest[1] = (ch & 0x3F) | 0x80;
+        return 2;
+    }
+    if (ch < 0x10000) {
+        dest[0] = (ch>>12) | 0xE0;
+        dest[1] = ((ch>>6) & 0x3F) | 0x80;
+        dest[2] = (ch & 0x3F) | 0x80;
+        return 3;
+    }
+    if (ch < 0x110000) {
+        dest[0] = (ch>>18) | 0xF0;
+        dest[1] = ((ch>>12) & 0x3F) | 0x80;
+        dest[2] = ((ch>>6) & 0x3F) | 0x80;
+        dest[3] = (ch & 0x3F) | 0x80;
+        return 4;
+    }
+    return 0;
+}
+
+/* charnum => byte offset */
+size_t u8_offset(const char *s, size_t charnum)
+{
+    size_t i=0;
+
+    while (charnum > 0) {
+        if (s[i++] & 0x80) {
+            (void)(isutf(s[++i]) || isutf(s[++i]) || ++i);
+        }
+        charnum--;
+    }
+    return i;
+}
+
+/* byte offset => charnum */
+size_t u8_charnum(const char *s, size_t offset)
+{
+    size_t charnum = 0, i=0;
+
+    while (i < offset) {
+        if (s[i++] & 0x80) {
+            (void)(isutf(s[++i]) || isutf(s[++i]) || ++i);
+        }
+        charnum++;
+    }
+    return charnum;
+}
+
+/* number of characters in NUL-terminated string */
+size_t u8_strlen(const char *s)
+{
+    size_t count = 0;
+    size_t i = 0, lasti;
+
+    while (1) {
+        lasti = i;
+        while (s[i] > 0)
+            i++;
+        count += (i-lasti);
+        if (s[i++]==0) break;
+        (void)(isutf(s[++i]) || isutf(s[++i]) || ++i);
+        count++;
+    }
+    return count;
+}
+
+size_t u8_strwidth(const char *s)
+{
+    u_int32_t ch;
+    size_t nb, tot=0;
+    int w;
+    signed char sc;
+
+    while ((sc = (signed char)*s) != 0) {
+        if (sc >= 0) {
+            s++;
+            if (sc) tot++;
+        }
+        else {
+            nb = trailingBytesForUTF8[(unsigned char)sc];
+            ch = 0;
+            switch (nb) {
+                /* these fall through deliberately */
+            case 3: ch += (unsigned char)*s++; ch <<= 6;
+            case 2: ch += (unsigned char)*s++; ch <<= 6;
+            case 1: ch += (unsigned char)*s++; ch <<= 6;
+            case 0: ch += (unsigned char)*s++;
+            }
+            ch -= offsetsFromUTF8[nb];
+            w = wcwidth(ch);
+            if (w > 0) tot += w;
+        }
+    }
+    return tot;
+}
+
+/* reads the next utf-8 sequence out of a string, updating an index */
+u_int32_t u8_nextchar(const char *s, size_t *i)
+{
+    u_int32_t ch = 0;
+    size_t sz = 0;
+
+    do {
+        ch <<= 6;
+        ch += (unsigned char)s[(*i)];
+        sz++;
+    } while (s[*i] && (++(*i)) && !isutf(s[*i]));
+    ch -= offsetsFromUTF8[sz-1];
+
+    return ch;
+}
+
+/* next character without NUL character terminator */
+u_int32_t u8_nextmemchar(const char *s, size_t *i)
+{
+    u_int32_t ch = 0;
+    size_t sz = 0;
+
+    do {
+        ch <<= 6;
+        ch += (unsigned char)s[(*i)++];
+        sz++;
+    } while (!isutf(s[*i]));
+    ch -= offsetsFromUTF8[sz-1];
+
+    return ch;
+}
+
+void u8_inc(const char *s, size_t *i)
+{
+    (void)(isutf(s[++(*i)]) || isutf(s[++(*i)]) || isutf(s[++(*i)]) || ++(*i));
+}
+
+void u8_dec(const char *s, size_t *i)
+{
+    (void)(isutf(s[--(*i)]) || isutf(s[--(*i)]) || isutf(s[--(*i)]) || --(*i));
+}
+
+int octal_digit(char c)
+{
+    return (c >= '0' && c <= '7');
+}
+
+int hex_digit(char c)
+{
+    return ((c >= '0' && c <= '9') ||
+            (c >= 'A' && c <= 'F') ||
+            (c >= 'a' && c <= 'f'));
+}
+
+/* assumes that src points to the character after a backslash
+   returns number of input characters processed */
+int u8_read_escape_sequence(const char *str, u_int32_t *dest)
+{
+    u_int32_t ch;
+    char digs[9]="\0\0\0\0\0\0\0\0\0";
+    int dno=0, i=1;
+
+    ch = (u_int32_t)str[0];    /* take literal character */
+    if (str[0] == 'n')
+        ch = L'\n';
+    else if (str[0] == 't')
+        ch = L'\t';
+    else if (str[0] == 'r')
+        ch = L'\r';
+    else if (str[0] == 'b')
+        ch = L'\b';
+    else if (str[0] == 'f')
+        ch = L'\f';
+    else if (str[0] == 'v')
+        ch = L'\v';
+    else if (str[0] == 'a')
+        ch = L'\a';
+    else if (octal_digit(str[0])) {
+        i = 0;
+        do {
+            digs[dno++] = str[i++];
+        } while (octal_digit(str[i]) && dno < 3);
+        ch = strtol(digs, NULL, 8);
+    }
+    else if (str[0] == 'x') {
+        while (hex_digit(str[i]) && dno < 2) {
+            digs[dno++] = str[i++];
+        }
+        if (dno > 0)
+            ch = strtol(digs, NULL, 16);
+    }
+    else if (str[0] == 'u') {
+        while (hex_digit(str[i]) && dno < 4) {
+            digs[dno++] = str[i++];
+        }
+        if (dno > 0)
+            ch = strtol(digs, NULL, 16);
+    }
+    else if (str[0] == 'U') {
+        while (hex_digit(str[i]) && dno < 8) {
+            digs[dno++] = str[i++];
+        }
+        if (dno > 0)
+            ch = strtol(digs, NULL, 16);
+    }
+    *dest = ch;
+
+    return i;
+}
+
+/* convert a string with literal \uxxxx or \Uxxxxxxxx characters to UTF-8
+   example: u8_unescape(mybuf, 256, "hello\\u220e")
+   note the double backslash is needed if called on a C string literal */
+size_t u8_unescape(char *buf, size_t sz, const char *src)
+{
+    size_t c=0, amt;
+    u_int32_t ch;
+    char temp[4];
+
+    while (*src && c < sz) {
+        if (*src == '\\') {
+            src++;
+            amt = u8_read_escape_sequence(src, &ch);
+        }
+        else {
+            ch = (u_int32_t)*src;
+            amt = 1;
+        }
+        src += amt;
+        amt = u8_wc_toutf8(temp, ch);
+        if (amt > sz-c)
+            break;
+        memcpy(&buf[c], temp, amt);
+        c += amt;
+    }
+    if (c < sz)
+        buf[c] = '\0';
+    return c;
+}
+
+static inline int buf_put2c(char *buf, const char *src)
+{
+    buf[0] = src[0];
+    buf[1] = src[1];
+    buf[2] = '\0';
+    return 2;
+}
+
+int u8_escape_wchar(char *buf, size_t sz, u_int32_t ch)
+{
+    assert(sz > 2);
+    if (ch == L'\n')
+        return buf_put2c(buf, "\\n");
+    else if (ch == L'\t')
+        return buf_put2c(buf, "\\t");
+    else if (ch == L'\r')
+        return buf_put2c(buf, "\\r");
+    else if (ch == L'\b')
+        return buf_put2c(buf, "\\b");
+    else if (ch == L'\f')
+        return buf_put2c(buf, "\\f");
+    else if (ch == L'\v')
+        return buf_put2c(buf, "\\v");
+    else if (ch == L'\a')
+        return buf_put2c(buf, "\\a");
+    else if (ch == L'\\')
+        return buf_put2c(buf, "\\\\");
+    else if (ch < 32 || ch == 0x7f)
+        return snprintf(buf, sz, "\\x%.2hhX", (unsigned char)ch);
+    else if (ch > 0xFFFF)
+        return snprintf(buf, sz, "\\U%.8X", (u_int32_t)ch);
+    else if (ch >= 0x80)
+        return snprintf(buf, sz, "\\u%.4hX", (unsigned short)ch);
+
+    buf[0] = (char)ch;
+    buf[1] = '\0';
+    return 1;
+}
+
+size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end,
+                 int escape_quotes, int ascii)
+{
+    size_t i = *pi, i0;
+    u_int32_t ch;
+    char *start = buf;
+    char *blim = start + sz-11;
+    assert(sz > 11);
+
+    while (i<end && buf<blim) {
+        // sz-11: leaves room for longest escape sequence
+        if (escape_quotes && src[i] == '"') {
+            buf += buf_put2c(buf, "\\\"");
+            i++;
+        }
+        else if (src[i] == '\\') {
+            buf += buf_put2c(buf, "\\\\");
+            i++;
+        }
+        else {
+            i0 = i;
+            ch = u8_nextmemchar(src, &i);
+            if (ascii || !iswprint((wint_t)ch)) {
+                buf += u8_escape_wchar(buf, sz - (buf-start), ch);
+            }
+            else {
+                i = i0;
+                do {
+                    *buf++ = src[i++];
+                } while (!isutf(src[i]));
+            }
+        }
+    }
+    *buf++ = '\0';
+    *pi = i;
+    return (buf-start);
+}
+
+char *u8_strchr(const char *s, u_int32_t ch, size_t *charn)
+{
+    size_t i = 0, lasti=0;
+    u_int32_t c;
+
+    *charn = 0;
+    while (s[i]) {
+        c = u8_nextchar(s, &i);
+        if (c == ch) {
+            /* it's const for us, but not necessarily the caller */
+            return (char*)&s[lasti];
+        }
+        lasti = i;
+        (*charn)++;
+    }
+    return NULL;
+}
+
+char *u8_memchr(const char *s, u_int32_t ch, size_t sz, size_t *charn)
+{
+    size_t i = 0, lasti=0;
+    u_int32_t c;
+    int csz;
+
+    *charn = 0;
+    while (i < sz) {
+        c = csz = 0;
+        do {
+            c <<= 6;
+            c += (unsigned char)s[i++];
+            csz++;
+        } while (i < sz && !isutf(s[i]));
+        c -= offsetsFromUTF8[csz-1];
+
+        if (c == ch) {
+            return (char*)&s[lasti];
+        }
+        lasti = i;
+        (*charn)++;
+    }
+    return NULL;
+}
+
+char *u8_memrchr(const char *s, u_int32_t ch, size_t sz)
+{
+    size_t i = sz-1, tempi=0;
+    u_int32_t c;
+
+    if (sz == 0) return NULL;
+
+    while (i && !isutf(s[i])) i--;
+
+    while (1) {
+        tempi = i;
+        c = u8_nextmemchar(s, &tempi);
+        if (c == ch) {
+            return (char*)&s[i];
+        }
+        if (i == 0)
+            break;
+        tempi = i;
+        u8_dec(s, &i);
+        if (i > tempi)
+            break;
+    }
+    return NULL;
+}
+
+int u8_is_locale_utf8(const char *locale)
+{
+    /* this code based on libutf8 */
+    const char* cp = locale;
+
+    for (; *cp != '\0' && *cp != '@' && *cp != '+' && *cp != ','; cp++) {
+        if (*cp == '.') {
+            const char* encoding = ++cp;
+            for (; *cp != '\0' && *cp != '@' && *cp != '+' && *cp != ','; cp++)
+                ;
+            if ((cp-encoding == 5 && !strncmp(encoding, "UTF-8", 5))
+                || (cp-encoding == 4 && !strncmp(encoding, "utf8", 4)))
+                return 1; /* it's UTF-8 */
+            break;
+        }
+    }
+    return 0;
+}
+
+size_t u8_vprintf(const char *fmt, va_list ap)
+{
+    size_t cnt, sz=0, nc;
+    char *buf;
+    u_int32_t *wcs;
+
+    sz = 512;
+    buf = (char*)alloca(sz);
+ try_print:
+    cnt = vsnprintf(buf, sz, fmt, ap);
+    if (cnt >= sz) {
+        buf = (char*)alloca(cnt - sz + 1);
+        sz = cnt + 1;
+        goto try_print;
+    }
+    wcs = (u_int32_t*)alloca((cnt+1) * sizeof(u_int32_t));
+    nc = u8_toucs(wcs, cnt+1, buf, cnt);
+    wcs[nc] = 0;
+    printf("%ls", (wchar_t*)wcs);
+    return nc;
+}
+
+size_t u8_printf(const char *fmt, ...)
+{
+    size_t cnt;
+    va_list args;
+
+    va_start(args, fmt);
+
+    cnt = u8_vprintf(fmt, args);
+
+    va_end(args);
+    return cnt;
+}
+
+/* based on the valid_utf8 routine from the PCRE library by Philip Hazel
+
+   length is in bytes, since without knowing whether the string is valid
+   it's hard to know how many characters there are! */
+int u8_isvalid(const char *str, int length)
+{
+    const unsigned char *p, *pend = (unsigned char*)str + length;
+    unsigned char c;
+    int ab;
+
+    for (p = (unsigned char*)str; p < pend; p++) {
+        c = *p;
+        if (c < 128)
+            continue;
+        if ((c & 0xc0) != 0xc0)
+            return 0;
+        ab = trailingBytesForUTF8[c];
+        if (length < ab)
+            return 0;
+        length -= ab;
+
+        p++;
+        /* Check top bits in the second byte */
+        if ((*p & 0xc0) != 0x80)
+            return 0;
+
+        /* Check for overlong sequences for each different length */
+        switch (ab) {
+            /* Check for xx00 000x */
+        case 1:
+            if ((c & 0x3e) == 0) return 0;
+            continue;   /* We know there aren't any more bytes to check */
+
+            /* Check for 1110 0000, xx0x xxxx */
+        case 2:
+            if (c == 0xe0 && (*p & 0x20) == 0) return 0;
+            break;
+
+            /* Check for 1111 0000, xx00 xxxx */
+        case 3:
+            if (c == 0xf0 && (*p & 0x30) == 0) return 0;
+            break;
+
+            /* Check for 1111 1000, xx00 0xxx */
+        case 4:
+            if (c == 0xf8 && (*p & 0x38) == 0) return 0;
+            break;
+
+            /* Check for leading 0xfe or 0xff,
+               and then for 1111 1100, xx00 00xx */
+        case 5:
+            if (c == 0xfe || c == 0xff ||
+                (c == 0xfc && (*p & 0x3c) == 0)) return 0;
+            break;
+        }
+
+        /* Check for valid bytes after the 2nd, if any; all must start 10 */
+        while (--ab > 0) {
+            if ((*(++p) & 0xc0) != 0x80) return 0;
+        }
+    }
+
+    return 1;
+}
+
+int u8_reverse(char *dest, char * src, size_t len)
+{
+    size_t si=0, di=len;
+    unsigned char c;
+
+    dest[di] = '\0';
+    while (si < len) {
+        c = (unsigned char)src[si];
+        if ((~c) & 0x80) {
+            di--;
+            dest[di] = c;
+            si++;
+        }
+        else {
+            switch (c>>4) {
+            case 0xC:
+            case 0xD:
+                di -= 2;
+                *((int16_t*)&dest[di]) = *((int16_t*)&src[si]);
+                si += 2;
+                break;
+            case 0xE:
+                di -= 3;
+                dest[di] = src[si];
+                *((int16_t*)&dest[di+1]) = *((int16_t*)&src[si+1]);
+                si += 3;
+                break;
+            case 0xF:
+                di -= 4;
+                *((int32_t*)&dest[di]) = *((int32_t*)&src[si]);
+                si += 4;
+                break;
+            default:
+                return 1;
+            }
+        }
+    }
+    return 0;
+}
+
+u_int32_t u8_fgetc(FILE *f)
+{
+    int amt=0, sz, c;
+    u_int32_t ch=0;
+    char c0;
+
+    c = fgetc(f);
+    if (c == EOF)
+        return UEOF;
+    ch = (u_int32_t)c;
+    c0 = (char)ch;
+    amt = sz = u8_seqlen(&c0);
+    while (--amt) {
+        ch <<= 6;
+        c = fgetc(f);
+        if (c == EOF)
+            return UEOF;
+        ch += (u_int32_t)c;
+    }
+    ch -= offsetsFromUTF8[sz-1];
+
+    return ch;
+}
diff --git a/llt/utf8.h b/llt/utf8.h
new file mode 100644
index 0000000..656483c
--- /dev/null
+++ b/llt/utf8.h
@@ -0,0 +1,128 @@
+#ifndef __UTF8_H_
+#define __UTF8_H_
+
+#ifndef MACOSX
+#if !defined(__DTYPES_H_) && !defined(_SYS_TYPES_H)
+typedef char int8_t;
+typedef short int16_t;
+typedef int int32_t;
+typedef long long int64_t;
+typedef unsigned char u_int8_t;
+typedef unsigned short u_int16_t;
+typedef unsigned int u_int32_t;
+typedef unsigned long long u_int64_t;
+#endif
+#endif
+
+/* is c the start of a utf8 sequence? */
+#define isutf(c) (((c)&0xC0)!=0x80)
+
+#define UEOF ((u_int32_t)-1)
+
+/* convert UTF-8 data to wide character */
+size_t u8_toucs(u_int32_t *dest, size_t sz, const char *src, size_t srcsz);
+
+/* the opposite conversion */
+size_t u8_toutf8(char *dest, size_t sz, const u_int32_t *src, size_t srcsz);
+
+/* single character to UTF-8, returns # bytes written */
+size_t u8_wc_toutf8(char *dest, u_int32_t ch);
+
+/* character number to byte offset */
+size_t u8_offset(const char *str, size_t charnum);
+
+/* byte offset to character number */
+size_t u8_charnum(const char *s, size_t offset);
+
+/* return next character, updating an index variable */
+u_int32_t u8_nextchar(const char *s, size_t *i);
+
+/* next character without NUL character terminator */
+u_int32_t u8_nextmemchar(const char *s, size_t *i);
+
+/* move to next character */
+void u8_inc(const char *s, size_t *i);
+
+/* move to previous character */
+void u8_dec(const char *s, size_t *i);
+
+/* returns length of next utf-8 sequence */
+size_t u8_seqlen(const char *s);
+
+/* returns the # of bytes needed to encode a certain character */
+size_t u8_charlen(u_int32_t ch);
+
+/* computes the # of bytes needed to encode a WC string as UTF-8 */
+size_t u8_codingsize(u_int32_t *wcstr, size_t n);
+
+/* assuming src points to the character after a backslash, read an
+   escape sequence, storing the result in dest and returning the number of
+   input characters processed */
+int u8_read_escape_sequence(const char *src, u_int32_t *dest);
+
+/* given a wide character, convert it to an ASCII escape sequence stored in
+   buf, where buf is "sz" bytes. returns the number of characters output.
+   sz must be at least 3. */
+int u8_escape_wchar(char *buf, size_t sz, u_int32_t ch);
+
+/* convert a string "src" containing escape sequences to UTF-8 */
+size_t u8_unescape(char *buf, size_t sz, const char *src);
+
+/* convert UTF-8 "src" to escape sequences.
+
+   sz is buf size in bytes. must be at least 12.
+
+   if escape_quotes is nonzero, quote characters will be escaped.
+
+   if ascii is nonzero, the output is 7-bit ASCII, no UTF-8 survives.
+
+   starts at src[*pi], updates *pi to point to the first unprocessed
+   byte of the input.
+
+   end is one more than the last allowable value of *pi.
+
+   returns number of bytes placed in buf, including a NUL terminator.
+*/
+size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end,
+                 int escape_quotes, int ascii);
+
+/* utility predicates used by the above */
+int octal_digit(char c);
+int hex_digit(char c);
+
+/* return a pointer to the first occurrence of ch in s, or NULL if not
+   found. character index of found character returned in *charn. */
+char *u8_strchr(const char *s, u_int32_t ch, size_t *charn);
+
+/* same as the above, but searches a buffer of a given size instead of
+   a NUL-terminated string. */
+char *u8_memchr(const char *s, u_int32_t ch, size_t sz, size_t *charn);
+
+char *u8_memrchr(const char *s, u_int32_t ch, size_t sz);
+
+/* count the number of characters in a UTF-8 string */
+size_t u8_strlen(const char *s);
+
+/* number of columns occupied by a string */
+size_t u8_strwidth(const char *s);
+
+int u8_is_locale_utf8(const char *locale);
+
+/* printf where the format string and arguments may be in UTF-8.
+   you can avoid this function and just use ordinary printf() if the current
+   locale is UTF-8. */
+size_t u8_vprintf(const char *fmt, va_list ap);
+size_t u8_printf(const char *fmt, ...);
+
+/* determine whether a sequence of bytes is valid UTF-8. length is in bytes */
+int u8_isvalid(const char *str, int length);
+
+/* reverse a UTF-8 string. len is length in bytes. dest and src must both
+   be allocated to at least len+1 bytes. returns 1 for error, 0 otherwise */
+int u8_reverse(char *dest, char *src, size_t len);
+
+#include <stdio.h>  // temporary, until u8_fgetc is gone
+/* read a UTF-8 sequence from a stream and return a wide character or UEOF */
+u_int32_t u8_fgetc(FILE *f);
+
+#endif
diff --git a/llt/utils.c b/llt/utils.c
new file mode 100644
index 0000000..e6b1847
--- /dev/null
+++ b/llt/utils.c
@@ -0,0 +1,311 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#include <alloca.h>
+#include "dtypes.h"
+#include "utils.h"
+
+void memswap(char *a, char *b, size_t sz)
+{
+    int8_t i8;
+    int32_t i32;
+    int32_t *a4, *b4;
+
+    if (sz < 4) {
+        while (sz--) {
+            i8 = *a;
+            *a++ = *b;
+            *b++ = i8;
+        }
+    }
+    else {
+        while (sz & 0x3) {
+            i8 = *a;
+            *a++ = *b;
+            *b++ = i8;
+            sz--;
+        }
+        a4 = (int32_t*)a;
+        b4 = (int32_t*)b;
+        sz >>= 2;
+        while (sz--) {
+            i32 = *a4;
+            *a4++ = *b4;
+            *b4++ = i32;
+        }
+    }
+}
+
+void memreverse(char *a, size_t n, size_t elsz)
+{
+    int64_t i64, *pi64;
+    int32_t i32, *pi32;
+    int16_t i16, *pi16;
+    int8_t i8;
+    size_t i;
+    char *temp;
+    size_t eli, tot;
+
+    if (n==0 || elsz==0) return;
+    switch(elsz) {
+    case 16:
+        pi64 = (int64_t*)a;
+        for(i=0; i < n/2; i++) {
+            i64 = pi64[2*i];
+            pi64[2*i] = pi64[2*(n-i-1)];
+            pi64[2*(n-i-1)] = i64;
+
+            i64 = pi64[2*i+1];
+            pi64[2*i+1] = pi64[2*(n-i-1)+1];
+            pi64[2*(n-i-1)+1] = i64;
+        }
+        break;
+    case 8:
+        pi64 = (int64_t*)a;
+        for(i=0; i < n/2; i++) {
+            i64 = pi64[i];
+            pi64[i] = pi64[n-i-1];
+            pi64[n-i-1] = i64;
+        }
+        break;
+    case 4:
+        pi32 = (int32_t*)a;
+        for(i=0; i < n/2; i++) {
+            i32 = pi32[i];
+            pi32[i] = pi32[n-i-1];
+            pi32[n-i-1] = i32;
+        }
+        break;
+    case 2:
+        pi16 = (int16_t*)a;
+        for(i=0; i < n/2; i++) {
+            i16 = pi16[i];
+            pi16[i] = pi16[n-i-1];
+            pi16[n-i-1] = i16;
+        }
+        break;
+    case 1:
+        for(i=0; i < n/2; i++) {
+            i8 = a[i];
+            a[i] = a[n-i-1];
+            a[n-i-1] = i8;
+        }
+        break;
+    default:
+        tot = n*elsz;
+        if (elsz < 4097)
+            temp = alloca(elsz);
+        else
+            temp = malloc(elsz);
+
+        if (temp != NULL) {
+            for(i=0, eli=0; i < n/2; i++, eli+=elsz) {
+                memcpy(temp, &a[eli], elsz);
+                memcpy(&a[eli], &a[tot-eli-elsz], elsz);
+                memcpy(&a[tot-eli-elsz], temp, elsz);
+            }
+
+            if (elsz >= 4097)
+                free(temp);
+        }
+        break;
+    }
+}
+
+void memreverse_to(char *dest, char *a, size_t n, size_t elsz)
+{
+    int64_t *pi64, *di64;
+    int32_t *pi32, *di32;
+    int16_t *pi16, *di16;
+    size_t i;
+    size_t eli, tot;
+    if (n==0 || elsz==0) return;
+    switch(elsz) {
+    case 16:
+        pi64 = (int64_t*)a;
+        di64 = (int64_t*)dest;
+        for(i=0; i < n/2; i++) {
+            di64[2*i] = pi64[2*(n-i-1)];
+            di64[2*(n-i-1)] = pi64[2*i];
+
+            di64[2*i+1] = pi64[2*(n-i-1)+1];
+            di64[2*(n-i-1)+1] = pi64[2*i+1];
+        }
+        if (n&0x1) {
+            di64[2*i] = pi64[2*i];
+            di64[2*i+1] = pi64[2*i+1];
+        }
+        break;
+    case 8:
+        pi64 = (int64_t*)a;
+        di64 = (int64_t*)dest;
+        for(i=0; i < n/2; i++) {
+            di64[i] = pi64[n-i-1];
+            di64[n-i-1] = pi64[i];
+        }
+        if (n&0x1)
+            di64[i] = pi64[i];
+        break;
+    case 4:
+        pi32 = (int32_t*)a;
+        di32 = (int32_t*)dest;
+        for(i=0; i < n/2; i++) {
+            di32[i] = pi32[n-i-1];
+            di32[n-i-1] = pi32[i];
+        }
+        if (n&0x1)
+            di32[i] = pi32[i];
+        break;
+    case 2:
+        pi16 = (int16_t*)a;
+        di16 = (int16_t*)dest;
+        for(i=0; i < n/2; i++) {
+            di16[i] = pi16[n-i-1];
+            di16[n-i-1] = pi16[i];
+        }
+        if (n&0x1)
+            di16[i] = pi16[i];
+        break;
+    case 1:
+        for(i=0; i < n/2; i++) {
+            dest[i] = a[n-i-1];
+            dest[n-i-1] = a[i];
+        }
+        if (n&0x1)
+            dest[i] = a[i];
+        break;
+    default:
+        tot = n*elsz;
+        for(i=0, eli=0; i < n/2; i++, eli+=elsz) {
+            memcpy(&dest[eli], &a[tot - eli - elsz], elsz);
+            memcpy(&dest[tot - eli - elsz], &a[eli], elsz);
+        }
+        if (n&0x1)
+            memcpy(&dest[eli], &a[eli], elsz);
+        break;
+    }
+}
+
+void bswap_buffer(byte_t *data, size_t sz, size_t npts)
+{
+    size_t i, b;
+    byte_t *el;
+    byte_t temp;
+
+    if (sz <= 1)
+        return;
+
+    switch (sz) {
+    case 8:
+        for(i=0; i < npts; i++) {
+            ((u_int64_t*)data)[i] = bswap_64(((u_int64_t*)data)[i]);
+        }
+        break;
+    case 4:
+        for(i=0; i < npts; i++) {
+            ((u_int32_t*)data)[i] = bswap_32(((u_int32_t*)data)[i]);
+        }
+        break;
+    case 2:
+        for(i=0; i < npts; i++) {
+            ((u_int16_t*)data)[i] = bswap_16(((u_int16_t*)data)[i]);
+        }
+        break;
+    default:
+        for(i=0; i < sz * npts; i += sz) {
+            el = data + i;
+            for(b=0; b < sz/2; b++) {
+                temp = el[b];
+                el[b] = el[sz-b-1];
+                el[sz-b-1] = temp;
+            }
+        }
+    }
+}
+
+void bswap(byte_t *s, size_t n)
+{
+    unsigned int i;
+    char temp;
+
+    switch (n) {
+    case 8:
+        *(u_int64_t*)s = bswap_64(*(u_int64_t*)s); break;
+    case 4:
+        *(u_int32_t*)s = bswap_32(*(u_int32_t*)s); break;
+    case 2:
+        *(u_int16_t*)s = bswap_16(*(u_int16_t*)s); break;
+    case 1:
+        break;
+    default:
+        for(i=0; i < n/2; i++) {
+            temp = s[i];
+            s[i] = s[n-i-1];
+            s[n-i-1] = temp;
+        }
+    }
+}
+
+void bswap_to(byte_t *dest, byte_t *src, size_t n)
+{
+    unsigned int i;
+
+    switch (n) {
+    case 8:
+        *(u_int64_t*)dest = bswap_64(*(u_int64_t*)src); break;
+    case 4:
+        *(u_int32_t*)dest = bswap_32(*(u_int32_t*)src); break;
+    case 2:
+        *(u_int16_t*)dest = bswap_16(*(u_int16_t*)src); break;
+    case 1:
+        break;
+    default:
+        for(i=0; i < n; i++) {
+            dest[i] = src[n-i-1];
+        }
+    }
+}
+
+#define ALIGNED_TO_ACTUAL(p) (((char*)p) - ((long*)p)[-1])
+
+static void *aligned_ptr(char *ptr, size_t align_size)
+{
+    char *ptr2, *aligned_ptr;
+
+    ptr2 = ptr + sizeof(long);
+    aligned_ptr = (char*)ALIGN(((uptrint_t)ptr2), align_size);
+
+    ((long*)aligned_ptr)[-1] = (long)(aligned_ptr - ptr);
+
+    return aligned_ptr;
+}
+
+/* align_size has to be a power of two */
+void *malloc_aligned(size_t size, size_t align_size)
+{
+    char *ptr;
+
+    ptr = (char*)malloc(size + align_size-1 + sizeof(long));
+    if (ptr == NULL)
+        return NULL;
+
+    return aligned_ptr(ptr, align_size);
+}
+
+void free_aligned(void *ptr)
+{
+    free(ALIGNED_TO_ACTUAL(ptr));
+}
+
+void *realloc_aligned(void *ptr, size_t size, size_t align_size)
+{
+    char *pnew;
+
+    if (ptr != NULL)
+        ptr = ALIGNED_TO_ACTUAL(ptr);
+    pnew = realloc(ptr, size + align_size-1 + sizeof(long));
+    if (pnew == NULL)
+        return NULL;
+
+    return aligned_ptr(pnew, align_size);
+}
diff --git a/llt/utils.h b/llt/utils.h
new file mode 100644
index 0000000..26f8f53
--- /dev/null
+++ b/llt/utils.h
@@ -0,0 +1,154 @@
+#ifndef __UTILS_H_
+#define __UTILS_H_
+
+/* these functions byteswap any-size units --------------------- */
+void bswap(byte_t *s, size_t n);
+void bswap_to(byte_t *dest, byte_t *src, size_t n);
+void bswap_buffer(byte_t *data, size_t sz, size_t npts);
+/* ------------------------------------------------------------- */
+
+/* reverse the order of elements of any size, in place or out of place */
+/* n is the number of elements */
+void memreverse(char *a, size_t n, size_t elsz);
+void memreverse_to(char *dest, char *a, size_t n, size_t elsz);
+
+/* swap the contents of two buffers */
+void memswap(char *a, char *b, size_t sz);
+
+/* allocating aligned blocks ----------------------------------- */
+void *malloc_aligned(size_t size, size_t align_size);
+void free_aligned(void *ptr);
+void *realloc_aligned(void *ptr, size_t size, size_t align_size);
+/* ------------------------------------------------------------- */
+
+int dbl_equals(double a, double b);
+int flt_equals(float a, float b);
+void dbl_tolerance(double tol);
+void flt_tolerance(float tol);
+int double_exponent(double d);
+double double_mantissa(double d);
+int float_exponent(float f);
+float float_mantissa(float f);
+void snprint_real(char *s, size_t cnt, double r,
+                  int width,    // printf field width, or 0
+                  int dec,      // # decimal digits desired, recommend 16
+                  // # of zeros in .00...0x before using scientific notation
+                  // recommend 3-4 or so
+                  int max_digs_rt,
+                  // # of digits left of decimal before scientific notation
+                  // recommend 10
+                  int max_digs_lf);
+void snprint_cplx(char *s, size_t cnt, double re, double im,
+                  // args to pass on to snprint_real
+                  int width, int dec,
+                  int max_digs_rt, int max_digs_lf,
+                  // print spaces around sign in a+bi
+                  int spflag);
+
+extern double trunc(double x);
+
+STATIC_INLINE double fpart(double arg)
+{
+    return arg - trunc(arg);
+}
+
+#define ipart(x) trunc(x)
+
+numerictype_t effective_numerictype(double r);
+double conv_to_double(void *data, numerictype_t tag);
+void conv_from_double(void *data, double d, numerictype_t tag);
+int64_t conv_to_int64(void *data, numerictype_t tag);
+uint64_t conv_to_uint64(void *data, numerictype_t tag);
+int32_t conv_to_int32(void *data, numerictype_t tag);
+uint32_t conv_to_uint32(void *data, numerictype_t tag);
+#ifdef BITS64
+#define conv_to_long conv_to_int64
+#define conv_to_ulong conv_to_uint64
+#else
+#define conv_to_long conv_to_int32
+#define conv_to_ulong conv_to_uint32
+#endif
+int cmp_same_lt(void *a, void *b, numerictype_t tag);
+int cmp_same_eq(void *a, void *b, numerictype_t tag);
+int cmp_lt(void *a, numerictype_t atag, void *b, numerictype_t btag);
+int cmp_eq(void *a, numerictype_t atag, void *b, numerictype_t btag);
+
+#ifdef ARCH_X86_64
+#  define LEGACY_REGS "=Q"
+#else
+#  define LEGACY_REGS "=q"
+#endif
+
+#if !defined(__INTEL_COMPILER) && (defined(ARCH_X86) || defined(ARCH_X86_64))
+STATIC_INLINE u_int16_t ByteSwap16(u_int16_t x)
+{
+  __asm("xchgb %b0,%h0" :
+        LEGACY_REGS (x)	:
+        "0" (x));
+    return x;
+}
+#define bswap_16(x) ByteSwap16(x)
+
+STATIC_INLINE u_int32_t ByteSwap32(u_int32_t x)
+{
+#if __CPU__ > 386
+ __asm("bswap	%0":
+      "=r" (x)     :
+#else
+ __asm("xchgb	%b0,%h0\n"\
+      "	rorl	$16,%0\n"
+      "	xchgb	%b0,%h0":
+      LEGACY_REGS (x)		:
+#endif
+      "0" (x));
+  return x;
+}
+
+#define bswap_32(x) ByteSwap32(x)
+
+STATIC_INLINE u_int64_t ByteSwap64(u_int64_t x)
+{
+#ifdef ARCH_X86_64
+  __asm("bswap	%0":
+        "=r" (x)     :
+        "0" (x));
+  return x;
+#else
+  register union { __extension__ u_int64_t __ll;
+          u_int32_t __l[2]; } __x;
+  asm("xchgl	%0,%1":
+      "=r"(__x.__l[0]),"=r"(__x.__l[1]):
+      "0"(bswap_32((unsigned long)x)),"1"(bswap_32((unsigned long)(x>>32))));
+  return __x.__ll;
+#endif
+}
+#define bswap_64(x) ByteSwap64(x)
+
+#else
+
+#define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8)
+
+#ifdef __INTEL_COMPILER
+#define bswap_32(x) _bswap(x)
+#else
+#define bswap_32(x) \
+     ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >>  8) | \
+      (((x) & 0x0000ff00) <<  8) | (((x) & 0x000000ff) << 24))
+#endif
+
+STATIC_INLINE u_int64_t ByteSwap64(u_int64_t x)
+{
+    union { 
+        u_int64_t ll;
+        u_int32_t l[2]; 
+    } w, r;
+    w.ll = x;
+    r.l[0] = bswap_32 (w.l[1]);
+    r.l[1] = bswap_32 (w.l[0]);
+    return r.ll;
+}
+#define bswap_64(x) ByteSwap64(x)
+
+#endif
+
+#endif