/* bit vector primitives todo: * reverse * nreverse (- rotate left/right) * shl_to * not - shr_row, shl_row These routines are the back end supporting bit matrices. Many operations on bit matrices are slow (such as accessing or setting a single element!) but certain operations are privileged and lend themselves to extremely efficient implementation due to the bit-vector nature of machine integers. These are: done: & | $ ~ copy reverse fill sum prod todo: shift trans rowswap would be nice: channel interleave Important note: Out-of-place functions always assume dest and source have the same amount of space available. shr_to, shl_to, not_to, and reverse_to assume source and dest don't overlap and_to, or_to, and xor_to allow overlap. */ #include #include #include #include "dtypes.h" #include "bitvector.h" #ifdef WIN32 #include #define alloca _alloca #endif // greater than this # of words we use malloc instead of alloca #define MALLOC_CUTOFF 2000 u_int32_t *bitvector_resize(u_int32_t *b, u_int64_t n, int initzero) { u_int32_t *p; size_t sz = ((n+31)>>5) * 4; p = realloc(b, sz); if (p == NULL) return NULL; if (initzero) memset(p, 0, sz); return p; } u_int32_t *bitvector_new(u_int64_t n, int initzero) { return bitvector_resize(NULL, n, initzero); } size_t bitvector_nwords(u_int64_t nbits) { return ((nbits+31)>>5) * 4; } void bitvector_set(u_int32_t *b, u_int64_t n, u_int32_t c) { if (c) b[n>>5] |= (1<<(n&31)); else b[n>>5] &= ~(1<<(n&31)); } u_int32_t bitvector_get(u_int32_t *b, u_int64_t n) { return b[n>>5] & (1<<(n&31)); } u_int32_t bitreverse(u_int32_t x) { u_int32_t m; #ifdef __INTEL_COMPILER x = _bswap(x); #else x = (x >> 16) | (x << 16); m = 0xff00ff00; x = ((x & m) >> 8) | ((x & ~m) << 8); #endif m = 0xf0f0f0f0; x = ((x & m) >> 4) | ((x & ~m) << 4); m = 0xcccccccc; x = ((x & m) >> 2) | ((x & ~m) << 2); m = 0xaaaaaaaa; x = ((x & m) >> 1) | ((x & ~m) << 1); return x; } // shift all bits in a long bit vector // n is # of int32s to consider, s is shift distance // lowest bit-index is bit 0 of word 0 // TODO: handle boundary case of shift distance >= data size? void bitvector_shr(u_int32_t *b, size_t n, u_int32_t s) { u_int32_t i; if (s == 0 || n == 0) return; i = (s>>5); if (i) { n -= i; memmove(b, &b[i], n*4); memset(&b[n], 0, i*4); s &= 31; } for(i=0; i < n-1; i++) { b[i] = (b[i]>>s) | (b[i+1]<<(32-s)); } b[i]>>=s; } // out-of-place version, good for re-aligning a strided submatrix to // linear representation when a copy is needed // assumes that dest has the same amount of space as source, even if it // wouldn't have been necessary to hold the shifted bits void bitvector_shr_to(u_int32_t *dest, u_int32_t *b, size_t n, u_int32_t s) { u_int32_t i, j; if (n == 0) return; if (s == 0) { memcpy(dest, b, n*4); return; } j = (s>>5); if (j) { n -= j; memset(&dest[n], 0, j*4); s &= 31; b = &b[j]; } for(i=0; i < n-1; i++) { dest[i] = (b[i]>>s) | (b[i+1]<<(32-s)); } dest[i] = b[i]>>s; } void bitvector_shl(u_int32_t *b, size_t n, u_int32_t s) { u_int32_t i, scrap=0, temp; if (s == 0 || n == 0) return; i = (s>>5); if (i) { n -= i; memmove(&b[i], b, n*4); memset(b, 0, i*4); s &= 31; b = &b[i]; } for(i=0; i < n; i++) { temp = (b[i]<>(32-s); b[i] = temp; } } // if dest has more space than source, set scrap to true to keep the // top bits that would otherwise be shifted out void bitvector_shl_to(u_int32_t *dest, u_int32_t *b, size_t n, u_int32_t s, bool_t scrap) { u_int32_t i, j, sc=0; if (n == 0) return; if (s == 0) { memcpy(dest, b, n*4); return; } j = (s>>5); if (j) { n -= j; memset(dest, 0, j*4); s &= 31; dest = &dest[j]; } for(i=0; i < n; i++) { dest[i] = (b[i]<>(32-s); } if (scrap) dest[i] = sc; } // set nbits to c, starting at given bit offset // assumes offs < 32 void bitvector_fill(u_int32_t *b, u_int32_t offs, u_int32_t c, u_int32_t nbits) { index_t i; u_int32_t nw, tail; u_int32_t mask; if (nbits == 0) return; nw = (offs+nbits+31)>>5; if (nw == 1) { mask = (lomask(nbits)<>5; if (nw == 1) { mask = (lomask(nbits)<>5; \ \ if (soffs == doffs) { \ if (nw == 1) { \ mask = (lomask(nbits)<>5; \ if (soffs < doffs) { \ s = doffs-soffs; \ if (nw == 1) { \ mask = (lomask(nbits)<>(32-s); \ for(i=1; i < snw-1; i++) { \ dest[i] = (OP(src[i])<>(32-s); \ } \ tail = (doffs+nbits)&31; \ if (tail==0) { mask=ONES32; } else { mask = lomask(tail); } \ if (snw == nw) { \ dest[i] = (dest[i] & ~mask) | (((OP(src[i])<>(32-s); \ i++; \ dest[i] = (dest[i] & ~mask) | (scrap & mask); \ } \ } \ } \ else { \ s = soffs-doffs; \ if (snw == 1) { \ mask = (lomask(nbits)<>s) & mask); \ return; \ } \ if (nw == 1) { \ mask = (lomask(nbits)<>s)|(OP(src[1])<<(32-s))) & mask); \ return; \ } \ mask = ~lomask(doffs); \ dest[0] = (dest[0] & ~mask) | \ (((OP(src[0])>>s)|(OP(src[1])<<(32-s))) & mask); \ for(i=1; i < nw-1; i++) { \ dest[i] = (OP(src[i])>>s) | (OP(src[i+1])<<(32-s)); \ } \ tail = (doffs+nbits)&31; \ if (tail==0) { mask=ONES32; } else { mask = lomask(tail); } \ if (snw == nw) { \ dest[i] = (dest[i] & ~mask) | ((OP(src[i])>>s) & mask); \ } \ else /* snw > nw */ { \ dest[i] = (dest[i] & ~mask) | \ (((OP(src[i])>>s)|(OP(src[i+1])<<(32-s))) & mask); \ } \ } \ } #define BV_COPY(a) (a) #define BV_NOT(a) (~(a)) BITVECTOR_COPY_OP(copy, BV_COPY) BITVECTOR_COPY_OP(not_to, BV_NOT) // right-shift the bits in one logical "row" of a long 2d bit vector /* void bitvector_shr_row(u_int32_t *b, u_int32_t offs, size_t nbits, u_int32_t s) { } */ // copy from source to dest while reversing bit-order // assumes dest offset == 0 // assumes source and dest don't overlap // assumes offset < 32 void bitvector_reverse_to(u_int32_t *dest, u_int32_t *src, u_int32_t soffs, u_int32_t nbits) { index_t i; u_int32_t nw, tail; if (nbits == 0) return; nw = (soffs+nbits+31)>>5; // first, reverse the words while reversing bit order within each word for(i=0; i < nw/2; i++) { dest[i] = bitreverse(src[nw-i-1]); dest[nw-i-1] = bitreverse(src[i]); } if (nw&0x1) dest[i] = bitreverse(src[i]); tail = (soffs+nbits)&31; if (tail) bitvector_shr(dest, nw, 32-tail); } void bitvector_reverse(u_int32_t *b, u_int32_t offs, u_int32_t nbits) { index_t i; u_int32_t nw, tail; u_int32_t *temp; if (nbits == 0) return; nw = (offs+nbits+31)>>5; temp = (nw > MALLOC_CUTOFF) ? malloc(nw*4) : alloca(nw*4); for(i=0; i < nw/2; i++) { temp[i] = bitreverse(b[nw-i-1]); temp[nw-i-1] = bitreverse(b[i]); } if (nw&0x1) temp[i] = bitreverse(b[i]); tail = (offs+nbits)&31; bitvector_copy(b, offs, temp, (32-tail)&31, nbits); if (nw > MALLOC_CUTOFF) free(temp); } u_int64_t bitvector_count(u_int32_t *b, u_int32_t offs, u_int64_t nbits) { size_t i, nw; u_int32_t ntail; u_int64_t ans; if (nbits == 0) return 0; nw = ((u_int64_t)offs+nbits+31)>>5; if (nw == 1) { return count_bits(b[0] & (lomask(nbits)<>offs); // first end cap for(i=1; i < nw-1; i++) { /* popcnt can be computed branch-free, so these special cases probably don't help much */ /* v = b[i]; if (v == 0) continue; if (v == ONES32) ans += 32; else */ ans += count_bits(b[i]); } ntail = (offs+(u_int32_t)nbits)&31; ans += count_bits(b[i]&(ntail>0?lomask(ntail):ONES32)); // last end cap return ans; } u_int32_t bitvector_any0(u_int32_t *b, u_int32_t offs, u_int32_t nbits) { index_t i; u_int32_t nw, tail; u_int32_t mask; if (nbits == 0) return 0; nw = (offs+nbits+31)>>5; if (nw == 1) { mask = (lomask(nbits)<>5; if (nw == 1) { mask = (lomask(nbits)< soffs) bitvector_shl_to(dest, src, nw, newoffs-soffs, true); else bitvector_shr_to(dest, src, nw, soffs-newoffs); } #define BITVECTOR_BINARY_OP_TO(opname, OP) \ void bitvector_##opname##_to(u_int32_t *dest, u_int32_t doffs, \ u_int32_t *a, u_int32_t aoffs, \ u_int32_t *b, u_int32_t boffs, u_int32_t nbits) \ { \ u_int32_t nw = (doffs+nbits+31)>>5; \ u_int32_t *temp = nw>MALLOC_CUTOFF ? malloc((nw+1)*4) : alloca((nw+1)*4);\ u_int32_t i, anw, bnw; \ if (aoffs == boffs) { \ anw = (aoffs+nbits+31)>>5; \ } \ else if (aoffs == doffs) { \ bnw = (boffs+nbits+31)>>5; \ adjust_offset_to(temp, b, bnw, boffs, aoffs); \ b = temp; anw = nw; \ } \ else { \ anw = (aoffs+nbits+31)>>5; \ bnw = (boffs+nbits+31)>>5; \ adjust_offset_to(temp, a, anw, aoffs, boffs); \ a = temp; aoffs = boffs; anw = bnw; \ } \ for(i=0; i < anw; i++) temp[i] = OP(a[i], b[i]); \ bitvector_copy(dest, doffs, temp, aoffs, nbits); \ if (nw>MALLOC_CUTOFF) free(temp); \ } #define BV_AND(a,b) ((a)&(b)) #define BV_OR(a,b) ((a)|(b)) #define BV_XOR(a,b) ((a)^(b)) BITVECTOR_BINARY_OP_TO(and, BV_AND) BITVECTOR_BINARY_OP_TO(or, BV_OR) BITVECTOR_BINARY_OP_TO(xor, BV_XOR)