summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2012-03-12 15:35:49 (GMT)
committerH. Peter Anvin <hpa@zytor.com>2012-03-12 15:37:32 (GMT)
commit488dc320879f268c810f021d5e063c60afbac0cf (patch)
treeae4b22ff47e993541481f535a62460b0a76ee811
parent1bacbd3042bc6fa665f6c35a34c738d4782c9f91 (diff)
downloadpbn-488dc320879f268c810f021d5e063c60afbac0cf.zip
pbn-488dc320879f268c810f021d5e063c60afbac0cf.tar.gz
pbn-488dc320879f268c810f021d5e063c60afbac0cf.tar.bz2
pbn-488dc320879f268c810f021d5e063c60afbac0cf.tar.xz
Do division via Knuth's Algorithm D (limb long division)
Instead of bitwise, do division via Knuth's Algorithm D, which is basically probabilistic long division.
-rw-r--r--pbn.h1
-rw-r--r--pbn_div.c141
-rw-r--r--pbn_divs.c20
-rw-r--r--pbn_init.c11
-rw-r--r--pbnint.h30
-rw-r--r--test.c68
6 files changed, 191 insertions, 80 deletions
diff --git a/pbn.h b/pbn.h
index 28cf763..86d4945 100644
--- a/pbn.h
+++ b/pbn.h
@@ -27,6 +27,7 @@
/* Configurables */
#define PBN_ATOM_BITS 32
+#define PBN_ATOM_SHIFT 5
#define PBN_ATOM_xFMT "08"PRIx32
#define PBN_ATOM_dFMT "08"PRId32
#define PBN_ATOM_uFMT "08"PRIu32
diff --git a/pbn_div.c b/pbn_div.c
index 7e5b1fa..0eaf0f7 100644
--- a/pbn_div.c
+++ b/pbn_div.c
@@ -25,20 +25,23 @@
by smarter algorithms, but this algorithm should at least be
correct, so it should be usable for now. */
-#include "pbn.h"
+#include "pbnint.h"
int pbn_div(struct pbn **qp, struct pbn **rp, struct pbn *n, struct pbn *d)
{
- struct pbn *q, *r;
- int rv = d->bits == 0 ? -1 : 0;
- int cm;
- int shift, bits, len, i;
- pbn_atom_t *qn, qm;
-
- if (rv || (cm = pbn_abscmp(n, d)) < 0) {
+ struct pbn *q; /* Quotient accumulator */
+ pbn_atom_t *un; /* Numerator/remainder array */
+ const pbn_atom_t *vn; /* Denominator array */
+ pbn_atom_t qhat; /* Estimated quotient digit */
+ pbn_2atom_t rhat; /* Estimated remainder */
+ pbn_2satom_t p, t, k; /* Multiply-subtract loop intermediates */
+ int nl, dl; /* Length of n, d in atoms */
+ int s; /* Normalization shift */
+ int i, j;
+ int rv;
+
+ if ((rv = d->bits == 0) || pbn_abscmp(n, d) < 0) {
/* We either had division by zero, or |n| < |d| */
- int rv = d->bits == 0;
-
if (rp) {
if (d->minus && n->bits) {
n = pbn_cow(n, n->len);
@@ -53,59 +56,107 @@ int pbn_div(struct pbn **qp, struct pbn **rp, struct pbn *n, struct pbn *d)
if (qp)
*qp = pbn_int(0);
- return rv; /* Division by zero? */
- }
+ return -rv; /* Division by zero? */
+ }
- if (cm == 0) {
- /* Not really a common case, but we got it anyway */
- int quot = (n->minus ^ d->minus) ? -1 : 1;
+ if (n->bits == 0 || d->bits == 1) {
+ /* Division by 1 or -1, or numerator is zero */
+ if (qp) {
+ if (d->minus && n->bits) {
+ n = pbn_cow(n, n->len);
+ n->minus ^= 1;
+ }
+ *qp = n;
+ } else {
+ pbn_free(n);
+ }
pbn_free(d);
- pbn_free(n);
if (rp)
*rp = pbn_int(0);
- if (qp)
- *qp = pbn_int(quot);
return 0;
}
- shift = n->bits - d->bits;
- bits = shift+1; /* Maximum possible number of quotient bits */
- len = (bits+PBN_ATOM_BITS-1)/PBN_ATOM_BITS;
-
- r = pbn_cow(n, n->len);
- q = pbn_new(len);
- d = pbn_shl(d, shift);
- q->minus = r->minus ^ d->minus;
- r->minus = d->minus; /* Makes our lives easier for now... */
-
- qm = (pbn_atom_t)1 << (shift % PBN_ATOM_BITS);
- qn = &q->num[shift/PBN_ATOM_BITS];
- for (i = shift; i >= 0; i--) {
- if (pbn_abscmp(r, d) >= 0) {
- r = pbn_sub(r, d);
- *qn |= qm;
+ q = pbn_new((((n->bits | (PBN_ATOM_BITS - 1)) -
+ (d->bits | (PBN_ATOM_BITS - 1)))
+ >> PBN_ATOM_SHIFT) + 1);
+
+ /* Normalize denominator so the MSB of the MSW is on */
+ nl = (n->bits + PBN_ATOM_BITS - 1) >> PBN_ATOM_SHIFT;
+ dl = (d->bits + PBN_ATOM_BITS - 1) >> PBN_ATOM_SHIFT;
+
+ s = -d->bits & (PBN_ATOM_BITS-1);
+ d = pbn_shl(d, s);
+ n = pbn_cow(pbn_shl(n, s), nl+1);
+ n->minus ^= d->minus; /* Set the sign */
+
+ un = n->num;
+ vn = d->num;
+
+ for (j = nl - dl; j >= 0; j--) {
+ pbn_atom_t nx = un[j+dl];
+ pbn_atom_t rhatx;
+
+ /* Compute initial estimate of the quotient atom */
+
+ rhat = 0;
+ if (nx >= vn[dl-1]) {
+ rhat = vn[dl-1];
+ nx -= vn[dl-1];
}
- d = pbn_shr(d, 1);
- qm >>= 1;
- if (!qm) {
- qm = (pbn_atom_t)1 << (PBN_ATOM_BITS-1);
- qn--;
+
+ DIVIDE(nx, un[j+dl-1], vn[dl-1], qhat, rhatx);
+ rhat += rhatx;
+
+ /*
+ * Note: this loop can only be executed at most twice;
+ * consider unrolling
+ */
+ while (rhat < ((pbn_2atom_t)1 << PBN_ATOM_BITS) &&
+ (pbn_2atom_t)qhat * vn[dl-2] >
+ (rhat << PBN_ATOM_BITS) + un[j+dl-2]) {
+ qhat--;
+ rhat += vn[dl-1];
}
- }
- r->minus = q->minus && r->bits;
+ /* Multiply-subtract loop */
+ k = 0;
+ for (i = 0; i < dl; i++) {
+ p = (pbn_2atom_t)qhat * vn[i];
+ t = un[i+j] - k - (pbn_atom_t)p;
+ un[i+j] = (pbn_atom_t)t;
+ k = (p >> PBN_ATOM_BITS) - (t >> PBN_ATOM_BITS);
+ }
+ t = un[j+dl] - k;
+ un[j+dl] = (pbn_atom_t)t;
+
+ /* Correct in case of oversubtraction */
+ if (unlikely(t < 0)) {
+ qhat--;
+ k = 0;
+ for (i = 0; i < dl; i++) {
+ t = k + un[i+j] + vn[i];
+ un[i+j] = (pbn_atom_t)t;
+ k = t >> PBN_ATOM_BITS;
+ }
+ un[j+dl] += (pbn_atom_t)k;
+ }
- if (rp)
- *rp = r; /* r already has correct bit adjustment */
- else
- pbn_free(r);
+ q->num[j] = qhat;
+ }
+ pbn_free(d);
+
if (qp)
*qp = pbn_adjust_bits(q);
else
pbn_free(q);
+ if (rp)
+ *rp = pbn_shr(pbn_adjust_bits(n), s);
+ else
+ pbn_free(n);
+
return 0;
}
diff --git a/pbn_divs.c b/pbn_divs.c
index bc8bd3c..aadad1d 100644
--- a/pbn_divs.c
+++ b/pbn_divs.c
@@ -49,24 +49,8 @@ int pbn_divs(struct pbn **qp, pbn_satom_t *rp, struct pbn *n, pbn_satom_t d)
q->minus = minus;
c = 0;
-#if defined(__GNUC__) && defined(__i386__) && PBN_ATOM_BITS == 32
-/* gcc won't generate divl for 64/32 -> 32, so help it out */
- for (i = len-1; i >= 0; i--) {
- asm("divl %4"
- : "=d" (c), "=a" (q->num[i])
- : "d" (c), "a" (n->num[i]), "rm" (d));
- }
-#else
- for (i = len-1; i >= 0; i--) {
- pbn_2atom_t cc;
- cc = ((pbn_2atom_t)c << PBN_ATOM_BITS) + n->num[i];
-
- /* N.B. the explicit casts allow some compilers to tell this is
- a 2A:A -> A division operation. */
- q->num[i] = (pbn_atom_t)(cc/d);
- c = (pbn_atom_t)(cc%d);
- }
-#endif
+ for (i = len-1; i >= 0; i--)
+ DIVIDE(c, n->num[i], d, q->num[i], c);
pbn_free(n);
diff --git a/pbn_init.c b/pbn_init.c
index 5bdebaf..a0a252b 100644
--- a/pbn_init.c
+++ b/pbn_init.c
@@ -16,6 +16,7 @@
#define PBN_INIT 1
#include <string.h>
+#include <limits.h>
#include "pbnint.h"
struct pbn *pbn_new(int len)
@@ -40,6 +41,16 @@ static int pbn_ilog2p1(pbn_atom_t v)
{
int p = 1;
+#if defined(__GNUC__) && __GNUC__ >= 4
+ if (sizeof(pbn_atom_t) == sizeof(int))
+ return (__builtin_clz(v) ^ (PBN_ATOM_BITS-1)) + 1;
+ if (sizeof(pbn_atom_t) == sizeof(long))
+ return (__builtin_clzl(v) ^ (PBN_ATOM_BITS-1)) + 1;
+ if (sizeof(pbn_atom_t) == sizeof(long long))
+ return (__builtin_clzll(v) ^ (PBN_ATOM_BITS-1)) + 1;
+ /* else fall through, wtf... */
+#endif
+
#if PBN_ATOM_BITS >= 64
if (v & 0xffffffff00000000) {
p += 32;
diff --git a/pbnint.h b/pbnint.h
index 55d63f4..2cdbee6 100644
--- a/pbnint.h
+++ b/pbnint.h
@@ -19,4 +19,34 @@
#include "pbn.h"
+/* Macro to do a 2A/A -> A division/remainder operation */
+#if defined(__GNUC__) && PBN_ATOM_BITS == 32 && \
+ (defined(__i386__) || defined(__x86_64__))
+#define DIVIDE(hn,ln,d,q,r) \
+ asm("divl %4" \
+ : "=d" (r), "=a" (q) \
+ : "d" (hn), "a" (ln), "rm" (d))
+#else
+/*
+ * Note: the explicit casts allow some compilers to tell this is a 2A:A -> A
+ * division operation.
+ */
+#define DIVIDE(hn,ln,d,q,r) \
+ do { \
+ pbn_2atom_t _n; \
+ pbn_atom_t _d = (d); \
+ _n= ((pbn_2atom_t)(hn) << PBN_ATOM_BITS) + (pbn_atom_t)(ln); \
+ (q) = (pbn_atom_t)(_n/_d); \
+ (r) = (pbn_atom_t)(_n%_d); \
+ } while(0)
+#endif
+
+#ifdef __GNUC__
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#else
+#define likely(x) (!!(x))
+#define unlikely(x) (!!(x))
+#endif
+
#endif /* PBNINT_H */
diff --git a/test.c b/test.c
index 1ae09cb..538122d 100644
--- a/test.c
+++ b/test.c
@@ -3,10 +3,30 @@
#include <stdlib.h>
#include "pbn.h"
+static void dump_div(struct pbn **qp, struct pbn **rp, struct pbn *n, struct pbn *d)
+{
+ int rv;
+
+ pbn_dump(stdout, n);
+ printf(" / ");
+ pbn_dump(stdout, d);
+ putchar('\n');
+
+ rv = pbn_div(qp, rp, n, d);
+
+ if (rv) {
+ printf("-> %d\n", rv);
+ } else {
+ pbn_dump(stdout, *qp);
+ printf(" : ");
+ pbn_dump(stdout, *rp);
+ putchar('\n');
+ }
+}
+
static void divide_test(void)
{
struct pbn *n, *d, *r, *q;
- int rv;
printf("\n*** Divide test ***\n\n");
@@ -20,12 +40,7 @@ static void divide_test(void)
d->num[2] = 1; /* 2^64 */
pbn_adjust_bits(d);
- rv = pbn_div(&q, &r, n, d);
-
- pbn_dump(stdout, q);
- printf(" : ");
- pbn_dump(stdout, r);
- putchar('\n');
+ dump_div(&q, &r, n, d);
/* 2^128/3 */
@@ -34,12 +49,8 @@ static void divide_test(void)
pbn_adjust_bits(n);
d = pbn_int(3);
- rv = pbn_div(&q, &r, n, d);
- pbn_dump(stdout, q);
- printf(" : ");
- pbn_dump(stdout, r);
- putchar('\n');
+ dump_div(&q, &r, n, d);
/* 2^128/(3*2^64) */
@@ -51,14 +62,37 @@ static void divide_test(void)
d->num[2] = 3; /* 3*2^64 */
pbn_adjust_bits(d);
- rv = pbn_div(&q, &r, n, d);
+ dump_div(&q, &r, n, d);
- pbn_dump(stdout, q);
- printf(" : ");
- pbn_dump(stdout, r);
- putchar('\n');
+ /* 2^128/(3*2^64+2^31+1) */
+
+ n = pbn_new(5);
+ n->num[4] = 1; /* 2^128 */
+ pbn_adjust_bits(n);
+
+ d = pbn_new(3);
+ d->num[2] = 3; /* 3*2^64 */
+ d->num[0] = 0x80000001; /* +2^31+1 */
+ pbn_adjust_bits(d);
+
+ dump_div(&q, &r, n, d);
+
+ /* Something that needs an addback... */
+
+ n = pbn_new(5);
+ n->num[4] = 0x7fffffff;
+ n->num[3] = 0x80000000;
+ pbn_adjust_bits(n);
+
+ d = pbn_new(3);
+ d->num[2] = 0x80000000;
+ d->num[0] = 0x00000001;
+ pbn_adjust_bits(d);
+
+ dump_div(&q, &r, n, d);
}
+
static void small_divide_test(void)
{
struct pbn *n, *q;