summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2012-03-13 04:59:00 (GMT)
committerH. Peter Anvin <hpa@zytor.com>2012-03-13 04:59:00 (GMT)
commitd58300585648dc798ac0a6d1d55590ea9ce26cd3 (patch)
tree30bf479fa0c6d450ec12d3342600a9df99e233f5
parent488dc320879f268c810f021d5e063c60afbac0cf (diff)
downloadpbn-d58300585648dc798ac0a6d1d55590ea9ce26cd3.zip
pbn-d58300585648dc798ac0a6d1d55590ea9ce26cd3.tar.gz
pbn-d58300585648dc798ac0a6d1d55590ea9ce26cd3.tar.bz2
pbn-d58300585648dc798ac0a6d1d55590ea9ce26cd3.tar.xz
Fix the division case where the denominator is a single limb
The code for Knuth-D assumes a denominator of >= 2 limbs; test for this and fall back to a simple divide loop for a single-limb denominator.
-rw-r--r--pbn.h1
-rw-r--r--pbn_div.c178
-rw-r--r--pbn_init.c11
3 files changed, 101 insertions, 89 deletions
diff --git a/pbn.h b/pbn.h
index 86d4945..f480c47 100644
--- a/pbn.h
+++ b/pbn.h
@@ -82,6 +82,7 @@ struct pbn *pbn_clr_bit(struct pbn *, int);
int pbn_bit(const struct pbn *, int);
struct pbn *pbn_int(pbn_satom_t);
+struct pbn *pbn_uint(pbn_atom_t);
/* Primarily for debugging; does NOT consume a reference */
void pbn_dump(FILE *, const struct pbn *);
diff --git a/pbn_div.c b/pbn_div.c
index 0eaf0f7..87665fc 100644
--- a/pbn_div.c
+++ b/pbn_div.c
@@ -59,104 +59,104 @@ int pbn_div(struct pbn **qp, struct pbn **rp, struct pbn *n, struct pbn *d)
return -rv; /* Division by zero? */
}
- if (n->bits == 0 || d->bits == 1) {
- /* Division by 1 or -1, or numerator is zero */
- if (qp) {
- if (d->minus && n->bits) {
- n = pbn_cow(n, n->len);
- n->minus ^= 1;
- }
- *qp = n;
- } else {
- pbn_free(n);
- }
- pbn_free(d);
-
- if (rp)
- *rp = pbn_int(0);
-
- return 0;
- }
+ /* Numerator and denominator length in limbs */
+ nl = (n->bits + PBN_ATOM_BITS - 1) >> PBN_ATOM_SHIFT;
+ dl = (d->bits + PBN_ATOM_BITS - 1) >> PBN_ATOM_SHIFT;
q = pbn_new((((n->bits | (PBN_ATOM_BITS - 1)) -
(d->bits | (PBN_ATOM_BITS - 1)))
>> PBN_ATOM_SHIFT) + 1);
- /* Normalize denominator so the MSB of the MSW is on */
- nl = (n->bits + PBN_ATOM_BITS - 1) >> PBN_ATOM_SHIFT;
- dl = (d->bits + PBN_ATOM_BITS - 1) >> PBN_ATOM_SHIFT;
-
- s = -d->bits & (PBN_ATOM_BITS-1);
- d = pbn_shl(d, s);
- n = pbn_cow(pbn_shl(n, s), nl+1);
- n->minus ^= d->minus; /* Set the sign */
-
- un = n->num;
- vn = d->num;
-
- for (j = nl - dl; j >= 0; j--) {
- pbn_atom_t nx = un[j+dl];
- pbn_atom_t rhatx;
-
- /* Compute initial estimate of the quotient atom */
-
- rhat = 0;
- if (nx >= vn[dl-1]) {
- rhat = vn[dl-1];
- nx -= vn[dl-1];
- }
-
- DIVIDE(nx, un[j+dl-1], vn[dl-1], qhat, rhatx);
- rhat += rhatx;
-
- /*
- * Note: this loop can only be executed at most twice;
- * consider unrolling
- */
- while (rhat < ((pbn_2atom_t)1 << PBN_ATOM_BITS) &&
- (pbn_2atom_t)qhat * vn[dl-2] >
- (rhat << PBN_ATOM_BITS) + un[j+dl-2]) {
- qhat--;
- rhat += vn[dl-1];
- }
-
- /* Multiply-subtract loop */
- k = 0;
- for (i = 0; i < dl; i++) {
- p = (pbn_2atom_t)qhat * vn[i];
- t = un[i+j] - k - (pbn_atom_t)p;
- un[i+j] = (pbn_atom_t)t;
- k = (p >> PBN_ATOM_BITS) - (t >> PBN_ATOM_BITS);
- }
- t = un[j+dl] - k;
- un[j+dl] = (pbn_atom_t)t;
-
- /* Correct in case of oversubtraction */
- if (unlikely(t < 0)) {
- qhat--;
- k = 0;
- for (i = 0; i < dl; i++) {
- t = k + un[i+j] + vn[i];
- un[i+j] = (pbn_atom_t)t;
- k = t >> PBN_ATOM_BITS;
- }
- un[j+dl] += (pbn_atom_t)k;
- }
-
- q->num[j] = qhat;
+ q->minus = n->minus ^= d->minus; /* Set the sign */
+
+ if (dl == 1) {
+ /* Single limb denominator */
+ pbn_atom_t c;
+ const pbn_atom_t dn = d->num[0];
+
+ c = 0;
+ for (i = nl - 1; i >= 0; i--)
+ DIVIDE(c, n->num[i], dn, q->num[i], c);
+
+ pbn_free(n);
+
+ if (rp) {
+ *rp = pbn_uint(c);
+ (*rp)->minus = q->minus;
+ }
+ } else {
+ /* Normalize denominator so the MSB of the MSW is on */
+ s = -d->bits & (PBN_ATOM_BITS-1);
+ d = pbn_shl(d, s);
+ n = pbn_cow(pbn_shl(n, s), nl+1);
+
+ un = n->num;
+ vn = d->num;
+
+ for (j = nl - dl; j >= 0; j--) {
+ pbn_atom_t nx = un[j+dl];
+ pbn_atom_t rhatx;
+
+ /* Compute initial estimate of the quotient atom */
+
+ rhat = 0;
+ if (nx >= vn[dl-1]) {
+ rhat = vn[dl-1];
+ nx -= vn[dl-1];
+ }
+
+ DIVIDE(nx, un[j+dl-1], vn[dl-1], qhat, rhatx);
+ rhat += rhatx;
+
+ /*
+ * Note: this loop can only be executed at most twice;
+ * consider unrolling
+ */
+ while (rhat < ((pbn_2atom_t)1 << PBN_ATOM_BITS) &&
+ (pbn_2atom_t)qhat * vn[dl-2] >
+ (rhat << PBN_ATOM_BITS) + un[j+dl-2]) {
+ qhat--;
+ rhat += vn[dl-1];
+ }
+
+ /* Multiply-subtract loop */
+ k = 0;
+ for (i = 0; i < dl; i++) {
+ p = (pbn_2atom_t)qhat * vn[i];
+ t = un[i+j] - k - (pbn_atom_t)p;
+ un[i+j] = (pbn_atom_t)t;
+ k = (p >> PBN_ATOM_BITS) - (t >> PBN_ATOM_BITS);
+ }
+ t = un[j+dl] - k;
+ un[j+dl] = (pbn_atom_t)t;
+
+ /* Correct in case of oversubtraction */
+ if (unlikely(t < 0)) {
+ qhat--;
+ k = 0;
+ for (i = 0; i < dl; i++) {
+ t = k + un[i+j] + vn[i];
+ un[i+j] = (pbn_atom_t)t;
+ k = t >> PBN_ATOM_BITS;
+ }
+ un[j+dl] += (pbn_atom_t)k;
+ }
+
+ q->num[j] = qhat;
+ }
+
+ if (rp)
+ *rp = pbn_shr(pbn_adjust_bits(n), s);
+ else
+ pbn_free(n);
}
-
- pbn_free(d);
- if (qp)
- *qp = pbn_adjust_bits(q);
- else
- pbn_free(q);
+ pbn_free(d);
- if (rp)
- *rp = pbn_shr(pbn_adjust_bits(n), s);
+ if (qp)
+ *qp = pbn_adjust_bits(q);
else
- pbn_free(n);
+ pbn_free(q);
return 0;
}
diff --git a/pbn_init.c b/pbn_init.c
index a0a252b..b34f218 100644
--- a/pbn_init.c
+++ b/pbn_init.c
@@ -103,6 +103,17 @@ struct pbn *pbn_int(pbn_satom_t v)
return pbn;
}
+struct pbn *pbn_uint(pbn_atom_t v)
+{
+ struct pbn *pbn = pbn_new(1);
+ if (!pbn)
+ return NULL;
+
+ pbn->num[0] = v;
+ pbn->bits = v ? pbn_ilog2p1(v) : 0;
+ return pbn;
+}
+
/* Produce a duplicate */
struct pbn *pbn_dup(const struct pbn *src)
{