home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Fresh Fish 5
/
FreshFish_July-August1994.bin
/
bbs
/
gnu
/
gmp-1.3.2-src.lha
/
src
/
amiga
/
gmp-1.3.2
/
mpn_sqrt.c
< prev
next >
Wrap
C/C++ Source or Header
|
1993-05-19
|
16KB
|
480 lines
/* mpn_sqrt(root_ptr, rem_ptr, op_ptr, op_size)
Write the square root of {OP_PTR, OP_SIZE} at ROOT_PTR.
Write the remainder at REM_PTR, if REM_PTR != NULL.
Return the size of the remainder.
(The size of the root is always half of the size of the operand.)
OP_PTR and ROOT_PTR may not point to the same object.
OP_PTR and REM_PTR may point to the same object.
If REM_PTR is NULL, only the root is computed and the return value of
the function is 0 if OP is a perfect square, and *any* non-zero number
otherwise.
Copyright (C) 1991, 1993 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
The GNU MP Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the GNU MP Library; see the file COPYING. If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
/* This code is just correct if "unsigned char" has at least 8 bits. It
doesn't help to use CHAR_BIT from limits.h, as the real problem is
the static arrays. */
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
/* Square root algorithm:
1. Shift OP (the input) to the left an even number of bits s.t. there
are an even number of words and either (or both) of the most
significant bits are set. This way, sqrt(OP) has exactly half as
many words as OP, and has its most significant bit set.
2. Get a 9-bit approximation to sqrt(OP) using the pre-computed tables.
This approximation is used for the first single-precision
iterations of Newton's method, yielding a full-word approximation
to sqrt(OP).
3. Perform multiple-precision Newton iteration until we have the
exact result. Only about half of the input operand is used in
this calculation, as the square root is perfectly determinable
from just the higher half of a number. */
/* Define this macro for IEEE P854 machines with a fast sqrt instruction. */
#if defined __GNUC__
#if defined __sparc__
#define SQRT(a) \
({ \
double __sqrt_res; \
asm ("fsqrtd %1,%0" : "=f" (__sqrt_res) : "f" (a)); \
__sqrt_res; \
})
#endif
#if defined __HAVE_68881__
#define SQRT(a) \
({ \
double __sqrt_res; \
asm ("fsqrtx %1,%0" : "=f" (__sqrt_res) : "f" (a)); \
__sqrt_res; \
})
#endif
#if defined __hppa
#define SQRT(a) \
({ \
double __sqrt_res; \
asm ("fsqrt,dbl %1,%0" : "=fx" (__sqrt_res) : "fx" (a)); \
__sqrt_res; \
})
#endif
#endif
#ifndef SQRT
/* Tables for initial approximation of the square root. These are
indexed with bits 1-8 of the operand for which the square root is
calculated, where bit 0 is the most significant non-zero bit. I.e.
the most significant one-bit is not used, since that per definition
is one. Likewise, the tables don't return the highest bit of the
result. That bit must be inserted by or:ing the returned value with
0x100. This way, we get a 9-bit approximation from 8-bit tables! */
/* Table to be used for operands with an even total number of bits.
(Exactly as in the decimal system there are similarities between the
square root of numbers with the same initial digits and an even
difference in the total number of digits. Consider the square root
of 1, 10, 100, 1000, ...) */
static unsigned char even_approx_tab[256] =
{
0x6a, 0x6a, 0x6b, 0x6c, 0x6c, 0x6d, 0x6e, 0x6e,
0x6f, 0x70, 0x71, 0x71, 0x72, 0x73, 0x73, 0x74,
0x75, 0x75, 0x76, 0x77, 0x77, 0x78, 0x79, 0x79,
0x7a, 0x7b, 0x7b, 0x7c, 0x7d, 0x7d, 0x7e, 0x7f,
0x80, 0x80, 0x81, 0x81, 0x82, 0x83, 0x83, 0x84,
0x85, 0x85, 0x86, 0x87, 0x87, 0x88, 0x89, 0x89,
0x8a, 0x8b, 0x8b, 0x8c, 0x8d, 0x8d, 0x8e, 0x8f,
0x8f, 0x90, 0x90, 0x91, 0x92, 0x92, 0x93, 0x94,
0x94, 0x95, 0x96, 0x96, 0x97, 0x97, 0x98, 0x99,
0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9c, 0x9d, 0x9e,
0x9e, 0x9f, 0xa0, 0xa0, 0xa1, 0xa1, 0xa2, 0xa3,
0xa3, 0xa4, 0xa4, 0xa5, 0xa6, 0xa6, 0xa7, 0xa7,
0xa8, 0xa9, 0xa9, 0xaa, 0xaa, 0xab, 0xac, 0xac,
0xad, 0xad, 0xae, 0xaf, 0xaf, 0xb0, 0xb0, 0xb1,
0xb2, 0xb2, 0xb3, 0xb3, 0xb4, 0xb5, 0xb5, 0xb6,
0xb6, 0xb7, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xba,
0xbb, 0xbb, 0xbc, 0xbd, 0xbd, 0xbe, 0xbe, 0xbf,
0xc0, 0xc0, 0xc1, 0xc1, 0xc2, 0xc2, 0xc3, 0xc3,
0xc4, 0xc5, 0xc5, 0xc6, 0xc6, 0xc7, 0xc7, 0xc8,
0xc9, 0xc9, 0xca, 0xca, 0xcb, 0xcb, 0xcc, 0xcc,
0xcd, 0xce, 0xce, 0xcf, 0xcf, 0xd0, 0xd0, 0xd1,
0xd1, 0xd2, 0xd3, 0xd3, 0xd4, 0xd4, 0xd5, 0xd5,
0xd6, 0xd6, 0xd7, 0xd7, 0xd8, 0xd9, 0xd9, 0xda,
0xda, 0xdb, 0xdb, 0xdc, 0xdc, 0xdd, 0xdd, 0xde,
0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe1, 0xe2, 0xe2,
0xe3, 0xe3, 0xe4, 0xe4, 0xe5, 0xe5, 0xe6, 0xe6,
0xe7, 0xe7, 0xe8, 0xe8, 0xe9, 0xea, 0xea, 0xeb,
0xeb, 0xec, 0xec, 0xed, 0xed, 0xee, 0xee, 0xef,
0xef, 0xf0, 0xf0, 0xf1, 0xf1, 0xf2, 0xf2, 0xf3,
0xf3, 0xf4, 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf7,
0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb,
0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe, 0xff,
};
/* Table to be used for operands with an odd total number of bits.
(Further comments before previous table.) */
static unsigned char odd_approx_tab[256] =
{
0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03,
0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07,
0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b,
0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f,
0x0f, 0x10, 0x10, 0x10, 0x11, 0x11, 0x12, 0x12,
0x13, 0x13, 0x14, 0x14, 0x15, 0x15, 0x16, 0x16,
0x16, 0x17, 0x17, 0x18, 0x18, 0x19, 0x19, 0x1a,
0x1a, 0x1b, 0x1b, 0x1b, 0x1c, 0x1c, 0x1d, 0x1d,
0x1e, 0x1e, 0x1f, 0x1f, 0x20, 0x20, 0x20, 0x21,
0x21, 0x22, 0x22, 0x23, 0x23, 0x23, 0x24, 0x24,
0x25, 0x25, 0x26, 0x26, 0x27, 0x27, 0x27, 0x28,
0x28, 0x29, 0x29, 0x2a, 0x2a, 0x2a, 0x2b, 0x2b,
0x2c, 0x2c, 0x2d, 0x2d, 0x2d, 0x2e, 0x2e, 0x2f,
0x2f, 0x30, 0x30, 0x30, 0x31, 0x31, 0x32, 0x32,
0x32, 0x33, 0x33, 0x34, 0x34, 0x35, 0x35, 0x35,
0x36, 0x36, 0x37, 0x37, 0x37, 0x38, 0x38, 0x39,
0x39, 0x39, 0x3a, 0x3a, 0x3b, 0x3b, 0x3b, 0x3c,
0x3c, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e, 0x3f, 0x3f,
0x40, 0x40, 0x40, 0x41, 0x41, 0x41, 0x42, 0x42,
0x43, 0x43, 0x43, 0x44, 0x44, 0x45, 0x45, 0x45,
0x46, 0x46, 0x47, 0x47, 0x47, 0x48, 0x48, 0x49,
0x49, 0x49, 0x4a, 0x4a, 0x4b, 0x4b, 0x4b, 0x4c,
0x4c, 0x4c, 0x4d, 0x4d, 0x4e, 0x4e, 0x4e, 0x4f,
0x4f, 0x50, 0x50, 0x50, 0x51, 0x51, 0x51, 0x52,
0x52, 0x53, 0x53, 0x53, 0x54, 0x54, 0x54, 0x55,
0x55, 0x56, 0x56, 0x56, 0x57, 0x57, 0x57, 0x58,
0x58, 0x59, 0x59, 0x59, 0x5a, 0x5a, 0x5a, 0x5b,
0x5b, 0x5b, 0x5c, 0x5c, 0x5d, 0x5d, 0x5d, 0x5e,
0x5e, 0x5e, 0x5f, 0x5f, 0x60, 0x60, 0x60, 0x61,
0x61, 0x61, 0x62, 0x62, 0x62, 0x63, 0x63, 0x63,
0x64, 0x64, 0x65, 0x65, 0x65, 0x66, 0x66, 0x66,
0x67, 0x67, 0x67, 0x68, 0x68, 0x68, 0x69, 0x69,
};
#endif
mp_size
#ifdef __STDC__
mpn_sqrt (mp_ptr root_ptr, mp_ptr rem_ptr, mp_srcptr op_ptr, mp_size op_size)
#else
mpn_sqrt (root_ptr, rem_ptr, op_ptr, op_size)
mp_ptr root_ptr;
mp_ptr rem_ptr;
mp_srcptr op_ptr;
mp_size op_size;
#endif
{
/* R (root result) */
mp_ptr rp; /* Pointer to least significant word */
mp_size rsize; /* The size in words */
/* T (OP shifted to the left a.k.a. normalized) */
mp_ptr tp; /* Pointer to least significant word */
mp_size tsize; /* The size in words */
mp_ptr t_end_ptr; /* Pointer right beyond most sign. word */
mp_limb t_high0, t_high1; /* The two most significant words */
/* TT (temporary for numerator/remainder) */
mp_ptr ttp; /* Pointer to least significant word */
/* X (temporary for quotient in main loop) */
mp_ptr xp; /* Pointer to least significant word */
mp_size xsize; /* The size in words */
unsigned cnt;
mp_limb initial