home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Kosovo Orphans' Appeal Charity CD
/
KosovoOrphansAppeal.iso
/
archimedesworld_cd2
/
acornanswers
/
_scape2
/
s
/
subrouts1
< prev
Wrap
Text File
|
1996-02-21
|
19KB
|
754 lines
max16 * &7fffffff ;max positive number in 16 bit fixed point format (2^31-1)/65536
min16 * &80000000 ;min negative number -2^31 /65536
one * 65536
; rbbcinc
; a leaf APCS function
;
; C prototype:
; int rbbcinc(int r, int k)
;
; given limits 0 <= r < 2^k, return (r ñ 1) where ñ denotes a reversed bit ordering increment,
; subject to the stated limits.
; NB in this case, for the function f: n -> {for (k=c=0; c<n; c++, k=rbbcinc(k, l); return k;},
; we have f(f(n))=n.
; NB2 algo requires 2 <= k <= 32, but doesn't check for this - BEWARE!
; (for k=1 get no action taken, while for any other bad k get code executed at unintended address,
; hence unpredictable & likely system fatal).
;
EXPORT rbbcinc
rbnsta DCB "rbbcinc", 0
ALIGN
rbnend DCD &ff000000 + rbnend - rbnsta
rbbcinc
rbbc a1, a2, a3
MOVS pc, lr
; pow16
; a leaf APCS function
;
; C prototype:
; int pow16(int a, int b)
;
; returns 65536 * (a/65536)^(b/65536)
;
; is about 5 to 40 times faster than FPEmulator working with double floats
;
; nb if a<0 require b an integer
;
; nb2 unexpected return values:
; a=b=0 returns 1 actual value ill defined
; a<0, b non integer returns 0 actual value complex
; abs(bln(a)) > max16 unknown errors likely (to avoid, restrict b to ▒(2^11)one, roughly)
; a=0, b<0 returns max16 actual value +infinity
; a & b st result out of range unknown result
;
; nb precise nature of errors for other values not yet confirmed acceptable, though should be okay except
; perhaps in extreme cases (as of 22/11/95)
;
; Few subsequent notes on error:
;
; For result much bigger than one:
; since exp16 only yields 16 significant bits, pow16 gives no more than this, hence for result much bigger
; than one get increasing error, eg for a around 64one & b=1.03125one get errors st answer (around 73one)
; is only correct to top 15 or 16 bits (ie error upto around one/1024 to one/256)
;
; For 0<=a<=one, & b eg 2one, 3one, 13.125one etc get error typically no more than in low 1 or 2 bits
;
; For a large (say a>one) & b<0 get error typically in only lowest bit or no error
;
; **** Strongly recommend this fn is only used where accuracy not crucial or with limited range ****
; **** of arguments where you can confirm yourself accuracy in that range is adequate. ****
;
EXPORT pow16
pwnsta DCB "pow16", 0
ALIGN
pwnend DCD &ff000000 + pwnend - pwnsta
pow16
CMP a2, #0
MOVEQ a1, #&10000 ;if b=0 return one directly
MOVEQS pc, lr
CMP a2, #&10000
MOVEQS pc, lr ;handle trival case b=one directly, by returning a
CMP a1, #0
MOVGT ip, #0
BGT pow16_apos
BNE pow16_aneg
CMP a2, #0
MOV a1, #0 ;if a=0 and b>0 return 0
MOVLT a1, #max16+1 ;if a=0 and b<0 return max16
SUBLT a1, a1, #1
MOVS pc, lr
pow16_aneg
MOVS ip, a2, LSL #16
MOVNE a1, #0 ;if a<0 and b not an integer return 0
MOVNES pc, lr
AND ip, a2, #&10000 ;if a<0 then go on to evaluate (65536 * (-a/65536)^(b/65536))
RSB a1, a1, #0 ;with sign subsequently forced to + or - according to whether b/one
pow16_apos ;is even or odd ( eg (-2.5)^3 is just (-1)^3 * 2.5^3 )
STMFD sp!, {v1, v2, lr}
MOV v1, ip ;now evaluate (65536 * (a/65536)^(b/65536)), for a>0
MOV v2, a2 ;via exp16( ln16(a) * b / 65536 )
BL ln16 ;nb this uses property that exp(b.log(a)) = exp(log(a^b)) = a^b
mul16 a1, v2, a1, a2, a3, a4
BL exp16
CMP v1, #0
RSBNE a1, a1, #0 ;finally switch sign on answer if originally a<0 and b/one was odd
LDMFD sp!, {v1, v2, pc}^
; ln16
; a leaf APCS function
;
; C prototype:
; int ln16(int a)
;
; returns 65536 * ln (a/65536)
;
; is about 30 times faster than FPEmulator working with double floats
;
EXPORT ln16
lnnsta DCB "ln16", 0
ALIGN
lnnend DCD &ff000000 + lnnend - lnnsta
ln16
CMP a1, #0 ;if a<=0 return min16
MOVLE a1, #min16
MOVLES pc, lr ;else most significant bit is between b30 & b0
GBLA counter
counter SETA 30 ;find msb & store (msb_index - 15) in a4
WHILE counter > 1
TST a1, #1<<counter
MOVNE a4, #counter-15
BNE ln16_gotmsb
counter SETA counter-1
WEND
TST a1, #1<<1
MOVNE a4, #1-15
MOVEQ a4, #0-15
ln16_gotmsb ;if we set z = a1/(2^a4), will have ln(a/one) = ln(z/one * 2^a4)
CMP a4, #2 ; = ln(z/one) + a4*ln2
SUBGT a2, a4, #2 ;with z in range [0.5,1)
MOVGT a1, a1, LSR a2 ;so calc a1 = 4*( a1/(2^a4) ) - 3*one
RSBLE a2, a4, #2 ;which puts a1 in range ▒one suitable for approximation by poly
MOVLE a1, a1, LSL a2 ;leaving us to calculate a4*one*ln2 + one*ln((a1+3one)/4one)
SUB a1, a1, #&30000
ADD a2, a1, a1, LSL #2 ;start polynomial approximation calculation on a1
RSB a2, a2, a1, LSL #10
MOV a2, a2, ASR #16 ;for details of how this works see comments against similar code
SUB a2, a2, #&000e00 ;in exp16 function, directly below this function
SUB a2, a2, #&000034
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&003200
ADD a2, a2, #&000031
MOV ip, a1
mul16c a2, ip, a2, a3
SUB a2, a2, #&00e200
SUB a2, a2, #&0000f2
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&050000
ADD a2, a2, #&005500
ADD a2, a2, #&000065
MOV ip, a1
mul16c a2, ip, a2, a3
SUB a2, a2, #&040000
SUB a2, a2, #&009a00 ;end of polynomial approximation calculation
SUB a1, a2, #&000059 ;a1 now holds (2^20)*(approximated value)
ADD ip, a4, a4, LSL #1
ADD a2, ip, a4, LSL #3 ;now add in the a4*one*ln2 term using an explicit binary expansion
ADD a2, a4, a2, LSL #4 ;of approximately one*ln2
RSB a3, a4, a4, LSL #3
ADD a2, a3, a2, LSL #4 ;nb we actually add approx (2^20)*ln2, & then divide by 16
ADD a2, a4, a2, LSL #3 ;to reduce truncation error
ADD a1, a1, a2, LSL #5
ADD a1, a1, ip, ASR #1
MOV a1, a1, ASR #4
MOVS pc, lr
; exp16
; a leaf APCS function
;
; C prototype:
; int exp16(int a)
;
; returns 65536 * exp (a/65536)
;
; is about 45 times faster than FPEmulator working with double floats
;
EXPORT exp16
epnsta DCB "exp16", 0
ALIGN
epnend DCD &ff000000 + epnend - epnsta
exp16 ;this fn returns a value with only about 17 significant binary
;digits - eg for 'a' small or negative, get near maximal accuracy
;but for 'a' large (upto around 10one) where exp has value
CMP a1, #12*65536 ;~20000one can get error upto roughly 0.1one
MOVGT a1, #max16+1
SUBGT a1, a1, #1
MOVGTS pc, lr
CMP a1, #-12*65536
MOVLT a1, #0
MOVLTS pc, lr ;otherwise know |a| <= 12one
RSB a2, a1, a1, LSL #3 ;now set a = a/ln2 using explicit mul by 2_1.01110001010101000111
ADD a3, a1, a1, LSL #2 ;nb only need 1st 20 digits given range on a
ADD a3, a3, a3, LSL #4 ;nb2 this calc is approximate only, though will usually yield an
ADD a1, a2, a1, LSL #4 ;answer correct to the stored 16 binary places (else error one/65536)
ADD a1, a1, a3, ASR #10
ADD a1, a1, a2, ASR #16 ;thus exp(original a) = exp(new a * ln2) = 2^(new a), eval'd below:
ADD a1, a1, #8
MOV a1, a1, ASR #4 ;calc a = a/ln2 complete
MOV a4, a1, ASR #16 ;a4 now holds integer component of a
BIC a1, a1, a4, LSL #16 ;a1 now holds fractional component in range [0,1)*one
CMP a4, #15 ;do another check on a to ensure exp(a) is in range
MOVGE a1, #max16+1 ;& if not return maximal or minimal value as appropriate
SUBGE a1, a1, #1
MOVGE pc, lr
CMP a4, #-16
MOVLT a1, #0
MOVLTS pc, lr ;else need to return value (one * 2^(a4 + a1/one))
MOV a1, a1, LSL #1 ; = (one * 2^(a1/one) * 2^a4)
SUB a1, a1, #&10000 ;convert a1 to lie in [-1,-1), then apply poly approximation:
RSB a2, a1, a1, LSL #3
ADD a2, a1, a2, LSL #6
MOV a2, a2, ASR #15 ;a2 = 0.0008561*(2^20)*(a1/one)
ADD a2, a2, #&002800
ADD a2, a2, #&00007e ;a2 = 0.0008561*(2^20)*(a1/one) + 0.0098857*(2^20)
MOV ip, a1
mul16c a2, ip, a2, a3 ;a2 = 0.0008561*(2^20)*(a1/one)^2 + 0.0098857*(2^20)*(a1/one)
ADD a2, a2, #&015000
ADD a2, a2, #&000be0 ; etc
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&070000
ADD a2, a2, #&00d700
ADD a2, a2, #&00007e
MOV ip, a1
mul16c a2, ip, a2, a3 ;until we have
ADD a2, a2, #&160000 ;a2 = (2^20) ( 0.0008561(a1/one)^4 + 0.0098857(a1/one)^3 +
ADD a2, a2, #&00a000 ; 0.0849301(a1/one)^2 + 0.4901106(a1/one) +
ADD a2, a2, #&0000a7 ; 1.14142138(a1/one) )
MOV a1, a2, ASR #4 ;now divide by 16 removing most truncation error from above calcs,
CMP a4, #0 ;leaving a2 = (2^16)*(approximated value)
RSBLT a4, a4, #0
MOVLT a1, a1, ASR a4 ;finally carry out the (a2 = a2 * 2^a4) step
MOVLTS pc, lr
MOVS a1, a1, LSL a4
MOVMI a1, #max16+1
SUBMI a1, a1, #1
MOVS pc, lr
; cos16
; a leaf APCS function
;
; C prototype:
; int cos16(int a)
;
; returns 65536 * cos ((a/65536)(pi/2))
;
; is about 44 times faster than FPEmulator working with double floats
;
EXPORT cos16
csnsta DCB "cos16", 0
ALIGN
csnend DCD &ff000000 + csnend - csnsta
cos16
EOR a4, a1, a1, LSL #1 ;since cos is periodic and cos((a/65536)(pi/2)) has period 4*one
TST a4, #&20000 ;eval is simpler than for ln or exp:
MOV a4, #0 ;we need only to truncate a to [0,4one) and then approx the function
MOVNE a4, #1 ;via a poly
TST a1, #&10000
MOV a1, a1, LSL #16 ;in fact further symmetries in cos over this range allow us to
MOV a1, a1, LSR #16 ;actually only approximate function over range [0,one)
RSBEQ a1, a1, #&10000 ;and reconstruct it over remainder of range [one,4one) from that part
MOV a1, a1, LSL #1 ;eg cos( (a/65536)(pi/2) ) for a in [2one, 3one)
SUB a1, a1, #&10000 ; = -cos( ((a-2one)/65536)(pi/2) )
RSB a2, a1, a1, LSL #3
ADD a2, a1, a2, LSL #8
MOV a2, a2, ASR #16
ADD a2, a2, #&002c00
ADD a2, a2, #&000086
MOV ip, a1
mul16c a2, ip, a2, a3
SUB a2, a2, #&00e900
SUB a2, a2, #&0000bd
MOV ip, a1
mul16c a2, ip, a2, a3
SUB a2, a2, #&030000
SUB a2, a2, #&007c00
SUB a2, a2, #&0000c6
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&080000
ADD a2, a2, #&00E200
ADD a2, a2, #&0000BD
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&0b0000
ADD a2, a2, #&005000
ADD a2, a2, #&000050
MOV a1, a2, ASR #4
TST a4, #1
RSBNE a1, a1, #0
MOVS pc, lr
; sin16
; a leaf APCS function
;
; C prototype:
; int sin16(int a)
;
; returns 65536 * sin ((a/65536)(pi/2))
;
; is about 44 times faster than FPEmulator working with double floats
;
EXPORT sin16
snnsta DCB "sin16", 0
ALIGN
snnend DCD &ff000000 + snnend - snnsta
sin16
TST a1, #&20000 ;eval of sin is almost identical to that of cos
MOV a4, #0
MOVNE a4, #1
TST a1, #&10000
MOV a1, a1, LSL #16
MOV a1, a1, LSR #16
RSBNE a1, a1, #&10000
MOV a1, a1, LSL #1
SUB a1, a1, #&10000
RSB a2, a1, a1, LSL #3
ADD a2, a1, a2, LSL #8
MOV a2, a2, ASR #16
ADD a2, a2, #&002c00
ADD a2, a2, #&000086
MOV ip, a1
mul16c a2, ip, a2, a3
SUB a2, a2, #&00e900
SUB a2, a2, #&0000bd
MOV ip, a1
mul16c a2, ip, a2, a3
SUB a2, a2, #&030000
SUB a2, a2, #&007c00
SUB a2, a2, #&0000c6
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&080000
ADD a2, a2, #&00E200
ADD a2, a2, #&0000BD
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&0b0000
ADD a2, a2, #&005000
ADD a2, a2, #&000050
MOV a1, a2, ASR #4
TST a4, #1
RSBNE a1, a1, #0
MOVS pc, lr
; acs16
; a leaf APCS function
;
; C prototype:
; int acs16(int a)
;
; returns 65536 * 2/pi * arccos (a/65536)
;
; for a out of range (ie abs a > 65536), reset a to ▒65536 as appropriate before eval
;
; is about 24 times faster than FPEmulator working with double floats
;
EXPORT acs16
acnsta DCB "acs16", 0
ALIGN
acnend DCD &ff000000 + acnend - acnsta
acs16
CMP a1, #65536
MOVGE a1, #0
MOVGES pc, lr
CMP a1, #-65536
MOVLE a1, #2*65536
MOVLES pc, lr
STMFD sp!, {lr} ;now have arg in range (-one,one)
CMP a1, #0
RSBMI a1, a1, #0
MOVMI a4, #1 ;b0 in a4 set iff final result r needs replacing by 2*one-r
MOVPL a4, #0 ;now have arg in range [0,one)
SUB a2, a1, #&b500
SUBS a2, a2, #&0005
BLT acs16_arglow ;if arg < one/sqrt2 go and apply poly else first transmute into here
ORR a4, a4, #2 ;b1 in a4 clear iff penultimate result r need be replaced by one-r
MUL a2, a1, a1
MOV a1, a2, LSR #16
RSB a1, a1, #65536
sqrt16 a1, a1, a2, a3, ip, lr ;arg transmuted (ie replaced by one-sqrt(arg^2))
acs16_arglow
ADD a3, a1, a1, ASL #2
ADD a2, a3, a1, ASL #4
ADD a2, a2, a3, ASL #5
ADD a2, a2, a3, ASR #8
ADD a2, a2, #32
MOV a1, a2, ASR #6 ;range transform applied (ie replaced by arg*2sqrt2-one)
SUB a1, a1, #&10000 ;so arg lies in range [-1,1]
ADD a2, a1, a1, LSL #2 ;now apply poly
ADD a2, a2, a2, LSL #5
MOV a2, a2, ASR #14
ADD a2, a2, #&000500
ADD a2, a2, #&0000ae
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&000800
ADD a2, a2, #&000088
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&002000
ADD a2, a2, #&00009a
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&004600
ADD a2, a2, #&00009a
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&030000
ADD a2, a2, #&00d900
ADD a2, a2, #&0000b4
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&030000
ADD a2, a2, #&00ae00
ADD a2, a2, #&000052
MOV a1, a2, ASR #4
TST a4, #2
RSBEQ a1, a1, #65536
TST a4, #1
RSBNE a1, a1, #2*65536
LDMFD sp!, {pc}^
; asn16
; a leaf APCS function
;
; C prototype:
; int asn16(int a)
;
; returns 65536 * 2/pi * arcsin (a/65536)
;
; for a out of range (ie abs a > 65536), reset a to ▒65536 as appropriate before eval
;
; is about 24 times faster than FPEmulator working with double floats
;
EXPORT asn16
asnsta DCB "asn16", 0
ALIGN
asnend DCD &ff000000 + asnend - asnsta
asn16
CMP a1, #65536
MOVGE a1, #65536
MOVGES pc, lr
ADDS a2, a1, #65536 ;equiv to CMP a1,#-65536
SUBLE a1, a1, a2 ;equiv to MOVLE a1,#-65536 (which wouldn't work due to bad constant)
MOVLES pc, lr
STMFD sp!, {lr} ;now have arg in range (-one,one)
CMP a1, #0
RSBMI a1, a1, #0
MOVMI a4, #1 ;b0 in a4 set iff final result needs negating
MOVPL a4, #0 ;now have arg in range [0,one)
SUB a2, a1, #&b500
SUBS a2, a2, #&0005
BLT asn16_arglow ;if arg < one/sqrt2 go and apply poly else first transmute into here
ORR a4, a4, #2 ;b1 in a4 set iff penultimate result r needs to be replaced by one-r
MUL a2, a1, a1
MOV a1, a2, LSR #16
RSB a1, a1, #65536
sqrt16 a1, a1, a2, a3, ip, lr ;arg transmuted (ie replaced by one-sqrt(arg^2))
asn16_arglow
ADD a3, a1, a1, ASL #2
ADD a2, a3, a1, ASL #4
ADD a2, a2, a3, ASL #5
ADD a2, a2, a3, ASR #8
ADD a2, a2, #32
MOV a1, a2, ASR #6 ;range transform applied (ie replaced by arg*2sqrt2-one)
SUB a1, a1, #&10000 ;so arg lies in range [-1,1]
ADD a2, a1, a1, LSL #2 ;now apply poly
ADD a2, a2, a2, LSL #5
MOV a2, a2, ASR #14
ADD a2, a2, #&000500
ADD a2, a2, #&0000ae
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&000800
ADD a2, a2, #&000088
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&002000
ADD a2, a2, #&00009a
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&004600
ADD a2, a2, #&00009a
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&030000
ADD a2, a2, #&00d900
ADD a2, a2, #&0000b4
MOV ip, a1
mul16c a2, ip, a2, a3
ADD a2, a2, #&030000
ADD a2, a2, #&00ae00
ADD a2, a2, #&000052
MOV a1, a2, ASR #4
TST a4, #2
RSBNE a1, a1, #65536
TST a4, #1
RSBNE a1, a1, #0
LDMFD sp!, {pc}^
; gauss16
; a leaf APCS function
;
; C prototype:
; int gauss16(void)
;
; returns 65536 * ( pseudo randon variable with approximate distribution N(0,1) )
; note, the approximate gaussian distribution is achieved via the sum of n pseudo U[0,65535]
; random variables & application of the central limit theorem
; here we use n=8, giving an actual range of ▒(sqrt24) (*65536).
;
; for general n, recall we need to return:
; (sum n U[0,65535] random variables) * 2sqrt(3n)*65536/(65535n) - sqrt(3n)*65536
;
EXPORT gauss16
gansta DCB "gauss16", 0
ALIGN
ganend DCD &ff000000 + ganend - gansta
gauss16
ADR a1, gaussseed1
LDMIA a1, {a2, a3}
MOVS a3, a3, LSR #1
MOVS a4, a2, RRX
ADC a3, a3, a3
EOR a4, a4, a2, LSL #12
EOR a2, a4, a4, LSR #20
MOV ip, a2, LSR #16
MOV a4, a2, LSL #16
ADD ip, ip, a4, LSR #16
MOVS a3, a3, LSR #1
MOVS a4, a2, RRX
ADC a3, a3, a3
EOR a4, a4, a2, LSL #12
EOR a2, a4, a4, LSR #20
ADD ip, ip, a2, LSR #16
MOV a4, a2, LSL #16
ADD ip, ip, a4, LSR #16
MOVS a3, a3, LSR #1
MOVS a4, a2, RRX
ADC a3, a3, a3
EOR a4, a4, a2, LSL #12
EOR a2, a4, a4, LSR #20
ADD ip, ip, a2, LSR #16
MOV a4, a2, LSL #16
ADD ip, ip, a4, LSR #16
MOVS a3, a3, LSR #1
MOVS a4, a2, RRX
ADC a3, a3, a3
EOR a4, a4, a2, LSL #12
EOR a2, a4, a4, LSR #20
ADD ip, ip, a2, LSR #16
MOV a4, a2, LSL #16
ADD ip, ip, a4, LSR #16 ;sum now in ip
STMIA a1, {a2, a3}
RSB a1, ip, ip, ASL #3
RSB a2, ip, ip, ASL #2
ADD a1, a2, a1, ASL #4
ADD a1, a1, ip, ASL #9
ADD a2, ip, ip, ASL #4
ADD a2, a2, ip, ASL #6
ADD a1, a1, a2, LSR #10
MOV a1, a1, LSR #9
SUB a1, a1, #&4E000
SUB a1, a1, #&00620
SUB a1, a1, #&00004
MOVS pc, lr
gaussseed1 DCD -1 ;bits b0-b31 of seed
DCD -1 ;bit b32 of seed (in lsb of word)
; sgauss16
; a leaf APCS function
;
; C prototype:
; void sgauss16(int seed)
;
; sets the seed for gauss16
;
EXPORT sgauss16
sgnsta DCB "sgauss16", 0
ALIGN
sgnend DCD &ff000000 + sgnend - sgnsta
sgauss16
ADR a2, gaussseed1
MOV a3, #1
STMIA a2, {a1, a3}
MOVS pc, lr
; div_frac16
; a leaf APCS function
;
; C prototype:
; int div_frac16(int number, int divisor)
;
; returns integer part of 65536*number/divisor
; assumes abs number < 65536 * abs divisor
; if this needs checking, must be done by caller
;
EXPORT div_frac16
dfnsta DCB "div_frac16", 0
ALIGN
dfnend DCD &ff000000 + dfnend - dfnsta
div_frac16
div16 a1, a2, a1, a3, a4
MOVS pc, lr
; mul_frac16
; a leaf APCS function
;
; C prototype:
; int mul_frac16(int x, int a)
;
; returns 32-bit signed integer x*a/65536
; assumes result fits into 32-bit signed representation
; note, no other restrictions on a - if can guarantee abs a < 65536, use mul_frac16c instead as is quicker
;
EXPORT mul_frac16
mfnsta DCB "mul_frac16", 0
ALIGN
mfnend DCD &ff000000 + mfnend - mfnsta
mul_frac16
mul16 a1, a2, a1, a3, a4, ip
MOVS pc, lr
; mul_frac16c
; a leaf APCS function
;
; C prototype:
; int mul_frac16c(int x, int a)
;
; returns 32-bit signed integer x*a/65536
; assumes abs a <=65536
; if it is possible that abs a > 65536, caller must check range & either not call fn or round down to 65536
;
EXPORT mul_frac16c
mfcnsta DCB "mul_frac16c", 0
ALIGN
mfcnend DCD &ff000000 + mfcnend - mfcnsta
mul_frac16c
mul16c a1, a2, a1, a3
MOVS pc, lr
; sqrt_frac16
; a leaf APCS function
;
; C prototype:
; int sqrt_frac16(unsigned int x)
;
; returns 32-bit integer sqrt(x<<16)
;
EXPORT sqrt_frac16
sqfnsta DCB "sqrt_frac16", 0
ALIGN
sqfnend DCD &ff000000 + sqfnend - sqfnsta
sqrt_frac16
sqrt16 a1, a1, a2, a3, a4, ip
MOVS pc, lr
END