; 32-bit math ; ; The 32-bit evaluation routines represent numbers from -2.0 to +2.0 using ; a fixed-point format containing one sign bit, one bit to the left of the ; binary point, and 30 bits to the right of the binary point. ; ; This fixed-point format is the same as Apple's Fract format (see Inside ; Mac vol. 4, p. 63.) My routines are faster than Apple's routines because ; they are register-based, and because they need to compute only three ; partial products in the case when both factors are equal. ; Macro to multiply a longint by itself. This is used often enough to ; justify the macro. (32 x 32 -> 64 version) ; ; s, high, low and tmp must be data ragisters; no two may be the same. ; macro s32_1 s,high,low,tmp = tst.l {s} ; If argument is negative, make it positive bpl.s @1 neg.l {s} @1 move {s},{low} ; Compute b^2 part of (a+b)^2 = a^2 + 2ab + b^2 mulu {s},{low} swap {s} ; Compute a^2 part. move {s},{high} mulu {s},{high} | macro s32_2 s,high,low,tmp = move {s},{tmp} ; Compute 2ab part. swap {s} mulu {s},{tmp} ; {tmp} = a*b add.l {tmp},{tmp} ; {tmp} = 2*a*b move.l {tmp},{s} ; Clone this result so we can split it into two ; halves. clr {tmp} ; Shift the MS half of 2ab into position swap {tmp} swap {s} ; Shift less significant half of 2ab into position clr {s} add.l {s},{low} ; Finally, add them together! addx.l {tmp},{high} | macro square_32 s,high,low,tmp = s32_1 {s},{high},{low},{tmp} s32_2 {s},{high},{low},{tmp} | ; Longint x Longint multiply subroutine, with a little bit (but not much) ; in common with the LongMul routine in the ROM, (C) Apple Computer Co. ; Oops, I guess I shouldn't say that I looked at the ROM (-: ; ; D6 used as sign flag. ; ; factor 1 a b ; times factor 2 x c d ; ------------ ; b * d p q ; a * d r s ; b * c t u ; a * c + v w ; ---------------------------- ; product v r+t+w p+s+u q ; ; ; D0 D1 D2 D3 D4 D5 ; ;------ ------- ------- ------- ------- ------- _long_mult ; a:b c:d tst.l d0 ; Make both factors positive and remember sign smi d6 ; of product. bpl.s _lm_0 neg.l d0 _lm_0 tst.l d1 bpl.s _lm_00 not.b d6 neg.l d1 ; D0 D1 D2 D3 D4 D5 ;------ ------- ------- ------- ------- ------- _lm_00 move d0,d3 ; a:b c:d 0:b mulu d1,d3 ; a:b c:d p:q swap d0 ; b:a c:d p:q swap d1 ; b:a d:c p:q move d0,d2 ; b:a d:c 0:a p:q mulu d1,d2 ; b:a d:c v:w p:q ; D0 D1 D2 D3 D4 D5 ;------ ------- ------- ------- ------- ------- swap d0 ; a:b d:c v:w p:q move d0,d4 ; a:b d:c v:w p:q 0:b mulu d1,d4 ; a:b d:c v:w p:q t:u move.l d4,d5 ; a:b d:c v:w p:q t:u t:u clr d4 ; a:b d:c v:w p:q t:0 t:u swap d4 ; a:b d:c v:w p:q 0:t t:u swap d5 ; a:b d:c v:w p:q 0:t u:t clr d5 ; a:b d:c v:w p:q 0:t u:0 add.l d5,d3 ; a:b d:c v:w pu:q 0:t u:0 addx.l d4,d2 ; a:b d:c v:tw pu:q 0:t u:0 ; D0 D1 D2 D3 D4 D5 ;------ ------- ------- ------- ------- ------- swap d0 ; b:a d:c v:tw pu:q 0:t u:0 swap d1 ; b:a c:d v:tw pu:q 0:t u:0 move d0,d4 ; b:a c:d v:tw pu:q 0:a u:0 mulu d1,d4 ; b:a c:d v:tw pu:q r:s u:0 move.l d4,d5 ; b:a c:d v:tw pu:q r:s r:s swap d5 ; b:a c:d v:tw pu:q r:s s:r clr d5 ; b:a c:d v:tw pu:q r:s s:0 clr d4 ; b:a c:d v:tw pu:q r:0 s:0 swap d4 ; b:a c:d v:tw pu:q 0:r s:0 add.l d5,d3 ; b:a c:d v:tw psu:q 0:r s:0 addx.l d4,d2 ; b:a c:d v:rtw psu:q 0:r s:0 tst.b d6 ; Set sign of product correctly. beq.s _lm_3 neg.l d3 negx.l d2 _lm_3 rts ; Subroutine to compute Z', given Z and C ; ; Z = a + b i ; ; 2 2 2 ; Z' = Z + C = ( a - b + Cr ) + ( 2ab + Ci ) i ; ; In the comments below, a^-b^ means a^2-b^2, a' means a^2+b^2+Cr, h means high 32 bits, l means low 32 bits, J ; means high 32 bits adjusted to put decimal in correct position, * means this step trashes the register. ; ; ; D0 D1 D2 D3 D4 D5 D6 A0 A1 A2 A3 A4 ; At startup ; a b Cr Ci ; At finish ; * * a' b' * * * Cr Ci a b a' ; ;------ ------- ------- ------- ------- ------- ------- ------- ------- ------- ------- ------- ; At startup ; a b Cr Ci _z_prime_32 move.l d2,a2 ; a b Cr Ci a move.l d3,a3 ; a b Cr Ci a b ; square_32 d3,d0,d1,d5 ; b^2h b^2l a * * Cr Ci a b ; square_32 d2,d5,d6,d3 ; b^2h b^2l * * a^2h a^2l Cr Ci a b ; sub.l d1,d6 subx.l d0,d5 ; b^2h b^2l a^-b^h a^-b^l Cr Ci a b ; 60 frac. bits ; D5 has 28 frac bits swap d6 add.w d6,d6 addx.l d5,d5 ; D5 has 29 frac bits add.l a0,d5 ; b^2h b^2l a'h a'l Cr Ci a b bvs.s _z_exit ; Check for overflow. ; D0 D1 D2 D3 D4 D5 D6 A0 A1 A2 A3 A4 add.w d6,d6 ;------ ------- ------- ------- ------- ------- ------- ------- ------- ------- ------- ------- addx.l d5,d5 ; b^2h b^2l a'J * Cr Ci a b bvs.s _z_exit ; D5 has 30 frac bits move.l d5,a4 ; b^2h b^2l a'J * Cr Ci a b a' move.l a2,d0 move.l a3,d1 ; a b a' Cr Ci a b a' jsr _long_mult ; * * abh abl * * * Cr Ci a b a' swap d3 add.w d3,d3 addx.l d2,d2 ; 2abh 2abl Cr Ci a b a' ; 60 frac. bits ; D2 has 28 frac bits add.w d3,d3 addx.l d2,d2 ; D2 has 29 frac bits bvs.s _z_exit ; [ Leave this out for neat little patterns... ] add.l a1,d2 ; b'h b'l Cr Ci a b a' bvs.s _z_exit add.w d3,d3 addx.l d2,d2 ; b'J * Cr Ci a b a' bvs.s _z_exit ; D2 has 30 frac bits move.l d2,d3 ; b' b' Cr Ci a b a' move.l a4,d2 ; a' b' Cr Ci a b a' _z_exit rts ; This subroutine is like _in_set, but uses 32-bit math for all compu- ; tations. ; ; Reg. Input Output ; ---- ------------------------- ------ ; D0 Real component of point Unchanged. ; (29 frac. bits) ; D1 Imag. component of point Unchanged. ; (29 frac. bits) ; D2 used as longint real component of Z ; D3 used as longint imag. component of Z ; D4 used as longint real component of Z' ; D5 used as longint imag. component of Z' ; D6 ; D7.w Ignored. Number of iterations it took. ; (If this is -1, assume that the ; point is in the Mandelbrot Set.) ; A0 used as real component of point ; A1 used as imag. component of point ; ; _in_set_32 movem.l d0-d6/a0-a4,-(sp) moveq #0,d2 ; Initial Z in Ls1.30 format (real) moveq #0,d3 ; (imag.) move n_max,d7 move.l d0,a0 move.l d1,a1 _is3_loop jsr _z_prime_32 bvs.s _is3_exit _is3_next dbf d7,_is3_loop move #-1,d7 ; -1 means it's in the set. bra _is3_rts _is3_exit sub n_max,d7 ; Compute n = n_max - d7 neg d7 jsr _is32_rpm move.l a2,d2 ; Get old a and b move.l a3,d3 _is3_z1 ; Compute a^2 + b^2 for old Z square_32 d3,d0,d1,d5 ; d0,d1 = b^2 (60 frac. bits) _is3_z2 square_32 d2,d3,d4,d5 ; d3,d4 = a^2 (60 frac. bits) _is3_z3 add.l d3,d0 ; d0 = MS word of a^2 + b^2, 28 frac. bits compare_l d0,#$40000000 ; Is it less than 4.0000 ? blt.s _is3_rts ; If so, it was still inside the circle. subq #1,d7 ; Otherwise Z was outside the circle; adjust count ; to make countour lines smooth. _is3_rts movem.l (sp)+,d0-d6/a0-a4 rts ; This routine makes my initials "RPM" appear at certain places in the ; picture. The initials can only be found if you use 32-bit math, and ; inspect a region near a lemniscate of order greater than 20. ; _is32_rpm movem.l d2-d3/a0,-(sp) compare d7,#20 ble _is3r_rts move.l a2,d2 move.l a3,d3 swap d2 swap d3 compare d3,#$7c00 blt _is3r_rts compare d3,#$7e00 bge _is3r_rts tst d2 blt _is3r_rts compare d2,#$0800 bge _is3r_rts sub #$7c00,d3 lsr #6,d3 lsr #6,d2 lea _is3r_data,a0 not d3 and #$0007,d3 asl #2,d3 adda d3,a0 move.l (a0),d3 asl.l d2,d3 tst.l d3 bpl _is3r_rts subq #1,d7 _is3r_rts movem.l (sp)+,d2-d3/a0 rts _is3r_data dc.l $f1e20800, $89131800, $8912a800, $f1e24800 dc.l $a1020800, $91020800, $89020800, $00000000 ; Subroutine to compute Z', given Z and C ; ; Z = a + b i ; ; 2 2 2 ; Z' = Z + C = ( a - b + Cr ) + ( 2ab + Ci ) i ; ; In the comments below, a^-b^ means a^2-b^2, a' means a^2+b^2+Cr, h means high 32 bits, l means low 32 bits, J ; means high 32 bits adjusted to put decimal in correct position, * means this step trashes the register. ; ; ; D0 D1 D2 D3 D4 D5 D6 A0 A1 A2 A3 A4 ; At startup ; a b Cr Ci ; At finish ; * * a' b' a' * Cr Ci a b ; ;------ ------- ------- ------- ------- ------- ------- ------- ------- ------- ------- ------- ; At startup ; a b Cr Ci _z_prime_020: move.l d2,a2 ; a b Cr Ci a move.l d3,a3 ; a b Cr Ci a b move.l d3,d1 muls3264 3,0,1 ; b^2h b^2l a b Cr Ci a b move.l d2,d6 muls3264 2,5,6 ; b^2h b^2l a b a^2h a^2l Cr Ci a b sub.l d1,d6 subx.l d0,d5 ; b^2h b^2l a b a^-b^h a^-b^l Cr Ci a b ; 60 frac. bits ; D5 has 28 frac bits ; At this point, we swap d6 and use add.w rather than add.l, because add.w is faster and we ; only need the top two bits anyway. swap d6 add.w d6,d6 addx.l d5,d5 ; a b D5 has 29 frac bits add.l a0,d5 ; b^2h b^2l a b a'h a'l Cr Ci a b bvs.s _z020_exit ; Check for overflow. ; D0 D1 D2 D3 D4 D5 D6 A0 A1 A2 A3 A4 add.w d6,d6 ;------ ------- ------- ------- ------- ------- ------- ------- ------- ------- ------- ------- addx.l d5,d5 ; b^2h b^2l a b a'J * Cr Ci a b bvs.s _z020_exit ; D5 has 30 frac bits move.l d2,d0 ; a b^2l a b a'J Cr Ci a b muls3264 0,2,3 ; a b^2l abh abl a'J Cr Ci a b swap d3 add.w d3,d3 addx.l d2,d2 ; 2abh 2abl a' Cr Ci a b ; 60 frac. bits ; D2 has 28 frac bits add.w d3,d3 addx.l d2,d2 ; D2 has 29 frac bits bvs.s _z020_exit ; [ Leave this out for neat little patterns... ] ; D0 D1 D2 D3 D4 D5 D6 A0 A1 A2 A3 A4 ;------ ------- ------- ------- ------- ------- ------- ------- ------- ------- ------- ------- add.l a1,d2 ; b'h b'l a' Cr Ci a b bvs.s _z020_exit add.w d3,d3 addx.l d2,d2 ; b'J * a' Cr Ci a b bvs.s _z020_exit ; D2 has 30 frac bits move.l d2,d3 ; b' b' a' Cr Ci a b move.l d5,d2 ; a' b' a' Cr Ci a b _z020_exit: rts ; This subroutine is like _in_set, but uses 32-bit math for all compu- ; tations. It also uses 68020 instructions to do the multiplications. ; ; Reg. Input Output ; ---- ------------------------- ------ ; D0 Real component of point Unchanged. ; (29 frac. bits) ; D1 Imag. component of point Unchanged. ; (29 frac. bits) ; D2 used as longint real component of Z ; D3 used as longint imag. component of Z ; D4 used as longint real component of Z' ; D5 used as longint imag. component of Z' ; D6 ; D7.w Ignored. Number of iterations it took. ; (If this is -1, assume that the ; point is in the Mandelbrot Set.) ; A0 used as real component of point ; A1 used as imag. component of point ; ; _in_set_020 movem.l d0-d6/a0-a4,-(sp) moveq #0,d2 ; Initial Z in Ls1.30 format (real) moveq #0,d3 ; (imag.) move n_max,d7 move.l d0,a0 move.l d1,a1 _is0_loop jsr _z_prime_020 bvs.s _is0_exit _is0_next dbf d7,_is0_loop move #-1,d7 ; -1 means it's in the set. bra _is0_rts _is0_exit sub n_max,d7 ; Compute n = n_max - d7 neg d7 jsr _is32_rpm move.l a2,d2 ; Get old a and b move.l a3,d3 _is0_z1 ; Compute a^2 + b^2 for old Z square_32 d3,d0,d1,d5 ; d0,d1 = b^2 (60 frac. bits) _is0_z2 square_32 d2,d3,d4,d5 ; d3,d4 = a^2 (60 frac. bits) _is0_z3 add.l d3,d0 ; d0 = MS word of a^2 + b^2, 28 frac. bits compare_l d0,#$40000000 ; Is it less than 4.0000 ? blt.s _is0_rts ; If so, it was still inside the circle. subq #1,d7 ; Otherwise Z was outside the circle; adjust count ; to make countour lines smooth. _is0_rts movem.l (sp)+,d0-d6/a0-a4 rts