; 30-bit math
;
; The 30-bit evaluation routines represent numbers from -2.0 to +2.0 using
; a fixed-point format containing one sign bit, one bit to the left of the
; binary point, 28 bits to the right of the binary point, and two bits
; which are ignored.  This format takes 32 bits to store in memory or a
; register, but because the last two bits are ignored, there are actually
; only 30 bits of precision.
;
; This fixed-point format is interchangeable with the format used in the
; 32-bit routines (see below) which in turn is equivalent to Apple's
; Fract format (see Inside Mac vol. 4, p. 63.)  Multiplying two numbers
; of this format involves a two-bit left shift to put the decimal point
; in the proper position.  In these 30-bit routines, the entire bottom
; half of the 64-bit product is ignored, and the two bits which would
; normally be shifted in are undefined.  This makes it significantly
; faster, while sacrificing two bits of precision.

; Macro to multiply a longint by itself.  This is used often enough to
; justify the macro. (32 x 32 -> 32 version)
;
; s and ans must be data ragisters (not both the same one)
;
macro	square_30 s,ans =
	tst.l {s}	  ; If argument is negative, make it positive
	bpl.s @1
	neg.l {s}

@1	move {s},{ans}
	swap {s}
	mulu {s},{ans}
	add.l {ans},{ans} 	; {ans} = 2*a*b

	clr {ans}	  	; Shift the MS half of 2ab into position
	swap {ans}

	mulu {s},{s}   		; Compute a^2 part.
	add.l {s},{ans}		; Add to answer.
	|

; 32 x 32 -> 32 version of _long_mult
;
;  D6	   used as sign flag.
;
; factor 1			a	b
; times factor 2	     x	c	d
;			     ------------
; b * d				p	q
; a * d			r	s
; b * c			t	u
; a * c	     +	v	w
;	     ----------------------------
; product	v	r+t+w	p+s+u	q
;
;			; D0	D1	D3	D4	D6
;			;------	-------	-------	-------	-------
_long_mult_30 		; a:b	c:d
	tst.l d0	; Make both factors positive and remember sign
	smi d6		; of product.
	bpl.s @1
	neg.l d0
@1	tst.l d1
	bpl.s @2
	not.b d6
	neg.l d1
			; D0	D1	D3	D4	D6
			;------	-------	-------	-------	-------
@2	move d0,d4	; a:b	c:d		*:b
	swap d1		; a:b	d:c		*:b
	mulu d1,d4	; a:b	d:c		t:u
	
	
	swap d0		; b:a	d:c		t:u	
	move d0,d3	; b:a	d:c	*:a	t:u	
	mulu d1,d3	; b:a	d:c	v:w	t:u	

	clr d4		; b:a	d:c	v:w	t:0	
	swap d4		; b:a	d:c	v:w	0:t	
	add.l d4,d3	; b:a	d:c	v:tw	0:t	

			; D0	D1	D3	D4	D6
			;------	-------	-------	-------	-------
	swap d1		; b:a	c:d	v:tw	0:t	
	move d0,d4	; b:a	c:d	v:tw	0:a	
	mulu d1,d4	; b:a	c:d	v:tw	r:s	
	clr d4		; b:a	c:d	v:tw	r:0	
	swap d4		; b:a	c:d	v:tw	0:r	
	add.l d4,d3	; b:a	c:d	v:rtw	0:r	

	tst.b d6	; Set sign of product correctly.
	beq.s _lm3_3
	neg.l d3
_lm3_3	rts

; Subroutine to compute Z', given Z and C   (30 bit version) 
;
;               Z = a + b i
;
;                     2         2    2
;               Z' = Z + C = ( a  - b  + Cr ) + ( 2ab + Ci ) i
;
; In the comments below, a^-b^ means a^2-b^2, a' means a^2+b^2+Cr, h means high 32 bits, l means low 32 bits, J means
; high 32 bits adjusted to put decimal in correct position, * means this step trashes the register.
;
;			; D0	D1	D2	D3	D4	D5	D6	A0	A1	A2	A3	A4
; At startup		;		a	b				Cr	Ci
; At finish		; *	*	a'	b'	*	a'	*	Cr	Ci	a	b	
;			;------	-------	-------	-------	-------	-------	-------	-------	-------	-------	-------	-------
; At startup		;		a	b				Cr	Ci
_z_prime_30 
	move.l d2,a2	;		a	b				Cr	Ci	a
	move.l d3,a3	;		a	b				Cr	Ci	a	b
_z3_1	;
	square_30 d3,d0	; b^2h		a	*				Cr	Ci	a	b
_z3_2	;
	square_30 d2,d5	; b^2h		*			a^2h		Cr	Ci	a	b
_z3_3	;
	sub.l d0,d5	; b^2h					a^-b^h		Cr	Ci	a	b
			;					28 frac. bits
			
	add.l d5,d5	;					29 frac bits
	
	add.l a0,d5	; b^2h					a'h	 	Cr	Ci	a	b
	bvs.s _z3_exit	; Check for overflow.
	
			; D0	D1	D2	D3	D4	D5	D6	A0	A1	A2	A3	A4
			;------	-------	-------	-------	-------	-------	-------	-------	-------	-------	------- -------
	add.l d5,d5	; b^2h					a'J		Cr	Ci	a	b
	bvs.s _z3_exit	;					D5 has 30 frac bits

	move.l a2,d0	; a					a'		Cr	Ci	a	b
	move.l a3,d1	; a	b				a'	 	Cr	Ci	a	b	
      jsr _long_mult_30	; *	*		abh	*	a'	*	Cr	Ci	a	b	

	asl.l #2,d3	;  			2abh		a'		Cr	Ci	a	b	
			;			29 frac bits
	bvs.s _z3_exit	; [ Leave this instruction out to get nifty "glitches" in picture... ]

	add.l a1,d3	;			b'h		a'		Cr	Ci	a	b	
	bvs.s _z3_exit
	add.l d3,d3	;			b'J		a'		Cr	Ci	a	b	
	bvs.s _z3_exit	;			30 frac bits

	move.l d5,d2	;		a'	b'		a'		Cr	Ci	a	b	
_z3_exit rts

; This subroutine is like _in_set_32, but it uses _long_mult_30 and
; square_30, for slightly lower prescision (but about 30% faster
; speed)
;
; Reg.	Input				Output
; ----	-------------------------	------
;  D0	Real component of point		Unchanged.
;	   (29 frac. bits)
;  D1	Imag. component of point	Unchanged.
;	   (29 frac. bits)
;  D2	   used as longint real component of Z
;  D3	   used as longint imag. component of Z
;  D4	   used as longint real component of Z'
;  D5	   used as longint imag. component of Z'
;  D6	   used as loop counter
;  D7	Ignored.			Number of iterations it took.
;					(If this is -1, assume that the
;					point is in the Mandelbrot Set.)
;
_in_set_30 movem.l d0-d6/a0-a4,-(sp)
	and.w #$fffc,d1		; Clear bottom two bits of imag. component.  This helps make
				; the filament on the real axis invisible in the default view.
	moveq #0,d2		; Initial Z in Ls1.30 format (real)
	moveq #0,d3		; (imag.)
	move n_max,d7
	move.l d0,a0
	move.l d1,a1

_is30_loop jsr _z_prime_30
	bvs.s _is30_exit

_is30_next dbf d7,_is30_loop
	move #-1,d7		; -1 means it's in the set.
	bra _is30_rts

_is30_exit sub n_max,d7		; Compute n = n_max - d7
	neg d7

	move.l a2,d2		; Get old a and b
	move.l a3,d3
_is30_z1				; Compute a^2 + b^2 for old Z
	square_30 d3,d0		; d0 = b^2 (28 frac. bits)
_is30_z2
	square_30 d2,d3		; d3 = a^2 (28 frac. bits)
_is30_z3
	add.l d3,d0		; d0 = MS word of a^2 + b^2, 28 frac. bits
	compare_l d0,#$40000000	; Is it less than 4.0000 ?
	blt.s _is30_rts		; If so, it was still inside the circle.
	subq #1,d7	; Otherwise Z was outside the circle; adjust count
			; to make countour lines smooth.
_is30_rts movem.l (sp)+,d0-d6/a0-a4
	rts