Today, I was having fun optimizing my sprite rotation algorithm I made a while ago for my homebrew game. How it works is it runs in the leftover CPU time in my game (whenever it is not calculating game logic) and it automatically fills the RAM with as many frames as possible (it's a compromise between smoothness, set up time, and amount of unique sprites) so when you approach a certain part of the level, the rotation is already calculated.
I got it just fast enough to do a 32x32 sprite per frame, if there is nothing happening onscreen. It doesn't look that fast on paper, but for a rotating 32x32 sprite with 64 angles, it only needs 32 frames because of horizontal/vertical flipping, which takes up only half a second.
I'm not that good with making notes, but I used tricks such as expecting positive numbers to ALWAYS leave the carry bit clear, and negative numbers to ALWAYS leave the carry bit set. I also used lookup tables to convert pixels to planar format.
Code
rotate_sprites_for_modular_animation:
-;
lda $0000,y
beq +
tax
phy
jsr rotate_sprite
ply
iny #2
lda.l {terminate_rotation}
beq -
+;
stz {modular_animation_data}
stz {terminate_rotation}
rts
rotate_sprite:
phb
php
sep #$20
lda #$80
sta {scratch_pad_ram}+34 //the LUTs are in bank $80
sta {scratch_pad_ram}+38 //These are pointers for the
sta {scratch_pad_ram}+42 //LUTs
sta {scratch_pad_ram}+46
lda $0004,x //$0000,x is ROM address
sta {rotation_step} //$0002,x is ROM bank
stz {rotation_angle} //$0004,x is rotation step amount
lda $000a,x //$0006,x is RAM address
asl #3 //$0008,x is RAM bank
sta {size} //$000a,x is size 2=16x16, 4=32x32
asl #2
sta {d}
lda $0000,x
stz {x_pixel}
sta {x_pixel_hi}
lda $0001,x
stz {y_pixel}
sta {y_pixel_hi}
lda $0002,x
sta {scratch_pad_ram}+3 //these are the banks of the
sta {scratch_pad_ram}+7 //"pixel pointers"
sta {scratch_pad_ram}+11
sta {scratch_pad_ram}+15
sta {scratch_pad_ram}+19
sta {scratch_pad_ram}+23
sta {scratch_pad_ram}+27
sta {scratch_pad_ram}+31
lda $0008,x
pha
rep #$20
lda $0006,x
tax
plb //make data bank hold the RAM bank
phd //and X hold the destination address
lda #$0000
tcd
jsr convert_bitmap
pld
plp
plb
rts
new_rotation_step: //I forgot how I did this math stuff
//but it's kind've like setting up
pla //mode 7 registers
sta.b {y_pixel}
pla
sta.b {x_pixel}
lda.l {terminate_rotation}
beq +
rts
+;
lda.b {rotation_step}
clc
adc.b {rotation_angle}
sta.b {rotation_angle}
cmp #$0080
bcc convert_bitmap
rts
convert_bitmap:
sep #$20
lda #$00
sta $004200
rep #$20
phx
lda.b {rotation_angle}
asl
and #$01fe
tax
lda $000000+sine,x
sta.b {sine}
lda $000000+cosine,x
sta.b {cosine}
plx
lda.b {x_pixel}
pha
lda.b {y_pixel}
pha
lda.b {sine}
clc
adc.b {cosine}
sta.b {a}
sep #$20
sta $00211b
xba
sta $00211b
lda.b {size}
lsr
sta $00211c
rep #$20
lda.b {size}
xba
clc
adc.b {a}
lsr
sec
sbc $002134
clc
adc.b {x_pixel}
sta.b {x_pixel}
lda.b {cosine}
sec
sbc.b {sine}
sta.b {a}
sep #$20
sta $00211b
xba
sta $00211b
lda.b {size}
lsr
sta $00211c
rep #$20
lda.b {size}
xba
clc
adc.b {a}
lsr
sec
sbc $002134
clc
adc.b {y_pixel}
sta.b {y_pixel}
lda.b {size}
sta.b {c}
lsr #3
sta.b {b}
lda.b {size}
asl #2
sta.b {a}
sep #$20
lda #$81
sta $004200
rep #$20
lda.b {cosine} //adjust sine and cosine so clc and sec
bpl + //are not needed
dec
sta.b {cosine}
+;
lda.b {sine}
bpl +
dec
sta.b {sine}
+;
convert_bitmap_loop:
lda.b {c}
bne old_rotation_step
jmp new_rotation_step
old_rotation_step:
lda.b {x_pixel}
pha
lda.b {y_pixel}
pha
convert_line:
jmp convert_pixel
convert_pixel_done:
txa
clc
adc #$0020
tax
lda.b {b}
bne convert_pixel
pla
clc
adc.b {cosine}
adc #$0000
sta.b {y_pixel}
pla
clc
adc.b {sine}
adc #$0000
sta.b {x_pixel}
dec.b {c}
lda.b {size}
lsr #3
sta.b {b}
lda.b {size}
txa
sec
sbc.b {a}
inc #2
tax
bit #$000e
bne convert_bitmap_loop
clc
adc.b {d}
sec
sbc #$0010
tax
jmp convert_bitmap_loop
convert_pixel: //This is the most important part
//of this code.
dec.b {b}
lda.b {y_pixel} //This is where pixels get drawn.
sta.b {scratch_pad_ram}+1
sec
sbc.b {sine}
sbc #$0000
sta.b {scratch_pad_ram}+5 //First it calculates the Y position
sbc.b {sine} //of every pixel,
sta.b {scratch_pad_ram}+9
sbc.b {sine}
sta.b {scratch_pad_ram}+13
sbc.b {sine}
sta.b {scratch_pad_ram}+17
sbc.b {sine}
sta.b {scratch_pad_ram}+21
sbc.b {sine}
sta.b {scratch_pad_ram}+25
sbc.b {sine}
sta.b {scratch_pad_ram}+29
sbc.b {sine}
sta.b {y_pixel}
lda.b {x_pixel} //Then it calculates the X position
sta.b {scratch_pad_ram} //of every pixel.
clc
adc.b {cosine}
adc #$0000
sta.b {scratch_pad_ram}+4 //The top byte of the X position
adc.b {cosine} //overwrites the low byte of the
sta.b {scratch_pad_ram}+8 //Y position, creating the ROM
adc.b {cosine} //address of the pixel, in the
sta.b {scratch_pad_ram}+12 //format: bbbbbbbbyyyyyyyyxxxxxxxx
adc.b {cosine} //where b is the bank, and x and y
sta.b {scratch_pad_ram}+16 //are coordinates in a 256x256
adc.b {cosine} //bitmap image in the bank that
sta.b {scratch_pad_ram}+20 //contain the rotatable sprites.
adc.b {cosine}
sta.b {scratch_pad_ram}+24
adc.b {cosine}
sta.b {scratch_pad_ram}+28
adc.b {cosine}
sta.b {x_pixel}
lda [{scratch_pad_ram}+1] //now it calculates the offsets of
asl #4 //the planar look up tables
ora [{scratch_pad_ram}+17]
and #$00ff
asl
sta.b {scratch_pad_ram}+32
lda [{scratch_pad_ram}+5]
asl #4
ora [{scratch_pad_ram}+21]
and #$00ff
asl
sta.b {scratch_pad_ram}+36
lda [{scratch_pad_ram}+9]
asl #4
ora [{scratch_pad_ram}+25]
and #$00ff
asl
sta.b {scratch_pad_ram}+40
lda [{scratch_pad_ram}+13]
asl #4
ora [{scratch_pad_ram}+29]
and #$00ff
asl
sta.b {scratch_pad_ram}+44
ldy #packed_to_planar_lo
lda [{scratch_pad_ram}+32],y //now it packs together bitplanes
asl //0 and 1
ora [{scratch_pad_ram}+36],y
asl
ora [{scratch_pad_ram}+40],y
asl
ora [{scratch_pad_ram}+44],y
sta $0000,x
ldy #packed_to_planar_hi //now it packs together bitplanes
lda [{scratch_pad_ram}+32],y //2 and 3
asl
ora [{scratch_pad_ram}+36],y
asl
ora [{scratch_pad_ram}+40],y
asl
ora [{scratch_pad_ram}+44],y
sta $0010,x
jmp convert_pixel_done
packed_to_planar_lo:
dw $0000,$0001,$0100,$0101,$0000,$0001,$0100,$0101,$0000,$0001,$0100,$0101,$0000,$0001,$0100,$0101 //DCBAdcba > ---B---b---A---a
dw $0010,$0011,$0110,$0111,$0010,$0011,$0110,$0111,$0010,$0011,$0110,$0111,$0010,$0011,$0110,$0111
dw $1000,$1001,$1100,$1101,$1000,$1001,$1100,$1101,$1000,$1001,$1100,$1101,$1000,$1001,$1100,$1101
dw $1010,$1011,$1110,$1111,$1010,$1011,$1110,$1111,$1010,$1011,$1110,$1111,$1010,$1011,$1110,$1111
dw $0000,$0001,$0100,$0101,$0000,$0001,$0100,$0101,$0000,$0001,$0100,$0101,$0000,$0001,$0100,$0101
dw $0010,$0011,$0110,$0111,$0010,$0011,$0110,$0111,$0010,$0011,$0110,$0111,$0010,$0011,$0110,$0111
dw $1000,$1001,$1100,$1101,$1000,$1001,$1100,$1101,$1000,$1001,$1100,$1101,$1000,$1001,$1100,$1101
dw $1010,$1011,$1110,$1111,$1010,$1011,$1110,$1111,$1010,$1011,$1110,$1111,$1010,$1011,$1110,$1111
dw $0000,$0001,$0100,$0101,$0000,$0001,$0100,$0101,$0000,$0001,$0100,$0101,$0000,$0001,$0100,$0101
dw $0010,$0011,$0110,$0111,$0010,$0011,$0110,$0111,$0010,$0011,$0110,$0111,$0010,$0011,$0110,$0111
dw $1000,$1001,$1100,$1101,$1000,$1001,$1100,$1101,$1000,$1001,$1100,$1101,$1000,$1001,$1100,$1101
dw $1010,$1011,$1110,$1111,$1010,$1011,$1110,$1111,$1010,$1011,$1110,$1111,$1010,$1011,$1110,$1111
dw $0000,$0001,$0100,$0101,$0000,$0001,$0100,$0101,$0000,$0001,$0100,$0101,$0000,$0001,$0100,$0101
dw $0010,$0011,$0110,$0111,$0010,$0011,$0110,$0111,$0010,$0011,$0110,$0111,$0010,$0011,$0110,$0111
dw $1000,$1001,$1100,$1101,$1000,$1001,$1100,$1101,$1000,$1001,$1100,$1101,$1000,$1001,$1100,$1101
dw $1010,$1011,$1110,$1111,$1010,$1011,$1110,$1111,$1010,$1011,$1110,$1111,$1010,$1011,$1110,$1111
packed_to_planar_hi:
dw $0000,$0000,$0000,$0000,$0001,$0001,$0001,$0001,$0100,$0100,$0100,$0100,$0101,$0101,$0101,$0101 //DCBAdcba > ---D---d---C---c
dw $0000,$0000,$0000,$0000,$0001,$0001,$0001,$0001,$0100,$0100,$0100,$0100,$0101,$0101,$0101,$0101
dw $0000,$0000,$0000,$0000,$0001,$0001,$0001,$0001,$0100,$0100,$0100,$0100,$0101,$0101,$0101,$0101
dw $0000,$0000,$0000,$0000,$0001,$0001,$0001,$0001,$0100,$0100,$0100,$0100,$0101,$0101,$0101,$0101
dw $0010,$0010,$0010,$0010,$0011,$0011,$0011,$0011,$0110,$0110,$0110,$0110,$0111,$0111,$0111,$0111
dw $0010,$0010,$0010,$0010,$0011,$0011,$0011,$0011,$0110,$0110,$0110,$0110,$0111,$0111,$0111,$0111
dw $0010,$0010,$0010,$0010,$0011,$0011,$0011,$0011,$0110,$0110,$0110,$0110,$0111,$0111,$0111,$0111
dw $0010,$0010,$0010,$0010,$0011,$0011,$0011,$0011,$0110,$0110,$0110,$0110,$0111,$0111,$0111,$0111
dw $1000,$1000,$1000,$1000,$1001,$1001,$1001,$1001,$1100,$1100,$1100,$1100,$1101,$1101,$1101,$1101
dw $1000,$1000,$1000,$1000,$1001,$1001,$1001,$1001,$1100,$1100,$1100,$1100,$1101,$1101,$1101,$1101
dw $1000,$1000,$1000,$1000,$1001,$1001,$1001,$1001,$1100,$1100,$1100,$1100,$1101,$1101,$1101,$1101
dw $1000,$1000,$1000,$1000,$1001,$1001,$1001,$1001,$1100,$1100,$1100,$1100,$1101,$1101,$1101,$1101
dw $1010,$1010,$1010,$1010,$1011,$1011,$1011,$1011,$1110,$1110,$1110,$1110,$1111,$1111,$1111,$1111
dw $1010,$1010,$1010,$1010,$1011,$1011,$1011,$1011,$1110,$1110,$1110,$1110,$1111,$1111,$1111,$1111
dw $1010,$1010,$1010,$1010,$1011,$1011,$1011,$1011,$1110,$1110,$1110,$1110,$1111,$1111,$1111,$1111
dw $1010,$1010,$1010,$1010,$1011,$1011,$1011,$1011,$1110,$1110,$1110,$1110,$1111,$1111,$1111,$1111