From e0ef57d9e6a9668389bf12b54f0753dced95c023 Mon Sep 17 00:00:00 2001 From: Matthew Berry Date: Sun, 30 Oct 2022 13:47:22 -0700 Subject: [PATCH] prefer Slice to Array in most cases This change updates most uses of Array to Slice in order to avoid an extra object on the heap and an extra reference to follow when indexing into these objects. This amounts to a roughly 5% performance improvement in the games tested. This change also updates the GBC palette writing code to not allocate new arrays when new palettes are written. --- src/crab/gb/apu/channel1.cr | 2 +- src/crab/gb/apu/channel2.cr | 2 +- src/crab/gb/fifo_ppu.cr | 2 +- src/crab/gb/memory.cr | 2 +- src/crab/gb/ppu.cr | 33 ++++++++++++++------------ src/crab/gb/scanline_ppu.cr | 2 +- src/crab/gba/apu/channel1.cr | 2 +- src/crab/gba/apu/channel2.cr | 2 +- src/crab/gba/apu/channel3.cr | 2 +- src/crab/gba/apu/dma_channels.cr | 12 +++++----- src/crab/gba/bus.cr | 4 ++-- src/crab/gba/cpu.cr | 4 ++-- src/crab/gba/dma.cr | 27 ++++++++++----------- src/crab/gba/ppu.cr | 40 +++++++++++--------------------- src/crab/gba/timer.cr | 24 ++++++++++--------- 15 files changed, 77 insertions(+), 83 deletions(-) diff --git a/src/crab/gb/apu/channel1.cr b/src/crab/gb/apu/channel1.cr index 1794abc..bb84c4a 100644 --- a/src/crab/gb/apu/channel1.cr +++ b/src/crab/gb/apu/channel1.cr @@ -1,6 +1,6 @@ module GB class Channel1 < VolumeEnvelopeChannel - WAVE_DUTY = [ + WAVE_DUTY = Slice[ [0, 0, 0, 0, 0, 0, 0, 1], # 12.5% [1, 0, 0, 0, 0, 0, 0, 1], # 25% [1, 0, 0, 0, 0, 1, 1, 1], # 50% diff --git a/src/crab/gb/apu/channel2.cr b/src/crab/gb/apu/channel2.cr index 286863a..0c6f678 100644 --- a/src/crab/gb/apu/channel2.cr +++ b/src/crab/gb/apu/channel2.cr @@ -1,6 +1,6 @@ module GB class Channel2 < VolumeEnvelopeChannel - WAVE_DUTY = [ + WAVE_DUTY = Slice[ [0, 0, 0, 0, 0, 0, 0, 1], # 12.5% [1, 0, 0, 0, 0, 0, 0, 1], # 25% [1, 0, 0, 0, 0, 1, 1, 1], # 50% diff --git a/src/crab/gb/fifo_ppu.cr b/src/crab/gb/fifo_ppu.cr index d631cce..5bdf24c 100644 --- a/src/crab/gb/fifo_ppu.cr +++ b/src/crab/gb/fifo_ppu.cr @@ -39,7 +39,7 @@ module GB SLEEP end - FETCHER_ORDER = [ + FETCHER_ORDER = Slice[ FetchStage::SLEEP, FetchStage::GET_TILE, FetchStage::SLEEP, FetchStage::GET_TILE_DATA_LOW, FetchStage::SLEEP, FetchStage::GET_TILE_DATA_HIGH, diff --git a/src/crab/gb/memory.cr b/src/crab/gb/memory.cr index 9301668..ef45ab0 100644 --- a/src/crab/gb/memory.cr +++ b/src/crab/gb/memory.cr @@ -22,7 +22,7 @@ module GB @scheduler : Scheduler @cgb_ptr : Pointer(Bool) - @wram = Array(Bytes).new 8 { Bytes.new GB::Memory::WORK_RAM_N.size } + @wram = Slice(Bytes).new 8 { Bytes.new GB::Memory::WORK_RAM_N.size } @wram_bank : UInt8 = 1 @hram = Bytes.new HRAM.size @ff72 : UInt8 = 0x00 diff --git a/src/crab/gb/ppu.cr b/src/crab/gb/ppu.cr index 233edcf..06ab56f 100644 --- a/src/crab/gb/ppu.cr +++ b/src/crab/gb/ppu.cr @@ -80,7 +80,7 @@ module GB end end - POST_BOOT_VRAM = [ + POST_BOOT_VRAM = Slice[ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x00, 0xF0, 0x00, 0xFC, 0x00, 0xFC, 0x00, 0xFC, 0x00, 0xFC, 0x00, 0xF3, 0x00, 0xF3, 0x00, 0x3C, 0x00, 0x3C, 0x00, 0x3C, 0x00, 0x3C, 0x00, 0x3C, 0x00, 0x3C, 0x00, 0x3C, 0x00, 0x3C, 0x00, @@ -116,7 +116,7 @@ module GB WIDTH = 160 HEIGHT = 144 - DMG_COLORS = [0x6BDF_u16, 0x3ABF_u16, 0x35BD_u16, 0x2CEF_u16] + DMG_COLORS = Slice[0x6BDF_u16, 0x3ABF_u16, 0x35BD_u16, 0x2CEF_u16] getter framebuffer = Slice(UInt16).new WIDTH * HEIGHT property frame = false @@ -129,7 +129,7 @@ module GB @obj_palette_index : UInt8 = 0 @obj_auto_increment = false - @vram = Array(Bytes).new 2 { Bytes.new GB::Memory::VRAM.size } # 0x8000..0x9FFF + @vram = Slice(Bytes).new 2 { Bytes.new GB::Memory::VRAM.size } # 0x8000..0x9FFF @vram_bank : UInt8 = 0 # track which bank is active @sprite_table = Bytes.new Memory::OAM.size # 0xFE00..0xFE9F @lcd_control : UInt8 = 0x00_u8 # 0xFF40 @@ -139,9 +139,9 @@ module GB @ly : UInt8 = 0x00_u8 # 0xFF44 @lyc : UInt8 = 0x00_u8 # 0xFF45 @dma : UInt8 = 0x00_u8 # 0xFF46 - @bgp : Array(UInt8) = Array(UInt8).new 4, 0 # 0xFF47 - @obp0 : Array(UInt8) = Array(UInt8).new 4, 0 # 0xFF48 - @obp1 : Array(UInt8) = Array(UInt8).new 4, 0 # 0xFF49 + @bgp : Slice(UInt8) = Slice(UInt8).new 4, 0 # 0xFF47 + @obp0 : Slice(UInt8) = Slice(UInt8).new 4, 0 # 0xFF48 + @obp1 : Slice(UInt8) = Slice(UInt8).new 4, 0 # 0xFF49 @wy : UInt8 = 0x00_u8 # 0xFF4A @wx : UInt8 = 0x00_u8 # 0xFF4B @@ -247,9 +247,9 @@ module GB when 0xFF44 then @ly when 0xFF45 then @lyc when 0xFF46 then @dma - when 0xFF47 then palette_from_array @bgp - when 0xFF48 then palette_from_array @obp0 - when 0xFF49 then palette_from_array @obp1 + when 0xFF47 then palette_from_enumerable @bgp + when 0xFF48 then palette_from_enumerable @obp0 + when 0xFF49 then palette_from_enumerable @obp1 when 0xFF4A then @wy when 0xFF4B then @wx when 0xFF4F then @cgb_ptr.value ? 0xFE_u8 | @vram_bank : 0xFF_u8 @@ -289,9 +289,9 @@ module GB @lyc = value handle_stat_interrupt when 0xFF46 then @dma = value - when 0xFF47 then @bgp = palette_to_array value - when 0xFF48 then @obp0 = palette_to_array value - when 0xFF49 then @obp1 = palette_to_array value + when 0xFF47 then update_palette(@bgp, value) + when 0xFF48 then update_palette(@obp0, value) + when 0xFF49 then update_palette(@obp1, value) when 0xFF4A then @wy = value when 0xFF4B then @wx = value when 0xFF4F then @vram_bank = value & 1 if @cgb_ptr.value @@ -400,11 +400,14 @@ module GB # palettes - def palette_to_array(palette : UInt8) : Array(UInt8) - [palette & 0x3, (palette >> 2) & 0x3, (palette >> 4) & 0x3, (palette >> 6) & 0x3] + def update_palette(palette : Indexable(UInt8), val : UInt8) : Nil + palette[0] = val & 0x3 + palette[1] = (val >> 2) & 0x3 + palette[2] = (val >> 4) & 0x3 + palette[3] = (val >> 6) & 0x3 end - def palette_from_array(palette_array : Array(UInt8)) : UInt8 + def palette_from_enumerable(palette_array : Enumerable(UInt8)) : UInt8 palette_array.each_with_index.reduce(0x00_u8) do |palette, (color, idx)| palette | color << (idx * 2) end diff --git a/src/crab/gb/scanline_ppu.cr b/src/crab/gb/scanline_ppu.cr index 9453af1..6e8dbf0 100644 --- a/src/crab/gb/scanline_ppu.cr +++ b/src/crab/gb/scanline_ppu.cr @@ -22,7 +22,7 @@ module GB end # color idx, BG-to-OAM priority bit - @scanline_color_vals = Array(Tuple(UInt8, Bool)).new WIDTH, {0_u8, false} + @scanline_color_vals = Slice(Tuple(UInt8, Bool)).new WIDTH, {0_u8, false} def scanline @current_window_line = 0 if @ly == 0 diff --git a/src/crab/gba/apu/channel1.cr b/src/crab/gba/apu/channel1.cr index 5ac734d..e9013e3 100644 --- a/src/crab/gba/apu/channel1.cr +++ b/src/crab/gba/apu/channel1.cr @@ -1,6 +1,6 @@ module GBA class Channel1 < VolumeEnvelopeChannel - WAVE_DUTY = [ + WAVE_DUTY = Slice[ [-8, -8, -8, -8, -8, -8, -8, +8], # 12.5% [+8, -8, -8, -8, -8, -8, -8, +8], # 25% [+8, -8, -8, -8, -8, +8, +8, +8], # 50% diff --git a/src/crab/gba/apu/channel2.cr b/src/crab/gba/apu/channel2.cr index 3b2aaab..8603954 100644 --- a/src/crab/gba/apu/channel2.cr +++ b/src/crab/gba/apu/channel2.cr @@ -1,6 +1,6 @@ module GBA class Channel2 < VolumeEnvelopeChannel - WAVE_DUTY = [ + WAVE_DUTY = Slice[ [-8, -8, -8, -8, -8, -8, -8, +8], # 12.5% [+8, -8, -8, -8, -8, -8, -8, +8], # 25% [+8, -8, -8, -8, -8, +8, +8, +8], # 50% diff --git a/src/crab/gba/apu/channel3.cr b/src/crab/gba/apu/channel3.cr index 057d5d6..082bd95 100644 --- a/src/crab/gba/apu/channel3.cr +++ b/src/crab/gba/apu/channel3.cr @@ -7,7 +7,7 @@ module GBA other.is_a?(Int) && RANGE.includes?(other) || WAVE_RAM_RANGE.includes?(other) end - @wave_ram = Array(Bytes).new 2, Bytes.new(WAVE_RAM_RANGE.size) { |idx| idx & 1 == 0 ? 0x00_u8 : 0xFF_u8 } + @wave_ram = Slice(Bytes).new 2, Bytes.new(WAVE_RAM_RANGE.size) { |idx| idx & 1 == 0 ? 0x00_u8 : 0xFF_u8 } @wave_ram_position : UInt8 = 0 @wave_ram_sample_buffer : UInt8 = 0x00 diff --git a/src/crab/gba/apu/dma_channels.cr b/src/crab/gba/apu/dma_channels.cr index 241f097..9ef8629 100644 --- a/src/crab/gba/apu/dma_channels.cr +++ b/src/crab/gba/apu/dma_channels.cr @@ -2,18 +2,18 @@ module GBA class DMAChannels RANGE = 0xA0..0xA7 - @fifos = Array(Array(Int8)).new 2 { Array(Int8).new 32, 0 } - @positions = Array(Int32).new 2, 0 - @sizes = Array(Int32).new 2, 0 - @timers : Array(Proc(UInt16)) - @latches = Array(Int16).new 2, 0 + @fifos = Slice(Slice(Int8)).new 2 { Slice(Int8).new 32, 0 } + @positions = Slice(Int32).new 2, 0 + @sizes = Slice(Int32).new 2, 0 + @timers : Slice(Proc(UInt16)) + @latches = Slice(Int16).new 2, 0 def ===(other) : Bool other.is_a?(Int) && RANGE.includes?(other) end def initialize(@gba : GBA, @control : Reg::SOUNDCNT_H) - @timers = [ + @timers = Slice[ ->{ @control.dma_sound_a_timer }, ->{ @control.dma_sound_b_timer }, ] diff --git a/src/crab/gba/bus.cr b/src/crab/gba/bus.cr index aee7860..3810d44 100644 --- a/src/crab/gba/bus.cr +++ b/src/crab/gba/bus.cr @@ -3,7 +3,7 @@ require "./gpio" module GBA class Bus # Timings for rom are estimated for game compatibility. - ACCESS_TIMING_TABLE = [ + ACCESS_TIMING_TABLE = Slice[ [1, 1, 3, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2], # 8-bit and 16-bit accesses [1, 1, 6, 1, 1, 2, 2, 1, 4, 4, 4, 4, 4, 4, 4, 4], # 32-bit accesses ] @@ -172,7 +172,7 @@ module GBA when 0x5 then (@gba.ppu.pram.to_unsafe + (address & 0x3FE)).as(UInt16*).value = 0x0101_u16 * value when 0x6 limit = @gba.ppu.bitmap? ? 0x13FFF : 0x0FFFF # (u8 write only) upper limit depends on display mode - address = 0x1FFFE_u32 & address # (u8 write only) UInt16-aligned + address = 0x1FFFE_u32 & address # (u8 write only) UInt16-aligned address -= 0x8000 if address > 0x17FFF # todo: determine if this happens before or after the limit check (@gba.ppu.vram.to_unsafe + address).as(UInt16*).value = 0x0101_u16 * value if address <= limit when 0x7 # can't write bytes to oam diff --git a/src/crab/gba/cpu.cr b/src/crab/gba/cpu.cr index b60cad0..5e7fe87 100644 --- a/src/crab/gba/cpu.cr +++ b/src/crab/gba/cpu.cr @@ -48,8 +48,8 @@ module GBA getter pipeline = Pipeline.new getter lut : Slice(Proc(UInt32, Nil)) { fill_lut } getter thumb_lut : Slice(Proc(UInt32, Nil)) { fill_thumb_lut } - @reg_banks = Array(Array(UInt32)).new 6 { Array(UInt32).new 7, 0 } - @spsr_banks = Array(UInt32).new 6, CPU::Mode::SYS.value # logically independent of typical register banks + @reg_banks = Slice(Slice(UInt32)).new 6 { Slice(UInt32).new 7, 0 } + @spsr_banks = Slice(UInt32).new 6, CPU::Mode::SYS.value # logically independent of typical register banks property halted = false def initialize(@gba : GBA) diff --git a/src/crab/gba/dma.cr b/src/crab/gba/dma.cr index b72da81..0e547df 100644 --- a/src/crab/gba/dma.cr +++ b/src/crab/gba/dma.cr @@ -22,23 +22,24 @@ module GBA end end - SRC_MASK = [0x07FFFFFF_u32, 0x0FFFFFFF_u32, 0x0FFFFFFF_u32, 0x0FFFFFFF_u32] - DST_MASK = [0x07FFFFFF_u32, 0x07FFFFFF_u32, 0x07FFFFFF_u32, 0x0FFFFFFF_u32] - LEN_MASK = [0x3FFF_u16, 0x3FFF_u16, 0x3FFF_u16, 0xFFFF_u16] + SRC_MASK = Slice[0x07FFFFFF_u32, 0x0FFFFFFF_u32, 0x0FFFFFFF_u32, 0x0FFFFFFF_u32] + DST_MASK = Slice[0x07FFFFFF_u32, 0x07FFFFFF_u32, 0x07FFFFFF_u32, 0x0FFFFFFF_u32] + LEN_MASK = Slice[0x3FFF_u16, 0x3FFF_u16, 0x3FFF_u16, 0xFFFF_u16] - getter dmacnt_l : Array(UInt16) + getter dmacnt_l : Slice(UInt16) - @interrupt_flags : Array(Proc(Nil)) + @interrupt_flags : Slice(Proc(Nil)) def initialize(@gba : GBA) - @dmasad = Array(UInt32).new 4, 0 - @dmadad = Array(UInt32).new 4, 0 - @dmacnt_l = Array(UInt16).new 4, 0 - @dmacnt_h = Array(Reg::DMACNT).new 4 { Reg::DMACNT.new 0 } - @src = Array(UInt32).new 4, 0 - @dst = Array(UInt32).new 4, 0 - @interrupt_flags = [->{ @gba.interrupts.reg_if.dma0 = true }, ->{ @gba.interrupts.reg_if.dma1 = true }, - ->{ @gba.interrupts.reg_if.dma2 = true }, ->{ @gba.interrupts.reg_if.dma3 = true }] + @dmasad = Slice(UInt32).new 4, 0 + @dmadad = Slice(UInt32).new 4, 0 + @dmacnt_l = Slice(UInt16).new 4, 0 + @dmacnt_h = Slice(Reg::DMACNT).new 4 { Reg::DMACNT.new 0 } + @src = Slice(UInt32).new 4, 0 + @dst = Slice(UInt32).new 4, 0 + @interrupt_flags = Slice[ + ->{ @gba.interrupts.reg_if.dma0 = true }, ->{ @gba.interrupts.reg_if.dma1 = true }, + ->{ @gba.interrupts.reg_if.dma2 = true }, ->{ @gba.interrupts.reg_if.dma3 = true }] end def [](io_addr : UInt32) : UInt8 diff --git a/src/crab/gba/ppu.cr b/src/crab/gba/ppu.cr index 52a332e..6afd72d 100644 --- a/src/crab/gba/ppu.cr +++ b/src/crab/gba/ppu.cr @@ -4,7 +4,7 @@ module GBA getter framebuffer : Slice(UInt16) = Slice(UInt16).new 0x9600 # framebuffer as 16-bit xBBBBBGGGGGRRRRR property frame = false - @layer_palettes : Array(Bytes) = Array.new 4 { Bytes.new 240 } + @layer_palettes : Slice(Bytes) = Slice.new 4 { Bytes.new 240 } @sprite_pixels : Slice(SpritePixel) = Slice(SpritePixel).new 240, SPRITE_PIXEL getter pram = Bytes.new 0x400 @@ -14,12 +14,12 @@ module GBA @dispcnt = Reg::DISPCNT.new 0 @dispstat = Reg::DISPSTAT.new 0 @vcount : UInt16 = 0x0000_u16 - @bgcnt = Array(Reg::BGCNT).new 4 { GBA::Reg::BGCNT.new 0 } - @bghofs = Array(Reg::BGOFS).new 4 { GBA::Reg::BGOFS.new 0 } - @bgvofs = Array(Reg::BGOFS).new 4 { GBA::Reg::BGOFS.new 0 } - @bgaff = Array(Array(Reg::BGAFF)).new 2 { Array(GBA::Reg::BGAFF).new 4 { GBA::Reg::BGAFF.new 0 } } - @bgref = Array(Array(Reg::BGREF)).new 2 { Array(GBA::Reg::BGREF).new 2 { GBA::Reg::BGREF.new 0 } } - @bgref_int = Array(Array(Int32)).new 2 { Array(Int32).new 2, 0 } + @bgcnt = Slice(Reg::BGCNT).new 4 { GBA::Reg::BGCNT.new 0 } + @bghofs = Slice(Reg::BGOFS).new 4 { GBA::Reg::BGOFS.new 0 } + @bgvofs = Slice(Reg::BGOFS).new 4 { GBA::Reg::BGOFS.new 0 } + @bgaff = Slice(Slice(Reg::BGAFF)).new 2 { Slice(GBA::Reg::BGAFF).new 4 { GBA::Reg::BGAFF.new 0 } } + @bgref = Slice(Slice(Reg::BGREF)).new 2 { Slice(GBA::Reg::BGREF).new 2 { GBA::Reg::BGREF.new 0 } } + @bgref_int = Slice(Slice(Int32)).new 2 { Slice(Int32).new 2, 0 } @win0h = Reg::WINH.new 0 @win1h = Reg::WINH.new 0 @win0v = Reg::WINV.new 0 @@ -454,25 +454,13 @@ module GBA end # SIZES[SHAPE][SIZE] - SIZES = [ - [ # square - {8, 8}, - {16, 16}, - {32, 32}, - {64, 64}, - ], - [ # horizontal rectangle - {16, 8}, - {32, 8}, - {32, 16}, - {64, 32}, - ], - [ # vertical rectangle - {8, 16}, - {8, 32}, - {16, 32}, - {32, 64}, - ], + SIZES = Slice[ + # square + Slice[{8, 8}, {16, 16}, {32, 32}, {64, 64}], + # horizontal rectangle + Slice[{16, 8}, {32, 8}, {32, 16}, {64, 32}], + # vertical rectangle + Slice[{8, 16}, {8, 32}, {16, 32}, {32, 64}], ] record Sprite, attr0 : UInt16, attr1 : UInt16, attr2 : UInt16, aff_param : Int16 do diff --git a/src/crab/gba/timer.cr b/src/crab/gba/timer.cr index 0ce3d3e..9f9b13f 100644 --- a/src/crab/gba/timer.cr +++ b/src/crab/gba/timer.cr @@ -1,19 +1,21 @@ module GBA class Timer - PERIODS = [1, 64, 256, 1024] - EVENT_TYPES = [Scheduler::EventType::Timer0, Scheduler::EventType::Timer1, - Scheduler::EventType::Timer2, Scheduler::EventType::Timer3] + PERIODS = Slice[1, 64, 256, 1024] + EVENT_TYPES = Slice[ + Scheduler::EventType::Timer0, Scheduler::EventType::Timer1, + Scheduler::EventType::Timer2, Scheduler::EventType::Timer3] - @interrupt_flags : Array(Proc(Nil)) + @interrupt_flags : Slice(Proc(Nil)) def initialize(@gba : GBA) - @tmcnt = Array(Reg::TMCNT).new 4 { Reg::TMCNT.new 0 } # control registers - @tmd = Array(UInt16).new 4, 0 # reload values - @tm = Array(UInt16).new 4, 0 # counted values - @cycle_enabled = Array(UInt64).new 4, 0 # cycle that the timer was enabled - @events = Array(Proc(Nil)).new 4 { |i| overflow i } # overflow closures for each timer - @interrupt_flags = [->{ @gba.interrupts.reg_if.timer0 = true }, ->{ @gba.interrupts.reg_if.timer1 = true }, - ->{ @gba.interrupts.reg_if.timer2 = true }, ->{ @gba.interrupts.reg_if.timer3 = true }] + @tmcnt = Slice(Reg::TMCNT).new 4 { Reg::TMCNT.new 0 } # control registers + @tmd = Slice(UInt16).new 4, 0 # reload values + @tm = Slice(UInt16).new 4, 0 # counted values + @cycle_enabled = Slice(UInt64).new 4, 0 # cycle that the timer was enabled + @events = Slice(Proc(Nil)).new 4 { |i| overflow i } # overflow closures for each timer + @interrupt_flags = Slice[ + ->{ @gba.interrupts.reg_if.timer0 = true }, ->{ @gba.interrupts.reg_if.timer1 = true }, + ->{ @gba.interrupts.reg_if.timer2 = true }, ->{ @gba.interrupts.reg_if.timer3 = true }] end def overflow(num : Int) : Proc(Nil)