| `timescale 1ns / 1ps |
| ////////////////////////////////////////////////////////////////////////////////// |
| // Company: |
| // Engineer: Wenting Zhang |
| // |
| // Create Date: 18:48:36 02/14/2018 |
| // Design Name: |
| // Module Name: ppu |
| // Project Name: |
| // Target Devices: |
| // Tool versions: |
| // Description: |
| // GameBoy PPU |
| // Additional Comments: |
| // There are three hardware layers in the GameBoy PPU: Background, Window, and |
| // Object (or sprites). |
| // |
| // Window will render above the background and the object can render above the |
| // background or under the background. Each object have a priority bit to |
| // indicate where it should be rendered. |
| // |
| // Background, Window, and Object can be individually turned on or off. When |
| // nothing is turned on, it displays white. |
| // |
| // The whole render logic does NOT require a scanline buffer to work, and it |
| // runs at 4MHz (VRAM runs at 2MHz) |
| // |
| // There are two main parts of the logic, implemented in a big FSM. The first |
| // one is the fetch unit, and the other is the pixel FIFO. |
| // |
| // The pixel FIFO shifts out one pixel when it contains more than 8 pixels, the |
| // fetch unit would generally render 8 pixels in 6 cycles (so 2 wait cycles are |
| // inserted so they are in sync generally). When there is no enough pixels, |
| // the FIFO would stop and wait for the fetch unit. |
| // |
| // Windows Trigger is handled in the next state logic, there is a distinct state |
| // for the PPU to switch from background rendering to window rendering (flush |
| // the fifo and add wait cycles.) |
| // |
| // Object Trigger is handled in the state change block, in order to backup the |
| // previous state. Current RAM address is also backed up during the handling of |
| // object rendering. Once all the objects at this position has been rendered, |
| // the render state machine could be restored to its previous state. |
| // |
| // The output pixel clock is the inverted main clock, which is the same as the |
| // real Game Boy Pixel data would be put on the pixel bus on the negedge of |
| // clock, so the LCD would latch the data on the posedge. The original Game Boy |
| // used a gated clock to control if output is valid. Since gated clock is not |
| // recommend, I used a valid signal to indicate is output should be considered |
| // valid. |
| ////////////////////////////////////////////////////////////////////////////////// |
| `default_nettype wire |
| module ppu( |
| input clk, |
| input rst, |
| input wire [1:0] ct, |
| // MMIO Bus, 0xFF40 - 0xFF4B, always visible to CPU |
| input wire [15:0] mmio_a, |
| output reg [7:0] mmio_dout, |
| input wire [7:0] mmio_din, |
| input wire mmio_rd, |
| input wire mmio_wr, |
| // OAM Bus, 0xFE00 - 0xFE9F |
| input wire [15:0] oam_a, |
| output wire [7:0] oam_dout, |
| input wire [7:0] oam_din, |
| input wire oam_rd, |
| input wire oam_wr, |
| // Interrupt interface |
| output reg int_vblank_req, |
| output reg int_lcdc_req, |
| input int_vblank_ack, |
| input int_lcdc_ack, |
| // Pixel output |
| output cpl, // Pixel Clock, = ~clk |
| output reg [1:0] pixel, // Pixel Output |
| output reg valid, // Pixel Valid |
| output reg hs, // Horizontal Sync, High Valid |
| output reg vs, // Vertical Sync, High Valid |
| // Video RAM interface |
| output wire [12:0] vram_a, |
| output wire vram_wr, |
| output wire vram_rd, |
| output wire [7:0] vram_din, |
| input wire [7:0] vram_dout, |
| //Debug output |
| output [7:0] scx, |
| output [7:0] scy, |
| output [4:0] state |
| ); |
| |
| // Global Wires ? |
| integer i; |
| |
| // PPU registers |
| reg [7:0] reg_lcdc; //$FF40 LCD Control (R/W) |
| reg [7:0] reg_stat; //$FF41 LCDC Status (R/W) |
| reg [7:0] reg_scy; //$FF42 Scroll Y (R/W) |
| reg [7:0] reg_scx; //$FF43 Scroll X (R/W) |
| reg [7:0] reg_ly; //$FF44 LCDC Y-Coordinate (R) Write will reset the counter |
| reg [7:0] reg_dma; //$FF46 DMA, actually handled outside of PPU for now |
| reg [7:0] reg_lyc; //$FF45 LY Compare (R/W) |
| reg [7:0] reg_bgp; //$FF47 BG Palette Data (R/W) Non-CGB mode only |
| reg [7:0] reg_obp0; //$FF48 Object Palette 0 Data (R/W) Non-CGB mode only |
| reg [7:0] reg_obp1; //$FF49 Object Palette 1 Data (R/W) Non-CGB mode only |
| reg [7:0] reg_wy; //$FF4A Window Y Position (R/W) |
| reg [7:0] reg_wx; //$FF4B Window X Position (R/W) |
| |
| // Some interrupt related register |
| reg [7:0] reg_ly_last; |
| reg [1:0] reg_mode_last; // Next mode based on next state |
| |
| wire reg_lcd_en = reg_lcdc[7]; //0=Off, 1=On |
| wire reg_win_disp_sel = reg_lcdc[6]; //0=9800-9BFF, 1=9C00-9FFF |
| wire reg_win_en = reg_lcdc[5]; //0=Off, 1=On |
| wire reg_bg_win_data_sel = reg_lcdc[4]; //0=8800-97FF, 1=8000-8FFF |
| wire reg_bg_disp_sel = reg_lcdc[3]; //0=9800-9BFF, 1=9C00-9FFF |
| wire reg_obj_size = reg_lcdc[2]; //0=8x8, 1=8x16 |
| wire reg_obj_en = reg_lcdc[1]; //0=Off, 1=On |
| wire reg_bg_disp = reg_lcdc[0]; //0=Off, 1=On |
| wire reg_lyc_int = reg_stat[6]; |
| wire reg_oam_int = reg_stat[5]; |
| wire reg_vblank_int = reg_stat[4]; |
| wire reg_hblank_int = reg_stat[3]; |
| wire reg_coin_flag = reg_stat[2]; |
| wire [1:0] reg_mode = reg_stat[1:0]; |
| |
| localparam PPU_MODE_H_BLANK = 2'b00; |
| localparam PPU_MODE_V_BLANK = 2'b01; |
| localparam PPU_MODE_OAM_SEARCH = 2'b10; |
| localparam PPU_MODE_PIX_TRANS = 2'b11; |
| |
| localparam PPU_PAL_BG = 2'b00; |
| localparam PPU_PAL_OB0 = 2'b01; |
| localparam PPU_PAL_OB1 = 2'b10; |
| |
| reg [12:0] vram_addr_bg; |
| reg [12:0] vram_addr_obj; |
| wire [12:0] vram_addr_int; |
| wire [12:0] vram_addr_ext; |
| wire vram_addr_int_sel; // 0 - BG, 1 - OBJ |
| |
| assign vram_addr_int = (vram_addr_int_sel == 1'b1) ? (vram_addr_obj) : (vram_addr_bg); |
| |
| wire oam_access_ext = ((reg_mode == PPU_MODE_H_BLANK)|| |
| (reg_mode == PPU_MODE_V_BLANK)); |
| |
| wire [12:0] window_map_addr = (reg_win_disp_sel) ? (13'h1C00) : (13'h1800); |
| wire [12:0] bg_map_addr = (reg_bg_disp_sel) ? (13'h1C00) : (13'h1800); |
| wire [12:0] bg_window_tile_addr = (reg_bg_win_data_sel) ? (13'h0000) : (13'h0800); |
| |
| // PPU Memories |
| |
| // 8 bit WR, 16 bit RD, 160Bytes OAM |
| reg [7:0] oam_u [0: 79]; |
| reg [7:0] oam_l [0: 79]; |
| reg [7:0] oam_rd_addr_int; |
| wire [7:0] oam_rd_addr; |
| wire [7:0] oam_wr_addr; |
| reg [15:0] oam_data_out; |
| wire [7:0] oam_data_out_byte; |
| wire [7:0] oam_data_in; |
| wire oam_we; |
| |
| always @ (negedge clk) |
| begin |
| if (oam_we) begin |
| if (oam_wr_addr[0]) |
| oam_u[oam_wr_addr[7:1]] <= oam_data_in; |
| else |
| oam_l[oam_wr_addr[7:1]] <= oam_data_in; |
| end |
| else begin |
| oam_data_out <= {oam_u[oam_rd_addr[7:1]], oam_l[oam_rd_addr[7:1]]}; |
| end |
| end |
| |
| assign oam_wr_addr = oam_a[7:0]; |
| assign oam_rd_addr = (oam_access_ext) ? (oam_a[7:0]) : (oam_rd_addr_int); |
| assign oam_data_in = oam_din; |
| assign oam_data_out_byte = (oam_rd_addr[0]) ? oam_data_out[15:8] : oam_data_out[7:0]; |
| //assign oam_we = (wr)&(oam_access_ext); |
| assign oam_we = oam_wr; // What if always allow OAM access? |
| assign oam_dout = (oam_access_ext) ? (oam_data_out_byte) : (8'hFF); |
| |
| // 8 bit WR, 8 bit RD, 8KB VRAM |
| wire [7:0] vram_data_out; |
| |
| assign vram_a = vram_addr_int; |
| assign vram_wr = 1'b0; // PPU doesn't write to VRAM |
| assign vram_din = 8'd0; |
| assign vram_data_out = vram_dout; |
| |
| // Pixel Pipeline |
| |
| // The pixel FIFO: 16 pixels, 4 bits each (2 bits color index, 2 bits palette index) |
| // Since in and out are 8 pixels aligned, it can be modeled as a ping-pong buffer |
| // of two 32 bits (8 pixels * 4 bits) group |
| reg [63:0] pf_data; // Pixel FIFO Data |
| wire [1:0] pf_output_pixel; |
| wire [7:0] pf_output_palette; |
| wire [1:0] pf_output_pixel_id; |
| wire [1:0] pf_output_palette_id; |
| assign {pf_output_pixel_id, pf_output_palette_id} = pf_data[63:60]; |
| assign pf_output_palette = (pf_output_palette_id == PPU_PAL_BG) ? (reg_bgp) : |
| (pf_output_palette_id == PPU_PAL_OB0) ? (reg_obp0) : |
| (pf_output_palette_id == PPU_PAL_OB1) ? (reg_obp1) : (8'hFF); |
| assign pf_output_pixel = (pf_output_pixel_id == 2'b11) ? (pf_output_palette[7:6]) : |
| (pf_output_pixel_id == 2'b10) ? (pf_output_palette[5:4]) : |
| (pf_output_pixel_id == 2'b01) ? (pf_output_palette[3:2]) : |
| (pf_output_pixel_id == 2'b00) ? (pf_output_palette[1:0]) : (2'b00); |
| reg [2:0] pf_empty; // Indicate if the Pixel FIFO is empty. |
| localparam PF_INITA = 3'd5; // When a line start... |
| localparam PF_INITB = 3'd4; // Line start, 2 pixels out, 8 rendered |
| localparam PF_EMPTY = 3'd3; // When the pipeline get flushed |
| localparam PF_HALF = 3'd2; // After flushed, 8 pixels in |
| localparam PF_FIN = 3'd1; // 16 pixels in, but still no wait cycles |
| localparam PF_FULL = 3'd0; // Normal |
| |
| assign cpl = ~clk; |
| //assign pixel = pf_output_pixel; |
| |
| // HV Timing |
| localparam PPU_H_FRONT = 9'd76; |
| localparam PPU_H_SYNC = 9'd4; // So front porch + sync = OAM search |
| localparam PPU_H_TOTAL = 9'd456; |
| localparam PPU_H_PIXEL = 9'd160; |
| // 8 null pixels in the front for objects which have x < 8, 8 bit counter |
| localparam PPU_H_OUTPUT = 8'd168; |
| localparam PPU_V_ACTIVE = 8'd144; |
| localparam PPU_V_BACK = 8'd9; |
| localparam PPU_V_SYNC = 8'd1; |
| localparam PPU_V_BLANK = 8'd10; |
| localparam PPU_V_TOTAL = 8'd154; |
| |
| // Raw timing counter |
| reg [8:0] h_count; |
| reg [7:0] v_count; |
| |
| // HV counter |
| always @(posedge clk) |
| begin |
| if (rst) begin |
| h_count <= 0; |
| hs <= 0; |
| v_count <= 0; |
| vs <= 0; |
| end |
| else begin |
| if(h_count < PPU_H_TOTAL - 1) |
| h_count <= h_count + 1'b1; |
| else begin |
| h_count <= 0; |
| if(v_count < PPU_V_TOTAL - 1) |
| v_count <= v_count + 1'b1; |
| else |
| v_count <= 0; |
| if(v_count == PPU_V_ACTIVE + PPU_V_BACK - 1) |
| vs <= 1; |
| if(v_count == PPU_V_ACTIVE + PPU_V_BACK + PPU_V_SYNC - 1) |
| vs <= 0; |
| end |
| if(h_count == PPU_H_FRONT - 1) |
| hs <= 1; |
| if(h_count == PPU_H_FRONT + PPU_H_SYNC - 1) |
| hs <= 0; |
| end |
| end |
| |
| // Render FSM |
| localparam S_IDLE = 5'd0; |
| localparam S_BLANK = 5'd1; // H Blank and V Blank |
| localparam S_OAMX = 5'd2; // OAM Search X check |
| localparam S_OAMY = 5'd3; // OAM Search Y check |
| localparam S_FTIDA = 5'd4; // Fetch Read Tile ID Stage A (Address Setup) |
| localparam S_FTIDB = 5'd5; // Fetch Read Tile ID Stage B (Data Read) |
| localparam S_FRD0A = 5'd6; // Fetch Read Data 0 Stage A |
| localparam S_FRD0B = 5'd7; // Fetch Read Data 0 Stage B |
| localparam S_FRD1A = 5'd8; // Fetch Read Data 1 Stage A |
| localparam S_FRD1B = 5'd9; // Fetch Read Data 1 Stage B |
| localparam S_FWAITA = 5'd10; // Fetch Wait Stage A (Idle) |
| localparam S_FWAITB = 5'd11; // Fetch Wait Stage B (Load to FIFO?) |
| localparam S_SWW = 5'd12; // Fetch Switch to Window |
| localparam S_OAMRDA = 5'd13; // OAM Read Stage A |
| localparam S_OAMRDB = 5'd14; // OAM Read Stage B |
| localparam S_OFRD0A = 5'd15; // Object Fetch Read Data 0 Stage A |
| localparam S_OFRD0B = 5'd16; // Object Fetch Read Data 0 Stage B |
| localparam S_OFRD1A = 5'd17; // Object Fetch Read Data 1 Stage A |
| localparam S_OFRD1B = 5'd18; // Object Fetch Read Data 1 Stage B |
| localparam S_OWAIT = 5'd19; // Object Wait |
| localparam S_OWB = 5'd20; // Object Write Back |
| |
| localparam PPU_OAM_SEARCH_LENGTH = 6'd40; |
| |
| reg [2:0] h_drop; //Drop pixels when SCX % 8 != 0 |
| wire [2:0] h_extra = reg_scx[2:0]; //Extra line length when SCX % 8 != 0 |
| reg [7:0] h_pix_render; // Horizontal Render Pixel pointer |
| reg [7:0] h_pix_output; // Horizontal Output Pixel counter |
| wire [7:0] h_pix_obj = h_pix_output + 1'b1; // Coordinate used to trigger the object rendering |
| wire [7:0] v_pix = v_count; |
| wire [7:0] v_pix_in_map = v_pix + reg_scy; |
| wire [7:0] v_pix_in_win = v_pix - reg_wy; |
| |
| reg [4:0] r_state = 0; |
| reg [4:0] r_next_backup; |
| reg [4:0] r_next_state; |
| wire is_in_v_blank = ((v_count >= PPU_V_ACTIVE) && (v_count < PPU_V_ACTIVE + PPU_V_BLANK)); |
| |
| reg window_triggered; // Indicate whether window has been triggered, should be replaced by a edge detector |
| wire render_window_or_bg = window_triggered; |
| wire window_trigger = (((h_pix_output) == (reg_wx))&&(v_pix >= reg_wy)&&(reg_win_en)&&(~window_triggered)) ? 1 : 0; |
| |
| wire [2:0] line_to_tile_v_offset_bg = v_pix_in_map[2:0]; // Current line in a tile being rendered |
| wire [4:0] line_in_tile_v_bg = v_pix_in_map[7:3]; // Current tile Y coordinate being rendered |
| wire [2:0] line_to_tile_v_offset_win = v_pix_in_win[2:0]; |
| wire [4:0] line_in_tile_v_win = v_pix_in_win[7:3]; |
| wire [2:0] line_to_tile_v_offset = (render_window_or_bg) ? (line_to_tile_v_offset_win) : (line_to_tile_v_offset_bg); |
| wire [4:0] line_in_tile_v = (render_window_or_bg) ? (line_in_tile_v_win) : (line_in_tile_v_bg); |
| |
| wire [4:0] h_tile_bg = h_pix_render[7:3] + reg_scx[7:3]; // Current tile X coordinate being rendered |
| wire [4:0] h_tile_win = h_pix_render[7:3]; |
| wire [4:0] h_tile = (render_window_or_bg) ? (h_tile_win) : (h_tile_bg); |
| |
| wire [12:0] current_map_address = (((render_window_or_bg) ? (window_map_addr) : (bg_map_addr)) + (line_in_tile_v) * 32 + {8'd0, h_tile}); //Background address |
| reg [7:0] current_tile_id; |
| wire [7:0] current_tile_id_adj = {~((reg_bg_win_data_sel)^(current_tile_id[7])), current_tile_id[6:0]}; // Adjust for 8800 Adressing mode |
| wire [12:0] current_tile_address_0 = (bg_window_tile_addr) + current_tile_id_adj * 16 + (line_to_tile_v_offset * 2); |
| wire [12:0] current_tile_address_1 = (current_tile_address_0) | 13'h0001; |
| reg [7:0] current_tile_data_0; |
| reg [7:0] current_tile_data_1; |
| |
| // Data that will be pushed into pixel FIFO |
| // Organized in pixels |
| reg [31:0] current_fetch_result; |
| always@(current_tile_data_1, current_tile_data_0) begin |
| for (i = 0; i < 8; i = i + 1) begin |
| current_fetch_result[i*4+3] = current_tile_data_1[i]; |
| current_fetch_result[i*4+2] = current_tile_data_0[i]; |
| current_fetch_result[i*4+1] = PPU_PAL_BG[1]; // Fetch could only fetch BG |
| current_fetch_result[i*4+0] = PPU_PAL_BG[0]; |
| end |
| end |
| |
| reg [5:0] oam_search_count; // Counter during OAM search stage |
| reg [5:0] obj_visible_list [0:9]; // Total visible list |
| reg [7:0] obj_trigger_list [0:9]; // Where the obj should be triggered |
| reg [7:0] obj_y_list [0:9]; // Where the obj is |
| reg obj_valid_list [0:9]; // Is obj visible entry valid |
| reg [3:0] oam_visible_count; // ??? |
| |
| wire [7:0] oam_search_x; |
| wire [7:0] oam_search_y; |
| wire [7:0] obj_size_h = (reg_obj_size == 1'b1) ? (8'd16) : (8'd8); |
| wire [7:0] obj_h_upper_boundary = (v_pix + 8'd16); |
| wire [7:0] obj_h_lower_boundary = obj_h_upper_boundary - obj_size_h; |
| |
| reg [3:0] obj_trigger_id; // The object currently being/ or have been rendered, in the visible list |
| |
| localparam OBJ_TRIGGER_NOT_FOUND = 4'd15; |
| |
| // Cascade mux used to implement the searching of next id would be triggered |
| reg [3:0] obj_trigger_id_from[0:10]; |
| reg [3:0] obj_trigger_id_next; |
| always@(h_pix_obj, obj_trigger_id) begin |
| obj_trigger_id_from[10] = OBJ_TRIGGER_NOT_FOUND; // There is no more after the 10th |
| for (i = 9; i >= 0; i = i - 1) begin |
| /* verilator lint_off WIDTH */ |
| obj_trigger_id_from[i] = |
| ((h_pix_obj == obj_trigger_list[i])&&(obj_valid_list[i])) ? (i) : (obj_trigger_id_from[i+1]); |
| // See if this one match, if not, cascade down. |
| /* verilator lint_on WIDTH */ |
| end |
| if (obj_trigger_id == OBJ_TRIGGER_NOT_FOUND) // currently not triggered yet |
| obj_trigger_id_next = obj_trigger_id_from[0]; // Search from start |
| else |
| obj_trigger_id_next = obj_trigger_id_from[obj_trigger_id + 1]; // Search start from next one |
| end |
| |
| //!-- DEBUG -- |
| //wire [3:0] obj_trigger_id_next = ((h_pix_obj == obj_trigger_list[4'd0])&&(obj_valid_list[4'd0])) ? (4'd0) : (4'd15); |
| |
| wire obj_trigger = ((reg_obj_en)&&(obj_trigger_id_next != OBJ_TRIGGER_NOT_FOUND)) ? 1 : 0; |
| //wire obj_trigger = 0; |
| |
| wire [5:0] obj_triggered = obj_visible_list[obj_trigger_id]; // The global id of object being rendered |
| wire [7:0] current_obj_y = obj_y_list[obj_trigger_id]; |
| wire [7:0] current_obj_x = obj_trigger_list[obj_trigger_id]; //h_pix gets incremented before render |
| reg [7:0] current_obj_tile_id_raw; // Tile ID without considering the object size |
| reg [7:0] current_obj_flags; // Flags |
| wire current_obj_to_bg_priority = current_obj_flags[7]; |
| wire current_obj_y_flip = current_obj_flags[6]; |
| wire current_obj_x_flip = current_obj_flags[5]; |
| wire current_obj_pal_id = current_obj_flags[4]; |
| wire [1:0] current_obj_pal= (current_obj_pal_id) ? (PPU_PAL_OB1) : (PPU_PAL_OB0); |
| /* verilator lint_off WIDTH */ |
| wire [3:0] line_to_obj_v_offset_raw = (v_pix + 8'd16 - current_obj_y); // Compensate 16 pixel offset and truncate to 4 bits |
| /* verilator lint_on WIDTH */ |
| wire [7:0] current_obj_tile_id = (reg_obj_size == 1'b1) ? |
| ({current_obj_tile_id_raw[7:1], (((line_to_obj_v_offset_raw[3])^(current_obj_y_flip)) ? 1'b1 : 1'b0)}) : // Select Hi or Lo tile |
| (current_obj_tile_id_raw); // Use tile ID directly |
| wire [2:0] line_to_obj_v_offset = (current_obj_y_flip) ? (~line_to_obj_v_offset_raw[2:0]) : (line_to_obj_v_offset_raw[2:0]); |
| |
| wire [12:0] current_obj_address_0 = current_obj_tile_id * 16 + line_to_obj_v_offset * 2; |
| wire [12:0] current_obj_address_1 = current_obj_address_0 | 13'h0001; |
| reg [7:0] current_obj_tile_data_0; |
| reg [7:0] current_obj_tile_data_1; |
| // Data that will be merged into pixel FIFO |
| // Organized in pixels |
| reg [31:0] merge_result; |
| always@(*) begin |
| for (i = 0; i < 8; i = i + 1) begin |
| if ( |
| ((current_obj_tile_data_1[i] != 1'b0)||(current_obj_tile_data_0[i] != 1'b0))&& |
| ((pf_data[32+i*4+1] == PPU_PAL_BG[1])&&(pf_data[32+i*4+0] == PPU_PAL_BG[0]))&& |
| ( |
| ((current_obj_to_bg_priority)&&(pf_data[32+i*4+3] == 1'b0)&&(pf_data[32+i*4+2] == 1'b0))|| |
| (~current_obj_to_bg_priority) |
| ) |
| ) //(OBJ is not transparent) and ((BG priority and BG is transparent) or (OBJ priority)) |
| begin |
| merge_result[i*4+3] = current_obj_tile_data_1[i]; |
| merge_result[i*4+2] = current_obj_tile_data_0[i]; |
| merge_result[i*4+1] = current_obj_pal[1]; |
| merge_result[i*4+0] = current_obj_pal[0]; |
| end |
| else begin |
| merge_result[i*4+3] = pf_data[32+i*4+3]; |
| merge_result[i*4+2] = pf_data[32+i*4+2]; |
| merge_result[i*4+1] = pf_data[32+i*4+1]; |
| merge_result[i*4+0] = pf_data[32+i*4+0]; |
| end |
| end |
| end |
| |
| assign vram_addr_int_sel = |
| ((r_state == S_OAMRDB) || (r_state == S_OFRD0A) || (r_state == S_OFRD0B) |
| || (r_state == S_OFRD1A) || (r_state == S_OFRD1B)) ? 1'b1 : 1'b0; |
| assign vram_rd = (r_state == S_FTIDA) || (r_state == S_FRD0A) || |
| (r_state == S_FRD1A) || (r_state == S_OFRD0A) || (r_state == S_OFRD1A); |
| |
| // Current mode logic, based on current state |
| always @ (posedge clk) |
| begin |
| if (rst) begin |
| reg_stat[1:0] <= PPU_MODE_V_BLANK; |
| end |
| else begin |
| case (r_state) |
| S_IDLE: reg_stat[1:0] <= (reg_lcd_en) ? (PPU_MODE_V_BLANK) : (PPU_MODE_H_BLANK); |
| S_BLANK: reg_stat[1:0] <= (is_in_v_blank) ? (PPU_MODE_V_BLANK) : (PPU_MODE_H_BLANK); |
| S_OAMX: reg_stat[1:0] <= PPU_MODE_OAM_SEARCH; |
| S_OAMY: reg_stat[1:0] <= PPU_MODE_OAM_SEARCH; |
| S_FTIDA: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| S_FTIDB: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| S_FRD0A: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| S_FRD0B: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| S_FRD1A: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| S_FRD1B: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| S_FWAITA: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| S_FWAITB: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| S_SWW: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| S_OAMRDA: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| S_OAMRDB: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| S_OFRD0A: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| S_OFRD0B: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| S_OFRD1A: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| S_OFRD1B: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| S_OWAIT: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| S_OWB: reg_stat[1:0] <= PPU_MODE_PIX_TRANS; |
| default: reg_stat[1:0] <= PPU_MODE_V_BLANK; |
| endcase |
| end |
| end |
| |
| assign oam_search_y = oam_data_out[7:0]; |
| assign oam_search_x = oam_data_out[15:8]; |
| |
| // Render logic |
| always @(posedge clk) |
| begin |
| reg_ly <= v_pix[7:0]; |
| |
| case (r_state) |
| // nothing to do for S_IDLE |
| S_IDLE: begin end |
| S_BLANK: begin |
| h_pix_render <= 8'd0; // Render pointer |
| oam_search_count <= 6'd0; |
| oam_visible_count <= 4'd0; |
| for (i = 0; i < 10; i = i + 1) begin |
| obj_valid_list[i] <= 1'b0; |
| end |
| oam_rd_addr_int <= 8'b0; |
| window_triggered <= 1'b0; |
| // Line start, need to render 16 pixels in 12 clocks |
| // and output 8 null pixels starting from the 4th clock |
| end |
| S_OAMX: begin |
| oam_rd_addr_int <= oam_search_count * 4; |
| end |
| S_OAMY: begin |
| if ((oam_search_y <= obj_h_upper_boundary)&& |
| (oam_search_y > obj_h_lower_boundary)&& |
| (oam_search_x != 8'd0)&& |
| (oam_visible_count < 4'd10)) begin |
| obj_visible_list[oam_visible_count] <= oam_search_count; |
| obj_trigger_list[oam_visible_count] <= oam_search_x; |
| obj_y_list[oam_visible_count] <= oam_search_y; |
| obj_valid_list[oam_visible_count] <= 1'b1; |
| oam_visible_count <= oam_visible_count + 1'b1; |
| end |
| oam_search_count <= oam_search_count + 1'b1; |
| end |
| S_FTIDA: vram_addr_bg <= current_map_address; |
| S_FTIDB: current_tile_id <= vram_data_out; |
| S_FRD0A: vram_addr_bg <= current_tile_address_0; |
| S_FRD0B: current_tile_data_0 <= vram_data_out; |
| S_FRD1A: vram_addr_bg <= current_tile_address_1; |
| S_FRD1B: begin |
| current_tile_data_1 <= vram_data_out; |
| h_pix_render <= h_pix_render + 8'd8; |
| end |
| // nothing to do for S_FWAITA, S_FWAITB |
| S_FWAITA: begin end |
| S_FWAITB: begin end |
| S_SWW: begin |
| h_pix_render <= 8'd0; |
| window_triggered <= 1'b1; |
| end |
| S_OAMRDA: oam_rd_addr_int <= obj_triggered * 4 + 8'd2; |
| S_OAMRDB: begin |
| current_obj_tile_id_raw <= oam_data_out[7:0]; |
| current_obj_flags <= oam_data_out[15:8]; |
| end |
| S_OFRD0A: vram_addr_obj <= current_obj_address_0; |
| S_OFRD0B: |
| if (current_obj_x_flip == 1'b1) |
| current_obj_tile_data_0[7:0] <= { |
| vram_data_out[0], vram_data_out[1], vram_data_out[2], vram_data_out[3], |
| vram_data_out[4], vram_data_out[5], vram_data_out[6], vram_data_out[7] |
| }; |
| else |
| current_obj_tile_data_0 <= vram_data_out; |
| S_OFRD1A: vram_addr_obj <= current_obj_address_1; |
| S_OFRD1B: |
| if (current_obj_x_flip == 1'b1) |
| current_obj_tile_data_1[7:0] <= { |
| vram_data_out[0], vram_data_out[1], vram_data_out[2], vram_data_out[3], |
| vram_data_out[4], vram_data_out[5], vram_data_out[6], vram_data_out[7] |
| }; |
| else |
| current_obj_tile_data_1 <= vram_data_out; |
| // nothing to do for S_OWB |
| S_OWAIT: begin end |
| S_OWB: begin end |
| default: begin |
| $display("Invalid state!"); |
| end |
| endcase |
| end |
| |
| reg [31:0] half_merge_result; |
| always @(current_fetch_result, pf_data) begin |
| for (i = 0; i < 8; i = i + 1) begin |
| if ((pf_data[32+i*4+1] == PPU_PAL_BG[1])&&(pf_data[32+i*4+0] == PPU_PAL_BG[0])) begin |
| half_merge_result[i*4+3] = current_fetch_result[i*4+3]; |
| half_merge_result[i*4+2] = current_fetch_result[i*4+2]; |
| half_merge_result[i*4+1] = current_fetch_result[i*4+1]; |
| half_merge_result[i*4+0] = current_fetch_result[i*4+0]; |
| end |
| else begin |
| half_merge_result[i*4+3] = pf_data[32+i*4+3]; |
| half_merge_result[i*4+2] = pf_data[32+i*4+2]; |
| half_merge_result[i*4+1] = pf_data[32+i*4+1]; |
| half_merge_result[i*4+0] = pf_data[32+i*4+0]; |
| end |
| end |
| end |
| |
| // Output logic |
| always @(posedge clk) |
| begin |
| if (r_state == S_BLANK) begin |
| valid <= 1'b0; |
| h_pix_output <= 8'd0; // Output pointer |
| h_drop <= reg_scx[2:0]; |
| pf_empty <= PF_INITA; |
| end |
| else if ((r_state == S_FTIDA) || (r_state == S_FTIDB) || (r_state == S_FRD0A) || (r_state == S_FRD0B) || |
| (r_state == S_FRD1A) || (r_state == S_FRD1B) || (r_state == S_FWAITA) || (r_state == S_FWAITB)) |
| begin |
| |
| if (r_state == S_FRD1B) begin |
| if (pf_empty == PF_INITA) pf_empty <= PF_INITB; |
| if (pf_empty == PF_INITB) pf_empty <= PF_FIN; |
| if (pf_empty == PF_EMPTY) pf_empty <= PF_HALF; |
| if (pf_empty == PF_HALF) pf_empty <= PF_FIN; |
| end else |
| if (pf_empty == PF_FIN) pf_empty <= PF_FULL; // should NOT wait through end |
| |
| // If it is in one of the output stages |
| if (pf_empty == PF_EMPTY) begin |
| // Just started, no data available |
| valid <= 1'b0; |
| end |
| else if (pf_empty == PF_HALF) begin |
| valid <= 1'b0; |
| if (r_state == S_FTIDA) begin |
| // One batch done, and they can be push into pipeline, but could not be output yet |
| // We need to be careful not to overwrite the sprites... |
| pf_data[63:32] <= half_merge_result[31:0]; |
| end |
| end |
| else if (((pf_empty == PF_INITA)&&((r_state == S_FRD1A)||(r_state == S_FRD1B))) |
| ||(pf_empty == PF_INITB)||(pf_empty == PF_FULL)||(pf_empty == PF_FIN)) begin |
| if (r_state == S_FTIDA) begin // reload and shift |
| if (pf_empty == PF_INITB) begin |
| pf_data[63:0] <= {20'b0, current_fetch_result[31:0], 12'b0}; |
| end |
| else begin // PF_FULL or PF_FIN |
| pf_data[63:0] <= {pf_data[59:32], current_fetch_result[31:0], 4'b0}; |
| end |
| end |
| else begin // just shift |
| pf_data <= {pf_data[59:0], 4'b0}; |
| end |
| |
| if (h_drop != 3'd0) begin |
| h_drop <= h_drop - 1'd1; |
| valid <= 0; |
| end |
| else begin |
| if (h_pix_output >= 8) |
| valid <= 1; |
| else |
| valid <= 0; |
| pixel <= pf_output_pixel; |
| h_pix_output <= h_pix_output + 1'b1; |
| end |
| end |
| end |
| else if (r_state == S_OAMRDA) begin |
| h_pix_output <= h_pix_output - 1'b1; //revert adding |
| valid <= 1'b0; |
| end |
| else if (r_state == S_OWB) begin |
| h_pix_output <= h_pix_output + 1'b1; //restore adding |
| pf_data <= {merge_result[31:0], pf_data[31:0]}; |
| valid <= 1'b0; |
| end |
| else if (r_state == S_SWW) begin |
| pf_empty <= PF_EMPTY; // Flush the pipeline |
| valid <= 1'b0; |
| end |
| else begin |
| // Not even in output stages |
| valid <= 1'b0; |
| end |
| end |
| |
| wire ram_ready = ((ct == 2'b00) || (ct == 2'b10)); |
| wire ppu_ram_stall = !ram_ready && vram_rd; |
| wire need_prewait = (r_state == S_FTIDA) || (r_state == S_FRD0A) || |
| (r_state == S_FRD1A) || (r_state == S_OFRD0A) || (r_state == S_OFRD1A) || |
| (r_state == S_FWAITA); |
| reg postwait; |
| |
| // Enter Next State |
| // and handle object interrupt |
| // (sorry but I need to backup next state so I could not handle these in the next state logic) |
| always @(posedge clk) |
| begin |
| if (rst) begin |
| //h_pix_obj <= 8'b0; |
| r_state <= 0; |
| r_next_backup <= 0; |
| obj_trigger_id <= OBJ_TRIGGER_NOT_FOUND;//not triggered |
| end |
| else |
| begin |
| if (obj_trigger && (reg_mode == PPU_MODE_PIX_TRANS)) begin |
| // If already in object rendering stages |
| if ((r_state == S_OFRD0A)||(r_state == S_OFRD0B)|| |
| (r_state == S_OFRD1A)||(r_state == S_OFRD1B)||(r_state == S_OWAIT)|| |
| (r_state == S_OAMRDA)||(r_state == S_OAMRDB)) begin |
| r_state <= r_next_state; |
| end |
| // Finished one object, but there is more |
| else if (r_state == S_OWB) begin |
| if (postwait) |
| r_state <= S_OAMRDA; |
| else |
| r_state <= S_OWAIT; |
| obj_trigger_id <= obj_trigger_id_next; |
| end |
| // Not rendering object before, start now |
| else begin |
| if (need_prewait) begin |
| r_state <= S_OWAIT; |
| postwait <= 1'b0; |
| end |
| else begin |
| r_state <= S_OAMRDA; |
| postwait <= 1'b1; |
| end |
| r_next_backup <= r_next_state; |
| |
| obj_trigger_id <= obj_trigger_id_next; |
| end |
| end |
| else begin |
| //h_pix_obj <= h_pix_output + 8'd2; |
| //if (!ppu_ram_stall) |
| r_state <= r_next_state; |
| // Finished one object, and there is no more currently |
| if (r_state == S_OWB) begin |
| obj_trigger_id <= OBJ_TRIGGER_NOT_FOUND; |
| end |
| end |
| end |
| end |
| |
| // Next State Logic |
| // Since new state get updated during posedge |
| always @(*) |
| begin |
| case (r_state) |
| S_IDLE: r_next_state = ((reg_lcd_en)&(is_in_v_blank)) ? (S_BLANK) : (S_IDLE); |
| S_BLANK: r_next_state = |
| (reg_lcd_en) ? ( |
| (is_in_v_blank) ? |
| (((v_count == (PPU_V_TOTAL - 1))&&(h_count == (PPU_H_TOTAL - 1))) ? |
| (S_OAMX) : (S_BLANK) |
| ) : |
| ((h_count == (PPU_H_TOTAL - 1)) ? |
| ((v_count == (PPU_V_ACTIVE - 1)) ? |
| (S_BLANK) : (S_OAMX)): |
| (S_BLANK) |
| ) |
| ) : (S_IDLE); |
| S_OAMX: r_next_state = (reg_lcd_en) ? (S_OAMY) : (S_IDLE); |
| S_OAMY: r_next_state = (reg_lcd_en) ? ((oam_search_count == (PPU_OAM_SEARCH_LENGTH - 1'b1)) ? (S_FTIDA) : (S_OAMX)) : (S_IDLE); |
| S_FTIDA: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FTIDB))) : (S_IDLE); |
| S_FTIDB: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FRD0A))) : (S_IDLE); |
| S_FRD0A: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FRD0B))) : (S_IDLE); |
| S_FRD0B: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FRD1A))) : (S_IDLE); |
| S_FRD1A: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FRD1B))) : (S_IDLE); |
| S_FRD1B: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : ((pf_empty != PF_FULL) ? (S_FTIDA) : (S_FWAITA)))) : (S_IDLE); // If fifo not full, no wait state is needed |
| S_FWAITA: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FWAITB))) : (S_IDLE); |
| S_FWAITB: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FTIDA))) : (S_IDLE); |
| S_SWW: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : (S_FTIDA)) : (S_IDLE); |
| S_OAMRDA: r_next_state = (reg_lcd_en) ? (S_OAMRDB) : (S_IDLE); |
| S_OAMRDB: r_next_state = (reg_lcd_en) ? (S_OFRD0A) : (S_IDLE); |
| S_OFRD0A: r_next_state = (reg_lcd_en) ? (S_OFRD0B) : (S_IDLE); |
| S_OFRD0B: r_next_state = (reg_lcd_en) ? (S_OFRD1A) : (S_IDLE); |
| S_OFRD1A: r_next_state = (reg_lcd_en) ? (S_OFRD1B) : (S_IDLE); |
| S_OFRD1B: r_next_state = (reg_lcd_en) ? (postwait ? S_OWAIT : S_OWB) : (S_IDLE); |
| S_OWAIT: r_next_state = (reg_lcd_en) ? (postwait ? S_OWB : S_OAMRDA) : (S_IDLE); |
| S_OWB: r_next_state = (reg_lcd_en) ? (r_next_backup) : (S_IDLE); |
| default: r_next_state = S_IDLE; |
| endcase |
| end |
| |
| // Interrupt |
| always @(posedge clk) |
| if (rst) |
| reg_stat[2] <= 0; |
| else |
| // TODO: what's the timing for this? |
| reg_stat[2] <= (reg_ly == reg_lyc) ? 1 : 0; |
| |
| always @(posedge clk) |
| begin |
| if (rst) begin |
| int_vblank_req <= 0; |
| int_lcdc_req <= 0; |
| reg_ly_last[7:0] <= 0; |
| //reg_stat[1:0] <= PPU_MODE_V_BLANK; |
| end |
| else |
| begin |
| if ((reg_mode == PPU_MODE_V_BLANK)&&(reg_mode_last != PPU_MODE_V_BLANK)) |
| int_vblank_req <= 1; |
| else if (int_vblank_ack) |
| int_vblank_req <= 0; |
| if (((reg_lyc_int == 1'b1)&&(reg_ly == reg_lyc)&&(reg_ly_last != reg_lyc))|| |
| ((reg_oam_int == 1'b1)&&(reg_mode == PPU_MODE_OAM_SEARCH)&&(reg_mode_last != PPU_MODE_OAM_SEARCH))|| |
| ((reg_vblank_int == 1'b1)&&(reg_mode == PPU_MODE_V_BLANK)&&(reg_mode_last != PPU_MODE_V_BLANK))|| |
| ((reg_hblank_int == 1'b1)&&(reg_mode == PPU_MODE_H_BLANK)&&(reg_mode_last != PPU_MODE_H_BLANK))) |
| int_lcdc_req <= 1; |
| else if (int_lcdc_ack) |
| int_lcdc_req <= 0; |
| reg_ly_last <= reg_ly; |
| reg_mode_last <= reg_mode; |
| end |
| end |
| |
| // Bus RW |
| // Bus RW - Combinational Read |
| always @(*) |
| begin |
| // MMIO Bus |
| mmio_dout = 8'hFF; |
| case (mmio_a) |
| 16'hFF40: mmio_dout = reg_lcdc; |
| 16'hFF41: mmio_dout = reg_stat; |
| 16'hFF42: mmio_dout = reg_scy; |
| 16'hFF43: mmio_dout = reg_scx; |
| 16'hFF44: mmio_dout = reg_ly; |
| 16'hFF45: mmio_dout = reg_lyc; |
| 16'hFF46: mmio_dout = reg_dma; |
| 16'hFF47: mmio_dout = reg_bgp; |
| 16'hFF48: mmio_dout = reg_obp0; |
| 16'hFF49: mmio_dout = reg_obp1; |
| 16'hFF4A: mmio_dout = reg_wy; |
| 16'hFF4B: mmio_dout = reg_wx; |
| endcase |
| end |
| |
| // Bus RW - Sequential Write |
| always @(posedge clk) |
| begin |
| if (rst) begin |
| reg_lcdc <= 8'h00; |
| reg_stat[7:3] <= 5'h00; |
| reg_scy <= 8'h00; |
| reg_scx <= 8'h00; |
| reg_lyc <= 8'h00; |
| reg_dma <= 8'h00; |
| reg_bgp <= 8'hFC; |
| reg_obp0 <= 8'h00; |
| reg_obp1 <= 8'h00; |
| reg_wy <= 8'h00; |
| reg_wx <= 8'h00; |
| end |
| else |
| begin |
| if (mmio_wr) begin |
| case (mmio_a) |
| 16'hFF40: reg_lcdc <= mmio_din; |
| 16'hFF41: reg_stat[7:3] <= mmio_din[7:3]; |
| 16'hFF42: reg_scy <= mmio_din; |
| 16'hFF43: reg_scx <= mmio_din; |
| //16'hFF44: reg_ly <= mmio_din; |
| 16'hFF45: reg_lyc <= mmio_din; |
| 16'hFF46: reg_dma <= mmio_din; |
| 16'hFF47: reg_bgp <= mmio_din; |
| 16'hFF48: reg_obp0 <= mmio_din; |
| 16'hFF49: reg_obp1 <= mmio_din; |
| 16'hFF4A: reg_wy <= mmio_din; |
| 16'hFF4B: reg_wx <= mmio_din; |
| endcase |
| // VRAM and OAM access are not handled here |
| end |
| end |
| end |
| |
| // Debug Outputs |
| assign scx = reg_scx; |
| assign scy = reg_scy; |
| assign state = r_state; |
| |
| endmodule |