blob: 662fbb19fe25775c4accbe97d5e5f6c37bafcae2 [file] [log] [blame]
`timescale 1ns / 1ps
//////////////////////////////////////////////////////////////////////////////////
// Company:
// Engineer: Wenting Zhang
//
// Create Date: 18:48:36 02/14/2018
// Design Name:
// Module Name: ppu
// Project Name:
// Target Devices:
// Tool versions:
// Description:
// GameBoy PPU
// Additional Comments:
// There are three hardware layers in the GameBoy PPU: Background, Window, and
// Object (or sprites).
//
// Window will render above the background and the object can render above the
// background or under the background. Each object have a priority bit to
// indicate where it should be rendered.
//
// Background, Window, and Object can be individually turned on or off. When
// nothing is turned on, it displays white.
//
// The whole render logic does NOT require a scanline buffer to work, and it
// runs at 4MHz (VRAM runs at 2MHz)
//
// There are two main parts of the logic, implemented in a big FSM. The first
// one is the fetch unit, and the other is the pixel FIFO.
//
// The pixel FIFO shifts out one pixel when it contains more than 8 pixels, the
// fetch unit would generally render 8 pixels in 6 cycles (so 2 wait cycles are
// inserted so they are in sync generally). When there is no enough pixels,
// the FIFO would stop and wait for the fetch unit.
//
// Windows Trigger is handled in the next state logic, there is a distinct state
// for the PPU to switch from background rendering to window rendering (flush
// the fifo and add wait cycles.)
//
// Object Trigger is handled in the state change block, in order to backup the
// previous state. Current RAM address is also backed up during the handling of
// object rendering. Once all the objects at this position has been rendered,
// the render state machine could be restored to its previous state.
//
// The output pixel clock is the inverted main clock, which is the same as the
// real Game Boy Pixel data would be put on the pixel bus on the negedge of
// clock, so the LCD would latch the data on the posedge. The original Game Boy
// used a gated clock to control if output is valid. Since gated clock is not
// recommend, I used a valid signal to indicate is output should be considered
// valid.
//////////////////////////////////////////////////////////////////////////////////
`default_nettype wire
module ppu(
input clk,
input rst,
input wire [1:0] ct,
// MMIO Bus, 0xFF40 - 0xFF4B, always visible to CPU
input wire [15:0] mmio_a,
output reg [7:0] mmio_dout,
input wire [7:0] mmio_din,
input wire mmio_rd,
input wire mmio_wr,
// OAM Bus, 0xFE00 - 0xFE9F
input wire [15:0] oam_a,
output wire [7:0] oam_dout,
input wire [7:0] oam_din,
input wire oam_rd,
input wire oam_wr,
// Interrupt interface
output reg int_vblank_req,
output reg int_lcdc_req,
input int_vblank_ack,
input int_lcdc_ack,
// Pixel output
output cpl, // Pixel Clock, = ~clk
output reg [1:0] pixel, // Pixel Output
output reg valid, // Pixel Valid
output reg hs, // Horizontal Sync, High Valid
output reg vs, // Vertical Sync, High Valid
// Video RAM interface
output wire [12:0] vram_a,
output wire vram_wr,
output wire vram_rd,
output wire [7:0] vram_din,
input wire [7:0] vram_dout,
//Debug output
output [7:0] scx,
output [7:0] scy,
output [4:0] state
);
// Global Wires ?
integer i;
// PPU registers
reg [7:0] reg_lcdc; //$FF40 LCD Control (R/W)
reg [7:0] reg_stat; //$FF41 LCDC Status (R/W)
reg [7:0] reg_scy; //$FF42 Scroll Y (R/W)
reg [7:0] reg_scx; //$FF43 Scroll X (R/W)
reg [7:0] reg_ly; //$FF44 LCDC Y-Coordinate (R) Write will reset the counter
reg [7:0] reg_dma; //$FF46 DMA, actually handled outside of PPU for now
reg [7:0] reg_lyc; //$FF45 LY Compare (R/W)
reg [7:0] reg_bgp; //$FF47 BG Palette Data (R/W) Non-CGB mode only
reg [7:0] reg_obp0; //$FF48 Object Palette 0 Data (R/W) Non-CGB mode only
reg [7:0] reg_obp1; //$FF49 Object Palette 1 Data (R/W) Non-CGB mode only
reg [7:0] reg_wy; //$FF4A Window Y Position (R/W)
reg [7:0] reg_wx; //$FF4B Window X Position (R/W)
// Some interrupt related register
reg [7:0] reg_ly_last;
reg [1:0] reg_mode_last; // Next mode based on next state
wire reg_lcd_en = reg_lcdc[7]; //0=Off, 1=On
wire reg_win_disp_sel = reg_lcdc[6]; //0=9800-9BFF, 1=9C00-9FFF
wire reg_win_en = reg_lcdc[5]; //0=Off, 1=On
wire reg_bg_win_data_sel = reg_lcdc[4]; //0=8800-97FF, 1=8000-8FFF
wire reg_bg_disp_sel = reg_lcdc[3]; //0=9800-9BFF, 1=9C00-9FFF
wire reg_obj_size = reg_lcdc[2]; //0=8x8, 1=8x16
wire reg_obj_en = reg_lcdc[1]; //0=Off, 1=On
wire reg_bg_disp = reg_lcdc[0]; //0=Off, 1=On
wire reg_lyc_int = reg_stat[6];
wire reg_oam_int = reg_stat[5];
wire reg_vblank_int = reg_stat[4];
wire reg_hblank_int = reg_stat[3];
wire reg_coin_flag = reg_stat[2];
wire [1:0] reg_mode = reg_stat[1:0];
localparam PPU_MODE_H_BLANK = 2'b00;
localparam PPU_MODE_V_BLANK = 2'b01;
localparam PPU_MODE_OAM_SEARCH = 2'b10;
localparam PPU_MODE_PIX_TRANS = 2'b11;
localparam PPU_PAL_BG = 2'b00;
localparam PPU_PAL_OB0 = 2'b01;
localparam PPU_PAL_OB1 = 2'b10;
reg [12:0] vram_addr_bg;
reg [12:0] vram_addr_obj;
wire [12:0] vram_addr_int;
wire [12:0] vram_addr_ext;
wire vram_addr_int_sel; // 0 - BG, 1 - OBJ
assign vram_addr_int = (vram_addr_int_sel == 1'b1) ? (vram_addr_obj) : (vram_addr_bg);
wire oam_access_ext = ((reg_mode == PPU_MODE_H_BLANK)||
(reg_mode == PPU_MODE_V_BLANK));
wire [12:0] window_map_addr = (reg_win_disp_sel) ? (13'h1C00) : (13'h1800);
wire [12:0] bg_map_addr = (reg_bg_disp_sel) ? (13'h1C00) : (13'h1800);
wire [12:0] bg_window_tile_addr = (reg_bg_win_data_sel) ? (13'h0000) : (13'h0800);
// PPU Memories
// 8 bit WR, 16 bit RD, 160Bytes OAM
reg [7:0] oam_u [0: 79];
reg [7:0] oam_l [0: 79];
reg [7:0] oam_rd_addr_int;
wire [7:0] oam_rd_addr;
wire [7:0] oam_wr_addr;
reg [15:0] oam_data_out;
wire [7:0] oam_data_out_byte;
wire [7:0] oam_data_in;
wire oam_we;
always @ (negedge clk)
begin
if (oam_we) begin
if (oam_wr_addr[0])
oam_u[oam_wr_addr[7:1]] <= oam_data_in;
else
oam_l[oam_wr_addr[7:1]] <= oam_data_in;
end
else begin
oam_data_out <= {oam_u[oam_rd_addr[7:1]], oam_l[oam_rd_addr[7:1]]};
end
end
assign oam_wr_addr = oam_a[7:0];
assign oam_rd_addr = (oam_access_ext) ? (oam_a[7:0]) : (oam_rd_addr_int);
assign oam_data_in = oam_din;
assign oam_data_out_byte = (oam_rd_addr[0]) ? oam_data_out[15:8] : oam_data_out[7:0];
//assign oam_we = (wr)&(oam_access_ext);
assign oam_we = oam_wr; // What if always allow OAM access?
assign oam_dout = (oam_access_ext) ? (oam_data_out_byte) : (8'hFF);
// 8 bit WR, 8 bit RD, 8KB VRAM
wire [7:0] vram_data_out;
assign vram_a = vram_addr_int;
assign vram_wr = 1'b0; // PPU doesn't write to VRAM
assign vram_din = 8'd0;
assign vram_data_out = vram_dout;
// Pixel Pipeline
// The pixel FIFO: 16 pixels, 4 bits each (2 bits color index, 2 bits palette index)
// Since in and out are 8 pixels aligned, it can be modeled as a ping-pong buffer
// of two 32 bits (8 pixels * 4 bits) group
reg [63:0] pf_data; // Pixel FIFO Data
wire [1:0] pf_output_pixel;
wire [7:0] pf_output_palette;
wire [1:0] pf_output_pixel_id;
wire [1:0] pf_output_palette_id;
assign {pf_output_pixel_id, pf_output_palette_id} = pf_data[63:60];
assign pf_output_palette = (pf_output_palette_id == PPU_PAL_BG) ? (reg_bgp) :
(pf_output_palette_id == PPU_PAL_OB0) ? (reg_obp0) :
(pf_output_palette_id == PPU_PAL_OB1) ? (reg_obp1) : (8'hFF);
assign pf_output_pixel = (pf_output_pixel_id == 2'b11) ? (pf_output_palette[7:6]) :
(pf_output_pixel_id == 2'b10) ? (pf_output_palette[5:4]) :
(pf_output_pixel_id == 2'b01) ? (pf_output_palette[3:2]) :
(pf_output_pixel_id == 2'b00) ? (pf_output_palette[1:0]) : (2'b00);
reg [2:0] pf_empty; // Indicate if the Pixel FIFO is empty.
localparam PF_INITA = 3'd5; // When a line start...
localparam PF_INITB = 3'd4; // Line start, 2 pixels out, 8 rendered
localparam PF_EMPTY = 3'd3; // When the pipeline get flushed
localparam PF_HALF = 3'd2; // After flushed, 8 pixels in
localparam PF_FIN = 3'd1; // 16 pixels in, but still no wait cycles
localparam PF_FULL = 3'd0; // Normal
assign cpl = ~clk;
//assign pixel = pf_output_pixel;
// HV Timing
localparam PPU_H_FRONT = 9'd76;
localparam PPU_H_SYNC = 9'd4; // So front porch + sync = OAM search
localparam PPU_H_TOTAL = 9'd456;
localparam PPU_H_PIXEL = 9'd160;
// 8 null pixels in the front for objects which have x < 8, 8 bit counter
localparam PPU_H_OUTPUT = 8'd168;
localparam PPU_V_ACTIVE = 8'd144;
localparam PPU_V_BACK = 8'd9;
localparam PPU_V_SYNC = 8'd1;
localparam PPU_V_BLANK = 8'd10;
localparam PPU_V_TOTAL = 8'd154;
// Raw timing counter
reg [8:0] h_count;
reg [7:0] v_count;
// HV counter
always @(posedge clk)
begin
if (rst) begin
h_count <= 0;
hs <= 0;
v_count <= 0;
vs <= 0;
end
else begin
if(h_count < PPU_H_TOTAL - 1)
h_count <= h_count + 1'b1;
else begin
h_count <= 0;
if(v_count < PPU_V_TOTAL - 1)
v_count <= v_count + 1'b1;
else
v_count <= 0;
if(v_count == PPU_V_ACTIVE + PPU_V_BACK - 1)
vs <= 1;
if(v_count == PPU_V_ACTIVE + PPU_V_BACK + PPU_V_SYNC - 1)
vs <= 0;
end
if(h_count == PPU_H_FRONT - 1)
hs <= 1;
if(h_count == PPU_H_FRONT + PPU_H_SYNC - 1)
hs <= 0;
end
end
// Render FSM
localparam S_IDLE = 5'd0;
localparam S_BLANK = 5'd1; // H Blank and V Blank
localparam S_OAMX = 5'd2; // OAM Search X check
localparam S_OAMY = 5'd3; // OAM Search Y check
localparam S_FTIDA = 5'd4; // Fetch Read Tile ID Stage A (Address Setup)
localparam S_FTIDB = 5'd5; // Fetch Read Tile ID Stage B (Data Read)
localparam S_FRD0A = 5'd6; // Fetch Read Data 0 Stage A
localparam S_FRD0B = 5'd7; // Fetch Read Data 0 Stage B
localparam S_FRD1A = 5'd8; // Fetch Read Data 1 Stage A
localparam S_FRD1B = 5'd9; // Fetch Read Data 1 Stage B
localparam S_FWAITA = 5'd10; // Fetch Wait Stage A (Idle)
localparam S_FWAITB = 5'd11; // Fetch Wait Stage B (Load to FIFO?)
localparam S_SWW = 5'd12; // Fetch Switch to Window
localparam S_OAMRDA = 5'd13; // OAM Read Stage A
localparam S_OAMRDB = 5'd14; // OAM Read Stage B
localparam S_OFRD0A = 5'd15; // Object Fetch Read Data 0 Stage A
localparam S_OFRD0B = 5'd16; // Object Fetch Read Data 0 Stage B
localparam S_OFRD1A = 5'd17; // Object Fetch Read Data 1 Stage A
localparam S_OFRD1B = 5'd18; // Object Fetch Read Data 1 Stage B
localparam S_OWAIT = 5'd19; // Object Wait
localparam S_OWB = 5'd20; // Object Write Back
localparam PPU_OAM_SEARCH_LENGTH = 6'd40;
reg [2:0] h_drop; //Drop pixels when SCX % 8 != 0
wire [2:0] h_extra = reg_scx[2:0]; //Extra line length when SCX % 8 != 0
reg [7:0] h_pix_render; // Horizontal Render Pixel pointer
reg [7:0] h_pix_output; // Horizontal Output Pixel counter
wire [7:0] h_pix_obj = h_pix_output + 1'b1; // Coordinate used to trigger the object rendering
wire [7:0] v_pix = v_count;
wire [7:0] v_pix_in_map = v_pix + reg_scy;
wire [7:0] v_pix_in_win = v_pix - reg_wy;
reg [4:0] r_state = 0;
reg [4:0] r_next_backup;
reg [4:0] r_next_state;
wire is_in_v_blank = ((v_count >= PPU_V_ACTIVE) && (v_count < PPU_V_ACTIVE + PPU_V_BLANK));
reg window_triggered; // Indicate whether window has been triggered, should be replaced by a edge detector
wire render_window_or_bg = window_triggered;
wire window_trigger = (((h_pix_output) == (reg_wx))&&(v_pix >= reg_wy)&&(reg_win_en)&&(~window_triggered)) ? 1 : 0;
wire [2:0] line_to_tile_v_offset_bg = v_pix_in_map[2:0]; // Current line in a tile being rendered
wire [4:0] line_in_tile_v_bg = v_pix_in_map[7:3]; // Current tile Y coordinate being rendered
wire [2:0] line_to_tile_v_offset_win = v_pix_in_win[2:0];
wire [4:0] line_in_tile_v_win = v_pix_in_win[7:3];
wire [2:0] line_to_tile_v_offset = (render_window_or_bg) ? (line_to_tile_v_offset_win) : (line_to_tile_v_offset_bg);
wire [4:0] line_in_tile_v = (render_window_or_bg) ? (line_in_tile_v_win) : (line_in_tile_v_bg);
wire [4:0] h_tile_bg = h_pix_render[7:3] + reg_scx[7:3]; // Current tile X coordinate being rendered
wire [4:0] h_tile_win = h_pix_render[7:3];
wire [4:0] h_tile = (render_window_or_bg) ? (h_tile_win) : (h_tile_bg);
wire [12:0] current_map_address = (((render_window_or_bg) ? (window_map_addr) : (bg_map_addr)) + (line_in_tile_v) * 32 + {8'd0, h_tile}); //Background address
reg [7:0] current_tile_id;
wire [7:0] current_tile_id_adj = {~((reg_bg_win_data_sel)^(current_tile_id[7])), current_tile_id[6:0]}; // Adjust for 8800 Adressing mode
wire [12:0] current_tile_address_0 = (bg_window_tile_addr) + current_tile_id_adj * 16 + (line_to_tile_v_offset * 2);
wire [12:0] current_tile_address_1 = (current_tile_address_0) | 13'h0001;
reg [7:0] current_tile_data_0;
reg [7:0] current_tile_data_1;
// Data that will be pushed into pixel FIFO
// Organized in pixels
reg [31:0] current_fetch_result;
always@(current_tile_data_1, current_tile_data_0) begin
for (i = 0; i < 8; i = i + 1) begin
current_fetch_result[i*4+3] = current_tile_data_1[i];
current_fetch_result[i*4+2] = current_tile_data_0[i];
current_fetch_result[i*4+1] = PPU_PAL_BG[1]; // Fetch could only fetch BG
current_fetch_result[i*4+0] = PPU_PAL_BG[0];
end
end
reg [5:0] oam_search_count; // Counter during OAM search stage
reg [5:0] obj_visible_list [0:9]; // Total visible list
reg [7:0] obj_trigger_list [0:9]; // Where the obj should be triggered
reg [7:0] obj_y_list [0:9]; // Where the obj is
reg obj_valid_list [0:9]; // Is obj visible entry valid
reg [3:0] oam_visible_count; // ???
wire [7:0] oam_search_x;
wire [7:0] oam_search_y;
wire [7:0] obj_size_h = (reg_obj_size == 1'b1) ? (8'd16) : (8'd8);
wire [7:0] obj_h_upper_boundary = (v_pix + 8'd16);
wire [7:0] obj_h_lower_boundary = obj_h_upper_boundary - obj_size_h;
reg [3:0] obj_trigger_id; // The object currently being/ or have been rendered, in the visible list
localparam OBJ_TRIGGER_NOT_FOUND = 4'd15;
// Cascade mux used to implement the searching of next id would be triggered
reg [3:0] obj_trigger_id_from[0:10];
reg [3:0] obj_trigger_id_next;
always@(h_pix_obj, obj_trigger_id) begin
obj_trigger_id_from[10] = OBJ_TRIGGER_NOT_FOUND; // There is no more after the 10th
for (i = 9; i >= 0; i = i - 1) begin
/* verilator lint_off WIDTH */
obj_trigger_id_from[i] =
((h_pix_obj == obj_trigger_list[i])&&(obj_valid_list[i])) ? (i) : (obj_trigger_id_from[i+1]);
// See if this one match, if not, cascade down.
/* verilator lint_on WIDTH */
end
if (obj_trigger_id == OBJ_TRIGGER_NOT_FOUND) // currently not triggered yet
obj_trigger_id_next = obj_trigger_id_from[0]; // Search from start
else
obj_trigger_id_next = obj_trigger_id_from[obj_trigger_id + 1]; // Search start from next one
end
//!-- DEBUG --
//wire [3:0] obj_trigger_id_next = ((h_pix_obj == obj_trigger_list[4'd0])&&(obj_valid_list[4'd0])) ? (4'd0) : (4'd15);
wire obj_trigger = ((reg_obj_en)&&(obj_trigger_id_next != OBJ_TRIGGER_NOT_FOUND)) ? 1 : 0;
//wire obj_trigger = 0;
wire [5:0] obj_triggered = obj_visible_list[obj_trigger_id]; // The global id of object being rendered
wire [7:0] current_obj_y = obj_y_list[obj_trigger_id];
wire [7:0] current_obj_x = obj_trigger_list[obj_trigger_id]; //h_pix gets incremented before render
reg [7:0] current_obj_tile_id_raw; // Tile ID without considering the object size
reg [7:0] current_obj_flags; // Flags
wire current_obj_to_bg_priority = current_obj_flags[7];
wire current_obj_y_flip = current_obj_flags[6];
wire current_obj_x_flip = current_obj_flags[5];
wire current_obj_pal_id = current_obj_flags[4];
wire [1:0] current_obj_pal= (current_obj_pal_id) ? (PPU_PAL_OB1) : (PPU_PAL_OB0);
/* verilator lint_off WIDTH */
wire [3:0] line_to_obj_v_offset_raw = (v_pix + 8'd16 - current_obj_y); // Compensate 16 pixel offset and truncate to 4 bits
/* verilator lint_on WIDTH */
wire [7:0] current_obj_tile_id = (reg_obj_size == 1'b1) ?
({current_obj_tile_id_raw[7:1], (((line_to_obj_v_offset_raw[3])^(current_obj_y_flip)) ? 1'b1 : 1'b0)}) : // Select Hi or Lo tile
(current_obj_tile_id_raw); // Use tile ID directly
wire [2:0] line_to_obj_v_offset = (current_obj_y_flip) ? (~line_to_obj_v_offset_raw[2:0]) : (line_to_obj_v_offset_raw[2:0]);
wire [12:0] current_obj_address_0 = current_obj_tile_id * 16 + line_to_obj_v_offset * 2;
wire [12:0] current_obj_address_1 = current_obj_address_0 | 13'h0001;
reg [7:0] current_obj_tile_data_0;
reg [7:0] current_obj_tile_data_1;
// Data that will be merged into pixel FIFO
// Organized in pixels
reg [31:0] merge_result;
always@(*) begin
for (i = 0; i < 8; i = i + 1) begin
if (
((current_obj_tile_data_1[i] != 1'b0)||(current_obj_tile_data_0[i] != 1'b0))&&
((pf_data[32+i*4+1] == PPU_PAL_BG[1])&&(pf_data[32+i*4+0] == PPU_PAL_BG[0]))&&
(
((current_obj_to_bg_priority)&&(pf_data[32+i*4+3] == 1'b0)&&(pf_data[32+i*4+2] == 1'b0))||
(~current_obj_to_bg_priority)
)
) //(OBJ is not transparent) and ((BG priority and BG is transparent) or (OBJ priority))
begin
merge_result[i*4+3] = current_obj_tile_data_1[i];
merge_result[i*4+2] = current_obj_tile_data_0[i];
merge_result[i*4+1] = current_obj_pal[1];
merge_result[i*4+0] = current_obj_pal[0];
end
else begin
merge_result[i*4+3] = pf_data[32+i*4+3];
merge_result[i*4+2] = pf_data[32+i*4+2];
merge_result[i*4+1] = pf_data[32+i*4+1];
merge_result[i*4+0] = pf_data[32+i*4+0];
end
end
end
assign vram_addr_int_sel =
((r_state == S_OAMRDB) || (r_state == S_OFRD0A) || (r_state == S_OFRD0B)
|| (r_state == S_OFRD1A) || (r_state == S_OFRD1B)) ? 1'b1 : 1'b0;
assign vram_rd = (r_state == S_FTIDA) || (r_state == S_FRD0A) ||
(r_state == S_FRD1A) || (r_state == S_OFRD0A) || (r_state == S_OFRD1A);
// Current mode logic, based on current state
always @ (posedge clk)
begin
if (rst) begin
reg_stat[1:0] <= PPU_MODE_V_BLANK;
end
else begin
case (r_state)
S_IDLE: reg_stat[1:0] <= (reg_lcd_en) ? (PPU_MODE_V_BLANK) : (PPU_MODE_H_BLANK);
S_BLANK: reg_stat[1:0] <= (is_in_v_blank) ? (PPU_MODE_V_BLANK) : (PPU_MODE_H_BLANK);
S_OAMX: reg_stat[1:0] <= PPU_MODE_OAM_SEARCH;
S_OAMY: reg_stat[1:0] <= PPU_MODE_OAM_SEARCH;
S_FTIDA: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
S_FTIDB: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
S_FRD0A: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
S_FRD0B: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
S_FRD1A: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
S_FRD1B: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
S_FWAITA: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
S_FWAITB: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
S_SWW: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
S_OAMRDA: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
S_OAMRDB: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
S_OFRD0A: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
S_OFRD0B: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
S_OFRD1A: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
S_OFRD1B: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
S_OWAIT: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
S_OWB: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
default: reg_stat[1:0] <= PPU_MODE_V_BLANK;
endcase
end
end
assign oam_search_y = oam_data_out[7:0];
assign oam_search_x = oam_data_out[15:8];
// Render logic
always @(posedge clk)
begin
reg_ly <= v_pix[7:0];
case (r_state)
// nothing to do for S_IDLE
S_IDLE: begin end
S_BLANK: begin
h_pix_render <= 8'd0; // Render pointer
oam_search_count <= 6'd0;
oam_visible_count <= 4'd0;
for (i = 0; i < 10; i = i + 1) begin
obj_valid_list[i] <= 1'b0;
end
oam_rd_addr_int <= 8'b0;
window_triggered <= 1'b0;
// Line start, need to render 16 pixels in 12 clocks
// and output 8 null pixels starting from the 4th clock
end
S_OAMX: begin
oam_rd_addr_int <= oam_search_count * 4;
end
S_OAMY: begin
if ((oam_search_y <= obj_h_upper_boundary)&&
(oam_search_y > obj_h_lower_boundary)&&
(oam_search_x != 8'd0)&&
(oam_visible_count < 4'd10)) begin
obj_visible_list[oam_visible_count] <= oam_search_count;
obj_trigger_list[oam_visible_count] <= oam_search_x;
obj_y_list[oam_visible_count] <= oam_search_y;
obj_valid_list[oam_visible_count] <= 1'b1;
oam_visible_count <= oam_visible_count + 1'b1;
end
oam_search_count <= oam_search_count + 1'b1;
end
S_FTIDA: vram_addr_bg <= current_map_address;
S_FTIDB: current_tile_id <= vram_data_out;
S_FRD0A: vram_addr_bg <= current_tile_address_0;
S_FRD0B: current_tile_data_0 <= vram_data_out;
S_FRD1A: vram_addr_bg <= current_tile_address_1;
S_FRD1B: begin
current_tile_data_1 <= vram_data_out;
h_pix_render <= h_pix_render + 8'd8;
end
// nothing to do for S_FWAITA, S_FWAITB
S_FWAITA: begin end
S_FWAITB: begin end
S_SWW: begin
h_pix_render <= 8'd0;
window_triggered <= 1'b1;
end
S_OAMRDA: oam_rd_addr_int <= obj_triggered * 4 + 8'd2;
S_OAMRDB: begin
current_obj_tile_id_raw <= oam_data_out[7:0];
current_obj_flags <= oam_data_out[15:8];
end
S_OFRD0A: vram_addr_obj <= current_obj_address_0;
S_OFRD0B:
if (current_obj_x_flip == 1'b1)
current_obj_tile_data_0[7:0] <= {
vram_data_out[0], vram_data_out[1], vram_data_out[2], vram_data_out[3],
vram_data_out[4], vram_data_out[5], vram_data_out[6], vram_data_out[7]
};
else
current_obj_tile_data_0 <= vram_data_out;
S_OFRD1A: vram_addr_obj <= current_obj_address_1;
S_OFRD1B:
if (current_obj_x_flip == 1'b1)
current_obj_tile_data_1[7:0] <= {
vram_data_out[0], vram_data_out[1], vram_data_out[2], vram_data_out[3],
vram_data_out[4], vram_data_out[5], vram_data_out[6], vram_data_out[7]
};
else
current_obj_tile_data_1 <= vram_data_out;
// nothing to do for S_OWB
S_OWAIT: begin end
S_OWB: begin end
default: begin
$display("Invalid state!");
end
endcase
end
reg [31:0] half_merge_result;
always @(current_fetch_result, pf_data) begin
for (i = 0; i < 8; i = i + 1) begin
if ((pf_data[32+i*4+1] == PPU_PAL_BG[1])&&(pf_data[32+i*4+0] == PPU_PAL_BG[0])) begin
half_merge_result[i*4+3] = current_fetch_result[i*4+3];
half_merge_result[i*4+2] = current_fetch_result[i*4+2];
half_merge_result[i*4+1] = current_fetch_result[i*4+1];
half_merge_result[i*4+0] = current_fetch_result[i*4+0];
end
else begin
half_merge_result[i*4+3] = pf_data[32+i*4+3];
half_merge_result[i*4+2] = pf_data[32+i*4+2];
half_merge_result[i*4+1] = pf_data[32+i*4+1];
half_merge_result[i*4+0] = pf_data[32+i*4+0];
end
end
end
// Output logic
always @(posedge clk)
begin
if (r_state == S_BLANK) begin
valid <= 1'b0;
h_pix_output <= 8'd0; // Output pointer
h_drop <= reg_scx[2:0];
pf_empty <= PF_INITA;
end
else if ((r_state == S_FTIDA) || (r_state == S_FTIDB) || (r_state == S_FRD0A) || (r_state == S_FRD0B) ||
(r_state == S_FRD1A) || (r_state == S_FRD1B) || (r_state == S_FWAITA) || (r_state == S_FWAITB))
begin
if (r_state == S_FRD1B) begin
if (pf_empty == PF_INITA) pf_empty <= PF_INITB;
if (pf_empty == PF_INITB) pf_empty <= PF_FIN;
if (pf_empty == PF_EMPTY) pf_empty <= PF_HALF;
if (pf_empty == PF_HALF) pf_empty <= PF_FIN;
end else
if (pf_empty == PF_FIN) pf_empty <= PF_FULL; // should NOT wait through end
// If it is in one of the output stages
if (pf_empty == PF_EMPTY) begin
// Just started, no data available
valid <= 1'b0;
end
else if (pf_empty == PF_HALF) begin
valid <= 1'b0;
if (r_state == S_FTIDA) begin
// One batch done, and they can be push into pipeline, but could not be output yet
// We need to be careful not to overwrite the sprites...
pf_data[63:32] <= half_merge_result[31:0];
end
end
else if (((pf_empty == PF_INITA)&&((r_state == S_FRD1A)||(r_state == S_FRD1B)))
||(pf_empty == PF_INITB)||(pf_empty == PF_FULL)||(pf_empty == PF_FIN)) begin
if (r_state == S_FTIDA) begin // reload and shift
if (pf_empty == PF_INITB) begin
pf_data[63:0] <= {20'b0, current_fetch_result[31:0], 12'b0};
end
else begin // PF_FULL or PF_FIN
pf_data[63:0] <= {pf_data[59:32], current_fetch_result[31:0], 4'b0};
end
end
else begin // just shift
pf_data <= {pf_data[59:0], 4'b0};
end
if (h_drop != 3'd0) begin
h_drop <= h_drop - 1'd1;
valid <= 0;
end
else begin
if (h_pix_output >= 8)
valid <= 1;
else
valid <= 0;
pixel <= pf_output_pixel;
h_pix_output <= h_pix_output + 1'b1;
end
end
end
else if (r_state == S_OAMRDA) begin
h_pix_output <= h_pix_output - 1'b1; //revert adding
valid <= 1'b0;
end
else if (r_state == S_OWB) begin
h_pix_output <= h_pix_output + 1'b1; //restore adding
pf_data <= {merge_result[31:0], pf_data[31:0]};
valid <= 1'b0;
end
else if (r_state == S_SWW) begin
pf_empty <= PF_EMPTY; // Flush the pipeline
valid <= 1'b0;
end
else begin
// Not even in output stages
valid <= 1'b0;
end
end
wire ram_ready = ((ct == 2'b00) || (ct == 2'b10));
wire ppu_ram_stall = !ram_ready && vram_rd;
wire need_prewait = (r_state == S_FTIDA) || (r_state == S_FRD0A) ||
(r_state == S_FRD1A) || (r_state == S_OFRD0A) || (r_state == S_OFRD1A) ||
(r_state == S_FWAITA);
reg postwait;
// Enter Next State
// and handle object interrupt
// (sorry but I need to backup next state so I could not handle these in the next state logic)
always @(posedge clk)
begin
if (rst) begin
//h_pix_obj <= 8'b0;
r_state <= 0;
r_next_backup <= 0;
obj_trigger_id <= OBJ_TRIGGER_NOT_FOUND;//not triggered
end
else
begin
if (obj_trigger && (reg_mode == PPU_MODE_PIX_TRANS)) begin
// If already in object rendering stages
if ((r_state == S_OFRD0A)||(r_state == S_OFRD0B)||
(r_state == S_OFRD1A)||(r_state == S_OFRD1B)||(r_state == S_OWAIT)||
(r_state == S_OAMRDA)||(r_state == S_OAMRDB)) begin
r_state <= r_next_state;
end
// Finished one object, but there is more
else if (r_state == S_OWB) begin
if (postwait)
r_state <= S_OAMRDA;
else
r_state <= S_OWAIT;
obj_trigger_id <= obj_trigger_id_next;
end
// Not rendering object before, start now
else begin
if (need_prewait) begin
r_state <= S_OWAIT;
postwait <= 1'b0;
end
else begin
r_state <= S_OAMRDA;
postwait <= 1'b1;
end
r_next_backup <= r_next_state;
obj_trigger_id <= obj_trigger_id_next;
end
end
else begin
//h_pix_obj <= h_pix_output + 8'd2;
//if (!ppu_ram_stall)
r_state <= r_next_state;
// Finished one object, and there is no more currently
if (r_state == S_OWB) begin
obj_trigger_id <= OBJ_TRIGGER_NOT_FOUND;
end
end
end
end
// Next State Logic
// Since new state get updated during posedge
always @(*)
begin
case (r_state)
S_IDLE: r_next_state = ((reg_lcd_en)&(is_in_v_blank)) ? (S_BLANK) : (S_IDLE);
S_BLANK: r_next_state =
(reg_lcd_en) ? (
(is_in_v_blank) ?
(((v_count == (PPU_V_TOTAL - 1))&&(h_count == (PPU_H_TOTAL - 1))) ?
(S_OAMX) : (S_BLANK)
) :
((h_count == (PPU_H_TOTAL - 1)) ?
((v_count == (PPU_V_ACTIVE - 1)) ?
(S_BLANK) : (S_OAMX)):
(S_BLANK)
)
) : (S_IDLE);
S_OAMX: r_next_state = (reg_lcd_en) ? (S_OAMY) : (S_IDLE);
S_OAMY: r_next_state = (reg_lcd_en) ? ((oam_search_count == (PPU_OAM_SEARCH_LENGTH - 1'b1)) ? (S_FTIDA) : (S_OAMX)) : (S_IDLE);
S_FTIDA: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FTIDB))) : (S_IDLE);
S_FTIDB: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FRD0A))) : (S_IDLE);
S_FRD0A: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FRD0B))) : (S_IDLE);
S_FRD0B: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FRD1A))) : (S_IDLE);
S_FRD1A: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FRD1B))) : (S_IDLE);
S_FRD1B: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : ((pf_empty != PF_FULL) ? (S_FTIDA) : (S_FWAITA)))) : (S_IDLE); // If fifo not full, no wait state is needed
S_FWAITA: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FWAITB))) : (S_IDLE);
S_FWAITB: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FTIDA))) : (S_IDLE);
S_SWW: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : (S_FTIDA)) : (S_IDLE);
S_OAMRDA: r_next_state = (reg_lcd_en) ? (S_OAMRDB) : (S_IDLE);
S_OAMRDB: r_next_state = (reg_lcd_en) ? (S_OFRD0A) : (S_IDLE);
S_OFRD0A: r_next_state = (reg_lcd_en) ? (S_OFRD0B) : (S_IDLE);
S_OFRD0B: r_next_state = (reg_lcd_en) ? (S_OFRD1A) : (S_IDLE);
S_OFRD1A: r_next_state = (reg_lcd_en) ? (S_OFRD1B) : (S_IDLE);
S_OFRD1B: r_next_state = (reg_lcd_en) ? (postwait ? S_OWAIT : S_OWB) : (S_IDLE);
S_OWAIT: r_next_state = (reg_lcd_en) ? (postwait ? S_OWB : S_OAMRDA) : (S_IDLE);
S_OWB: r_next_state = (reg_lcd_en) ? (r_next_backup) : (S_IDLE);
default: r_next_state = S_IDLE;
endcase
end
// Interrupt
always @(posedge clk)
if (rst)
reg_stat[2] <= 0;
else
// TODO: what's the timing for this?
reg_stat[2] <= (reg_ly == reg_lyc) ? 1 : 0;
always @(posedge clk)
begin
if (rst) begin
int_vblank_req <= 0;
int_lcdc_req <= 0;
reg_ly_last[7:0] <= 0;
//reg_stat[1:0] <= PPU_MODE_V_BLANK;
end
else
begin
if ((reg_mode == PPU_MODE_V_BLANK)&&(reg_mode_last != PPU_MODE_V_BLANK))
int_vblank_req <= 1;
else if (int_vblank_ack)
int_vblank_req <= 0;
if (((reg_lyc_int == 1'b1)&&(reg_ly == reg_lyc)&&(reg_ly_last != reg_lyc))||
((reg_oam_int == 1'b1)&&(reg_mode == PPU_MODE_OAM_SEARCH)&&(reg_mode_last != PPU_MODE_OAM_SEARCH))||
((reg_vblank_int == 1'b1)&&(reg_mode == PPU_MODE_V_BLANK)&&(reg_mode_last != PPU_MODE_V_BLANK))||
((reg_hblank_int == 1'b1)&&(reg_mode == PPU_MODE_H_BLANK)&&(reg_mode_last != PPU_MODE_H_BLANK)))
int_lcdc_req <= 1;
else if (int_lcdc_ack)
int_lcdc_req <= 0;
reg_ly_last <= reg_ly;
reg_mode_last <= reg_mode;
end
end
// Bus RW
// Bus RW - Combinational Read
always @(*)
begin
// MMIO Bus
mmio_dout = 8'hFF;
case (mmio_a)
16'hFF40: mmio_dout = reg_lcdc;
16'hFF41: mmio_dout = reg_stat;
16'hFF42: mmio_dout = reg_scy;
16'hFF43: mmio_dout = reg_scx;
16'hFF44: mmio_dout = reg_ly;
16'hFF45: mmio_dout = reg_lyc;
16'hFF46: mmio_dout = reg_dma;
16'hFF47: mmio_dout = reg_bgp;
16'hFF48: mmio_dout = reg_obp0;
16'hFF49: mmio_dout = reg_obp1;
16'hFF4A: mmio_dout = reg_wy;
16'hFF4B: mmio_dout = reg_wx;
endcase
end
// Bus RW - Sequential Write
always @(posedge clk)
begin
if (rst) begin
reg_lcdc <= 8'h00;
reg_stat[7:3] <= 5'h00;
reg_scy <= 8'h00;
reg_scx <= 8'h00;
reg_lyc <= 8'h00;
reg_dma <= 8'h00;
reg_bgp <= 8'hFC;
reg_obp0 <= 8'h00;
reg_obp1 <= 8'h00;
reg_wy <= 8'h00;
reg_wx <= 8'h00;
end
else
begin
if (mmio_wr) begin
case (mmio_a)
16'hFF40: reg_lcdc <= mmio_din;
16'hFF41: reg_stat[7:3] <= mmio_din[7:3];
16'hFF42: reg_scy <= mmio_din;
16'hFF43: reg_scx <= mmio_din;
//16'hFF44: reg_ly <= mmio_din;
16'hFF45: reg_lyc <= mmio_din;
16'hFF46: reg_dma <= mmio_din;
16'hFF47: reg_bgp <= mmio_din;
16'hFF48: reg_obp0 <= mmio_din;
16'hFF49: reg_obp1 <= mmio_din;
16'hFF4A: reg_wy <= mmio_din;
16'hFF4B: reg_wx <= mmio_din;
endcase
// VRAM and OAM access are not handled here
end
end
end
// Debug Outputs
assign scx = reg_scx;
assign scy = reg_scy;
assign state = r_state;
endmodule