Import VerilogBoy RTL
diff --git a/verilog/rtl/alu.v b/verilog/rtl/alu.v
new file mode 100644
index 0000000..944e11f
--- /dev/null
+++ b/verilog/rtl/alu.v
@@ -0,0 +1,263 @@
+`timescale 1ns / 1ps
+`default_nettype wire
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 17:30:26 02/08/2018
+// Module Name: alu
+// Project Name: VerilogBoy
+// Description:
+// The Game Boy ALU.
+// Dependencies:
+//
+// Additional Comments:
+//
+//////////////////////////////////////////////////////////////////////////////////
+
+module alu(
+ input [7:0] alu_b,
+ input [7:0] alu_a,
+ input [2:0] alu_bit_index,
+ output reg [7:0] alu_result,
+ input [3:0] alu_flags_in,
+ output reg [3:0] alu_flags_out,
+ input [4:0] alu_op
+ );
+
+ localparam OP_ADD = 5'b00000;
+ localparam OP_ADC = 5'b00001;
+ localparam OP_SUB = 5'b00010;
+ localparam OP_SBC = 5'b00011;
+ localparam OP_AND = 5'b00100;
+ localparam OP_XOR = 5'b00101;
+ localparam OP_OR = 5'b00110;
+ localparam OP_CP = 5'b00111;
+ localparam OP_RLC = 5'b01000;
+ localparam OP_RRC = 5'b01001;
+ localparam OP_RL = 5'b01010;
+ localparam OP_RR = 5'b01011;
+ localparam OP_SLA = 5'b01100;
+ localparam OP_SRA = 5'b01101;
+ localparam OP_SWAP= 5'b01110;
+ localparam OP_SRL = 5'b01111;
+ localparam OP_LF = 5'b10000; // Load Flags
+ // unused 5'b10001
+ localparam OP_SF = 5'b10010; // Save Flags
+ // unused 5'b10011
+ localparam OP_DAA = 5'b10100;
+ localparam OP_CPL = 5'b10101;
+ localparam OP_SCF = 5'b10110;
+ localparam OP_CCF = 5'b10111;
+ // unused 5'b11000
+ // unused 5'b11001
+ // unused 5'b11010
+ // unused 5'b11011
+ // unused 5'b11100
+ localparam OP_BIT = 5'b11101;
+ localparam OP_RES = 5'b11110;
+ localparam OP_SET = 5'b11111;
+
+ localparam F_Z = 2'd3;
+ localparam F_N = 2'd2;
+ localparam F_H = 2'd1;
+ localparam F_C = 2'd0;
+
+ reg [8:0] intermediate_result1, intermediate_result2;
+ reg [4:0] result_low;
+ reg [4:0] result_high;
+ wire [2:0] bit_index;
+ reg carry;
+
+ assign bit_index = alu_bit_index;
+
+ always@(*) begin
+ alu_flags_out = 4'b0;
+ carry = 1'b0;
+ result_low = 5'd0;
+ result_high = 5'd0;
+ intermediate_result1 = 9'd0;
+ intermediate_result2 = 9'd0;
+ case (alu_op)
+ OP_ADD, OP_ADC: begin
+ carry = (alu_op == OP_ADC) ? alu_flags_in[F_C] : 1'b0;
+ result_low = {1'b0, alu_a[3:0]} + {1'b0, alu_b[3:0]} +
+ {4'b0, carry};
+ alu_flags_out[F_H] = result_low[4];
+ result_high = {1'b0, alu_a[7:4]} +
+ {1'b0, alu_b[7:4]} +
+ {4'b0, result_low[4]};
+ alu_flags_out[F_C] = result_high[4];
+ alu_result = {result_high[3:0], result_low[3:0]};
+ alu_flags_out[F_Z] = (alu_result == 8'd0) ? 1'b1 : 1'b0;
+ end
+ OP_SUB, OP_SBC, OP_CP: begin
+ alu_flags_out[F_N] = 1'b1;
+ carry = (alu_op == OP_SBC) ? alu_flags_in[F_C] : 1'b0;
+ result_low = {1'b0, alu_a[3:0]} +
+ ~({1'b0, alu_b[3:0]} +
+ {4'b0, carry}) + 5'b1;
+ alu_flags_out[F_H] = result_low[4];
+ result_high = {1'b0, alu_a[7:4]} +
+ ~({1'b0, alu_b[7:4]}) +
+ {4'b0, ~result_low[4]};
+ alu_flags_out[F_C] = result_high[4];
+ alu_result = (alu_op == OP_CP) ? (alu_a[7:0]) : {result_high[3:0], result_low[3:0]};
+ alu_flags_out[F_Z] = ({result_high[3:0], result_low[3:0]} == 8'd0) ? 1'b1 : 1'b0;
+ end
+ OP_AND: begin
+ alu_flags_out[F_H] = 1'b1;
+ alu_result = alu_a & alu_b;
+ alu_flags_out[F_Z] = (alu_result == 8'd0) ? 1'b1 : 1'b0;
+ end
+ OP_OR: begin
+ alu_result = alu_a | alu_b;
+ alu_flags_out[F_Z] = (alu_result == 8'd0) ? 1'b1 : 1'b0;
+ end
+ OP_XOR: begin
+ alu_result = alu_a ^ alu_b;
+ alu_flags_out[F_Z] = (alu_result == 8'd0) ? 1'b1 : 1'b0;
+ end
+ OP_DAA: begin
+ if (~alu_flags_in[F_N]) begin
+ if (alu_flags_in[F_H] |
+ ((alu_a & 8'h0f) > 8'h9)) begin
+ intermediate_result1 = {1'b0, alu_a} + 9'h6;
+ end
+ else begin
+ intermediate_result1 = {1'b0, alu_a};
+ end
+ if (alu_flags_in[F_C] | (intermediate_result1 > 9'h9f)) begin
+ intermediate_result2 = intermediate_result1 + 9'h60;
+ end
+ else begin
+ intermediate_result2 = intermediate_result1;
+ end
+ end
+ else begin
+ if (alu_flags_in[F_H]) begin
+ intermediate_result1 = {1'b0, (alu_a - 8'h6)};
+ end
+ else begin
+ intermediate_result1 = {1'b0, alu_a};
+ end
+ if (alu_flags_in[F_C]) begin
+ intermediate_result2 = intermediate_result1 - 9'h60;
+ end
+ else begin
+ intermediate_result2 = intermediate_result1;
+ end
+ end // else: !if(alu_flags_in[F_N])
+
+ alu_result = intermediate_result2[7:0];
+
+ alu_flags_out[F_N] = alu_flags_in[F_N];
+ alu_flags_out[F_H] = 1'b0;
+ alu_flags_out[F_C] = intermediate_result2[8] ? 1'b1 :
+ alu_flags_in[F_C];
+ alu_flags_out[F_Z] = (intermediate_result2[7:0] == 8'd0) ?
+ 1'b1 : 1'b0;
+ end
+ OP_CPL: begin
+ alu_flags_out[F_Z] = alu_flags_in[F_Z];
+ alu_flags_out[F_N] = 1'b1;
+ alu_flags_out[F_H] = 1'b1;
+ alu_flags_out[F_C] = alu_flags_in[F_C];
+ alu_result = ~alu_a;
+ end
+ OP_CCF: begin
+ alu_flags_out[F_Z] = alu_flags_in[F_Z];
+ alu_flags_out[F_N] = 1'b0;
+ alu_flags_out[F_H] = 1'b0;
+ alu_flags_out[F_C] = ~alu_flags_in[F_C];
+ alu_result = alu_b;
+ end
+ OP_SCF: begin
+ alu_flags_out[F_Z] = alu_flags_in[F_Z];
+ alu_flags_out[F_N] = 1'b0;
+ alu_flags_out[F_H] = 1'b0;
+ alu_flags_out[F_C] = 1'b1;
+ alu_result = alu_b;
+ end
+ OP_RLC: begin
+ alu_result[0] = alu_a[7];
+ alu_result[7:1] = alu_a[6:0];
+ alu_flags_out[F_C] = alu_a[7];
+ alu_flags_out[F_Z] = (alu_result == 8'd0) ? 1'b1 : 1'b0;
+ end
+ OP_RL: begin
+ alu_result[0] = alu_flags_in[F_C];
+ alu_result[7:1] = alu_a[6:0];
+ alu_flags_out[F_C] = alu_a[7];
+ alu_flags_out[F_Z] = (alu_result == 8'd0) ? 1'b1 : 1'b0;
+ end
+ OP_RRC: begin
+ alu_result[7] = alu_a[0];
+ alu_result[6:0] = alu_a[7:1];
+ alu_flags_out[F_C] = alu_a[0];
+ alu_flags_out[F_Z] = (alu_result == 8'd0) ? 1'b1 : 1'b0;
+ end
+ OP_RR: begin
+ alu_result[7] = alu_flags_in[F_C];
+ alu_result[6:0] = alu_a[7:1];
+ alu_flags_out[F_C] = alu_a[0];
+ alu_flags_out[F_Z] = (alu_result == 8'd0) ? 1'b1 : 1'b0;
+ end
+ OP_SLA: begin
+ alu_result[7:1] = alu_a[6:0];
+ alu_result[0] = 1'b0;
+ alu_flags_out[F_C] = alu_a[7];
+ alu_flags_out[F_Z] = (alu_result == 8'd0) ? 1'b1 : 1'b0;
+ end
+ OP_SRA: begin
+ alu_result[7] = alu_a[7];
+ alu_result[6:0] = alu_a[7:1];
+ alu_flags_out[F_C] = alu_a[0];
+ alu_flags_out[F_Z] = (alu_result == 8'd0) ? 1'b1 : 1'b0;
+ end
+ OP_SRL: begin
+ alu_result[7] = 1'b0;
+ alu_result[6:0] = alu_a[7:1];
+ alu_flags_out[F_C] = alu_a[0];
+ alu_flags_out[F_Z] = (alu_result == 8'd0) ? 1'b1 : 1'b0;
+ end
+ OP_BIT: begin
+ // Bit index must be in data0[5:3]
+ alu_flags_out[F_C] = alu_flags_in[F_C];
+ alu_flags_out[F_H] = 1'b1;
+ alu_flags_out[F_N] = 1'b0;
+ alu_flags_out[F_Z] = ~alu_a[bit_index];
+ alu_result = alu_b;
+ end
+ OP_SET: begin
+ alu_flags_out = alu_flags_in;
+ alu_result = alu_a;
+ alu_result[bit_index] = 1'b1;
+ end
+ OP_RES: begin
+ alu_flags_out = alu_flags_in;
+ alu_result = alu_a;
+ alu_result[bit_index] = 1'b0;
+ end
+ OP_SWAP: begin
+ alu_flags_out[F_Z] = (alu_a == 8'd0) ? 1'd1: 1'd0;
+ alu_flags_out[F_H] = 1'b0;
+ alu_flags_out[F_C] = 1'b0;
+ alu_flags_out[F_N] = 1'b0;
+ alu_result = {alu_a[3:0], alu_a[7:4]};
+ end
+ OP_SF: begin
+ alu_flags_out = alu_b[7:4];
+ alu_result = alu_a;
+ end
+ OP_LF: begin
+ alu_result = {alu_flags_in, 4'b0};
+ end
+ default: begin
+ alu_result = alu_b;
+ alu_flags_out = alu_flags_in;
+ end
+ endcase
+ end
+
+endmodule
diff --git a/verilog/rtl/bootrom.mif b/verilog/rtl/bootrom.mif
new file mode 100644
index 0000000..77d25b3
--- /dev/null
+++ b/verilog/rtl/bootrom.mif
@@ -0,0 +1,256 @@
+31
+FE
+FF
+AF
+21
+FF
+9F
+32
+CB
+7C
+20
+FB
+3E
+00
+E0
+42
+3E
+91
+E0
+40
+3E
+01
+C3
+FE
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+00
+E0
+50
diff --git a/verilog/rtl/boy.v b/verilog/rtl/boy.v
new file mode 100644
index 0000000..69c2d65
--- /dev/null
+++ b/verilog/rtl/boy.v
@@ -0,0 +1,459 @@
+`timescale 1ns / 1ps
+`default_nettype wire
+////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 17:30:26 02/08/2018
+// Module Name: boy
+// Project Name: VerilogBoy
+// Description:
+// VerilogBoy portable top level file. This is the file connect the CPU and
+// all the peripherals in the LR35902 together.
+// Dependencies:
+// cpu
+// Additional Comments:
+// Hardware specific code should be implemented outside of this file
+// So normally in an implementation, this will not be the top level.
+////////////////////////////////////////////////////////////////////////////////
+
+module boy(
+ input wire rst, // Async Reset Input
+ input wire clk, // 4.19MHz Clock Input
+ output wire phi, // 1.05MHz Reference Clock Output
+ // Cartridge interface
+ output wire [15:0] a, // Address Bus
+ output wire [7:0] dout, // Data Bus
+ input wire [7:0] din,
+ output wire wr, // Write Enable
+ output wire rd, // Read Enable
+ // Keyboard input
+ input wire [7:0] key,
+ // LCD output
+ output wire hs, // Horizontal Sync Output
+ output wire vs, // Vertical Sync Output
+ output wire cpl, // Pixel Data Latch
+ output wire [1:0] pixel, // Pixel Data
+ output wire valid,
+ // Sound output
+ output reg [15:0] left,
+ output reg [15:0] right,
+ // Debug interface
+ output wire done,
+ output wire fault
+ );
+
+ // CPU
+ wire cpu_rd; // CPU Read Enable
+ wire cpu_wr; // CPU Write Enable
+ reg [7:0] cpu_din; // CPU Data Bus, to CPU
+ wire [7:0] cpu_dout; // CPU Data Bus, from CPU
+ wire [15:0] cpu_a; // CPU Address Bus
+ wire [4:0] cpu_int_en; // CPU Interrupt Enable input
+ wire [4:0] cpu_int_flags_in; // CPU Interrupt Flags input
+ wire [4:0] cpu_int_flags_out; // CPU Interrupt Flags output
+ wire [1:0] cpu_ct; // 0-3 T cycle number inside one M cycle
+
+ cpu cpu(
+ .clk(clk),
+ .rst(rst),
+ .phi(phi),
+ .ct(cpu_ct),
+ .a(cpu_a),
+ .dout(cpu_dout),
+ .din(cpu_din),
+ .rd(cpu_rd),
+ .wr(cpu_wr),
+ .int_en(cpu_int_en),
+ .int_flags_in(cpu_int_flags_in),
+ .int_flags_out(cpu_int_flags_out),
+ .key_in(key),
+ .done(done),
+ .fault(fault));
+
+ // High RAM
+ reg [7:0] high_ram [0:127];
+ wire high_ram_rd = cpu_rd;
+ reg high_ram_wr;
+ wire [6:0] high_ram_a = cpu_a[6:0];
+ wire [7:0] high_ram_din = cpu_dout;
+ reg [7:0] high_ram_dout;
+ always @(posedge clk) begin
+ if (high_ram_wr)
+ high_ram[high_ram_a] <= high_ram_din;
+ else
+ high_ram_dout <= (high_ram_rd) ? high_ram[high_ram_a] : 8'bx;
+ end
+
+ //DMA
+ wire dma_rd; // DMA Memory Write Enable
+ wire dma_wr; // DMA Memory Read Enable
+ wire [15:0] dma_a; // Main Address Bus
+ reg [7:0] dma_din; // Main Data Bus
+ wire [7:0] dma_dout;
+ wire [7:0] dma_mmio_dout;
+ reg dma_mmio_wr; // actually wire
+ wire dma_occupy_extbus; // 0x0000 - 0x7FFF, 0xA000 - 0xFFFF
+ wire dma_occupy_vidbus; // 0x8000 - 0x9FFF
+ wire dma_occupy_oambus; // 0xFE00 - 0xFE9F
+ dma dma(
+ .clk(clk),
+ .rst(rst),
+ .dma_rd(dma_rd),
+ .dma_wr(dma_wr),
+ .dma_a(dma_a),
+ .dma_din(dma_din),
+ .dma_dout(dma_dout),
+ .mmio_wr(dma_mmio_wr),
+ .mmio_din(cpu_dout),
+ .mmio_dout(dma_mmio_dout),
+ .dma_occupy_extbus(dma_occupy_extbus),
+ .dma_occupy_vidbus(dma_occupy_vidbus),
+ .dma_occupy_oambus(dma_occupy_oambus)
+ );
+
+ // Interrupt
+ // int_req is the request signal from peripherals.
+ // When an interrupt is generated, the peripheral should send a pulse on
+ // the int_req for exactly one clock (using 4MHz clock).
+ wire [4:0] int_req;
+
+ wire int_key_req;
+ wire int_serial_req;
+ wire int_serial_ack;
+ wire int_tim_req;
+ wire int_tim_ack;
+ wire int_lcdc_req;
+ wire int_lcdc_ack;
+ wire int_vblank_req;
+ wire int_vblank_ack;
+
+ assign int_req[4] = int_key_req;
+ assign int_req[3] = int_serial_req;
+ assign int_req[2] = int_tim_req;
+ assign int_req[1] = int_lcdc_req;
+ assign int_req[0] = int_vblank_req;
+
+ //reg reg_ie_rd;
+ reg reg_ie_wr;
+ reg [4:0] reg_ie;
+ wire [4:0] reg_ie_din = cpu_dout[4:0];
+ wire [4:0] reg_ie_dout;
+ always @(posedge clk) begin
+ if (reg_ie_wr)
+ reg_ie <= reg_ie_din;
+ end
+
+ assign reg_ie_dout = reg_ie;
+ assign cpu_int_en = reg_ie_dout;
+
+ // Interrupt may be manually triggered
+ // int_req should only stay high for only 1 cycle for each interrupt
+ //reg reg_if_rd;
+ reg reg_if_wr;
+ reg [4:0] reg_if;
+ wire [4:0] reg_if_din = cpu_dout[4:0];
+ wire [4:0] reg_if_dout;
+ always @(posedge clk) begin
+ if (reg_if_wr)
+ reg_if <= reg_if_din | int_req;
+ else
+ reg_if <= cpu_int_flags_out | int_req;
+ end
+ assign reg_if_dout = reg_if | int_req;
+ assign cpu_int_flags_in = reg_if_dout;
+
+ assign int_serial_ack = reg_if[3];
+ assign int_tim_ack = reg_if[2];
+ assign int_lcdc_ack = reg_if[1];
+ assign int_vblank_ack = reg_if[0];
+
+ // PPU
+ wire [7:0] ppu_mmio_dout;
+ reg ppu_mmio_wr; // actually wire
+ wire [15:0] vram_a;
+ wire [7:0] vram_dout;
+ //wire [7:0] vram_din;
+ wire vram_rd;
+ wire vram_wr;
+ reg vram_cpu_wr;
+ wire [15:0] oam_a;
+ wire [7:0] oam_dout;
+ wire [7:0] oam_din;
+ wire oam_rd;
+ wire oam_wr;
+ reg oam_cpu_wr;
+
+ assign vram_a = (dma_occupy_vidbus) ? (dma_a) : (cpu_a);
+ //assign vram_din = (dma_occupy_vidbus) ? (dma_dout) : (cpu_dout);
+ assign vram_rd = (dma_occupy_vidbus) ? (dma_rd) : (cpu_rd);
+ assign vram_wr = (dma_occupy_vidbus) ? (1'b0) : (vram_cpu_wr);
+ assign oam_a = (dma_occupy_oambus) ? (dma_a) : (cpu_a);
+ assign oam_din = (dma_occupy_oambus) ? (dma_dout) : (cpu_dout);
+ assign oam_rd = (dma_occupy_oambus) ? (1'b0) : (cpu_rd);
+ assign oam_wr = (dma_occupy_oambus) ? (dma_wr) : (oam_cpu_wr);
+
+ ppu ppu(
+ .clk(clk),
+ .rst(rst),
+ .mmio_a(cpu_a), // mmio bus is always accessable to CPU
+ .mmio_dout(ppu_mmio_dout),
+ .mmio_din(cpu_dout),
+ .mmio_rd(cpu_rd),
+ .mmio_wr(ppu_mmio_wr),
+ .vram_a(vram_a),
+ .vram_dout(vram_dout),
+ .vram_din(cpu_dout), // DMA never writes to VRAM
+ .vram_rd(vram_rd),
+ .vram_wr(vram_wr),
+ .oam_a(oam_a),
+ .oam_dout(oam_dout),
+ .oam_din(oam_din),
+ .oam_rd(oam_rd),
+ .oam_wr(oam_wr),
+ .int_vblank_req(int_vblank_req),
+ .int_lcdc_req(int_lcdc_req),
+ .int_vblank_ack(int_vblank_ack),
+ .int_lcdc_ack(int_lcdc_ack),
+ .cpl(cpl), // Pixel clock
+ .pixel(pixel), // Pixel Data (2bpp)
+ .valid(valid),
+ .hs(hs), // Horizontal Sync, Low Active
+ .vs(vs), // Vertical Sync, Low Active
+ // Ignore the debugging interface
+ /* verilator lint_off PINCONNECTEMPTY */
+ .scx(),
+ .scy(),
+ .state()
+ /* verilator lint_on PINCONNECTEMPTY */
+ );
+
+ // Timer
+ wire [7:0] timer_dout;
+ reg timer_wr; // actually wire
+
+ timer timer(
+ .clk(clk),
+ .rst(rst),
+ .ct(cpu_ct),
+ .a(cpu_a),
+ .dout(timer_dout),
+ .din(cpu_dout),
+ .rd(cpu_rd),
+ .wr(timer_wr),
+ .int_tim_req(int_tim_req),
+ .int_tim_ack(int_tim_ack)
+ );
+
+ // Dummy Serial
+ wire [7:0] serial_dout;
+ reg serial_wr; // actually wire
+
+ serial serial(
+ .clk(clk),
+ .rst(rst),
+ .a(cpu_a),
+ .dout(serial_dout),
+ .din(cpu_dout),
+ .rd(cpu_rd),
+ .wr(serial_wr),
+ .int_serial_req(int_serial_req),
+ .int_serial_ack(int_serial_ack)
+ );
+
+ // Sound
+ wire [7:0] sound_dout;
+ reg sound_wr; // wire
+ wire [15:0] left_pre;
+ wire [15:0] right_pre;
+
+ sound sound(
+ .clk(clk),
+ .rst(rst),
+ .a(cpu_a),
+ .dout(sound_dout),
+ .din(cpu_dout),
+ .rd(cpu_rd),
+ .wr(sound_wr),
+ .left(left_pre),
+ .right(right_pre),
+ // Ignore the debugging signals
+ /* verilator lint_off PINCONNECTEMPTY */
+ .ch1_level(),
+ .ch2_level(),
+ .ch3_level(),
+ .ch4_level()
+ /* verilator lint_on PINCONNECTEMPTY */
+ );
+
+ always @(posedge clk) begin
+ left <= left_pre;
+ right <= right_pre;
+ end
+
+ // Boot ROM Enable Register
+ reg brom_disable;
+ reg brom_disable_wr; // actually wire
+ always @(posedge clk) begin
+ if (rst)
+ brom_disable <= 1'b0;
+ else
+ if (brom_disable_wr && (!brom_disable))
+ brom_disable <= cpu_dout[0];
+ end
+
+ wire [7:0] brom_dout;
+ brom brom(
+ .a(cpu_a[7:0]),
+ .d(brom_dout)
+ );
+
+ // Work RAM
+ wire [7:0] wram_dout;
+ wire [12:0] wram_a;
+ wire wram_wr;
+ reg wram_cpu_wr; // actually wire
+
+ assign wram_a = (dma_occupy_extbus) ? (dma_a[12:0]) : (cpu_a[12:0]);
+ assign wram_wr = (dma_occupy_extbus) ? (1'b0) : (wram_cpu_wr);
+
+ singleport_ram #(
+ .WORDS(8192)
+ ) br_wram (
+ .clka(clk),
+ .wea(wram_wr),
+ .addra(wram_a),
+ .dina(cpu_dout), // DMA never writes to Work RAM
+ .douta(wram_dout)
+ );
+
+ // Keypad
+ wire [7:0] keypad_reg;
+ reg keypad_reg_wr; // actually wire
+ reg [1:0] keypad_high;
+ always @(posedge clk) begin
+ if (rst)
+ keypad_high <= 2'b11;
+ else
+ if (keypad_reg_wr)
+ keypad_high <= cpu_dout[5:4];
+ end
+ assign keypad_reg[7:6] = 2'b11;
+ assign keypad_reg[5:4] = keypad_high[1:0];
+ assign keypad_reg[3:0] =
+ ~(((keypad_high[1] == 1'b1) ? (key[7:4]) : 4'h0) |
+ ((keypad_high[0] == 1'b1) ? (key[3:0]) : 4'h0));
+ assign int_key_req = (keypad_reg[3:0] != 4'hf) ? (1'b1) : (1'b0);
+
+ // External Bus
+ reg ext_cpu_wr; // wire
+ assign a = (dma_occupy_extbus) ? (dma_a) : (cpu_a);
+ assign dout = cpu_dout; // DMA never writes to external bus
+ assign wr = (dma_occupy_extbus) ? (1'b0) : (ext_cpu_wr);
+ assign rd = (dma_occupy_extbus) ? (dma_rd) : (cpu_rd);
+
+ // Bus Multiplexing, CPU
+ always @(*) begin
+ reg_ie_wr = 1'b0;
+ reg_if_wr = 1'b0;
+ keypad_reg_wr = 1'b0;
+ timer_wr = 1'b0;
+ serial_wr = 1'b0;
+ dma_mmio_wr = 1'b0;
+ brom_disable_wr = 1'b0;
+ high_ram_wr = 1'b0;
+ sound_wr = 1'b0;
+ ppu_mmio_wr = 1'b0;
+ vram_cpu_wr = 1'b0;
+ oam_cpu_wr = 1'b0;
+ wram_cpu_wr = 1'b0;
+ ext_cpu_wr = 1'b0;
+ // -- These are exclusive to CPU --
+ if (cpu_a == 16'hffff) begin // 0xFFFF - IE
+ //reg_ie_rd = bus_rd;
+ reg_ie_wr = cpu_wr;
+ cpu_din = {3'b0, reg_ie_dout};
+ end
+ else if (cpu_a == 16'hff0f) begin // 0xFF0F - IF
+ //reg_if_rd = bus_rd;
+ reg_if_wr = cpu_wr;
+ cpu_din = {3'b111, reg_if_dout};
+ end
+ else if (cpu_a == 16'hff00) begin // 0xFF00 - Keypad
+ keypad_reg_wr = cpu_wr;
+ cpu_din = keypad_reg;
+ end
+ else if ((cpu_a == 16'hff04) || (cpu_a == 16'hff05) || // Timer
+ (cpu_a == 16'hff06) || (cpu_a == 16'hff07)) begin
+ timer_wr = cpu_wr;
+ cpu_din = timer_dout;
+ end
+ else if ((cpu_a == 16'hff01) || (cpu_a == 16'hff02)) begin // Serial
+ serial_wr = cpu_wr;
+ cpu_din = serial_dout;
+ end
+ else if (cpu_a == 16'hff46) begin // 0xFF46 - DMA
+ dma_mmio_wr = cpu_wr;
+ cpu_din = dma_mmio_dout;
+ end
+ else if (cpu_a == 16'hff50) begin // 0xFF50 - BROM DISABLE
+ brom_disable_wr = cpu_wr;
+ cpu_din = {7'b0, brom_disable};
+ end
+ else if (cpu_a >= 16'hff80) begin // 0xFF80 - High RAM
+ high_ram_wr = cpu_wr;
+ cpu_din = high_ram_dout;
+ end
+ else if ((cpu_a >= 16'hff10 && cpu_a <= 16'hff1e) ||
+ (cpu_a >= 16'hff20 && cpu_a <= 16'hff26) ||
+ (cpu_a >= 16'hff30 && cpu_a <= 16'hff3f)) begin // Sound
+ sound_wr = cpu_wr;
+ cpu_din = sound_dout;
+ end
+ else if (cpu_a >= 16'hff40 && cpu_a <= 16'hff4b) begin // PPU MMIO
+ ppu_mmio_wr = cpu_wr;
+ cpu_din = ppu_mmio_dout;
+ end
+ else if ((cpu_a <= 16'h00ff) && (!brom_disable)) begin // Boot ROM
+ cpu_din = brom_dout;
+ end
+ // -- These are shared between CPU and DMA --
+ else if (cpu_a >= 16'h8000 && cpu_a <= 16'h9fff) begin // VRAM
+ vram_cpu_wr = cpu_wr;
+ cpu_din = (dma_occupy_vidbus) ? (8'hff) : (vram_dout);
+ end
+ else if (cpu_a >= 16'hfe00 && cpu_a <= 16'hfe9f) begin // OAM
+ oam_cpu_wr = cpu_wr;
+ cpu_din = (dma_occupy_oambus) ? (8'hff) : (oam_dout);
+ end
+ else if ((cpu_a >= 16'hc000 && cpu_a <= 16'hdfff) ||
+ (cpu_a >= 16'he000 && cpu_a <= 16'hfdff)) begin // WRAM
+ wram_cpu_wr = cpu_wr;
+ cpu_din = (dma_occupy_extbus) ? (8'hff) : (wram_dout);
+ end
+ else if ((cpu_a <= 16'h7fff) ||
+ (cpu_a >= 16'ha000 && cpu_a <= 16'hbfff)) begin // External
+ ext_cpu_wr = cpu_wr;
+ cpu_din = (dma_occupy_extbus) ? (8'hff) : (din);
+ end
+ else begin
+ // Unmapped area
+ cpu_din = 8'hff;
+ end
+ end
+
+ // Bus Multiplexing, DMA
+ always @(*) begin
+ if (dma_a >= 16'h8000 && dma_a <= 16'h9fff) begin // VRAM
+ dma_din = vram_dout;
+ end
+ else if ((dma_a >= 16'hc000 && dma_a <= 16'hdfff) ||
+ (dma_a >= 16'he000 && dma_a <= 16'hfdff)) begin // WRAM
+ dma_din = wram_dout;
+ end
+ else begin
+ dma_din = din;
+ end
+ end
+
+endmodule
diff --git a/verilog/rtl/brom.v b/verilog/rtl/brom.v
new file mode 100644
index 0000000..1b6896b
--- /dev/null
+++ b/verilog/rtl/brom.v
@@ -0,0 +1,35 @@
+`timescale 1ns / 1ps
+`default_nettype wire
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer:
+//
+// Create Date: 21:10:17 02/09/2018
+// Design Name:
+// Module Name: brom
+// Project Name:
+// Target Devices:
+// Tool versions:
+// Description:
+//
+// Dependencies:
+//
+// Revision:
+// Revision 0.01 - File Created
+// Additional Comments:
+//
+//////////////////////////////////////////////////////////////////////////////////
+module brom(
+ input [7:0] a,
+ output [7:0] d
+ );
+
+ reg [7:0] brom_array [0:255]; // 256 Bytes BROM array
+
+ initial begin
+ $readmemh("bootrom.mif", brom_array, 0, 255);
+ end
+
+ assign d = brom_array[a];
+
+endmodule
diff --git a/verilog/rtl/clk_div.v b/verilog/rtl/clk_div.v
new file mode 100644
index 0000000..715bfed
--- /dev/null
+++ b/verilog/rtl/clk_div.v
@@ -0,0 +1,35 @@
+`timescale 1ns / 1ps
+`default_nettype wire
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 09:50:37 04/07/2018
+// Module Name: clk_div
+// Project Name: VerilogBoy
+// Description:
+//
+// Dependencies:
+//
+// Additional Comments:
+//
+//////////////////////////////////////////////////////////////////////////////////
+module clk_div(
+ input i,
+ output reg o = 0
+ );
+
+ parameter WIDTH = 15, DIV = 1000;
+
+ reg [WIDTH - 1:0] counter = 0;
+
+ always @(posedge i)
+ begin
+ if (counter == (DIV / 2 - 1)) begin
+ o <= ~o;
+ counter <= 0;
+ end
+ else
+ counter <= counter + 1'b1;
+ end
+endmodule
diff --git a/verilog/rtl/common.v b/verilog/rtl/common.v
new file mode 100644
index 0000000..f71c77f
--- /dev/null
+++ b/verilog/rtl/common.v
@@ -0,0 +1,93 @@
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 17:30:26 02/08/2018
+// Module Name: None
+// Project Name: VerilogBoy
+// Description:
+// Common definitions for VerilogBoy. Use as a header inclusion.
+// Dependencies:
+//
+// Additional Comments:
+// It is also used in Verilated simulation
+//////////////////////////////////////////////////////////////////////////////////
+
+
+`define ALU_ADD 3'b000
+`define ALU_ADC 3'b001
+`define ALU_SUB 3'b010
+`define ALU_SBC 3'b011
+`define ALU_AND 3'b100
+`define ALU_XOR 3'b101
+`define ALU_OR 3'b110
+`define ALU_CP 3'b111
+
+`define INT_LCDC 0
+`define INT_STAT 1
+`define INT_TIMER 2
+`define INT_SERIAL 3
+`define INT_JOYPAD 4
+
+`define ALU_SRC_A_ACC 2'b00
+`define ALU_SRC_A_PC 2'b01
+`define ALU_SRC_A_REG 2'b10
+`define ALU_SRC_A_DB 2'b11
+`define ALU_SRC_B_ACC 3'b000
+`define ALU_SRC_B_CARRY 3'b001
+`define ALU_SRC_B_ZERO 3'b010
+`define ALU_SRC_B_ONE 3'b011
+`define ALU_SRC_B_H 3'b100
+`define ALU_SRC_B_L 3'b101
+`define ALU_SRC_B_ABSIMM 3'b110
+`define ALU_SRC_B_IMM 3'b111
+`define ALU_OP_PREFIX_NORMAL 2'b00
+`define ALU_OP_PREFIX_SHIFT_ROTATE 2'b01
+`define ALU_OP_PREFIX_SPECIAL 2'b10
+`define ALU_OP_PREFIX_CB 2'b11
+`define ALU_OP_SRC_INSTR_5TO3 2'b00
+`define ALU_OP_SRC_INSTR_7TO6 2'b01
+`define ALU_OP_SRC_ADD_FTOR 2'b10
+`define ALU_OP_SRC_SUB_ATOF 2'b11
+`define ALU_OP_SIGNED_FORCE 1'b1
+`define ALU_OP_SIGNED_AUTO 1'b0
+`define ALU_DST_ACC 2'b00
+`define ALU_DST_PC 2'b01
+`define ALU_DST_REG 2'b10
+`define ALU_DST_DB 2'b11
+`define PC_SRC_REG 2'b00
+`define PC_SRC_RST 2'b01
+`define PC_SRC_TEMP 2'b10
+`define PC_WRITE_ENABLE 1'b1
+`define RF_SEL_B 3'b000
+`define RF_SEL_C 3'b001
+`define RF_SEL_D 3'b010
+`define RF_SEL_E 3'b011
+`define RF_SEL_H 3'b100
+`define RF_SEL_L 3'b101
+`define RF_SEL_SP_H 3'b110
+`define RF_SEL_SP_L 3'b111
+`define RF_SEL_BC 3'b001
+`define RF_SEL_DE 3'b011
+`define RF_SEL_HL 3'b101
+`define RF_SEL_SP 3'b111
+`define BUS_OP_IDLE 2'b00
+`define BUS_OP_IF 2'b01
+`define BUS_OP_WRITE 2'b10
+`define BUS_OP_READ 2'b11
+`define DB_SRC_ACC 2'b00
+`define DB_SRC_ALU 2'b01
+`define DB_SRC_REG 2'b10
+`define DB_SRC_DB 2'b11
+`define AB_SRC_PC 2'b00
+`define AB_SRC_TEMP 2'b01
+`define AB_SRC_REG 2'b10
+`define AB_SRC_SP 2'b11
+`define CT_OP_IDLE 2'b00
+`define CT_OP_PC_INC 2'b01
+`define CT_OP_SP_DEC 2'b10
+`define CT_OP_SP_INC 2'b11
+`define FLAGS_ZNHC 2'b00
+`define FLAGS_x0HC 2'b01
+`define FLAGS_00HC 2'b10
+`define FLAGS_ZNHx 2'b11
diff --git a/verilog/rtl/control.v b/verilog/rtl/control.v
new file mode 100644
index 0000000..e272690
--- /dev/null
+++ b/verilog/rtl/control.v
@@ -0,0 +1,1165 @@
+`timescale 1ns / 1ps
+`default_nettype wire
+`include "common.v"
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Module Name: control
+// Project Name: VerilogBoy
+// Description:
+// The control unit of Game Boy CPU.
+// Dependencies:
+//
+// Additional Comments:
+//
+//////////////////////////////////////////////////////////////////////////////////
+
+module control(
+ input clk,
+ input rst,
+ input [7:0] opcode_early,
+ /* verilator lint_off UNUSED */
+ input [7:0] imm,
+ /* verilator lint_on UNUSED */
+ input [7:0] cb,
+ input [2:0] m_cycle_early,
+ input [1:0] ct_state,
+ input f_z,
+ input f_c,
+ output reg [1:0] alu_src_a,
+ output reg [2:0] alu_src_b,
+ output reg alu_src_xchg,
+ output reg [1:0] alu_op_prefix,
+ output reg [1:0] alu_op_src,
+ output reg alu_op_signed,
+ output reg [1:0] alu_dst,
+ output reg [1:0] pc_src,
+ output reg pc_we,
+ output reg pc_b_sel,
+ output reg pc_jr,
+ output reg pc_revert,
+ output reg [2:0] rf_wr_sel,
+ output reg [2:0] rf_rd_sel,
+ output reg [1:0] rf_rdw_sel,
+ output reg temp_redir,
+ output reg opcode_redir,
+ output reg [1:0] bus_op,
+ output reg [1:0] db_src,
+ output reg [1:0] ab_src,
+ output reg [1:0] ct_op,
+ output reg flags_we,
+ output reg [1:0] flags_pattern,
+ output reg high_mask,
+ output int_master_en,
+ input int_dispatch,
+ output reg int_ack,
+ output reg next,
+ output reg stop,
+ output reg halt,
+ input wake,
+ output reg fault
+ );
+
+ // Comb signal generated by control logic
+ reg ime_clear;
+ reg ime_set;
+ reg ime_delay_set;
+ // FF
+ reg ime_delay_set_ff;
+ reg ime;
+ assign int_master_en = ime;
+
+ wire [7:0] opcode = opcode_early;
+ wire [2:0] m_cycle = m_cycle_early;
+
+ always @(posedge clk)
+ if (ct_state == 2'd2)
+ ime_delay_set_ff <= ime_delay_set;
+
+ always @(posedge clk) begin
+ if (rst)
+ ime <= 1'b0;
+ else if (ime_clear)
+ ime <= 1'b0;
+ else if (ime_set)
+ ime <= 1'b1;
+ else if (ime_delay_set_ff)
+ ime <= 1'b1;
+ end
+
+ reg halt_last;
+ reg stop_last;
+ reg fault_last;
+ always @(posedge clk) begin
+ if (rst) begin
+ halt_last <= 1'b0;
+ stop_last <= 1'b0;
+ fault_last <= 1'b0;
+ end
+ else begin
+ halt_last <= halt;
+ stop_last <= stop;
+ fault_last <= fault;
+ end
+ end
+
+ reg wake_by_int;
+ always @(posedge clk) begin
+ if (rst)
+ wake_by_int <= 1'b0;
+ else begin
+ if (int_dispatch && (m_cycle == 0)) begin
+ wake_by_int <= wake;
+ end
+ end
+ end
+
+ // Combinational control signal
+ reg [1:0] comb_alu_src_a;
+ reg [2:0] comb_alu_src_b;
+ reg comb_alu_src_xchg;
+ reg [1:0] comb_alu_op_prefix;
+ reg [1:0] comb_alu_op_src;
+ reg comb_alu_op_signed;
+ reg [1:0] comb_alu_dst;
+ reg [1:0] comb_pc_src;
+ reg comb_pc_we;
+ reg comb_pc_b_sel;
+ reg comb_pc_jr;
+ reg comb_pc_revert;
+ reg [2:0] comb_rf_wr_sel;
+ reg [2:0] comb_rf_rd_sel;
+ reg [1:0] comb_rf_rdw_sel;
+ reg comb_temp_redir;
+ reg comb_opcode_redir;
+ reg [1:0] comb_bus_op;
+ reg [1:0] comb_db_src;
+ reg [1:0] comb_ab_src;
+ reg [1:0] comb_ct_op;
+ reg comb_flags_we;
+ reg [1:0] comb_flags_pattern;
+ reg comb_high_mask;
+ reg comb_int_ack;
+ reg comb_next;
+ reg comb_stop;
+ reg comb_halt;
+ reg comb_fault;
+
+ // All these nonsense will be replaced by a vector decoding ROM...
+ // in the future
+ always @(*) begin
+ // Set default output
+ // ACC = ACC + 0
+ comb_alu_src_a = `ALU_SRC_A_ACC;
+ comb_alu_src_b = `ALU_SRC_B_ZERO;
+ comb_alu_op_prefix = `ALU_OP_PREFIX_NORMAL;
+ comb_alu_op_src = `ALU_OP_SRC_ADD_FTOR;
+ comb_alu_dst = `ALU_DST_ACC;
+ comb_pc_we = 0;
+ comb_rf_wr_sel = `RF_SEL_B; // Doesn't matter
+ comb_rf_rd_sel = `RF_SEL_B; // Doesn't matter
+ comb_bus_op = `BUS_OP_IF; // Fetch comb_next instruction
+ comb_db_src = `DB_SRC_DB; // Should != ACC
+ comb_ab_src = `AB_SRC_PC; // Output PC
+ comb_ct_op = `CT_OP_PC_INC; // PC = PC + 1
+ comb_flags_we = 0;
+ comb_next = 0;
+ comb_alu_src_xchg = 0;
+ comb_rf_rdw_sel = 2'b10; // Select HL
+ comb_pc_src = 2'b00;
+ comb_pc_b_sel = m_cycle[0];
+ comb_pc_jr = 1'b0;
+ comb_pc_revert = 1'b0;
+ comb_stop = 1'b0;
+ comb_halt = 1'b0;
+ comb_fault = 1'b0;
+ comb_high_mask = 1'b0;
+ comb_alu_op_signed = 1'b0;
+ comb_temp_redir = 1'b0;
+ comb_opcode_redir = 1'b0;
+ ime_set = 1'b0;
+ ime_delay_set = 1'b0;
+ ime_clear = 1'b0;
+ comb_int_ack = 1'b0;
+ comb_flags_pattern = 2'b00;
+ // Though the idea behind the original GB is that when in comb_halt or comb_stop
+ // mode, the clock can be comb_stopped, thus lower the power consumption and
+ // save the battery. On FPGA, this is hard to achieve since clocking in
+ // FPGA works very differently than on ASIC. So here, when comb_halted, CPU
+ // would executing NOP in place as if it was comb_halted.
+ if (halt_last || stop_last || fault_last) begin
+ if (wake) begin
+ comb_halt = 1'b0;
+ comb_stop = 1'b0;
+ // Fault could not be waked up
+ end
+ else begin
+ // Keep sleeping
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_halt = halt_last;
+ comb_stop = stop_last;
+ end
+ // Fault cannot be waken up
+ comb_fault = fault_last;
+ end
+ if (int_dispatch) begin
+ // Interrupt dispatch process
+ case (m_cycle)
+ 0: begin
+ // Revert PC
+ comb_pc_revert = 1'b1;
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_ct_op = `CT_OP_SP_DEC;
+ comb_next = 1'b1;
+ end
+ 1: begin
+ // Save PCh
+ comb_alu_src_a = `ALU_SRC_A_PC;
+ comb_alu_dst = `ALU_DST_DB;
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_ab_src = `AB_SRC_SP;
+ comb_db_src = `DB_SRC_DB;
+ comb_ct_op = `CT_OP_SP_DEC;
+ comb_next = 1'b1;
+ end
+ 2: begin
+ // Save PCl
+ comb_alu_src_a = `ALU_SRC_A_PC;
+ comb_alu_dst = `ALU_DST_DB;
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_ab_src = `AB_SRC_SP;
+ comb_db_src = `DB_SRC_DB;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_pc_we = 1;
+ comb_next = 1'b1;
+ end
+ 3: begin
+ // Delay
+ if (wake_by_int) begin
+ ime_clear = 1'b1;
+ comb_int_ack = 1'b1;
+ end
+ else begin
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1'b1;
+ end
+ end
+ 4: begin
+ // Normal instruction fetch process
+ ime_clear = 1'b1;
+ comb_int_ack = 1'b1;
+ end
+ endcase
+ end
+ //else begin
+ // If waken up
+ if (!comb_halt && !comb_stop && !comb_fault && !int_dispatch) begin
+ if (opcode == 8'h00) begin // NOP
+ // Default behavior is enough
+ end
+ else if (opcode == 8'h10) begin // STOP
+ comb_stop = 1;
+ end
+ else if (opcode == 8'h76) begin // HALT
+ comb_halt = 1;
+ end
+ else if (opcode == 8'hF3) begin // DI
+ ime_clear = 1'b1;
+ end
+ else if (opcode == 8'hFB) begin // EI
+ // EI here need to be delayed for 1 clock?
+ ime_delay_set = 1'b1;
+ end
+ // 16-bit IMM to register LD instructions
+ else if ((opcode[7:6] == 2'b00) && (opcode[3:0] == 4'b0001)) begin
+ comb_alu_src_a = `ALU_SRC_A_DB; // Load from databus
+ comb_alu_dst = `ALU_DST_REG; // Load to register
+ comb_db_src = `DB_SRC_DB; // DB destination to databus buffer
+ if ((m_cycle == 0) || (m_cycle == 1)) begin
+ comb_rf_wr_sel = {opcode[5:4], 1'b1}; // Register no based on opcode
+ comb_bus_op = `BUS_OP_READ; // Read from databus
+ comb_next = 1;
+ end
+ else begin
+ comb_rf_wr_sel = {opcode[5:4], 1'b0};
+ comb_next = 0;
+ end
+ end
+ // LD (nn), SP
+ else if (opcode == 8'h08) begin
+ if ((m_cycle == 0) || (m_cycle == 1)) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_DB;
+ comb_ab_src = `AB_SRC_PC;
+ comb_ct_op = `CT_OP_PC_INC;
+ comb_next = 1;
+ end
+ else if (m_cycle == 2) begin
+ comb_ab_src = `AB_SRC_TEMP;
+ comb_db_src = `DB_SRC_REG;
+ comb_rf_rd_sel = `RF_SEL_SP_L;
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_ct_op = `CT_OP_SP_INC;
+ comb_temp_redir = 1'b1;
+ comb_next = 1'b1;
+ end
+ else if (m_cycle == 3) begin
+ comb_ab_src = `AB_SRC_TEMP;
+ comb_db_src = `DB_SRC_REG;
+ comb_rf_rd_sel = `RF_SEL_SP_H;
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1'b1;
+ end
+ else begin
+ // Default behaviour is enough.
+ end
+ end
+ // 8 bit reg-to-reg, mem-to-reg, or reg-to-mem LD instructions
+ else if (opcode[7:6] == 2'b01) begin
+ if (opcode[2:0] == 3'b110)
+ comb_alu_src_a = `ALU_SRC_A_DB; // Src A from data bus
+ else if (opcode[2:0] == 3'b111)
+ comb_alu_src_a = `ALU_SRC_A_ACC; // Src A from accumulator
+ else
+ comb_alu_src_a = `ALU_SRC_A_REG; // Src A from register file
+
+ if (opcode[5:3] == 3'b110)
+ comb_alu_dst = `ALU_DST_DB; // Destination is (HL)
+ else if (opcode[5:3] == 3'b111)
+ comb_alu_dst = `ALU_DST_ACC; // Destination is A
+ else
+ comb_alu_dst = `ALU_DST_REG; // Destination is register
+
+ comb_rf_wr_sel = opcode[5:3];
+ comb_rf_rd_sel = opcode[2:0];
+
+ if (opcode[5:3] == 3'b110) begin // Register to Memory
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_db_src = `DB_SRC_ALU;
+ comb_ab_src = `AB_SRC_REG;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ end
+ else if (opcode[2:0] == 3'b110) begin // Memory to Register
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_DB;
+ comb_ab_src = `AB_SRC_REG;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ end
+ end
+ // 8 bit imm-to-reg, imm-to-mem LD instructions
+ else if ((opcode[7:6] == 2'b00) && (opcode[2:0] == 3'b110)) begin
+ comb_alu_src_a = `ALU_SRC_A_DB;
+
+ if (opcode[5:3] == 3'b110) begin // imm to mem
+ comb_alu_dst = `ALU_DST_DB;
+ comb_rf_rd_sel = `RF_SEL_HL;
+ end
+ else if (opcode[5:3] == 3'b111) begin
+ comb_alu_dst = `ALU_DST_ACC;
+ end
+ else begin
+ comb_alu_dst = `ALU_DST_REG;
+ comb_rf_wr_sel = opcode[5:3];
+ end
+
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_next = 1;
+ end
+ else if (m_cycle == 1) begin
+ if (opcode[5:3] == 3'b110) begin
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_db_src = `DB_SRC_DB;
+ comb_ab_src = `AB_SRC_REG;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ end
+ end
+ // LD (BC)/(DE), A
+ else if ((opcode == 8'h02) || (opcode == 8'h12)) begin
+ comb_alu_dst = `ALU_DST_DB;
+ if (opcode == 8'h02)
+ comb_rf_rdw_sel = 2'b00; // Select BC
+ else
+ comb_rf_rdw_sel = 2'b01; // Select DE
+ if (m_cycle == 0) begin
+ comb_next = 1;
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_ab_src = `AB_SRC_REG;
+ comb_ct_op = `CT_OP_IDLE;
+ end
+ end
+ // LD (HL+)/(HL-), A
+ else if ((opcode == 8'h22) || (opcode == 8'h32)) begin
+ comb_alu_src_a = `ALU_SRC_A_REG;
+ comb_alu_dst = `ALU_DST_REG;
+ if (opcode == 8'h22)
+ comb_alu_op_src = `ALU_OP_SRC_ADD_FTOR;
+ else
+ comb_alu_op_src = `ALU_OP_SRC_SUB_ATOF;
+ if (m_cycle == 0) begin
+ // A being written to the memory, calculate L +/- 1
+ comb_alu_src_b = `ALU_SRC_B_ONE;
+ comb_rf_rd_sel = `RF_SEL_L;
+ comb_rf_wr_sel = `RF_SEL_L;
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_db_src = `DB_SRC_ACC;
+ comb_ab_src = `AB_SRC_REG;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ else begin
+ // calculate H +/- carry
+ comb_alu_src_b = `ALU_SRC_B_CARRY;
+ comb_rf_rd_sel = `RF_SEL_H;
+ comb_rf_wr_sel = `RF_SEL_H;
+ end
+ end
+ // LD A, (BC)/(DE)
+ else if ((opcode == 8'h0A) || (opcode == 8'h1A)) begin
+ comb_alu_src_a = `ALU_SRC_A_DB;
+ if (opcode == 8'h0A) begin
+ comb_rf_rdw_sel = 2'b00; // Select BC
+ end
+ else begin
+ comb_rf_rdw_sel = 2'b01; // Select DE
+ end
+
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_DB;
+ comb_ab_src = `AB_SRC_REG;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ end
+ // LD A, (HL+)/(HL-)
+ else if ((opcode == 8'h2A) || (opcode == 8'h3A)) begin
+ comb_alu_src_a = `ALU_SRC_A_REG;
+ comb_alu_src_b = `ALU_SRC_B_ONE;
+ if (opcode == 8'h2A)
+ comb_alu_op_src = `ALU_OP_SRC_ADD_FTOR;
+ else
+ comb_alu_op_src = `ALU_OP_SRC_SUB_ATOF;
+ comb_alu_dst = `ALU_DST_REG;
+ if (m_cycle == 0) begin
+ comb_alu_src_b = `ALU_SRC_B_ONE;
+ comb_rf_rd_sel = `RF_SEL_L;
+ comb_rf_wr_sel = `RF_SEL_L;
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_ACC;
+ comb_ab_src = `AB_SRC_REG;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ else begin
+ comb_alu_src_b = `ALU_SRC_B_CARRY;
+ comb_rf_rd_sel = `RF_SEL_H;
+ comb_rf_wr_sel = `RF_SEL_H;
+ end
+ end
+ // 16-bit INC/DEC
+ else if ((opcode[7:6] == 2'b00) && (opcode[2:0] == 3'b011)) begin
+ comb_alu_src_a = `ALU_SRC_A_REG;
+ comb_alu_dst = `ALU_DST_REG;
+ if (opcode[3] == 1) begin
+ comb_alu_op_src = `ALU_OP_SRC_SUB_ATOF;
+ end
+ if (m_cycle == 0) begin
+ comb_alu_src_b = `ALU_SRC_B_ONE;
+ comb_rf_rd_sel = {opcode[5:4], 1'b1};
+ comb_rf_wr_sel = {opcode[5:4], 1'b1};
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_db_src = `DB_SRC_DB;
+ comb_next = 1;
+ end
+ else begin
+ comb_alu_src_b = `ALU_SRC_B_CARRY;
+ comb_rf_rd_sel = {opcode[5:4], 1'b0};
+ comb_rf_wr_sel = {opcode[5:4], 1'b0};
+ end
+ end
+ // 8-bit INC/DEC
+ else if ((opcode[7:6] == 2'b00) && (opcode[2:1] == 2'b10)) begin
+ comb_alu_src_b = `ALU_SRC_B_ONE;
+ comb_flags_pattern = `FLAGS_ZNHx;
+ comb_flags_we = 1'b1;
+
+ // INC or DEC
+ if (opcode[0])
+ comb_alu_op_src = `ALU_OP_SRC_SUB_ATOF;
+ else
+ comb_alu_op_src = `ALU_OP_SRC_ADD_FTOR;
+
+ if (opcode[5:3] == 3'b110) begin
+ // INC/DEC (HL)
+ comb_alu_src_a = `ALU_SRC_A_DB;
+ comb_alu_dst = `ALU_DST_DB;
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_REG;
+ comb_ab_src = `AB_SRC_REG;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ else if (m_cycle == 1) begin
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_ab_src = `AB_SRC_REG;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ else begin
+ // End cycle
+ comb_flags_we = 0;
+ end
+ end
+ else if (opcode[5:3] == 3'b111) begin
+ // INC/DEC A
+ comb_alu_src_a = `ALU_SRC_A_ACC;
+ comb_alu_dst = `ALU_DST_ACC;
+ end
+ else begin
+ comb_alu_src_a = `ALU_SRC_A_REG;
+ comb_alu_dst = `ALU_DST_REG;
+ comb_rf_rd_sel = opcode[5:3];
+ comb_rf_wr_sel = opcode[5:3];
+ end
+ end
+ // ADD HL, r16
+ else if ((opcode[7:6] == 2'b00) && (opcode[3:0] == 4'b1001)) begin
+ comb_alu_dst = `ALU_DST_REG;
+ comb_flags_we = 1'b1;
+ comb_flags_pattern = `FLAGS_x0HC;
+ if (m_cycle == 0) begin
+ comb_alu_src_a = `ALU_SRC_A_REG;
+ comb_alu_src_b = `ALU_SRC_B_L;
+ comb_rf_wr_sel = `RF_SEL_L;
+ comb_rf_rd_sel = {opcode[5:4], 1'b1};
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ else begin
+ comb_alu_src_a = `ALU_SRC_A_REG;
+ comb_alu_src_b = `ALU_SRC_B_H;
+ comb_rf_wr_sel = `RF_SEL_H;
+ comb_rf_rd_sel = {opcode[5:4], 1'b0};
+ comb_alu_op_signed = 1'b1;
+ end
+ end
+ // 8 bit reg-to-reg, mem-to-reg ALU operation
+ else if (opcode[7:6] == 2'b10) begin
+ comb_alu_src_b = `ALU_SRC_B_ACC;
+ comb_alu_op_src = `ALU_OP_SRC_INSTR_5TO3;
+ comb_rf_rd_sel = opcode[2:0];
+ comb_flags_we = 1'b1;
+ if ((opcode[5:4] == 2'b01) || (opcode[5:3] == 3'b111)) begin
+ // Sub or CP
+ comb_alu_src_xchg = 1'b1;
+ end
+ if (opcode[2:0] == 3'b110) begin // Source from HL
+ comb_alu_src_a = `ALU_SRC_A_DB;
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_ab_src = `AB_SRC_REG;
+ // Do not writeback in the first cycle
+ comb_alu_dst = `ALU_DST_DB;
+ comb_flags_we = 1'b0;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ end
+ else if (opcode[2:0] == 3'b111) begin // Source from A
+ comb_alu_src_a = `ALU_SRC_A_ACC;
+ end
+ else begin
+ comb_alu_src_a = `ALU_SRC_A_REG;
+ end
+ end
+ // 8 bit imm-to-reg ALU operation
+ else if ((opcode[7:6] == 2'b11) && (opcode[2:0] == 3'b110)) begin
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_next = 1;
+ end
+ else begin
+ if ((opcode[5:4] == 2'b01) || (opcode[5:3] == 3'b111)) begin
+ // Sub or CP
+ comb_alu_src_xchg = 1'b1;
+ end
+ comb_alu_src_a = `ALU_SRC_A_DB;
+ comb_alu_src_b = `ALU_SRC_B_ACC;
+ comb_alu_op_src = `ALU_OP_SRC_INSTR_5TO3;
+ comb_flags_we = 1'b1;
+ end
+ end
+ // 16-bit PUSH
+ else if ((opcode[7:6] == 2'b11) && (opcode[3:0] == 4'b0101)) begin
+ if (opcode[5:4] == 2'b11) begin
+ // AF
+ comb_alu_op_prefix = `ALU_OP_PREFIX_SPECIAL;
+ comb_db_src = `DB_SRC_ACC;
+ end
+ else begin
+ comb_db_src = `DB_SRC_DB;
+ end
+ comb_alu_src_a = `ALU_SRC_A_REG;
+ comb_alu_dst = `ALU_DST_DB;
+
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_ab_src = `AB_SRC_SP;
+ comb_ct_op = `CT_OP_SP_DEC;
+ comb_next = 1;
+ end
+ else if (m_cycle == 1) begin
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_ab_src = `AB_SRC_SP;
+ comb_ct_op = `CT_OP_SP_DEC;
+ comb_rf_rd_sel = {opcode[5:4], 1'b0};
+ comb_next = 1;
+ end
+ else if (m_cycle == 2) begin
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_ab_src = `AB_SRC_SP;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_rf_rd_sel = {opcode[5:4], 1'b1};
+ if (opcode[5:4] == 2'b11) begin
+ comb_db_src = `DB_SRC_ALU;
+ end
+ comb_next = 1;
+ end
+ end
+ // 16-bit POP
+ else if ((opcode[7:6] == 2'b11) && (opcode[3:0] == 4'b0001)) begin
+ if ((m_cycle == 1) || (m_cycle == 2)) begin
+ comb_alu_src_a = `ALU_SRC_A_DB;
+ if (opcode[5:4] == 2'b11) begin
+ comb_alu_dst = `ALU_DST_ACC;
+ end
+ else begin
+ comb_alu_dst = `ALU_DST_REG;
+ end
+ end
+
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_DB;
+ comb_ab_src = `AB_SRC_SP;
+ comb_ct_op = `CT_OP_SP_INC;
+ comb_next = 1;
+ end
+ else if (m_cycle == 1) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_DB;
+ comb_ab_src = `AB_SRC_SP;
+ comb_ct_op = `CT_OP_SP_INC;
+ comb_rf_wr_sel = {opcode[5:4], 1'b1};
+ comb_next = 1;
+ end
+ else if (m_cycle == 2) begin
+ comb_rf_wr_sel = {opcode[5:4], 1'b0};
+ if (opcode[5:4] == 2'b11) begin
+ // Copy from memory to flags
+ comb_alu_op_prefix = `ALU_OP_PREFIX_SPECIAL;
+ comb_alu_op_src = `ALU_OP_SRC_SUB_ATOF;
+ comb_alu_src_b = `ALU_SRC_B_ACC;
+ comb_flags_we = 1'b1;
+ end
+ end
+ end
+ // LD (C), A
+ else if (opcode == 8'he2) begin
+ comb_rf_rdw_sel = 2'b00; // Select BC
+ comb_high_mask = 1'b1; // Select C only
+ comb_alu_src_a = `ALU_SRC_A_ACC;
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_db_src = `DB_SRC_ACC;
+ comb_ab_src = `AB_SRC_REG;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ end
+ // LD A, (C)
+ else if (opcode == 8'hf2) begin
+ comb_rf_rdw_sel = 2'b00; // Select BC
+ comb_high_mask = 1'b1; // Select C only
+ comb_alu_src_a = `ALU_SRC_A_DB;
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_DB;
+ comb_ab_src = `AB_SRC_REG;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1'b1;
+ end
+ end
+ // ADD SP, r8
+ else if (opcode == 8'he8) begin
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_DB;
+ comb_next = 1;
+ end
+ else if (m_cycle == 1) begin
+ comb_alu_src_a = `ALU_SRC_A_REG;
+ comb_alu_src_b = `ALU_SRC_B_IMM;
+ comb_alu_dst = `ALU_DST_REG;
+ comb_rf_rd_sel = `RF_SEL_SP_L;
+ comb_rf_wr_sel = `RF_SEL_SP_L;
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_flags_pattern = `FLAGS_00HC;
+ comb_flags_we = 1'b1;
+ comb_next = 1;
+ end
+ else if (m_cycle == 2) begin
+ comb_alu_src_a = `ALU_SRC_A_REG;
+ comb_alu_src_b = `ALU_SRC_B_IMM;
+ comb_alu_dst = `ALU_DST_REG;
+ comb_alu_op_signed = 1'b1;
+ comb_rf_rd_sel = `RF_SEL_SP_H;
+ comb_rf_wr_sel = `RF_SEL_SP_H;
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ end
+ // LD HL, SP+r8
+ else if (opcode == 8'hf8) begin
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_DB;
+ comb_next = 1;
+ end
+ else if (m_cycle == 1) begin
+ comb_alu_src_a = `ALU_SRC_A_REG;
+ comb_alu_src_b = `ALU_SRC_B_IMM;
+ comb_alu_dst = `ALU_DST_REG;
+ comb_rf_rd_sel = `RF_SEL_SP_L;
+ comb_rf_wr_sel = `RF_SEL_L;
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_flags_pattern = `FLAGS_00HC;
+ comb_flags_we = 1'b1;
+ comb_next = 1;
+ end
+ else begin
+ comb_alu_op_signed = 1'b1;
+ comb_alu_src_a = `ALU_SRC_A_REG;
+ comb_alu_src_b = `ALU_SRC_B_IMM;
+ comb_alu_dst = `ALU_DST_REG;
+ comb_rf_rd_sel = `RF_SEL_SP_H;
+ comb_rf_wr_sel = `RF_SEL_H;
+ end
+ end
+ // LD SP, HL
+ else if (opcode == 8'hf9) begin
+ comb_alu_src_a = `ALU_SRC_A_REG;
+ comb_alu_dst = `ALU_DST_REG;
+
+ if (m_cycle == 0) begin
+ comb_rf_wr_sel = `RF_SEL_SP_H;
+ comb_rf_rd_sel = `RF_SEL_H;
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ else begin
+ comb_rf_wr_sel = `RF_SEL_SP_L;
+ comb_rf_rd_sel = `RF_SEL_L;
+ end
+ end
+ // LDH (a8), A
+ else if (opcode == 8'hE0) begin
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_DB;
+ comb_next = 1;
+ end
+ else if (m_cycle == 1) begin
+ comb_alu_src_a = `ALU_SRC_A_ACC;
+ comb_alu_dst = `ALU_DST_DB;
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_db_src = `DB_SRC_ACC;
+ comb_ab_src = `AB_SRC_TEMP;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_high_mask = 1;
+ comb_next = 1;
+ end
+ end
+ // LDH A, (a8)
+ else if (opcode == 8'hF0) begin
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_DB;
+ comb_next = 1;
+ end
+ else if (m_cycle == 1) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_ab_src = `AB_SRC_TEMP;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_high_mask = 1;
+ comb_next = 1;
+ end
+ else begin
+ comb_alu_src_a = `ALU_SRC_A_DB;
+ comb_alu_dst = `ALU_DST_ACC;
+ end
+ end
+ // LD (a16), A
+ else if (opcode == 8'hEA) begin
+ if ((m_cycle == 0) || (m_cycle == 1)) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_DB;
+ comb_next = 1;
+ end
+ else if (m_cycle == 2) begin
+ comb_alu_src_a = `ALU_SRC_A_ACC;
+ comb_alu_dst = `ALU_DST_DB;
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_db_src = `DB_SRC_ACC;
+ comb_ab_src = `AB_SRC_TEMP;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ end
+ // LDH A, (a16)
+ else if (opcode == 8'hFA) begin
+ if ((m_cycle == 0) || (m_cycle == 1)) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_DB;
+ comb_next = 1;
+ end
+ else if (m_cycle == 2) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_ab_src = `AB_SRC_TEMP;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ else begin
+ comb_alu_src_a = `ALU_SRC_A_DB;
+ comb_alu_dst = `ALU_DST_ACC;
+ end
+ end
+ // JP HL
+ else if (opcode == 8'hE9) begin
+ comb_rf_rd_sel = `RF_SEL_H;
+ comb_ab_src = `AB_SRC_REG;
+ comb_pc_we = 1;
+ end
+ // JP CC, a16
+ else if ((opcode == 8'hC3) || (opcode == 8'hC2) || (opcode == 8'hD2)
+ || (opcode == 8'hCA) || (opcode == 8'hDA)) begin
+ if ((m_cycle == 0) || (m_cycle == 1)) begin
+ // Read 16 bit imm
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_DB;
+ comb_next = 1;
+ end
+ else if (m_cycle == 2) begin
+ if (((opcode == 8'hC2) && (!f_z)) || // JP NZ
+ ((opcode == 8'hD2) && (!f_c)) || // JP NC
+ ((opcode == 8'hC3)) || // JP
+ ((opcode == 8'hCA) && (f_z)) || // JP Z
+ ((opcode == 8'hDA) && (f_c))) begin // JP C
+ // Branch taken
+ comb_pc_src = `PC_SRC_TEMP;
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_pc_we = 1;
+ comb_next = 1;
+ end
+ // Branch not taken
+ end
+ end
+ // CALL CC, a16
+ else if ((opcode == 8'hCD) || (opcode == 8'hCC) || (opcode == 8'hDC)
+ || (opcode == 8'hC4) || (opcode == 8'hD4)) begin
+ if ((m_cycle == 0) || (m_cycle == 1)) begin
+ // Read 16 bit imm
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_DB;
+ comb_next = 1;
+ end
+ else if (m_cycle == 2) begin
+ if (((opcode == 8'hC4) && (!f_z)) || // CALL NZ
+ ((opcode == 8'hD4) && (!f_c)) || // CALL NC
+ ((opcode == 8'hCD)) || // CALL
+ ((opcode == 8'hCC) && (f_z)) || // CALL Z
+ ((opcode == 8'hDC) && (f_c))) begin // CALL C
+ // Call taken
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_ct_op = `CT_OP_SP_DEC;
+ comb_next = 1;
+ end
+ end
+ else if (m_cycle == 3) begin
+ comb_alu_src_a = `ALU_SRC_A_PC;
+ comb_alu_dst = `ALU_DST_DB;
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_ab_src = `AB_SRC_SP;
+ comb_db_src = `DB_SRC_DB;
+ comb_ct_op = `CT_OP_SP_DEC;
+ comb_next = 1;
+ end
+ else if (m_cycle == 4) begin
+ comb_alu_src_a = `ALU_SRC_A_PC;
+ comb_alu_dst = `ALU_DST_DB;
+ comb_pc_src = `PC_SRC_TEMP;
+ comb_pc_we = 1;
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_ab_src = `AB_SRC_SP;
+ comb_db_src = `DB_SRC_DB;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ end
+ // JR CC, imm8
+ else if ((opcode == 8'h20) || (opcode == 8'h30) || (opcode == 8'h18)
+ || (opcode == 8'h28) || (opcode == 8'h38)) begin
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_DB;
+ comb_next = 1;
+ end
+ else if (m_cycle == 1) begin
+ if (((opcode == 8'h20) && (!f_z)) || // JR NZ
+ ((opcode == 8'h30) && (!f_c)) || // JR NC
+ ((opcode == 8'h18)) || // JR
+ ((opcode == 8'h28) && (f_z)) || // JR Z
+ ((opcode == 8'h38) && (f_c))) begin // JR C
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_pc_jr = 1;
+ comb_next = 1;
+ end
+ end
+ end
+ // RET, RETI
+ else if ((opcode == 8'hC9) || (opcode == 8'hD9)) begin
+ if (m_cycle == 0) begin
+ if (opcode == 8'hD9) begin
+ ime_set = 1;
+ end
+ comb_ab_src = `AB_SRC_SP;
+ comb_db_src = `DB_SRC_DB;
+ comb_bus_op = `BUS_OP_READ;
+ comb_ct_op = `CT_OP_SP_INC;
+ comb_next = 1;
+ end
+ else if (m_cycle == 1) begin
+ comb_ab_src = `AB_SRC_SP;
+ comb_db_src = `DB_SRC_DB;
+ comb_bus_op = `BUS_OP_READ;
+ comb_ct_op = `CT_OP_SP_INC;
+ comb_alu_src_a = `ALU_SRC_A_DB;
+ comb_alu_dst = `ALU_DST_PC;
+ comb_pc_b_sel = 0;
+ comb_next = 1;
+ end
+ else if (m_cycle == 2) begin
+ comb_alu_src_a = `ALU_SRC_A_DB;
+ comb_alu_dst = `ALU_DST_PC;
+ comb_pc_b_sel = 1;
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ end
+ // RET CC
+ else if ((opcode[7:5] == 3'b110) && (opcode[2:0] == 3'b000)) begin
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ else if (m_cycle == 1) begin
+ if (((opcode == 8'hC0) && (!f_z)) || // RET NZ
+ ((opcode == 8'hD0) && (!f_c)) || // RET NC
+ ((opcode == 8'hC8) && (f_z)) || // RET Z
+ ((opcode == 8'hD8) && (f_c))) begin // RET C
+ comb_ab_src = `AB_SRC_SP;
+ comb_db_src = `DB_SRC_DB;
+ comb_bus_op = `BUS_OP_READ;
+ comb_ct_op = `CT_OP_SP_INC;
+ comb_next = 1;
+ end
+ end
+ else if (m_cycle == 2) begin
+ comb_ab_src = `AB_SRC_SP;
+ comb_db_src = `DB_SRC_DB;
+ comb_bus_op = `BUS_OP_READ;
+ comb_ct_op = `CT_OP_SP_INC;
+ comb_alu_src_a = `ALU_SRC_A_DB;
+ comb_alu_dst = `ALU_DST_PC;
+ comb_pc_b_sel = 0;
+ comb_next = 1;
+ end
+ else if (m_cycle == 3) begin
+ comb_alu_src_a = `ALU_SRC_A_DB;
+ comb_alu_dst = `ALU_DST_PC;
+ comb_pc_b_sel = 1;
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ end
+ // RST
+ else if ((opcode[7:6] == 2'b11) && (opcode[2:0] == 3'b111)) begin
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_IDLE;
+ comb_ct_op = `CT_OP_SP_DEC;
+ comb_next = 1;
+ end
+ else if (m_cycle == 1) begin
+ comb_alu_src_a = `ALU_SRC_A_PC;
+ comb_alu_dst = `ALU_DST_DB;
+ comb_db_src = `DB_SRC_DB;
+ comb_ab_src = `AB_SRC_SP;
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_ct_op = `CT_OP_SP_DEC;
+ comb_next = 1;
+ end
+ else if (m_cycle == 2) begin
+ comb_alu_src_a = `ALU_SRC_A_PC;
+ comb_alu_dst = `ALU_DST_DB;
+ comb_db_src = `DB_SRC_DB;
+ comb_ab_src = `AB_SRC_SP;
+ comb_pc_src = `PC_SRC_RST;
+ comb_pc_we = 1;
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ end
+ // RLCA, RRCA, RLA, RRA
+ else if ((opcode[7:5] == 3'b000) && (opcode[2:0] == 3'b111)) begin
+ comb_alu_src_b = `ALU_SRC_B_ACC;
+ comb_alu_op_prefix = `ALU_OP_PREFIX_SHIFT_ROTATE;
+ comb_alu_op_src = `ALU_OP_SRC_INSTR_5TO3;
+ comb_flags_pattern = `FLAGS_00HC;
+ comb_flags_we = 1;
+ end
+ // DAA, CPL, SCF, CCF
+ else if ((opcode[7:5] == 3'b001) && (opcode[2:0] == 3'b111)) begin
+ comb_alu_src_b = `ALU_SRC_B_ACC;
+ comb_alu_op_prefix = `ALU_OP_PREFIX_SPECIAL;
+ comb_alu_op_src = `ALU_OP_SRC_INSTR_5TO3;
+ comb_flags_we = 1;
+ end
+ // CB prefix
+ else if (opcode == 8'hCB) begin
+ if (m_cycle == 0) begin
+ comb_bus_op = `BUS_OP_READ;
+ comb_db_src = `DB_SRC_DB;
+ comb_next = 1;
+ end
+ else if (m_cycle == 1) begin
+ comb_opcode_redir = 1'b1;
+ if (cb[2:0] == 3'b110) begin
+ comb_alu_src_a = `ALU_SRC_A_DB;
+ comb_alu_dst = `ALU_DST_DB;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_ab_src = `AB_SRC_REG;
+ comb_bus_op = `BUS_OP_READ;
+ comb_flags_we = 0;
+ comb_next = 1;
+ end
+ else if (cb[2:0] == 3'b111) begin
+ comb_alu_src_a = `ALU_SRC_A_ACC;
+ comb_alu_dst = `ALU_DST_ACC;
+ comb_flags_we = !cb[7];
+ end
+ else begin
+ comb_alu_src_a = `ALU_SRC_A_REG;
+ comb_alu_dst = `ALU_DST_REG;
+ comb_rf_rd_sel = cb[2:0];
+ comb_rf_wr_sel = cb[2:0];
+ comb_flags_we = !cb[7];
+ end
+ if (cb[7:6] == 2'b00) begin
+ comb_alu_op_prefix = `ALU_OP_PREFIX_SHIFT_ROTATE;
+ comb_alu_op_src = `ALU_OP_SRC_INSTR_5TO3;
+ end
+ else begin
+ comb_alu_op_prefix = `ALU_OP_PREFIX_CB;
+ comb_alu_op_src = `ALU_OP_SRC_INSTR_7TO6;
+ end
+ if (cb[7:6] == 2'b01) begin
+ // Only affects flags
+ comb_alu_dst = `ALU_DST_DB;
+ end
+ end
+ else if (m_cycle == 2) begin
+ comb_opcode_redir = 1'b1;
+ comb_alu_src_a = `ALU_SRC_A_DB;
+ comb_alu_dst = `ALU_DST_DB;
+ if (cb[7:6] == 2'b00) begin
+ comb_alu_op_prefix = `ALU_OP_PREFIX_SHIFT_ROTATE;
+ comb_alu_op_src = `ALU_OP_SRC_INSTR_5TO3;
+ end
+ else begin
+ comb_alu_op_prefix = `ALU_OP_PREFIX_CB;
+ comb_alu_op_src = `ALU_OP_SRC_INSTR_7TO6;
+ end
+ if (cb[7:6] != 2'b01) begin
+ // Write-back cycle required.
+ comb_bus_op = `BUS_OP_WRITE;
+ comb_db_src = `DB_SRC_ALU;
+ comb_ab_src = `AB_SRC_REG;
+ comb_ct_op = `CT_OP_IDLE;
+ comb_next = 1;
+ end
+ comb_flags_we = !cb[7];
+ end
+ end
+ end
+ end
+
+ always @(posedge clk) begin
+ if ((ct_state == 2'd3) || (rst == 1'b1)) begin
+ alu_src_a <= comb_alu_src_a;
+ alu_src_b <= comb_alu_src_b;
+ alu_src_xchg <= comb_alu_src_xchg;
+ alu_op_prefix <= comb_alu_op_prefix;
+ alu_op_src <= comb_alu_op_src;
+ alu_op_signed <= comb_alu_op_signed;
+ alu_dst <= comb_alu_dst;
+ pc_src <= comb_pc_src;
+ pc_we <= comb_pc_we;
+ pc_b_sel <= comb_pc_b_sel;
+ pc_jr <= comb_pc_jr;
+ pc_revert <= comb_pc_revert;
+ rf_wr_sel <= comb_rf_wr_sel;
+ rf_rd_sel <= comb_rf_rd_sel;
+ rf_rdw_sel <= comb_rf_rdw_sel;
+ temp_redir <= comb_temp_redir;
+ opcode_redir <= comb_opcode_redir;
+ bus_op <= comb_bus_op;
+ db_src <= comb_db_src;
+ ab_src <= comb_ab_src;
+ ct_op <= comb_ct_op;
+ flags_we <= comb_flags_we;
+ flags_pattern <= comb_flags_pattern;
+ high_mask <= comb_high_mask;
+ int_ack <= comb_int_ack;
+ next <= comb_next;
+ stop <= comb_stop;
+ halt <= comb_halt;
+ fault <= comb_fault;
+ end
+ end
+
+endmodule
diff --git a/verilog/rtl/cpu.v b/verilog/rtl/cpu.v
new file mode 100644
index 0000000..6b59982
--- /dev/null
+++ b/verilog/rtl/cpu.v
@@ -0,0 +1,647 @@
+`timescale 1ns / 1ps
+`default_nettype wire
+////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 17:30:26 02/08/2018
+// Module Name: cpu
+// Project Name: VerilogBoy
+// Description:
+// The Game Boy CPU.
+// Dependencies:
+//
+// Additional Comments:
+// See doc/cpu_internal.md for signal definitions
+////////////////////////////////////////////////////////////////////////////////
+
+module cpu(
+ input clk,
+ input rst,
+ output reg phi,
+ output wire [1:0] ct,
+ output reg [15:0] a,
+ output reg [7:0] dout,
+ input [7:0] din,
+ output reg rd,
+ output reg wr,
+ input [4:0] int_en,
+ input [4:0] int_flags_in,
+ output wire [4:0] int_flags_out,
+ input [7:0] key_in,
+ output reg done,
+ output wire fault
+ );
+
+ reg [7:0] opcode;
+ reg [7:0] cb;
+ wire [2:0] m_cycle;
+ reg [2:0] m_cycle_early;
+ wire [1:0] alu_src_a;
+ wire [2:0] alu_src_b;
+ wire alu_src_xchg;
+ wire [1:0] alu_op_prefix;
+ wire [1:0] alu_op_src;
+ wire [1:0] alu_dst;
+ wire [1:0] pc_src;
+ wire pc_we;
+ wire [2:0] rf_wr_sel;
+ wire [2:0] rf_rd_sel;
+ wire [1:0] rf_rdw_sel;
+ wire [1:0] bus_op;
+ wire [1:0] db_src;
+ wire [1:0] ab_src;
+ wire [1:0] ct_op;
+ wire flags_we;
+ wire [1:0] flags_pattern;
+ wire high_mask;
+ wire next;
+ wire stop;
+ wire halt;
+ reg wake;
+ //wire fault;
+ reg int_dispatch;
+ wire int_master_en;
+ wire int_ack;
+
+ wire [2:0] rf_rdn;
+ wire [7:0] rf_rd;
+ reg [7:0] rf_rd_ex; // Buffer Rd selected during EX stage
+ wire [1:0] rf_rdwn;
+ wire [15:0] rf_rdw;
+ wire [7:0] rf_h;
+ wire [7:0] rf_l;
+ wire [15:0] rf_sp;
+ wire [2:0] rf_wrn;
+ wire [7:0] rf_wr;
+ wire rf_we;
+
+ wire [7:0] alu_a;
+ wire [7:0] alu_b;
+ wire [7:0] alu_result;
+ reg [7:0] alu_result_buffer;
+ wire [3:0] alu_flags_in;
+ wire [3:0] alu_flags_out;
+ wire [4:0] alu_op;
+ wire alu_op_signed;
+ wire alu_carry_out;
+ reg alu_carry_out_ex;
+ reg alu_carry_out_ct;
+
+ wire [7:0] acc_wr;
+ wire acc_we;
+ wire [7:0] acc_rd;
+
+ wire [15:0] pc_rd;
+ wire [7:0] pc_rd_b;
+ wire pc_b_sel; // byte select
+ wire [15:0] pc_wr;
+ wire [7:0] pc_wr_b;
+ wire pc_we_h;
+ wire pc_we_l;
+
+ wire [15:0] temp_rd; // temp value for 16bit imm
+
+ wire [3:0] flags_rd;
+ wire [3:0] flags_wr;
+
+ wire [7:0] db_wr; // Data into buffer
+ wire [7:0] db_rd; // Data out from buffer
+ wire db_we;
+
+ wire [7:0] imm_abs;
+ wire [7:0] imm_low;
+ wire [7:0] imm_ext;
+
+ reg [1:0] ct_state;
+
+ // Control Logic
+ // Control Logic is only used in EX stage
+ // Signals are gated.
+ wire [1:0] alu_src_a_ex;
+ wire [2:0] alu_src_b_ex;
+ wire [1:0] alu_op_prefix_ex;
+ wire [1:0] alu_op_src_ex;
+ wire alu_op_signed_ex;
+ wire [1:0] alu_dst_ex;
+ wire [2:0] rf_wr_sel_ex;
+ wire [2:0] rf_rd_sel_ex;
+ wire flags_we_ex;
+ wire pc_b_sel_ex;
+ wire pc_jr;
+ wire pc_we_ex;
+ wire pc_revert;
+ wire temp_redir; // redirect regfile operation to temp register
+ wire opcode_redir;
+
+ control control(
+ .clk(clk),
+ .rst(rst),
+ .opcode_early(opcode),
+ .cb(cb),
+ .imm(imm_low),
+ .m_cycle_early(m_cycle_early),
+ .ct_state(ct_state),
+ .f_z(flags_rd[3]),
+ .f_c(flags_rd[0]),
+ .alu_src_a(alu_src_a_ex),
+ .alu_src_b(alu_src_b_ex),
+ .alu_src_xchg(alu_src_xchg),
+ .alu_op_prefix(alu_op_prefix_ex),
+ .alu_op_src(alu_op_src_ex),
+ .alu_op_signed(alu_op_signed_ex),
+ .alu_dst(alu_dst_ex),
+ .pc_src(pc_src),
+ .pc_we(pc_we_ex),
+ .pc_b_sel(pc_b_sel_ex),
+ .pc_jr(pc_jr),
+ .pc_revert(pc_revert),
+ .rf_wr_sel(rf_wr_sel_ex),
+ .rf_rd_sel(rf_rd_sel_ex),
+ .rf_rdw_sel(rf_rdw_sel),
+ .temp_redir(temp_redir),
+ .opcode_redir(opcode_redir),
+ .bus_op(bus_op),
+ .db_src(db_src),
+ .ab_src(ab_src),
+ .ct_op(ct_op),
+ .flags_we(flags_we_ex),
+ .flags_pattern(flags_pattern),
+ .high_mask(high_mask),
+ .int_master_en(int_master_en),
+ .int_dispatch(int_dispatch),
+ .int_ack(int_ack),
+ .next(next),
+ .stop(stop),
+ .halt(halt),
+ .wake(wake),
+ .fault(fault)
+ );
+
+ always @(posedge clk) begin
+ done <= stop | halt | fault;
+ // only used to stop simulation if needed
+ // and delay 1 clk
+ end
+
+ wire wake_comb =
+ // Any enabled interrupt can wake up halted CPU, IME doesn't matter
+ (halt) ? ((int_flags_in & int_en) != 0) : (
+ // Any enabled interrupt and any keypress can wake up stopped CPU
+ // IME doesn't matter. Though the typical usage is clear the IE before
+ // entering STOP mode, so only keypad can wake up the CPU.
+ (stop) ? (((int_flags_in & int_en) != 0) || (key_in != 0)) :
+ (1'b0));
+ reg wake_delay; // Wake should be delayed for 1 Mcycle
+ always @(posedge clk) begin
+ if (ct_state == 2'b10) begin
+ wake_delay <= wake_comb;
+ wake <= wake_delay;
+ end
+ end
+
+ wire [7:3] current_opcode;
+
+ // Data Bus Buffer
+ reg [7:0] db_wr_buffer;
+ reg [7:0] db_rd_buffer;
+
+ // Logic: if buffer is selected, use the data in the buffer,
+ // otherwise the buffer is overrided.
+ always @(posedge clk) begin
+ if (db_we)
+ db_wr_buffer <= alu_result;
+ end
+ assign db_rd = db_rd_buffer;
+ assign db_wr = (
+ (db_src == 2'b00) ? (acc_rd) : (
+ (db_src == 2'b01) ? (alu_result_buffer) : (
+ (db_src == 2'b10) ? (rf_rd_ex) : (
+ (db_src == 2'b11) ? (db_wr_buffer) : (8'b0)))));
+ assign db_we = (alu_dst == 2'b11);
+
+ // Address Bus Buffer
+ wire [15:0] ab_wr;
+ assign ab_wr = (
+ (ab_src == 2'b00) ? (pc_rd) : (
+ (ab_src == 2'b01) ? ((high_mask) ? ({8'hFF, temp_rd[7:0]}) : (temp_rd)) : (
+ (ab_src == 2'b10) ? ((high_mask) ? ({8'hFF, rf_rdw[7:0]}) : (rf_rdw)) : (
+ (ab_src == 2'b11) ? (rf_sp) : (16'b0)))));
+
+ // Interrupt
+ wire [4:0] int_flags_masked = int_flags_in & int_en & {5{int_master_en}};
+ wire [4:0] int_flags_out_cleared =
+ (int_flags_masked[0]) ? (int_flags_in & 5'b11110) : (
+ (int_flags_masked[1]) ? (int_flags_in & 5'b11101) : (
+ (int_flags_masked[2]) ? (int_flags_in & 5'b11011) : (
+ (int_flags_masked[3]) ? (int_flags_in & 5'b10111) : (
+ (int_flags_masked[4]) ? (int_flags_in & 5'b01111) : (
+ int_flags_in
+ )))));
+
+ assign int_flags_out =
+ ((int_dispatch)&&(pc_we)) ? (int_flags_out_cleared) : (int_flags_in);
+
+ // Regisiter file
+ wire [7:0] rf_rd_raw;
+ regfile regfile(
+ .clk(clk),
+ .rst(rst),
+ .rdn(rf_rdn),
+ .rd(rf_rd_raw),
+ .rdwn(rf_rdwn),
+ .rdw(rf_rdw),
+ .h(rf_h),
+ .l(rf_l),
+ .sp(rf_sp),
+ .wrn(rf_wrn),
+ .wr(rf_wr),
+ .we(rf_we)
+ );
+ assign rf_wr = alu_result;
+ assign rf_we = (alu_dst == 2'b10) && (!temp_redir);
+ assign rf_wrn = rf_wr_sel;
+ assign rf_rdn = rf_rd_sel;
+ assign rf_rdwn = rf_rdw_sel;
+ assign rf_rd = (!temp_redir) ? (rf_rd_raw) : ((rf_rd_sel[0]) ? (temp_rd[7:0]) : (temp_rd[15:8]));
+ always@(posedge clk) begin
+ if (rst)
+ rf_rd_ex <= 8'b0;
+ else
+ if (ct_state == 2'b00)
+ rf_rd_ex <= rf_rd_raw;
+ end
+
+ // Register A
+ reg [15:0] imm_reg;
+ singlereg #(8) acc(
+ .clk(clk),
+ .rst(rst),
+ .wr(acc_wr),
+ .we(acc_we),
+ .rd(acc_rd)
+ );
+ assign acc_wr = ((db_src == 2'b00) && (bus_op == 2'b11)) ? (imm_reg[7:0]) : (alu_result);
+ assign acc_we = ((alu_dst == 2'b00) || ((db_src == 2'b00) && (bus_op == 2'b11)));
+
+ // Register PC
+ reg [15:0] pc;
+ reg [15:0] last_pc;
+ assign pc_rd = pc;
+ assign pc_rd_b = (pc_b_sel == 1'b0) ? (pc[7:0]) : (pc[15:8]);
+ assign pc_wr_b = alu_result;
+ assign pc_wr = (
+ (pc_src == 2'b00) ? (rf_rdw) : (
+ (pc_src == 2'b01) ? ({10'b00, opcode[5:3], 3'b000}) : (
+ (pc_src == 2'b10) ? (temp_rd) : (
+ (pc_src == 2'b11) ? (16'b0) : (16'b0)))));
+ wire [15:0] pc_int =
+ (int_flags_masked[0]) ? (16'h0040) : (
+ (int_flags_masked[1]) ? (16'h0048) : (
+ (int_flags_masked[2]) ? (16'h0050) : (
+ (int_flags_masked[3]) ? (16'h0058) : (
+ (int_flags_masked[4]) ? (16'h0060) : (
+ // no interrupts anymore, dispatching is cancelled.
+ // jump to 0000 instead
+ // this behavior is tested by acceptence/interrupts/ie_push
+ 16'h0000
+ )))));
+ assign pc_we_l = ((alu_dst == 2'b01) && (pc_b_sel == 1'b0)) ? (1'b1) : (1'b0);
+ assign pc_we_h = ((alu_dst == 2'b01) && (pc_b_sel == 1'b1)) ? (1'b1) : (1'b0);
+ always @(posedge clk) begin
+ if (rst)
+ pc <= 16'b0;
+ else begin
+ if (pc_we_l) begin
+ pc[7:0] <= pc_wr_b;
+ last_pc[7:0] <= pc[7:0];
+ end
+ else if (pc_we_h) begin
+ pc[15:8] <= pc_wr_b;
+ last_pc[15:8] <= pc[15:8];
+ end
+ else if (pc_revert)
+ pc <= last_pc;
+ else if (pc_we)
+ if (int_dispatch)
+ // this might need to be deffered
+ pc <= pc_int;
+ else begin
+ pc <= pc_wr;
+ last_pc <= pc;
+ end
+ end
+ end
+
+ // Register F
+ /*singlereg #(4) flags(
+ .clk(clk),
+ .rst(rst),
+ .wr(flags_wr),
+ .we((flags_we != 2'b00) ? 1'b1 : 1'b0),
+ .rd(flags_rd)
+ );*/
+ reg [3:0] flags;
+ always @(posedge clk) begin
+ if (rst)
+ flags <= 4'b0;
+ else if (flags_we)
+ if (flags_pattern == 2'b00)
+ flags[3:0] <= flags_wr[3:0];
+ else if (flags_pattern == 2'b01)
+ flags[2:0] <= {1'b0, flags_wr[1:0]};
+ else if (flags_pattern == 2'b10)
+ flags[3:0] <= {2'b0, flags_wr[1:0]};
+ else if (flags_pattern == 2'b11)
+ flags[3:1] <= flags_wr[3:1];
+ end
+ assign flags_rd = flags;
+ assign flags_wr = alu_flags_out;
+
+
+ // ALU
+ wire [2:0] alu_op_mux;
+ wire [7:0] alu_a_pre;
+ wire [7:0] alu_b_pre;
+
+ alu alu(
+ .alu_a(alu_a),
+ .alu_b(alu_b),
+ .alu_bit_index(imm_reg[5:3]),
+ .alu_result(alu_result),
+ .alu_flags_in(alu_flags_in),
+ .alu_flags_out(alu_flags_out),
+ .alu_op(alu_op)
+ );
+
+ assign alu_a_pre = (
+ (alu_src_a == 2'b00) ? (acc_rd) : (
+ (alu_src_a == 2'b01) ? (pc_rd_b) : (
+ (alu_src_a == 2'b10) ? (rf_rd) : (
+ (alu_src_a == 2'b11) ? (db_rd) : (8'b0)))));
+
+ assign alu_b_pre = (
+ (alu_src_b == 3'b000) ? (acc_rd) : (
+ (alu_src_b == 3'b001) ? ({7'b0, alu_carry_out}) : (
+ (alu_src_b == 3'b010) ? (8'd0) : (
+ (alu_src_b == 3'b011) ? (8'd1) : (
+ (alu_src_b == 3'b100) ? (rf_h) : (
+ (alu_src_b == 3'b101) ? (rf_l) : (
+ (alu_src_b == 3'b110) ? (imm_abs) : (
+ (alu_src_b == 3'b111) ? ((pc_b_sel) ? (imm_low) : (imm_ext)) : (8'b0))))))))); // cursed
+
+ assign alu_a = (alu_src_xchg) ? (alu_b_pre) : (alu_a_pre);
+ assign alu_b = (alu_src_xchg) ? (alu_a_pre) : (alu_b_pre);
+
+ assign alu_op_mux = (
+ (alu_op_src == 2'b00) ? (current_opcode[5:3]) : (
+ (alu_op_src == 2'b01) ? ({1'b1, current_opcode[7:6]}) : (
+ (alu_op_src == 2'b10) ? ((alu_op_signed) ? (3'b001) : (3'b000)) : (
+ (alu_op_src == 2'b11) ? ((alu_op_signed) ? (3'b011) : (3'b010)) : (3'b0)))));
+
+ assign alu_flags_in = flags_rd;
+ assign alu_op = {alu_op_prefix, alu_op_mux};
+
+ assign current_opcode[7:3] = (opcode_redir) ? (imm_reg[7:3]) : (opcode[7:3]);
+
+ // CT FSM
+ wire [1:0] ct_next_state;
+
+ assign ct_next_state = ct_state + 2'b01;
+ always @(posedge clk) begin
+ if (rst)
+ ct_state <= 2'b00;
+ else
+ ct_state <= ct_next_state;
+ end
+
+ assign ct = ct_state;
+
+ //reg [15:0] imm_reg; decleared before
+ assign temp_rd = imm_reg;
+ assign imm_low = imm_reg[7:0];
+ assign imm_ext = {8{imm_reg[7]}};
+ assign imm_abs = (imm_reg[7]) ? (~imm_reg[7:0] + 1'b1) : (imm_reg[7:0]);
+
+ // CT - FSM / Bus Operation
+ always @(posedge clk) begin
+ if (rst) begin
+ a <= 16'b0;
+ rd <= 1'b0;
+ wr <= 1'b0;
+ phi <= 1;
+ opcode <= 8'b0;
+ imm_reg <= 16'b0;
+ db_rd_buffer <= 8'b0;
+ dout <= 8'b0;
+ int_dispatch <= 1'b0;
+ alu_result_buffer <= 8'b0;
+ end
+ else begin
+ if ((alu_dst == 2'b10) && temp_redir && !(ct_state == 2'b10 && bus_op == 2'b11))
+ if (rf_wr_sel[0]) imm_reg[7:0] <= rf_wr;
+ else imm_reg[15:8] <= rf_wr;
+
+ case (ct_state)
+ 2'b00: begin
+ // Setup Address
+ a <= ab_wr;
+ rd <= ((bus_op == 2'b01)||(bus_op == 2'b11)) ? (1'b1) : (1'b0);
+ wr <= 0;
+ phi <= 1;
+ // Backup ALU results
+ alu_result_buffer <= alu_result;
+ end
+ 2'b01: begin
+ // Read in progress
+ end
+ 2'b10: begin
+ if (bus_op == 2'b10) begin
+ // Write cycle
+ wr <= 1;
+ dout <= db_wr;
+ end
+ else if (bus_op == 2'b01) begin
+ // Instruction Fetch Cycle
+ wr <= 0;
+ opcode <= din;
+ end
+ else if (bus_op == 2'b11) begin
+ // Data Read cycle
+ wr <= 0;
+ db_rd_buffer <= din;
+ if ((opcode == 8'hCB) && (m_cycle == 0)) cb <= din[7:0];
+ // mcycle is slower
+ if (m_cycle == 3'd0) imm_reg[7:0] <= din;
+ else if (m_cycle == 3'd1) imm_reg[15:8] <= din;
+ end
+ else begin
+ wr <= 0;
+ end
+ rd <= 0;
+ phi <= 0;
+
+ // Interrupt dispatch happens here
+ // Guarenteed if it is at instruction fetch cycle,
+ // It is at instruction boundaries,
+ // and m_cycle will start from 0.
+ if ((!int_dispatch) && (int_flags_masked != 0) && (int_master_en) && ((bus_op == 2'b01) || (halt == 1'b1)))
+ int_dispatch <= 1'b1;
+ else if ((int_dispatch) && (int_ack)) begin
+ int_dispatch <= 1'b0;
+ end
+ end
+ 2'b11: begin
+ // Bus Idle
+ rd <= 0;
+ wr <= 0;
+ dout <= 8'b0;
+ end
+ endcase
+ end
+ end
+
+ // CT - FSM / Instruction Execution
+ reg [1:0] alu_src_a_ct;
+ reg [2:0] alu_src_b_ct;
+ wire [1:0] alu_op_prefix_ct = 2'b00;
+ reg [1:0] alu_op_src_ct;
+ reg [1:0] alu_dst_ct;
+ reg [2:0] rf_wr_sel_ct;
+ reg [2:0] rf_rd_sel_ct;
+ reg pc_b_sel_ct;
+
+
+ always @(*) begin
+ // Do nothing by default
+ alu_src_a_ct = 2'b00; // From A
+ alu_src_b_ct = 3'b010; // Constant 0
+ alu_op_src_ct = 2'b10; // Add
+ alu_dst_ct = 2'b00; // To A
+ rf_wr_sel_ct = 3'b000;
+ rf_rd_sel_ct = 3'b000;
+ pc_b_sel_ct = 1'b0;
+ case (ct_state)
+ 2'b00: begin
+ // Decoding and Execution
+ // Actually cannot control anything
+ end
+ 2'b01: begin
+ // CT_OP first clock
+ case (ct_op)
+ 2'b00: begin
+ // Do nothing
+ end
+ 2'b01: begin
+ // Calculate PC low + 1
+ pc_b_sel_ct = 1'b0;
+ alu_src_a_ct = 2'b01; // From PC byte
+ alu_src_b_ct = (pc_jr) ? (3'b110) : (3'b011); // Imm Abs or Constant 1
+ alu_op_src_ct = (pc_jr) ? (imm_low[7] ? 2'b11 : 2'b10) : 2'b10; // Add
+ alu_dst_ct = 2'b01; // To PC byte
+ end
+ 2'b10: begin
+ // Calculate SP low - 1
+ rf_rd_sel_ct = 3'b111; // Read from SP low
+ rf_wr_sel_ct = 3'b111; // Write to SP low
+ alu_src_a_ct = 2'b10; // From register file
+ alu_src_b_ct = 3'b011; // Constant 1
+ alu_op_src_ct = 2'b11; // Sub
+ alu_dst_ct = 2'b10; // To register file
+ end
+ 2'b11: begin
+ // Calculate SP low + 1
+ rf_rd_sel_ct = 3'b111; // Read from SP low
+ rf_wr_sel_ct = 3'b111; // Write to SP low
+ alu_src_a_ct = 2'b10; // From register file
+ alu_src_b_ct = 3'b011; // Constant 1
+ alu_op_src_ct = 2'b10; // Add
+ alu_dst_ct = 2'b10; // To register file
+ end
+ endcase
+ end
+ 2'b10: begin
+ // CT_OP second clock
+ case (ct_op)
+ 2'b00: begin
+ // Do nothing
+ end
+ 2'b01: begin
+ // Calculate PC high + carry
+ pc_b_sel_ct = 1'b1;
+ alu_src_a_ct = 2'b01; // From PC byte
+ alu_src_b_ct = 3'b001; // Carry
+ alu_op_src_ct = (pc_jr) ? (imm_low[7] ? 2'b11 : 2'b10) : 2'b10; // Add
+ alu_dst_ct = 2'b01; // To PC byte
+ end
+ 2'b10: begin
+ // Calculate SP high - carry
+ rf_rd_sel_ct = 3'b110; // Read from SP high
+ rf_wr_sel_ct = 3'b110; // Write to SP high
+ alu_src_a_ct = 2'b10; // From register file
+ alu_src_b_ct = 3'b001; // Carry
+ alu_op_src_ct = 2'b11; // Sub
+ alu_dst_ct = 2'b10; // To register file
+ end
+ 2'b11: begin
+ // Calculate SP high + carry
+ rf_rd_sel_ct = 3'b110; // Read from SP high
+ rf_wr_sel_ct = 3'b110; // Write to SP high
+ alu_src_a_ct = 2'b10; // From register file
+ alu_src_b_ct = 3'b001; // Carry
+ alu_op_src_ct = 2'b10; // Add
+ alu_dst_ct = 2'b10; // To register file
+ end
+ endcase
+ end
+ 2'b11: begin
+ // End, it is safe to overwrite DB as doing nothing
+ alu_dst_ct = 2'b11;
+ end
+ endcase
+ end
+
+ assign alu_src_a = (ct_state == 2'b00) ? (alu_src_a_ex) : (alu_src_a_ct);
+ assign alu_src_b = (ct_state == 2'b00) ? (alu_src_b_ex) : (alu_src_b_ct);
+ assign alu_op_prefix = (ct_state == 2'b00) ? (alu_op_prefix_ex) : (alu_op_prefix_ct);
+ assign alu_op_src = (ct_state == 2'b00) ? (alu_op_src_ex) : (alu_op_src_ct);
+ assign alu_op_signed = (ct_state == 2'b00) ? (alu_op_signed_ex) : (1'b0);
+ assign alu_dst = (ct_state == 2'b00) ? (alu_dst_ex) : (alu_dst_ct);
+ assign rf_wr_sel = (ct_state == 2'b00) ? (rf_wr_sel_ex) : (rf_wr_sel_ct);
+ assign rf_rd_sel = (ct_state == 2'b00) ? (rf_rd_sel_ex) : (rf_rd_sel_ct);
+ assign flags_we = (ct_state == 2'b00) ? (flags_we_ex) : (1'b0);
+ assign pc_b_sel = (ct_state == 2'b00) ? (pc_b_sel_ex) : (pc_b_sel_ct);
+ assign pc_we = (ct_state == 2'b00) ? (pc_we_ex) : (1'b0);
+ assign alu_carry_out = (ct_state == 2'b00) ? (alu_carry_out_ex) : (alu_carry_out_ct);
+
+ // EX - FSM / Mutli-M-cycle Instruction Handling
+ reg [2:0] ex_state;
+ wire [2:0] ex_next_state;
+
+ assign ex_next_state = (next) ? (ex_state + 3'd1) : (3'd0);
+
+ always @(posedge clk) begin
+ if (rst) begin
+ ex_state <= 3'd0;
+ m_cycle_early <= 3'd0;
+ alu_carry_out_ex <= 1'b0;
+ alu_carry_out_ct <= 1'b0;
+ end
+ else begin
+ alu_carry_out_ct <= alu_flags_out[0];
+ if (ct_state == 2'b11) begin
+ ex_state <= ex_next_state;
+ end
+ else if (ct_state == 2'b10) begin
+ m_cycle_early <= ex_next_state;
+ end
+ else if (ct_state == 2'b00) begin
+ // Backup flag output
+ alu_carry_out_ex <= alu_flags_out[0];
+ end
+ end
+ end
+
+ assign m_cycle = ex_state;
+
+endmodule
diff --git a/verilog/rtl/dma.v b/verilog/rtl/dma.v
new file mode 100644
index 0000000..5aaadaa
--- /dev/null
+++ b/verilog/rtl/dma.v
@@ -0,0 +1,164 @@
+`timescale 1ns / 1ps
+/**
+ * Block transfer unit for the GB80 CPU.
+ *
+ * Original Author: Joseph Carlos (jdcarlos1@gmail.com)
+ * Modified: Wenting Zhang (zephray@outlook.com)
+ */
+
+/**
+ * The DMA unit.
+ *
+ * Contains the DMA register and performs DMA transfers when the register is
+ * written to. Each transfer takes 320 cycles rather than the canon 640, this
+ * is because there's no reason to take 640.
+ *
+ * @inout addr_ext The address bus.
+ * @inout data_ext The data bus.
+ * @output dma_transfer 1 if a transfer is occurring, 0 otherwise.
+ * @input mem_re 1 if the processor is reading from memory.
+ * @input mem_we 1 if the processor is writing to memory.
+ * @input clock The CPU clock.
+ * @input reset The CPU reset.
+ */
+module dma(
+ input wire clk,
+ //input wire phi,
+ input wire rst,
+ output reg dma_rd,
+ output reg dma_wr,
+ //output wire dma_rd_comb,
+ //output wire dma_wr_comb,
+ output reg [15:0] dma_a,
+ input wire [7:0] dma_din,
+ output reg [7:0] dma_dout,
+ input wire mmio_wr,
+ input wire [7:0] mmio_din,
+ output wire [7:0] mmio_dout,
+ output wire dma_occupy_extbus,
+ output wire dma_occupy_vidbus,
+ output wire dma_occupy_oambus
+ );
+
+ // DMA data blocks /////////////////////////////////////////////////////////
+
+ reg [7:0] dma_start_addr;
+ reg [7:0] count;
+
+ assign mmio_dout = dma_start_addr;
+
+ reg cpu_mem_disable;
+
+ assign dma_occupy_extbus = cpu_mem_disable &
+ ((dma_start_addr <= 8'h7f) || (dma_start_addr >= 8'ha0));
+ assign dma_occupy_vidbus = cpu_mem_disable &
+ ((dma_start_addr >= 8'h80) && (dma_start_addr <= 8'h9f));
+ assign dma_occupy_oambus = cpu_mem_disable;
+
+ // DMA transfer logic //////////////////////////////////////////////////////
+
+ localparam DMA_IDLE = 'd0;
+ localparam DMA_TRANSFER_READ_ADDR = 'd1;
+ localparam DMA_TRANSFER_READ_DATA = 'd2;
+ localparam DMA_TRANSFER_WRITE_DATA = 'd3;
+ localparam DMA_TRANSFER_WRITE_WAIT = 'd4;
+ localparam DMA_DELAY = 'd5;
+
+ reg [2:0] state;
+
+ always @(posedge clk) begin
+ if (rst) begin
+ dma_start_addr <= 8'h00;
+ end
+ else begin
+ if (mmio_wr) begin
+ // Writing is always valid regardless of the state
+ dma_start_addr <= mmio_din;
+ end
+ end
+ end
+
+ always @(posedge clk) begin
+ if (rst) begin
+ state <= DMA_IDLE;
+ count <= 8'd0;
+ dma_wr <= 1'b0;
+ dma_rd <= 1'b0;
+ cpu_mem_disable <= 1'b0;
+ end
+ else begin
+ case (state)
+ DMA_IDLE: begin
+ dma_wr <= 1'b0;
+ dma_rd <= 1'b0;
+ cpu_mem_disable <= 1'b0;
+ if (mmio_wr) begin
+ // Transfer starts on next cycle
+ state <= DMA_DELAY;
+ count <= 8'd3; // Delay before start
+ end
+ else
+ count <= 8'd0;
+ end
+ DMA_DELAY: begin
+ if (count != 8'd0) begin
+ count <= count - 1;
+ end
+ else begin
+ state <= DMA_TRANSFER_READ_ADDR;
+ end
+ end
+ DMA_TRANSFER_READ_ADDR: begin
+ dma_wr <= 1'b0;
+ cpu_mem_disable <= 1'b1;
+ // Load the temp register with data from memory
+ dma_a <= {dma_start_addr, count}; // Output read address
+ dma_rd <= 1'b1;
+ if (mmio_wr) begin // Allow re-triggering
+ state <= DMA_DELAY;
+ count <= 8'd3; // Delay before start
+ end
+ else
+ state <= DMA_TRANSFER_READ_DATA;
+ end
+ DMA_TRANSFER_READ_DATA: begin
+ state <= DMA_TRANSFER_WRITE_DATA;
+ // Basically wait
+ end
+ DMA_TRANSFER_WRITE_DATA: begin
+ // Read data
+ dma_dout <= dma_din;
+ dma_rd <= 1'b0;
+ // Write the temp register to memory
+ dma_a <= {8'hfe, count}; // Output write address
+ dma_wr <= 1'b1;
+ if (mmio_wr) begin // Allow re-triggering
+ state <= DMA_DELAY;
+ count <= 8'd3; // Delay before start
+ end
+ else
+ state <= DMA_TRANSFER_WRITE_WAIT;
+ end
+ DMA_TRANSFER_WRITE_WAIT: begin
+ // Wait
+ if (mmio_wr) begin // Allow re-triggering
+ state <= DMA_DELAY;
+ count <= 8'd3; // Delay before start
+ end
+ else
+ if (count == 8'h9f) begin
+ state <= DMA_IDLE;
+ count <= 8'd0;
+ end
+ else begin
+ state <= DMA_TRANSFER_READ_ADDR;
+ count <= count + 8'd1;
+ end
+ end
+ default: begin
+ end
+ endcase
+ end
+ end
+
+endmodule // dma
diff --git a/verilog/rtl/mbc5.v b/verilog/rtl/mbc5.v
new file mode 100644
index 0000000..97ffd67
--- /dev/null
+++ b/verilog/rtl/mbc5.v
@@ -0,0 +1,82 @@
+`timescale 1ns / 1ps
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 23:34:43 03/15/2018
+// Design Name:
+// Module Name: mbc5
+// Project Name:
+// Target Devices:
+// Tool versions:
+// Description:
+//
+// Dependencies:
+//
+// Revision:
+// Revision 0.01 - File Created
+// Additional Comments:
+//
+//////////////////////////////////////////////////////////////////////////////////
+module mbc5(
+ input vb_clk,
+ input [15:12] vb_a,
+ input [7:0] vb_d,
+ input vb_wr,
+ input vb_rd,
+ input vb_rst,
+ output [22:14] rom_a,
+ output [16:13] ram_a,
+ output rom_cs_n,
+ output ram_cs_n
+ );
+
+ reg [8:0] rom_bank;
+ reg [3:0] ram_bank;
+ reg ram_en = 1'b0; // RAM Access Enable
+
+ wire rom_addr_en; // RW Address in ROM range
+ wire ram_addr_en; // RW Address in RAM range
+ wire rom_addr_lo; // RW Address in LoROM range
+
+ wire [15:0] vb_addr;
+
+ assign vb_addr[15:12] = vb_a[15:12];
+ assign vb_addr[11:0] = 12'b0;
+
+ assign rom_addr_en = (vb_addr >= 16'h0000)&(vb_addr <= 16'h7FFF); //Request Addr in ROM range
+ assign ram_addr_en = (vb_addr >= 16'hA000)&(vb_addr <= 16'hBFFF); //Request Addr in RAM range
+ assign rom_addr_lo = (vb_addr >= 16'h0000)&(vb_addr <= 16'h3FFF); //Request Addr in LoROM range
+
+ assign rom_cs_n = ((rom_addr_en) & (vb_rst == 0)) ? 1'b0 : 1'b1; //ROM output enable
+ assign ram_cs_n = ((ram_addr_en) & (ram_en) & (vb_rst == 0)) ? 1'b0 : 1'b1; //RAM output enable
+
+ assign rom_a[22:14] = rom_addr_lo ? 9'b0 : rom_bank[8:0];
+ assign ram_a[16:13] = ram_bank[3:0];
+
+ reg vb_wr_last;
+
+ always@(posedge vb_clk, posedge vb_rst)
+ begin
+ if (vb_rst) begin
+ vb_wr_last <= 1'b0;
+ rom_bank[8:0] <= 9'b000000001;
+ ram_bank[3:0] <= 4'b0000;
+ ram_en <= 1'b0;
+ end
+ else begin
+ vb_wr_last <= vb_wr;
+ if ((vb_wr_last == 0)&&(vb_wr == 1)) begin
+ case (vb_addr)
+ 16'h0000: ram_en <= (vb_d[3:0] == 4'hA) ? 1'b1 : 1'b0;
+ 16'h1000: ram_en <= (vb_d[3:0] == 4'hA) ? 1'b1 : 1'b0;
+ 16'h2000: rom_bank[7:0] <= vb_d[7:0];
+ 16'h3000: rom_bank[8] <= vb_d[0];
+ 16'h4000: ram_bank[3:0] <= vb_d[3:0];
+ 16'h5000: ram_bank[3:0] <= vb_d[3:0];
+ endcase
+ end
+ end
+ end
+
+endmodule
diff --git a/verilog/rtl/ppu.v b/verilog/rtl/ppu.v
new file mode 100644
index 0000000..bf4e1e8
--- /dev/null
+++ b/verilog/rtl/ppu.v
@@ -0,0 +1,855 @@
+`timescale 1ns / 1ps
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 18:48:36 02/14/2018
+// Design Name:
+// Module Name: ppu
+// Project Name:
+// Target Devices:
+// Tool versions:
+// Description:
+// GameBoy PPU
+// Additional Comments:
+// There are three hardware layers in the GameBoy PPU: Background, Window, and
+// Object (or sprites).
+//
+// Window will render above the background and the object can render above the
+// background or under the background. Each object have a priority bit to
+// indicate where it should be rendered.
+//
+// Background, Window, and Object can be individually turned on or off. When
+// nothing is turned on, it displays white.
+//
+// The whole render logic does NOT require a scanline buffer to work, and it
+// runs at 4MHz (VRAM runs at 2MHz)
+//
+// There are two main parts of the logic, implemented in a big FSM. The first
+// one is the fetch unit, and the other is the pixel FIFO.
+//
+// The pixel FIFO shifts out one pixel when it contains more than 8 pixels, the
+// fetch unit would generally render 8 pixels in 6 cycles (so 2 wait cycles are
+// inserted so they are in sync generally). When there is no enough pixels,
+// the FIFO would stop and wait for the fetch unit.
+//
+// Windows Trigger is handled in the next state logic, there is a distinct state
+// for the PPU to switch from background rendering to window rendering (flush
+// the fifo and add wait cycles.)
+//
+// Object Trigger is handled in the state change block, in order to backup the
+// previous state. Current RAM address is also backed up during the handling of
+// object rendering. Once all the objects at this position has been rendered,
+// the render state machine could be restored to its previous state.
+//
+// The output pixel clock is the inverted main clock, which is the same as the
+// real Game Boy Pixel data would be put on the pixel bus on the negedge of
+// clock, so the LCD would latch the data on the posedge. The original Game Boy
+// used a gated clock to control if output is valid. Since gated clock is not
+// recommend, I used a valid signal to indicate is output should be considered
+// valid.
+//////////////////////////////////////////////////////////////////////////////////
+`default_nettype wire
+module ppu(
+ input clk,
+ input rst,
+ // MMIO Bus, 0xFF40 - 0xFF4B, always visible to CPU
+ input wire [15:0] mmio_a,
+ output reg [7:0] mmio_dout,
+ input wire [7:0] mmio_din,
+ input wire mmio_rd,
+ input wire mmio_wr,
+ // VRAM Bus, 0x8000 - 0x9FFF
+ input wire [15:0] vram_a,
+ output wire [7:0] vram_dout,
+ input wire [7:0] vram_din,
+ input wire vram_rd,
+ input wire vram_wr,
+ // OAM Bus, 0xFE00 - 0xFE9F
+ input wire [15:0] oam_a,
+ output wire [7:0] oam_dout,
+ input wire [7:0] oam_din,
+ input wire oam_rd,
+ input wire oam_wr,
+ // Interrupt interface
+ output reg int_vblank_req,
+ output reg int_lcdc_req,
+ input int_vblank_ack,
+ input int_lcdc_ack,
+ // Pixel output
+ output cpl, // Pixel Clock, = ~clk
+ output reg [1:0] pixel, // Pixel Output
+ output reg valid, // Pixel Valid
+ output reg hs, // Horizontal Sync, High Valid
+ output reg vs, // Vertical Sync, High Valid
+ //Debug output
+ output [7:0] scx,
+ output [7:0] scy,
+ output [4:0] state
+ );
+
+ // Global Wires ?
+ integer i;
+
+ // PPU registers
+ reg [7:0] reg_lcdc; //$FF40 LCD Control (R/W)
+ reg [7:0] reg_stat; //$FF41 LCDC Status (R/W)
+ reg [7:0] reg_scy; //$FF42 Scroll Y (R/W)
+ reg [7:0] reg_scx; //$FF43 Scroll X (R/W)
+ reg [7:0] reg_ly; //$FF44 LCDC Y-Coordinate (R) Write will reset the counter
+ reg [7:0] reg_dma; //$FF46 DMA, actually handled outside of PPU for now
+ reg [7:0] reg_lyc; //$FF45 LY Compare (R/W)
+ reg [7:0] reg_bgp; //$FF47 BG Palette Data (R/W) Non-CGB mode only
+ reg [7:0] reg_obp0; //$FF48 Object Palette 0 Data (R/W) Non-CGB mode only
+ reg [7:0] reg_obp1; //$FF49 Object Palette 1 Data (R/W) Non-CGB mode only
+ reg [7:0] reg_wy; //$FF4A Window Y Position (R/W)
+ reg [7:0] reg_wx; //$FF4B Window X Position (R/W)
+
+ // Some interrupt related register
+ reg [7:0] reg_ly_last;
+ reg [1:0] reg_mode_last; // Next mode based on next state
+
+ wire reg_lcd_en = reg_lcdc[7]; //0=Off, 1=On
+ wire reg_win_disp_sel = reg_lcdc[6]; //0=9800-9BFF, 1=9C00-9FFF
+ wire reg_win_en = reg_lcdc[5]; //0=Off, 1=On
+ wire reg_bg_win_data_sel = reg_lcdc[4]; //0=8800-97FF, 1=8000-8FFF
+ wire reg_bg_disp_sel = reg_lcdc[3]; //0=9800-9BFF, 1=9C00-9FFF
+ wire reg_obj_size = reg_lcdc[2]; //0=8x8, 1=8x16
+ wire reg_obj_en = reg_lcdc[1]; //0=Off, 1=On
+ wire reg_bg_disp = reg_lcdc[0]; //0=Off, 1=On
+ wire reg_lyc_int = reg_stat[6];
+ wire reg_oam_int = reg_stat[5];
+ wire reg_vblank_int = reg_stat[4];
+ wire reg_hblank_int = reg_stat[3];
+ wire reg_coin_flag = reg_stat[2];
+ wire [1:0] reg_mode = reg_stat[1:0];
+
+ localparam PPU_MODE_H_BLANK = 2'b00;
+ localparam PPU_MODE_V_BLANK = 2'b01;
+ localparam PPU_MODE_OAM_SEARCH = 2'b10;
+ localparam PPU_MODE_PIX_TRANS = 2'b11;
+
+ localparam PPU_PAL_BG = 2'b00;
+ localparam PPU_PAL_OB0 = 2'b01;
+ localparam PPU_PAL_OB1 = 2'b10;
+
+ reg [12:0] vram_addr_bg;
+ reg [12:0] vram_addr_obj;
+ wire [12:0] vram_addr_int;
+ wire [12:0] vram_addr_ext;
+ wire vram_addr_int_sel; // 0 - BG, 1 - OBJ
+
+ assign vram_addr_int = (vram_addr_int_sel == 1'b1) ? (vram_addr_obj) : (vram_addr_bg);
+
+ wire vram_access_ext = ((reg_mode == PPU_MODE_H_BLANK)||
+ (reg_mode == PPU_MODE_V_BLANK)||
+ (reg_mode == PPU_MODE_OAM_SEARCH));
+ wire vram_access_int = ~vram_access_ext;
+ wire oam_access_ext = ((reg_mode == PPU_MODE_H_BLANK)||
+ (reg_mode == PPU_MODE_V_BLANK));
+
+ wire [12:0] window_map_addr = (reg_win_disp_sel) ? (13'h1C00) : (13'h1800);
+ wire [12:0] bg_map_addr = (reg_bg_disp_sel) ? (13'h1C00) : (13'h1800);
+ wire [12:0] bg_window_tile_addr = (reg_bg_win_data_sel) ? (13'h0000) : (13'h0800);
+
+ // PPU Memories
+
+ // 8 bit WR, 16 bit RD, 160Bytes OAM
+ reg [7:0] oam_u [0: 79];
+ reg [7:0] oam_l [0: 79];
+ reg [7:0] oam_rd_addr_int;
+ wire [7:0] oam_rd_addr;
+ wire [7:0] oam_wr_addr;
+ reg [15:0] oam_data_out;
+ wire [7:0] oam_data_out_byte;
+ wire [7:0] oam_data_in;
+ wire oam_we;
+
+ always @ (negedge clk)
+ begin
+ if (oam_we) begin
+ if (oam_wr_addr[0])
+ oam_u[oam_wr_addr[7:1]] <= oam_data_in;
+ else
+ oam_l[oam_wr_addr[7:1]] <= oam_data_in;
+ end
+ else begin
+ oam_data_out <= {oam_u[oam_rd_addr[7:1]], oam_l[oam_rd_addr[7:1]]};
+ end
+ end
+
+ assign oam_wr_addr = oam_a[7:0];
+ assign oam_rd_addr = (oam_access_ext) ? (oam_a[7:0]) : (oam_rd_addr_int);
+ assign oam_data_in = oam_din;
+ assign oam_data_out_byte = (oam_rd_addr[0]) ? oam_data_out[15:8] : oam_data_out[7:0];
+ //assign oam_we = (wr)&(oam_access_ext);
+ assign oam_we = oam_wr; // What if always allow OAM access?
+ assign oam_dout = (oam_access_ext) ? (oam_data_out_byte) : (8'hFF);
+
+ // 8 bit WR, 8 bit RD, 8KB VRAM
+ wire vram_we;
+ wire [12:0] vram_addr;
+ wire [7:0] vram_data_in;
+ wire [7:0] vram_data_out;
+
+ singleport_ram #(
+ .WORDS(8192)
+ ) br_vram (
+ .clka(~clk),
+ .wea(vram_we),
+ .addra(vram_addr[12:0]),
+ .dina(vram_data_in),
+ .douta(vram_data_out));
+
+ assign vram_addr_ext = vram_a[12:0];
+ assign vram_addr = (vram_access_ext) ? (vram_addr_ext) : (vram_addr_int);
+ assign vram_data_in = vram_din;
+ assign vram_we = (vram_wr)&(vram_access_ext);
+ assign vram_dout = (vram_access_ext) ? (vram_data_out) : (8'hFF);
+
+ // Pixel Pipeline
+
+ // The pixel FIFO: 16 pixels, 4 bits each (2 bits color index, 2 bits palette index)
+ // Since in and out are 8 pixels aligned, it can be modeled as a ping-pong buffer
+ // of two 32 bits (8 pixels * 4 bits) group
+ reg [63:0] pf_data; // Pixel FIFO Data
+ wire [1:0] pf_output_pixel;
+ wire [7:0] pf_output_palette;
+ wire [1:0] pf_output_pixel_id;
+ wire [1:0] pf_output_palette_id;
+ assign {pf_output_pixel_id, pf_output_palette_id} = pf_data[63:60];
+ assign pf_output_palette = (pf_output_palette_id == PPU_PAL_BG) ? (reg_bgp) :
+ (pf_output_palette_id == PPU_PAL_OB0) ? (reg_obp0) :
+ (pf_output_palette_id == PPU_PAL_OB1) ? (reg_obp1) : (8'hFF);
+ assign pf_output_pixel = (pf_output_pixel_id == 2'b11) ? (pf_output_palette[7:6]) :
+ (pf_output_pixel_id == 2'b10) ? (pf_output_palette[5:4]) :
+ (pf_output_pixel_id == 2'b01) ? (pf_output_palette[3:2]) :
+ (pf_output_pixel_id == 2'b00) ? (pf_output_palette[1:0]) : (2'b00);
+ reg [2:0] pf_empty; // Indicate if the Pixel FIFO is empty.
+ localparam PF_INITA = 3'd5; // When a line start...
+ localparam PF_INITB = 3'd4; // Line start, 2 pixels out, 8 rendered
+ localparam PF_EMPTY = 3'd3; // When the pipeline get flushed
+ localparam PF_HALF = 3'd2; // After flushed, 8 pixels in
+ localparam PF_FIN = 3'd1; // 16 pixels in, but still no wait cycles
+ localparam PF_FULL = 3'd0; // Normal
+
+ assign cpl = ~clk;
+ //assign pixel = pf_output_pixel;
+
+ // HV Timing
+ localparam PPU_H_FRONT = 9'd76;
+ localparam PPU_H_SYNC = 9'd4; // So front porch + sync = OAM search
+ localparam PPU_H_TOTAL = 9'd456;
+ localparam PPU_H_PIXEL = 9'd160;
+ // 8 null pixels in the front for objects which have x < 8, 8 bit counter
+ localparam PPU_H_OUTPUT = 8'd168;
+ localparam PPU_V_ACTIVE = 8'd144;
+ localparam PPU_V_BACK = 8'd9;
+ localparam PPU_V_SYNC = 8'd1;
+ localparam PPU_V_BLANK = 8'd10;
+ localparam PPU_V_TOTAL = 8'd154;
+
+ // Raw timing counter
+ reg [8:0] h_count;
+ reg [7:0] v_count;
+
+ // HV counter
+ always @(posedge clk)
+ begin
+ if (rst) begin
+ h_count <= 0;
+ hs <= 0;
+ v_count <= 0;
+ vs <= 0;
+ end
+ else begin
+ if(h_count < PPU_H_TOTAL - 1)
+ h_count <= h_count + 1'b1;
+ else begin
+ h_count <= 0;
+ if(v_count < PPU_V_TOTAL - 1)
+ v_count <= v_count + 1'b1;
+ else
+ v_count <= 0;
+ if(v_count == PPU_V_ACTIVE + PPU_V_BACK - 1)
+ vs <= 1;
+ if(v_count == PPU_V_ACTIVE + PPU_V_BACK + PPU_V_SYNC - 1)
+ vs <= 0;
+ end
+ if(h_count == PPU_H_FRONT - 1)
+ hs <= 1;
+ if(h_count == PPU_H_FRONT + PPU_H_SYNC - 1)
+ hs <= 0;
+ end
+ end
+
+ // Render FSM
+ localparam S_IDLE = 5'd0;
+ localparam S_BLANK = 5'd1; // H Blank and V Blank
+ localparam S_OAMX = 5'd2; // OAM Search X check
+ localparam S_OAMY = 5'd3; // OAM Search Y check
+ localparam S_FTIDA = 5'd4; // Fetch Read Tile ID Stage A (Address Setup)
+ localparam S_FTIDB = 5'd5; // Fetch Read Tile ID Stage B (Data Read)
+ localparam S_FRD0A = 5'd6; // Fetch Read Data 0 Stage A
+ localparam S_FRD0B = 5'd7; // Fetch Read Data 0 Stage B
+ localparam S_FRD1A = 5'd8; // Fetch Read Data 1 Stage A
+ localparam S_FRD1B = 5'd9; // Fetch Read Data 1 Stage B
+ localparam S_FWAITA = 5'd10; // Fetch Wait Stage A (Idle)
+ localparam S_FWAITB = 5'd11; // Fetch Wait Stage B (Load to FIFO?)
+ localparam S_SWW = 5'd12; // Fetch Switch to Window
+ localparam S_OAMRDA = 5'd13; // OAM Read Stage A
+ localparam S_OAMRDB = 5'd14; // OAM Read Stage B
+ localparam S_OFRD0A = 5'd15; // Object Fetch Read Data 0 Stage A
+ localparam S_OFRD0B = 5'd16; // Object Fetch Read Data 0 Stage B
+ localparam S_OFRD1A = 5'd17; // Object Fetch Read Data 1 Stage A
+ localparam S_OFRD1B = 5'd18; // Object Fetch Read Data 1 Stage B
+ localparam S_OWB = 5'd19; // Object Write Back
+
+ localparam PPU_OAM_SEARCH_LENGTH = 6'd40;
+
+ reg [2:0] h_drop; //Drop pixels when SCX % 8 != 0
+ wire [2:0] h_extra = reg_scx[2:0]; //Extra line length when SCX % 8 != 0
+ reg [7:0] h_pix_render; // Horizontal Render Pixel pointer
+ reg [7:0] h_pix_output; // Horizontal Output Pixel counter
+ wire [7:0] h_pix_obj = h_pix_output + 1'b1; // Coordinate used to trigger the object rendering
+ wire [7:0] v_pix = v_count;
+ wire [7:0] v_pix_in_map = v_pix + reg_scy;
+ wire [7:0] v_pix_in_win = v_pix - reg_wy;
+
+ reg [4:0] r_state = 0;
+ reg [4:0] r_next_backup;
+ reg [4:0] r_next_state;
+ wire is_in_v_blank = ((v_count >= PPU_V_ACTIVE) && (v_count < PPU_V_ACTIVE + PPU_V_BLANK));
+
+ reg window_triggered; // Indicate whether window has been triggered, should be replaced by a edge detector
+ wire render_window_or_bg = window_triggered;
+ wire window_trigger = (((h_pix_output) == (reg_wx))&&(v_pix >= reg_wy)&&(reg_win_en)&&(~window_triggered)) ? 1 : 0;
+
+ wire [2:0] line_to_tile_v_offset_bg = v_pix_in_map[2:0]; // Current line in a tile being rendered
+ wire [4:0] line_in_tile_v_bg = v_pix_in_map[7:3]; // Current tile Y coordinate being rendered
+ wire [2:0] line_to_tile_v_offset_win = v_pix_in_win[2:0];
+ wire [4:0] line_in_tile_v_win = v_pix_in_win[7:3];
+ wire [2:0] line_to_tile_v_offset = (render_window_or_bg) ? (line_to_tile_v_offset_win) : (line_to_tile_v_offset_bg);
+ wire [4:0] line_in_tile_v = (render_window_or_bg) ? (line_in_tile_v_win) : (line_in_tile_v_bg);
+
+ wire [4:0] h_tile_bg = h_pix_render[7:3] + reg_scx[7:3]; // Current tile X coordinate being rendered
+ wire [4:0] h_tile_win = h_pix_render[7:3];
+ wire [4:0] h_tile = (render_window_or_bg) ? (h_tile_win) : (h_tile_bg);
+
+ wire [12:0] current_map_address = (((render_window_or_bg) ? (window_map_addr) : (bg_map_addr)) + (line_in_tile_v) * 32 + {8'd0, h_tile}); //Background address
+ reg [7:0] current_tile_id;
+ wire [7:0] current_tile_id_adj = {~((reg_bg_win_data_sel)^(current_tile_id[7])), current_tile_id[6:0]}; // Adjust for 8800 Adressing mode
+ wire [12:0] current_tile_address_0 = (bg_window_tile_addr) + current_tile_id_adj * 16 + (line_to_tile_v_offset * 2);
+ wire [12:0] current_tile_address_1 = (current_tile_address_0) | 13'h0001;
+ reg [7:0] current_tile_data_0;
+ reg [7:0] current_tile_data_1;
+
+ // Data that will be pushed into pixel FIFO
+ // Organized in pixels
+ reg [31:0] current_fetch_result;
+ always@(current_tile_data_1, current_tile_data_0) begin
+ for (i = 0; i < 8; i = i + 1) begin
+ current_fetch_result[i*4+3] = current_tile_data_1[i];
+ current_fetch_result[i*4+2] = current_tile_data_0[i];
+ current_fetch_result[i*4+1] = PPU_PAL_BG[1]; // Fetch could only fetch BG
+ current_fetch_result[i*4+0] = PPU_PAL_BG[0];
+ end
+ end
+
+ reg [5:0] oam_search_count; // Counter during OAM search stage
+ reg [5:0] obj_visible_list [0:9]; // Total visible list
+ reg [7:0] obj_trigger_list [0:9]; // Where the obj should be triggered
+ reg [7:0] obj_y_list [0:9]; // Where the obj is
+ reg obj_valid_list [0:9]; // Is obj visible entry valid
+ reg [3:0] oam_visible_count; // ???
+
+ wire [7:0] oam_search_x;
+ wire [7:0] oam_search_y;
+ wire [7:0] obj_size_h = (reg_obj_size == 1'b1) ? (8'd16) : (8'd8);
+ wire [7:0] obj_h_upper_boundary = (v_pix + 8'd16);
+ wire [7:0] obj_h_lower_boundary = obj_h_upper_boundary - obj_size_h;
+
+ reg [3:0] obj_trigger_id; // The object currently being/ or have been rendered, in the visible list
+
+ localparam OBJ_TRIGGER_NOT_FOUND = 4'd15;
+
+ // Cascade mux used to implement the searching of next id would be triggered
+ reg [3:0] obj_trigger_id_from[0:10];
+ reg [3:0] obj_trigger_id_next;
+ always@(h_pix_obj, obj_trigger_id) begin
+ obj_trigger_id_from[10] = OBJ_TRIGGER_NOT_FOUND; // There is no more after the 10th
+ for (i = 9; i >= 0; i = i - 1) begin
+ /* verilator lint_off WIDTH */
+ obj_trigger_id_from[i] =
+ ((h_pix_obj == obj_trigger_list[i])&&(obj_valid_list[i])) ? (i) : (obj_trigger_id_from[i+1]);
+ // See if this one match, if not, cascade down.
+ /* verilator lint_on WIDTH */
+ end
+ if (obj_trigger_id == OBJ_TRIGGER_NOT_FOUND) // currently not triggered yet
+ obj_trigger_id_next = obj_trigger_id_from[0]; // Search from start
+ else
+ obj_trigger_id_next = obj_trigger_id_from[obj_trigger_id + 1]; // Search start from next one
+ end
+
+ //!-- DEBUG --
+ //wire [3:0] obj_trigger_id_next = ((h_pix_obj == obj_trigger_list[4'd0])&&(obj_valid_list[4'd0])) ? (4'd0) : (4'd15);
+
+ wire obj_trigger = ((reg_obj_en)&&(obj_trigger_id_next != OBJ_TRIGGER_NOT_FOUND)) ? 1 : 0;
+ //wire obj_trigger = 0;
+
+ wire [5:0] obj_triggered = obj_visible_list[obj_trigger_id]; // The global id of object being rendered
+ wire [7:0] current_obj_y = obj_y_list[obj_trigger_id];
+ wire [7:0] current_obj_x = obj_trigger_list[obj_trigger_id]; //h_pix gets incremented before render
+ reg [7:0] current_obj_tile_id_raw; // Tile ID without considering the object size
+ reg [7:0] current_obj_flags; // Flags
+ wire current_obj_to_bg_priority = current_obj_flags[7];
+ wire current_obj_y_flip = current_obj_flags[6];
+ wire current_obj_x_flip = current_obj_flags[5];
+ wire current_obj_pal_id = current_obj_flags[4];
+ wire [1:0] current_obj_pal= (current_obj_pal_id) ? (PPU_PAL_OB1) : (PPU_PAL_OB0);
+ /* verilator lint_off WIDTH */
+ wire [3:0] line_to_obj_v_offset_raw = (v_pix + 8'd16 - current_obj_y); // Compensate 16 pixel offset and truncate to 4 bits
+ /* verilator lint_on WIDTH */
+ wire [7:0] current_obj_tile_id = (reg_obj_size == 1'b1) ?
+ ({current_obj_tile_id_raw[7:1], (((line_to_obj_v_offset_raw[3])^(current_obj_y_flip)) ? 1'b1 : 1'b0)}) : // Select Hi or Lo tile
+ (current_obj_tile_id_raw); // Use tile ID directly
+ wire [2:0] line_to_obj_v_offset = (current_obj_y_flip) ? (~line_to_obj_v_offset_raw[2:0]) : (line_to_obj_v_offset_raw[2:0]);
+
+ wire [12:0] current_obj_address_0 = current_obj_tile_id * 16 + line_to_obj_v_offset * 2;
+ wire [12:0] current_obj_address_1 = current_obj_address_0 | 13'h0001;
+ reg [7:0] current_obj_tile_data_0;
+ reg [7:0] current_obj_tile_data_1;
+ // Data that will be merged into pixel FIFO
+ // Organized in pixels
+ reg [31:0] merge_result;
+ always@(*) begin
+ for (i = 0; i < 8; i = i + 1) begin
+ if (
+ ((current_obj_tile_data_1[i] != 1'b0)||(current_obj_tile_data_0[i] != 1'b0))&&
+ ((pf_data[32+i*4+1] == PPU_PAL_BG[1])&&(pf_data[32+i*4+0] == PPU_PAL_BG[0]))&&
+ (
+ ((current_obj_to_bg_priority)&&(pf_data[32+i*4+3] == 1'b0)&&(pf_data[32+i*4+2] == 1'b0))||
+ (~current_obj_to_bg_priority)
+ )
+ ) //(OBJ is not transparent) and ((BG priority and BG is transparent) or (OBJ priority))
+ begin
+ merge_result[i*4+3] = current_obj_tile_data_1[i];
+ merge_result[i*4+2] = current_obj_tile_data_0[i];
+ merge_result[i*4+1] = current_obj_pal[1];
+ merge_result[i*4+0] = current_obj_pal[0];
+ end
+ else begin
+ merge_result[i*4+3] = pf_data[32+i*4+3];
+ merge_result[i*4+2] = pf_data[32+i*4+2];
+ merge_result[i*4+1] = pf_data[32+i*4+1];
+ merge_result[i*4+0] = pf_data[32+i*4+0];
+ end
+ end
+ end
+
+ assign vram_addr_int_sel =
+ ((r_state == S_OAMRDB) || (r_state == S_OFRD0A) || (r_state == S_OFRD0B)
+ || (r_state == S_OFRD1A) || (r_state == S_OFRD1B)) ? 1'b1 : 1'b0;
+
+
+ // Current mode logic, based on current state
+ always @ (posedge clk)
+ begin
+ if (rst) begin
+ reg_stat[1:0] <= PPU_MODE_V_BLANK;
+ end
+ else begin
+ case (r_state)
+ S_IDLE: reg_stat[1:0] <= (reg_lcd_en) ? (PPU_MODE_V_BLANK) : (PPU_MODE_H_BLANK);
+ S_BLANK: reg_stat[1:0] <= (is_in_v_blank) ? (PPU_MODE_V_BLANK) : (PPU_MODE_H_BLANK);
+ S_OAMX: reg_stat[1:0] <= PPU_MODE_OAM_SEARCH;
+ S_OAMY: reg_stat[1:0] <= PPU_MODE_OAM_SEARCH;
+ S_FTIDA: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
+ S_FTIDB: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
+ S_FRD0A: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
+ S_FRD0B: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
+ S_FRD1A: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
+ S_FRD1B: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
+ S_FWAITA: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
+ S_FWAITB: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
+ S_SWW: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
+ S_OAMRDA: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
+ S_OAMRDB: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
+ S_OFRD0A: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
+ S_OFRD0B: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
+ S_OFRD1A: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
+ S_OFRD1B: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
+ S_OWB: reg_stat[1:0] <= PPU_MODE_PIX_TRANS;
+ default: reg_stat[1:0] <= PPU_MODE_V_BLANK;
+ endcase
+ end
+ end
+
+ assign oam_search_y = oam_data_out[7:0];
+ assign oam_search_x = oam_data_out[15:8];
+
+ // Render logic
+ always @(posedge clk)
+ begin
+ reg_ly <= v_pix[7:0];
+
+ case (r_state)
+ // nothing to do for S_IDLE
+ S_IDLE: begin end
+ S_BLANK: begin
+ h_pix_render <= 8'd0; // Render pointer
+ oam_search_count <= 6'd0;
+ oam_visible_count <= 4'd0;
+ for (i = 0; i < 10; i = i + 1) begin
+ obj_valid_list[i] <= 1'b0;
+ end
+ oam_rd_addr_int <= 8'b0;
+ window_triggered <= 1'b0;
+ // Line start, need to render 16 pixels in 12 clocks
+ // and output 8 null pixels starting from the 4th clock
+ end
+ S_OAMX: begin
+ oam_rd_addr_int <= oam_search_count * 4;
+ end
+ S_OAMY: begin
+ if ((oam_search_y <= obj_h_upper_boundary)&&
+ (oam_search_y > obj_h_lower_boundary)&&
+ (oam_search_x != 8'd0)&&
+ (oam_visible_count < 4'd10)) begin
+ obj_visible_list[oam_visible_count] <= oam_search_count;
+ obj_trigger_list[oam_visible_count] <= oam_search_x;
+ obj_y_list[oam_visible_count] <= oam_search_y;
+ obj_valid_list[oam_visible_count] <= 1'b1;
+ oam_visible_count <= oam_visible_count + 1'b1;
+ end
+ oam_search_count <= oam_search_count + 1'b1;
+ end
+ S_FTIDA: vram_addr_bg <= current_map_address;
+ S_FTIDB: current_tile_id <= vram_data_out;
+ S_FRD0A: vram_addr_bg <= current_tile_address_0;
+ S_FRD0B: current_tile_data_0 <= vram_data_out;
+ S_FRD1A: vram_addr_bg <= current_tile_address_1;
+ S_FRD1B: begin
+ current_tile_data_1 <= vram_data_out;
+ h_pix_render <= h_pix_render + 8'd8;
+ end
+ // nothing to do for S_FWAITA, S_FWAITB
+ S_FWAITA: begin end
+ S_FWAITB: begin end
+ S_SWW: begin
+ h_pix_render <= 8'd0;
+ window_triggered <= 1'b1;
+ end
+ S_OAMRDA: oam_rd_addr_int <= obj_triggered * 4 + 8'd2;
+ S_OAMRDB: begin
+ current_obj_tile_id_raw <= oam_data_out[7:0];
+ current_obj_flags <= oam_data_out[15:8];
+ end
+ S_OFRD0A: vram_addr_obj <= current_obj_address_0;
+ S_OFRD0B:
+ if (current_obj_x_flip == 1'b1)
+ current_obj_tile_data_0[7:0] <= {
+ vram_data_out[0], vram_data_out[1], vram_data_out[2], vram_data_out[3],
+ vram_data_out[4], vram_data_out[5], vram_data_out[6], vram_data_out[7]
+ };
+ else
+ current_obj_tile_data_0 <= vram_data_out;
+ S_OFRD1A: vram_addr_obj <= current_obj_address_1;
+ S_OFRD1B:
+ if (current_obj_x_flip == 1'b1)
+ current_obj_tile_data_1[7:0] <= {
+ vram_data_out[0], vram_data_out[1], vram_data_out[2], vram_data_out[3],
+ vram_data_out[4], vram_data_out[5], vram_data_out[6], vram_data_out[7]
+ };
+ else
+ current_obj_tile_data_1 <= vram_data_out;
+ // nothing to do for S_OWB
+ S_OWB: begin end
+ default: begin
+ $display("Invalid state!");
+ end
+ endcase
+ end
+
+ reg [31:0] half_merge_result;
+ always @(current_fetch_result, pf_data) begin
+ for (i = 0; i < 8; i = i + 1) begin
+ if ((pf_data[32+i*4+1] == PPU_PAL_BG[1])&&(pf_data[32+i*4+0] == PPU_PAL_BG[0])) begin
+ half_merge_result[i*4+3] = current_fetch_result[i*4+3];
+ half_merge_result[i*4+2] = current_fetch_result[i*4+2];
+ half_merge_result[i*4+1] = current_fetch_result[i*4+1];
+ half_merge_result[i*4+0] = current_fetch_result[i*4+0];
+ end
+ else begin
+ half_merge_result[i*4+3] = pf_data[32+i*4+3];
+ half_merge_result[i*4+2] = pf_data[32+i*4+2];
+ half_merge_result[i*4+1] = pf_data[32+i*4+1];
+ half_merge_result[i*4+0] = pf_data[32+i*4+0];
+ end
+ end
+ end
+
+ // Output logic
+ always @(posedge clk)
+ begin
+ if (r_state == S_BLANK) begin
+ valid <= 1'b0;
+ h_pix_output <= 8'd0; // Output pointer
+ h_drop <= reg_scx[2:0];
+ pf_empty <= PF_INITA;
+ end
+ else if ((r_state == S_FTIDA) || (r_state == S_FTIDB) || (r_state == S_FRD0A) || (r_state == S_FRD0B) ||
+ (r_state == S_FRD1A) || (r_state == S_FRD1B) || (r_state == S_FWAITA) || (r_state == S_FWAITB))
+ begin
+
+ if (r_state == S_FRD1B) begin
+ if (pf_empty == PF_INITA) pf_empty <= PF_INITB;
+ if (pf_empty == PF_INITB) pf_empty <= PF_FIN;
+ if (pf_empty == PF_EMPTY) pf_empty <= PF_HALF;
+ if (pf_empty == PF_HALF) pf_empty <= PF_FIN;
+ end else
+ if (pf_empty == PF_FIN) pf_empty <= PF_FULL; // should NOT wait through end
+
+ // If it is in one of the output stages
+ if (pf_empty == PF_EMPTY) begin
+ // Just started, no data available
+ valid <= 1'b0;
+ end
+ else if (pf_empty == PF_HALF) begin
+ valid <= 1'b0;
+ if (r_state == S_FTIDA) begin
+ // One batch done, and they can be push into pipeline, but could not be output yet
+ // We need to be careful not to overwrite the sprites...
+ pf_data[63:32] <= half_merge_result[31:0];
+ end
+ end
+ else if (((pf_empty == PF_INITA)&&((r_state == S_FRD1A)||(r_state == S_FRD1B)))
+ ||(pf_empty == PF_INITB)||(pf_empty == PF_FULL)||(pf_empty == PF_FIN)) begin
+ if (r_state == S_FTIDA) begin // reload and shift
+ if (pf_empty == PF_INITB) begin
+ pf_data[63:0] <= {20'b0, current_fetch_result[31:0], 12'b0};
+ end
+ else begin // PF_FULL or PF_FIN
+ pf_data[63:0] <= {pf_data[59:32], current_fetch_result[31:0], 4'b0};
+ end
+ end
+ else begin // just shift
+ pf_data <= {pf_data[59:0], 4'b0};
+ end
+
+ if (h_drop != 3'd0) begin
+ h_drop <= h_drop - 1'd1;
+ valid <= 0;
+ end
+ else begin
+ if (h_pix_output >= 8)
+ valid <= 1;
+ else
+ valid <= 0;
+ pixel <= pf_output_pixel;
+ h_pix_output <= h_pix_output + 1'b1;
+ end
+ end
+ end
+ else if (r_state == S_OAMRDA) begin
+ h_pix_output <= h_pix_output - 1'b1; //revert adding
+ valid <= 1'b0;
+ end
+ else if (r_state == S_OWB) begin
+ h_pix_output <= h_pix_output + 1'b1; //restore adding
+ pf_data <= {merge_result[31:0], pf_data[31:0]};
+ valid <= 1'b0;
+ end
+ else if (r_state == S_SWW) begin
+ pf_empty <= PF_EMPTY; // Flush the pipeline
+ valid <= 1'b0;
+ end
+ else begin
+ // Not even in output stages
+ valid <= 1'b0;
+ end
+ end
+
+ // Enter Next State
+ // and handle object interrupt
+ // (sorry but I need to backup next state so I could not handle these in the next state logic)
+ always @(posedge clk)
+ begin
+ if (rst) begin
+ //h_pix_obj <= 8'b0;
+ r_state <= 0;
+ r_next_backup <= 0;
+ obj_trigger_id <= OBJ_TRIGGER_NOT_FOUND;//not triggered
+ end
+ else
+ begin
+ if (obj_trigger && (reg_mode == PPU_MODE_PIX_TRANS)) begin
+ // If already in object rendering stages
+ if ((r_state == S_OFRD0A)||(r_state == S_OFRD0B)||
+ (r_state == S_OFRD1A)||(r_state == S_OFRD1B)||
+ (r_state == S_OAMRDA)||(r_state == S_OAMRDB)) begin
+ r_state <= r_next_state;
+ end
+ // Finished one object, but there is more
+ else if (r_state == S_OWB) begin
+ r_state <= S_OAMRDA;
+ obj_trigger_id <= obj_trigger_id_next;
+ end
+ // Not rendering object before, start now
+ else begin
+ r_next_backup <= r_next_state;
+ r_state <= S_OAMRDA;
+ obj_trigger_id <= obj_trigger_id_next;
+ end
+ end
+ else begin
+ //h_pix_obj <= h_pix_output + 8'd2;
+ r_state <= r_next_state;
+ // Finished one object, and there is no more currently
+ if (r_state == S_OWB) begin
+ obj_trigger_id <= OBJ_TRIGGER_NOT_FOUND;
+ end
+ end
+ end
+ end
+
+ // Next State Logic
+ // Since new state get updated during posedge
+ always @(*)
+ begin
+ case (r_state)
+ S_IDLE: r_next_state = ((reg_lcd_en)&(is_in_v_blank)) ? (S_BLANK) : (S_IDLE);
+ S_BLANK: r_next_state =
+ (reg_lcd_en) ? (
+ (is_in_v_blank) ?
+ (((v_count == (PPU_V_TOTAL - 1))&&(h_count == (PPU_H_TOTAL - 1))) ?
+ (S_OAMX) : (S_BLANK)
+ ) :
+ ((h_count == (PPU_H_TOTAL - 1)) ?
+ ((v_count == (PPU_V_ACTIVE - 1)) ?
+ (S_BLANK) : (S_OAMX)):
+ (S_BLANK)
+ )
+ ) : (S_IDLE);
+ S_OAMX: r_next_state = (reg_lcd_en) ? (S_OAMY) : (S_IDLE);
+ S_OAMY: r_next_state = (reg_lcd_en) ? ((oam_search_count == (PPU_OAM_SEARCH_LENGTH - 1'b1)) ? (S_FTIDA) : (S_OAMX)) : (S_IDLE);
+ S_FTIDA: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FTIDB))) : (S_IDLE);
+ S_FTIDB: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FRD0A))) : (S_IDLE);
+ S_FRD0A: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FRD0B))) : (S_IDLE);
+ S_FRD0B: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FRD1A))) : (S_IDLE);
+ S_FRD1A: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FRD1B))) : (S_IDLE);
+ S_FRD1B: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : ((pf_empty != PF_FULL) ? (S_FTIDA) : (S_FWAITA)))) : (S_IDLE); // If fifo not full, no wait state is needed
+ S_FWAITA: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FWAITB))) : (S_IDLE);
+ S_FWAITB: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FTIDA))) : (S_IDLE);
+ S_SWW: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : (S_FTIDA)) : (S_IDLE);
+ S_OAMRDA: r_next_state = (reg_lcd_en) ? (S_OAMRDB) : (S_IDLE);
+ S_OAMRDB: r_next_state = (reg_lcd_en) ? (S_OFRD0A) : (S_IDLE);
+ S_OFRD0A: r_next_state = (reg_lcd_en) ? (S_OFRD0B) : (S_IDLE);
+ S_OFRD0B: r_next_state = (reg_lcd_en) ? (S_OFRD1A) : (S_IDLE);
+ S_OFRD1A: r_next_state = (reg_lcd_en) ? (S_OFRD1B) : (S_IDLE);
+ S_OFRD1B: r_next_state = (reg_lcd_en) ? (S_OWB) : (S_IDLE);
+ S_OWB: r_next_state = (reg_lcd_en) ? (r_next_backup) : (S_IDLE);
+ default: r_next_state = S_IDLE;
+ endcase
+ end
+
+ // Interrupt
+ always @(posedge clk)
+ if (rst)
+ reg_stat[2] <= 0;
+ else
+ // TODO: what's the timing for this?
+ reg_stat[2] <= (reg_ly == reg_lyc) ? 1 : 0;
+
+ always @(posedge clk)
+ begin
+ if (rst) begin
+ int_vblank_req <= 0;
+ int_lcdc_req <= 0;
+ reg_ly_last[7:0] <= 0;
+ //reg_stat[1:0] <= PPU_MODE_V_BLANK;
+ end
+ else
+ begin
+ if ((reg_mode == PPU_MODE_V_BLANK)&&(reg_mode_last != PPU_MODE_V_BLANK))
+ int_vblank_req <= 1;
+ else if (int_vblank_ack)
+ int_vblank_req <= 0;
+ if (((reg_lyc_int == 1'b1)&&(reg_ly == reg_lyc)&&(reg_ly_last != reg_lyc))||
+ ((reg_oam_int == 1'b1)&&(reg_mode == PPU_MODE_OAM_SEARCH)&&(reg_mode_last != PPU_MODE_OAM_SEARCH))||
+ ((reg_vblank_int == 1'b1)&&(reg_mode == PPU_MODE_V_BLANK)&&(reg_mode_last != PPU_MODE_V_BLANK))||
+ ((reg_hblank_int == 1'b1)&&(reg_mode == PPU_MODE_H_BLANK)&&(reg_mode_last != PPU_MODE_H_BLANK)))
+ int_lcdc_req <= 1;
+ else if (int_lcdc_ack)
+ int_lcdc_req <= 0;
+ reg_ly_last <= reg_ly;
+ reg_mode_last <= reg_mode;
+ end
+ end
+
+ // Bus RW
+ // Bus RW - Combinational Read
+ always @(*)
+ begin
+ // MMIO Bus
+ mmio_dout = 8'hFF;
+ case (mmio_a)
+ 16'hFF40: mmio_dout = reg_lcdc;
+ 16'hFF41: mmio_dout = reg_stat;
+ 16'hFF42: mmio_dout = reg_scy;
+ 16'hFF43: mmio_dout = reg_scx;
+ 16'hFF44: mmio_dout = reg_ly;
+ 16'hFF45: mmio_dout = reg_lyc;
+ 16'hFF46: mmio_dout = reg_dma;
+ 16'hFF47: mmio_dout = reg_bgp;
+ 16'hFF48: mmio_dout = reg_obp0;
+ 16'hFF49: mmio_dout = reg_obp1;
+ 16'hFF4A: mmio_dout = reg_wy;
+ 16'hFF4B: mmio_dout = reg_wx;
+ endcase
+ end
+
+ // Bus RW - Sequential Write
+ always @(posedge clk)
+ begin
+ if (rst) begin
+ reg_lcdc <= 8'h00;
+ reg_stat[7:3] <= 5'h00;
+ reg_scy <= 8'h00;
+ reg_scx <= 8'h00;
+ reg_lyc <= 8'h00;
+ reg_dma <= 8'h00;
+ reg_bgp <= 8'hFC;
+ reg_obp0 <= 8'h00;
+ reg_obp1 <= 8'h00;
+ reg_wy <= 8'h00;
+ reg_wx <= 8'h00;
+ end
+ else
+ begin
+ if (mmio_wr) begin
+ case (mmio_a)
+ 16'hFF40: reg_lcdc <= mmio_din;
+ 16'hFF41: reg_stat[7:3] <= mmio_din[7:3];
+ 16'hFF42: reg_scy <= mmio_din;
+ 16'hFF43: reg_scx <= mmio_din;
+ //16'hFF44: reg_ly <= mmio_din;
+ 16'hFF45: reg_lyc <= mmio_din;
+ 16'hFF46: reg_dma <= mmio_din;
+ 16'hFF47: reg_bgp <= mmio_din;
+ 16'hFF48: reg_obp0 <= mmio_din;
+ 16'hFF49: reg_obp1 <= mmio_din;
+ 16'hFF4A: reg_wy <= mmio_din;
+ 16'hFF4B: reg_wx <= mmio_din;
+ endcase
+ // VRAM and OAM access are not handled here
+ end
+ end
+ end
+
+ // Debug Outputs
+ assign scx = reg_scx;
+ assign scy = reg_scy;
+ assign state = r_state;
+
+endmodule
diff --git a/verilog/rtl/regfile.v b/verilog/rtl/regfile.v
new file mode 100644
index 0000000..575070c
--- /dev/null
+++ b/verilog/rtl/regfile.v
@@ -0,0 +1,54 @@
+`timescale 1ns / 1ps
+`default_nettype wire
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Module Name: regfile
+// Project Name: VerilogBoy
+// Description:
+// The register file of Game Boy CPU.
+// Dependencies:
+//
+// Additional Comments:
+// Only BCDEHLSP are in the register file
+//////////////////////////////////////////////////////////////////////////////////
+
+module regfile(
+ input clk,
+ input rst,
+ input [2:0] rdn,
+ output [7:0] rd,
+ input [1:0] rdwn,
+ output [15:0] rdw,
+ output [7:0] h, // H, L output for 16bit addition
+ output [7:0] l,
+ output [15:0] sp, // SP output for addressing
+ input [2:0] wrn,
+ input [7:0] wr,
+ input we
+ );
+
+ reg [7:0] regs [0:7];
+
+ wire [7:0] rdhigh = regs[{rdwn, 1'b0}];
+ wire [7:0] rdlow = regs[{rdwn, 1'b1}];
+ assign rdw = {rdhigh, rdlow};
+ assign rd = regs[rdn];
+ assign h = regs[3'd4];
+ assign l = regs[3'd5];
+ assign sp = {regs[3'd6], regs[3'd7]};
+
+ integer i;
+ always @(posedge clk) begin
+ if (rst) begin
+ for (i = 0; i < 8; i = i + 1)
+ regs[i] <= 8'b0;
+ end
+ else begin
+ if (we)
+ regs[wrn] <= wr;
+ end
+ end
+
+endmodule
diff --git a/verilog/rtl/serial.v b/verilog/rtl/serial.v
new file mode 100644
index 0000000..30b03fa
--- /dev/null
+++ b/verilog/rtl/serial.v
@@ -0,0 +1,104 @@
+`timescale 1ns / 1ps
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 13:13:04 04/13/2018
+// Module Name: serial
+// Project Name: VerilogBoy
+// Description:
+// Dummy serial interface
+// Dependencies:
+//
+// Additional Comments:
+//
+//////////////////////////////////////////////////////////////////////////////////
+module serial(
+ input clk,
+ input rst,
+ input wire [15:0] a,
+ output reg [7:0] dout,
+ input wire [7:0] din,
+ input wire rd,
+ input wire wr,
+ output reg int_serial_req,
+ input wire int_serial_ack
+ );
+
+ reg clk_spi; //8kHz SPI Clock
+
+ /*clk_div #(.WIDTH(10), .DIV(512)) spi_div(
+ .i(clk),
+ .o(clk_spi)
+ );*/
+
+ reg [8:0] counter;
+
+ always @(posedge clk)
+ begin
+ if (rst) begin
+ counter <= 9'h72;
+ clk_spi <= 1'b0;
+ end
+ else begin
+ if (counter == (512 / 2 - 1)) begin
+ clk_spi <= ~clk_spi;
+ counter <= 0;
+ end
+ else
+ counter <= counter + 1'b1;
+ end
+ end
+
+ //reg [7:0] reg_sb;
+ reg reg_sc_start;
+ reg reg_sc_int;
+
+ always @(*) begin
+ dout = 8'hff;
+ if (a == 16'hff01) dout = 8'hff; else
+ if (a == 16'hff02) dout = {reg_sc_start, 6'b111111, reg_sc_int};
+ end
+
+ reg [3:0] count;
+ reg last_clk;
+
+ always @(posedge clk) begin
+ if (rst) begin
+ //reg_sb <= 8'h00;
+ reg_sc_start <= 1'b0;
+ reg_sc_int <= 1'b0;
+ int_serial_req <= 1'b0;
+ count <= 4'd0;
+ last_clk <= 1'b0;
+ end
+ else begin
+ last_clk <= clk_spi;
+ //if (wr && (a == 16'hff01)) reg_sb <= din;
+ if (wr && (a == 16'hff02)) begin
+ reg_sc_start <= din[7];
+ reg_sc_int <= din[0];
+ if (din[7] && din[0]) count <= 4'd8;
+ else count <= 4'd0;
+ end
+ else begin
+ // Dummy serial interface
+ if (count != 4'd0) begin
+ if (!last_clk && clk_spi) begin
+ count <= count - 4'd1;
+ if ((count - 4'd1) == 0) begin
+ int_serial_req <= 1'b1;
+ end
+ end
+ end
+ else begin
+ if ((int_serial_req)&&(int_serial_ack)) begin
+ int_serial_req <= 1'b0;
+ end
+ end
+ end
+ end
+ end
+
+
+endmodule
diff --git a/verilog/rtl/singleport_ram.v b/verilog/rtl/singleport_ram.v
new file mode 100644
index 0000000..0c8d630
--- /dev/null
+++ b/verilog/rtl/singleport_ram.v
@@ -0,0 +1,24 @@
+`timescale 1ns / 1ps
+module singleport_ram #(
+ parameter integer WORDS = 8192,
+ parameter ABITS = 13
+)(
+ input clka,
+ input wea,
+ input [ABITS - 1:0] addra,
+ input [7:0] dina,
+ output reg [7:0] douta
+);
+
+ reg [7:0] ram [0:WORDS-1];
+
+ always@(posedge clka) begin
+ if (wea)
+ ram[addra] <= dina;
+ end
+
+ always@(posedge clka) begin
+ douta <= ram[addra];
+ end
+
+endmodule
diff --git a/verilog/rtl/singlereg.v b/verilog/rtl/singlereg.v
new file mode 100644
index 0000000..783e04c
--- /dev/null
+++ b/verilog/rtl/singlereg.v
@@ -0,0 +1,38 @@
+`timescale 1ns / 1ps
+`default_nettype wire
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Module Name: reg
+// Project Name: VerilogBoy
+// Description:
+// The register file of Game Boy CPU.
+// Dependencies:
+//
+// Additional Comments:
+// Single 8-bit register
+//////////////////////////////////////////////////////////////////////////////////
+
+module singlereg(clk, rst, wr, rd, we);
+ parameter WIDTH = 8;
+
+ input clk;
+ input rst;
+ input [WIDTH-1:0] wr;
+ output [WIDTH-1:0] rd;
+ input we;
+
+ reg [WIDTH-1:0] data;
+
+ assign rd = data;
+
+ always @(posedge clk) begin
+ if (rst)
+ data <= 0;
+ else
+ if (we)
+ data <= wr;
+ end
+
+endmodule
diff --git a/verilog/rtl/sound.v b/verilog/rtl/sound.v
new file mode 100644
index 0000000..2cabe88
--- /dev/null
+++ b/verilog/rtl/sound.v
@@ -0,0 +1,360 @@
+`timescale 1ns / 1ps
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 12:29:37 04/07/2018
+// Module Name: sound
+// Project Name: VerilogBoy
+// Description:
+// GameBoy sound unit main file
+// Dependencies:
+//
+// Additional Comments:
+// On a real gameboy, audio mixing is done with an OpAmp (I am not sure, but
+// this makes most sense according to the documents we have). I am using adder
+// here to make that happen. Also, audio volume control is probably done with a
+// PGA on a real gameboy, and I am using multiplication to implement that here.
+// So this would synthesis some additional adders and multipliers which should
+// not be part of a Game Boy.
+//////////////////////////////////////////////////////////////////////////////////
+module sound(
+ input wire clk,
+ input wire rst,
+ input wire [15:0] a,
+ output reg [7:0] dout,
+ input wire [7:0] din,
+ input wire rd,
+ input wire wr,
+ output wire [15:0] left,
+ output wire [15:0] right,
+ // debug
+ output wire [3:0] ch1_level,
+ output wire [3:0] ch2_level,
+ output wire [3:0] ch3_level,
+ output wire [3:0] ch4_level
+ );
+
+ // Sound registers
+ reg [7:0] regs [0:31];
+
+ /* verilator lint_off UNUSED */
+ wire [7:0] reg_nr10 = regs[00]; // $FF10 Channel 1 Sweep register (RW)
+ wire [7:0] reg_nr11 = regs[01]; // $FF11 Channel 1 Sound length/wave patternduty (RW)
+ wire [7:0] reg_nr12 = regs[02]; // $FF12 Channel 1 Volume envelope (RW)
+ wire [7:0] reg_nr13 = regs[03]; // $FF13 Channel 1 Freqency lo (W)
+ wire [7:0] reg_nr14 = regs[04]; // $FF14 Channel 1 Freqency hi (RW)
+ wire [7:0] reg_nr21 = regs[06]; // $FF16 Channel 2 Sound length/wave patternduty (RW)
+ wire [7:0] reg_nr22 = regs[07]; // $FF17 Channel 2 Volume envelope (RW)
+ wire [7:0] reg_nr23 = regs[08]; // $FF18 Channel 2 Freqency lo (W)
+ wire [7:0] reg_nr24 = regs[09]; // $FF19 Channel 2 Freqency hi (RW)
+ wire [7:0] reg_nr30 = regs[10]; // $FF1A Channel 3 Sound on/off (RW)
+ wire [7:0] reg_nr31 = regs[11]; // $FF1B Channel 3 Sound length (?)
+ wire [7:0] reg_nr32 = regs[12]; // $FF1C Channel 3 Select output level (RW)
+ wire [7:0] reg_nr33 = regs[13]; // $FF1D Channel 3 Frequency lo (W)
+ wire [7:0] reg_nr34 = regs[14]; // $FF1E Channel 3 Frequency hi (RW)
+ wire [7:0] reg_nr41 = regs[16]; // $FF20 Channel 4 Sound length (RW)
+ wire [7:0] reg_nr42 = regs[17]; // $FF21 Channel 4 Volume envelope (RW)
+ wire [7:0] reg_nr43 = regs[18]; // $FF22 Channel 4 Polynomial counter (RW)
+ wire [7:0] reg_nr44 = regs[19]; // $FF23 Channel 4 Counter/consecutive; Initial(RW)
+ wire [7:0] reg_nr50 = regs[20]; // $FF24 Channel contorl / ON-OFF / Volume (RW)
+ wire [7:0] reg_nr51 = regs[21]; // $FF25 Selection of Sound output terminal (RW)
+ wire [7:0] reg_nr52 = regs[22]; // $FF26 Sound on/off
+ /* verilator lint_on UNUSED */
+ wire [4:0] reg_addr = {~a[4], a[3:0]}; // Convert 10-20 to 00-10
+
+ wire [2:0] ch1_sweep_time = reg_nr10[6:4];
+ wire ch1_sweep_decreasing = reg_nr10[3];
+ wire [2:0] ch1_num_sweep_shifts = reg_nr10[2:0];
+ wire [1:0] ch1_wave_duty = reg_nr11[7:6];
+ wire [5:0] ch1_length = reg_nr11[5:0];
+ wire [3:0] ch1_initial_volume = reg_nr12[7:4];
+ wire ch1_envelope_increasing = reg_nr12[3];
+ wire [2:0] ch1_num_envelope_sweeps = reg_nr12[2:0];
+ reg ch1_start;
+ wire ch1_single = reg_nr14[6];
+ wire [10:0] ch1_frequency = {reg_nr14[2:0], reg_nr13[7:0]};
+ wire [1:0] ch2_wave_duty = reg_nr21[7:6];
+ wire [5:0] ch2_length = reg_nr21[5:0];
+ wire [3:0] ch2_initial_volume = reg_nr22[7:4];
+ wire ch2_envelope_increasing = reg_nr22[3];
+ wire [2:0] ch2_num_envelope_sweeps = reg_nr22[2:0];
+ reg ch2_start;
+ wire ch2_single = reg_nr24[6];
+ wire [10:0] ch2_frequency = {reg_nr24[2:0], reg_nr23[7:0]};
+ wire [7:0] ch3_length = reg_nr31[7:0];
+ wire ch3_on = reg_nr30[7];
+ wire [1:0] ch3_volume = reg_nr32[6:5];
+ reg ch3_start;
+ wire ch3_single = reg_nr34[6];
+ wire [10:0] ch3_frequency = {reg_nr34[2:0], reg_nr33[7:0]};
+ wire [5:0] ch4_length = reg_nr41[5:0];
+ wire [3:0] ch4_initial_volume = reg_nr42[7:4];
+ wire ch4_envelope_increasing = reg_nr42[3];
+ wire [2:0] ch4_num_envelope_sweeps = reg_nr42[2:0];
+ wire [3:0] ch4_shift_clock_freq = reg_nr43[7:4];
+ wire ch4_counter_width = reg_nr43[3]; // 0 = 15 bits, 1 = 7 bits
+ wire [2:0] ch4_freq_dividing_ratio = reg_nr43[2:0];
+ reg ch4_start;
+ wire ch4_single = reg_nr44[6];
+ wire s02_vin = reg_nr50[7];
+ wire [2:0] s02_output_level = reg_nr50[6:4];
+ wire s01_vin = reg_nr50[3];
+ wire [2:0] s01_output_level = reg_nr50[2:0];
+ wire s02_ch4_enable = reg_nr51[7];
+ wire s02_ch3_enable = reg_nr51[6];
+ wire s02_ch2_enable = reg_nr51[5];
+ wire s02_ch1_enable = reg_nr51[4];
+ wire s01_ch4_enable = reg_nr51[3];
+ wire s01_ch3_enable = reg_nr51[2];
+ wire s01_ch2_enable = reg_nr51[1];
+ wire s01_ch1_enable = reg_nr51[0];
+ wire sound_enable = reg_nr52[7];
+ wire ch4_on_flag;
+ wire ch3_on_flag;
+ wire ch2_on_flag;
+ wire ch1_on_flag;
+
+ reg [7:0] wave [0:15];
+ wire [3:0] wave_addr_ext = a[3:0];
+ wire [3:0] wave_addr_int;
+ wire [3:0] wave_addr = (ch3_on) ? (wave_addr_int) : (wave_addr_ext);
+ wire [7:0] wave_data = wave[wave_addr];
+
+ wire addr_in_regs = (a >= 16'hFF10 && a <= 16'hFF2F);
+ wire addr_in_wave = (a >= 16'hFF30 && a <= 16'hFF3F);
+
+ // Bus RW
+ // Bus RW - Combinational Read
+ // This is a drawback of ISE XST, one can not use always@(*) and reg array together,
+ // so one have to write something (does not need to make sense, just as a place holder)
+ // and let the synthesizer to determine the correct sensitvity list. (Or one would have
+ // to enumerate EACH item in an array, otherwise it will give an error.
+ always @(a)
+ begin
+ dout = 8'hFF;
+ if (addr_in_regs) begin
+ if (a == 16'hFF26)
+ dout = {sound_enable, 3'b0, ch4_on_flag, ch3_on_flag, ch2_on_flag, ch1_on_flag};
+ else
+ dout = regs[reg_addr];
+ end
+ else
+ if (addr_in_wave) begin
+ dout = wave[wave_addr];
+ end
+ end
+
+ // Bus RW - Sequential Write
+ integer i;
+
+ always @(posedge clk)
+ begin
+ if (rst) begin
+ for (i = 0; i < 32; i = i+1) begin
+ regs[i] <= 8'b0;
+ end
+ // wave pattern should not be initialized
+ end
+ else begin
+ if (wr) begin
+ if (addr_in_regs) begin
+ if (a == 16'hFF26) begin
+ if (din[7] == 0) begin
+ for (i = 0; i < 32; i = i+1) begin
+ regs[i] <= 8'b0;
+ end
+ end
+ else
+ regs[reg_addr] <= din;
+ end
+ else if (sound_enable) begin
+ regs[reg_addr] <= din;
+ end
+ end
+ else if (addr_in_wave)
+ //wave[wave_addr_ext] <= din; //what if we allow Write any way?
+ wave[wave_addr] <= din; // This is what happens trying to write to wave sample while it is on
+ end
+ // Initialize signal, should be triggered whenever a 1 is written
+ if ((wr)&&(a == 16'hFF14)) ch1_start <= din[7];
+ else ch1_start <= 0;
+ if ((wr)&&(a == 16'hFF19)) ch2_start <= din[7];
+ else ch2_start <= 0;
+ if ((wr)&&(a == 16'hFF1E)) ch3_start <= din[7];
+ else ch3_start <= 0;
+ if ((wr)&&(a == 16'hFF23)) ch4_start <= din[7];
+ else ch4_start <= 0;
+ end
+ end
+
+ // Clocks
+ wire clk_frame; // 512Hz Base Clock
+ wire clk_length_ctr; // 256Hz Length Control Clock
+ wire clk_vol_env; // 64Hz Volume Enevelope Clock
+ wire clk_sweep; // 128Hz Sweep Clock
+ wire clk_freq_div; // 1048576Hz Frequency Division Clock
+
+ clk_div #(.WIDTH(15), .DIV(8192)) frame_div(
+ .i(clk),
+ .o(clk_frame)
+ );
+
+ reg [2:0] sequencer_state = 3'b0;
+ always@(posedge clk_frame)
+ begin
+ sequencer_state <= sequencer_state + 1'b1;
+ end
+
+ assign clk_length_ctr = (sequencer_state[0]) ? 1'b0 : 1'b1;
+ assign clk_vol_env = (sequencer_state == 3'd7) ? 1'b1 : 1'b0;
+ assign clk_sweep = ((sequencer_state == 3'd2) || (sequencer_state == 3'd6)) ? 1'b1 : 1'b0;
+
+ clk_div #(.WIDTH(2), .DIV(2)) freq_div(
+ .i(clk),
+ .o(clk_freq_div)
+ );
+
+ // Channels
+ wire [3:0] ch1;
+ wire [3:0] ch2;
+ wire [3:0] ch3;
+ wire [3:0] ch4;
+
+ sound_square sound_ch1(
+ .rst(~sound_enable),
+ .clk(clk),
+ .clk_length_ctr(clk_length_ctr),
+ .clk_vol_env(clk_vol_env),
+ .clk_sweep(clk_sweep),
+ .clk_freq_div(clk_freq_div),
+ .sweep_time(ch1_sweep_time),
+ .sweep_decreasing(ch1_sweep_decreasing),
+ .num_sweep_shifts(ch1_num_sweep_shifts),
+ .wave_duty(ch1_wave_duty),
+ .length(ch1_length),
+ .initial_volume(ch1_initial_volume),
+ .envelope_increasing(ch1_envelope_increasing),
+ .num_envelope_sweeps(ch1_num_envelope_sweeps),
+ .start(ch1_start),
+ .single(ch1_single),
+ .frequency(ch1_frequency),
+ .level(ch1),
+ .enable(ch1_on_flag)
+ );
+
+ sound_square sound_ch2(
+ .rst(~sound_enable),
+ .clk(clk),
+ .clk_length_ctr(clk_length_ctr),
+ .clk_vol_env(clk_vol_env),
+ .clk_sweep(clk_sweep),
+ .clk_freq_div(clk_freq_div),
+ .sweep_time(3'b0),
+ .sweep_decreasing(1'b0),
+ .num_sweep_shifts(3'b0),
+ .wave_duty(ch2_wave_duty),
+ .length(ch2_length),
+ .initial_volume(ch2_initial_volume),
+ .envelope_increasing(ch2_envelope_increasing),
+ .num_envelope_sweeps(ch2_num_envelope_sweeps),
+ .start(ch2_start),
+ .single(ch2_single),
+ .frequency(ch2_frequency),
+ .level(ch2),
+ .enable(ch2_on_flag)
+ );
+
+ sound_wave sound_ch3(
+ .rst(~sound_enable),
+ .clk(clk),
+ .clk_length_ctr(clk_length_ctr),
+ .length(ch3_length),
+ .volume(ch3_volume),
+ .on(ch3_on),
+ .single(ch3_single),
+ .start(ch3_start),
+ .frequency(ch3_frequency),
+ .wave_a(wave_addr_int),
+ .wave_d(wave_data),
+ .level(ch3),
+ .enable(ch3_on_flag)
+ );
+
+ sound_noise sound_ch4(
+ .rst(~sound_enable),
+ .clk(clk),
+ .clk_length_ctr(clk_length_ctr),
+ .clk_vol_env(clk_vol_env),
+ .length(ch4_length),
+ .initial_volume(ch4_initial_volume),
+ .envelope_increasing(ch4_envelope_increasing),
+ .num_envelope_sweeps(ch4_num_envelope_sweeps),
+ .shift_clock_freq(ch4_shift_clock_freq),
+ .counter_width(ch4_counter_width),
+ .freq_dividing_ratio(ch4_freq_dividing_ratio),
+ .start(ch4_start),
+ .single(ch4_single),
+ .level(ch4),
+ .enable(ch4_on_flag)
+ );
+
+ // Mixer
+
+ /*
+ // Signed mixer
+ wire [5:0] sign_extend_ch1 = {{3{ch1[3]}}, ch1[2:0]};
+ wire [5:0] sign_extend_ch2 = {{3{ch2[3]}}, ch2[2:0]};
+ wire [5:0] sign_extend_ch3 = {{3{ch3[3]}}, ch3[2:0]};
+ wire [5:0] sign_extend_ch4 = {{3{ch4[3]}}, ch4[2:0]};
+ reg [5:0] mixed_s01;
+ reg [5:0] mixed_s02;
+
+ always @(*)
+ begin
+ mixed_s01 = 6'd0;
+ mixed_s02 = 6'd0;
+ if (s01_ch1_enable) mixed_s01 = mixed_s01 + sign_extend_ch1;
+ if (s01_ch2_enable) mixed_s01 = mixed_s01 + sign_extend_ch2;
+ if (s01_ch3_enable) mixed_s01 = mixed_s01 + sign_extend_ch3;
+ if (s01_ch4_enable) mixed_s01 = mixed_s01 + sign_extend_ch4;
+ if (s02_ch1_enable) mixed_s02 = mixed_s02 + sign_extend_ch1;
+ if (s02_ch2_enable) mixed_s02 = mixed_s02 + sign_extend_ch2;
+ if (s02_ch3_enable) mixed_s02 = mixed_s02 + sign_extend_ch3;
+ if (s02_ch4_enable) mixed_s02 = mixed_s02 + sign_extend_ch4;
+ end
+
+ assign left = (sound_enable) ? {mixed_s01[5:0], 14'b0} : 20'b0;
+ assign right = (sound_enable) ? {mixed_s02[5:0], 14'b0} : 20'b0;
+ */
+
+ // Unsigned mixer
+ reg [5:0] added_s01;
+ reg [5:0] added_s02;
+ always @(*)
+ begin
+ added_s01 = 6'd0;
+ added_s02 = 6'd0;
+ if (s01_ch1_enable) added_s01 = added_s01 + {2'b0, ch1};
+ if (s01_ch2_enable) added_s01 = added_s01 + {2'b0, ch2};
+ if (s01_ch3_enable) added_s01 = added_s01 + {2'b0, ch3};
+ if (s01_ch4_enable) added_s01 = added_s01 + {2'b0, ch4};
+ if (s02_ch1_enable) added_s02 = added_s02 + {2'b0, ch1};
+ if (s02_ch2_enable) added_s02 = added_s02 + {2'b0, ch2};
+ if (s02_ch3_enable) added_s02 = added_s02 + {2'b0, ch3};
+ if (s02_ch4_enable) added_s02 = added_s02 + {2'b0, ch4};
+ end
+
+ wire [8:0] mixed_s01 = added_s01 * s01_output_level;
+ wire [8:0] mixed_s02 = added_s02 * s02_output_level;
+
+ assign left = (sound_enable) ? {1'b0, mixed_s01[8:0], 6'b0} : 16'b0;
+ assign right = (sound_enable) ? {1'b0, mixed_s02[8:0], 6'b0} : 16'b0;
+
+ // Debug Output
+ assign ch1_level = ch1;
+ assign ch2_level = ch2;
+ assign ch3_level = ch3;
+ assign ch4_level = ch4;
+
+endmodule
diff --git a/verilog/rtl/sound_channel_mix.v b/verilog/rtl/sound_channel_mix.v
new file mode 100644
index 0000000..a9b03ed
--- /dev/null
+++ b/verilog/rtl/sound_channel_mix.v
@@ -0,0 +1,58 @@
+`timescale 1ns / 1ps
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer:
+//
+// Create Date: 22:29:14 04/08/2018
+// Design Name:
+// Module Name: sound_channel_mix
+// Project Name:
+// Target Devices:
+// Tool versions:
+// Description:
+//
+// Dependencies:
+//
+// Revision:
+// Revision 0.01 - File Created
+// Additional Comments:
+//
+//////////////////////////////////////////////////////////////////////////////////
+module sound_channel_mix(
+ input enable,
+ input modulate,
+ input [3:0] target_vol,
+ output [3:0] level
+ );
+
+ /*// Converting Volume envelope to 2s compliment number
+ reg [3:0] target_vol_low;
+ wire [3:0] target_vol_high;
+ always@(target_vol)
+ begin
+ case (target_vol)
+ 4'b0000: target_vol_low = 4'b0000;
+ 4'b0001: target_vol_low = 4'b1111;
+ 4'b0010: target_vol_low = 4'b1111;
+ 4'b0011: target_vol_low = 4'b1110;
+ 4'b0100: target_vol_low = 4'b1110;
+ 4'b0101: target_vol_low = 4'b1101;
+ 4'b0110: target_vol_low = 4'b1101;
+ 4'b0111: target_vol_low = 4'b1100;
+ 4'b1000: target_vol_low = 4'b1100;
+ 4'b1001: target_vol_low = 4'b1011;
+ 4'b1010: target_vol_low = 4'b1011;
+ 4'b1011: target_vol_low = 4'b1010;
+ 4'b1100: target_vol_low = 4'b1010;
+ 4'b1101: target_vol_low = 4'b1001;
+ 4'b1110: target_vol_low = 4'b1001;
+ 4'b1111: target_vol_low = 4'b1000;
+ endcase
+ end
+ assign target_vol_high = {1'b0, target_vol[3:1]};
+
+ assign level = (enable) ? ((modulate) ? (target_vol_high) : (target_vol_low)) : (4'b0000);*/
+
+ assign level = (enable) ? ((modulate) ? (target_vol) : (4'b0000)) : (4'b0000);
+
+endmodule
diff --git a/verilog/rtl/sound_length_ctr.v b/verilog/rtl/sound_length_ctr.v
new file mode 100644
index 0000000..3a1452a
--- /dev/null
+++ b/verilog/rtl/sound_length_ctr.v
@@ -0,0 +1,49 @@
+`timescale 1ns / 1ps
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 22:24:55 04/08/2018
+// Module Name: sound_length_ctr
+// Project Name: VerilogBoy
+// Description:
+// Sound length control for all channels
+// Dependencies:
+// none
+// Additional Comments:
+// Channel 3 has a different length
+//////////////////////////////////////////////////////////////////////////////////
+module sound_length_ctr(rst, clk_length_ctr, start, single, length, enable);
+ parameter WIDTH = 6; // 6bit for Ch124, 8bit for Ch3
+
+ input rst;
+ input clk_length_ctr;
+ input start;
+ input single;
+ input [WIDTH-1:0] length;
+ output reg enable = 0;
+
+ reg [WIDTH-1:0] length_left = {WIDTH{1'b1}}; // Upcounter from length to 255
+
+ // Length Control
+ always @(posedge clk_length_ctr, posedge start, posedge rst)
+ begin
+ if (rst) begin
+ enable <= 1'b0;
+ length_left <= 0;
+ end
+ else if (start) begin
+ enable <= 1'b1;
+ length_left <= (length == 0) ? ({WIDTH{1'b1}}) : (length);
+ end
+ else begin
+ if (single) begin
+ if (length_left != {WIDTH{1'b1}})
+ length_left <= length_left + 1'b1;
+ else
+ enable <= 1'b0;
+ end
+ end
+ end
+
+endmodule
diff --git a/verilog/rtl/sound_noise.v b/verilog/rtl/sound_noise.v
new file mode 100644
index 0000000..a836bcf
--- /dev/null
+++ b/verilog/rtl/sound_noise.v
@@ -0,0 +1,112 @@
+`timescale 1ns / 1ps
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 21:19:04 04/08/2018
+// Module Name: sound_noise
+// Project Name: VerilogBoy
+// Description:
+//
+// Dependencies:
+//
+// Additional Comments:
+//
+//////////////////////////////////////////////////////////////////////////////////
+module sound_noise(
+ input rst, // Async reset
+ input clk, // CPU Clock
+ input clk_length_ctr, // Length control clock
+ input clk_vol_env, // Volume Envelope clock
+ input [5:0] length, // Length = (64-t1)*(1/256) second, used iff single is set
+ input [3:0] initial_volume, // Initial volume of envelope 0 = no sound
+ input envelope_increasing, // 0 = decrease, 1 = increase
+ input [2:0] num_envelope_sweeps, // number of envelope sweep 0 = stop
+ input [3:0] shift_clock_freq, // shift clock prescaler (s)
+ input counter_width, // 0 = 15 bits, 1 = 7 bits
+ input [2:0] freq_dividing_ratio, // shift clock divider 0 -> 1MHz, 1 -> 512kHz (r)
+ input start, // Restart sound
+ input single, // If set, output would stop upon reaching the length specified
+ output [3:0] level,
+ output enable
+ );
+
+ // Dividing ratio from 4MHz is (r * 8), for the divier to work, the comparator shoud
+ // compare with (dividing_factor / 2 - 1), so it becomes (r * 4 - 1)
+ reg [4:0] adjusted_freq_dividing_ratio;
+ reg [3:0] latched_shift_clock_freq;
+
+ wire [3:0] target_vol;
+
+ reg clk_div = 0;
+ wire clk_shift;
+
+ reg [4:0] clk_divider = 5'b0; // First stage
+ always @(posedge clk)
+ begin
+ if (clk_divider == adjusted_freq_dividing_ratio) begin
+ clk_div <= ~clk_div;
+ clk_divider <= 0;
+ end
+ else
+ clk_divider <= clk_divider + 1'b1;
+ end
+
+ reg [13:0] clk_shifter = 14'b0; // Second stage
+ always @(posedge clk_div)
+ begin
+ clk_shifter <= clk_shifter + 1'b1;
+ end
+
+ assign clk_shift = clk_shifter[latched_shift_clock_freq];
+
+ reg [14:0] lfsr = {15{1'b1}};
+ wire target_freq_out = ~lfsr[0];
+
+ wire [14:0] lfsr_next =
+ (counter_width == 0) ? ({(lfsr[0] ^ lfsr[1]), lfsr[14:1]}) :
+ ({8'b0, (lfsr[0] ^ lfsr[1]), lfsr[6:1]});
+
+ always@(posedge start)
+ begin
+ adjusted_freq_dividing_ratio <=
+ (freq_dividing_ratio == 3'b0) ? (5'd1) : ((freq_dividing_ratio * 4) - 1);
+ latched_shift_clock_freq <= shift_clock_freq;
+ end
+
+ always@(posedge clk_shift, posedge start)
+ begin
+ if (start) begin
+ lfsr <= {15{1'b1}};
+ end
+ else begin
+ lfsr <= lfsr_next;
+ end
+ end
+
+ sound_vol_env sound_vol_env(
+ .clk_vol_env(clk_vol_env),
+ .start(start),
+ .initial_volume(initial_volume),
+ .envelope_increasing(envelope_increasing),
+ .num_envelope_sweeps(num_envelope_sweeps),
+ .target_vol(target_vol)
+ );
+
+ sound_length_ctr #(6) sound_length_ctr(
+ .rst(rst),
+ .clk_length_ctr(clk_length_ctr),
+ .start(start),
+ .single(single),
+ .length(length),
+ .enable(enable)
+ );
+
+ sound_channel_mix sound_channel_mix(
+ .enable(enable),
+ .modulate(target_freq_out),
+ .target_vol(target_vol),
+ .level(level)
+ );
+
+endmodule
diff --git a/verilog/rtl/sound_square.v b/verilog/rtl/sound_square.v
new file mode 100644
index 0000000..8f85838
--- /dev/null
+++ b/verilog/rtl/sound_square.v
@@ -0,0 +1,139 @@
+`timescale 1ns / 1ps
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 16:51:12 04/07/2018
+// Module Name: sound_square
+// Project Name: VerilogBoy
+// Description:
+// Square wave generator for channel 1 and 2
+// Dependencies:
+// sound_vol_env, sound_length_ctr, sound_channel_mix
+// Additional Comments:
+// First, synthesize a frequency with 8X of specified frequency with any percent
+// of duty cycle, then use a small FSM to synthesis it into desired duty cycle.
+//
+// Note: the original GameBoy process all the sound internally as unsigned
+// number, and use a bypass capacitor to remove all the DC component. One drawback
+// is that it do not have a constant "zero" reference: when a channel is off, the
+// voltage is, naturually 0V. But when it is working, it will alter between 0 and
+// Vmax(volume), means the zero becomes the half of current volume. This is also
+// the design I am using here.
+//////////////////////////////////////////////////////////////////////////////////
+module sound_square(
+ input rst, // Async reset
+ input clk, // CPU Clock
+ input clk_length_ctr, // Length control clock
+ input clk_vol_env, // Volume Envelope clock
+ input clk_sweep, // Sweep clock
+ input clk_freq_div, // Base frequency for divider (should be 16x131072=2097152Hz)
+ input [2:0] sweep_time, // From 0 to 7/128Hz
+ input sweep_decreasing, // 0: Addition (Freq+) 1: Subtraction (Freq-)
+ input [2:0] num_sweep_shifts, // Number of sweep shift (n=0-7)
+ input [1:0] wave_duty, // 00: 87.5% HIGH 01: 75% HIGH 10: 50% HIGH 11: 25% HIGH
+ input [5:0] length, // Length = (64-t1)*(1/256) second, used iff single is set
+ input [3:0] initial_volume, // Initial volume of envelope 0 = no sound
+ input envelope_increasing, // 0 = decrease, 1 = increase
+ input [2:0] num_envelope_sweeps, // number of envelope sweep 0 = stop
+ input start, // Restart sound
+ input single, // If set, output would stop upon reaching the length specified
+ input [10:0] frequency, // Output frequency = 131072/(2048-x) Hz
+ output [3:0] level, // Sound output
+ output enable // Internal enable flag
+ );
+
+ //Sweep: X(t) = X(t-1) +/- X(t-1)/2^n
+
+ reg [10:0] divider = 11'b0;
+ reg [10:0] target_freq;
+ reg octo_freq_out = 0; // 8 x target frequency with arbitrary duty cycle
+ wire target_freq_out; // Traget frequency with specified duty cycle
+ wire [3:0] target_vol;
+ reg [2:0] sweep_left; // Number of sweeps need to be done
+
+ always @(posedge clk_freq_div, posedge start)
+ begin
+ if (start) begin
+ divider <= target_freq;
+ end
+ else begin
+ if (divider == 11'd2047) begin
+ octo_freq_out <= ~octo_freq_out;
+ divider <= target_freq;
+ end
+ else begin
+ divider <= divider + 1'b1;
+ end
+ end
+ end
+
+ reg [2:0] duty_counter = 3'b0;
+ always @(posedge octo_freq_out)
+ begin
+ duty_counter <= duty_counter + 1'b1;
+ end
+
+ assign target_freq_out =
+ (wave_duty == 2'b00) ? ((duty_counter != 3'b111) ? 1'b1 : 1'b0) : ( // 87.5% HIGH
+ (wave_duty == 2'b01) ? ((duty_counter[2:1] != 2'b11) ? 1'b1 : 1'b0) : ( // 75% HIGH
+ (wave_duty == 2'b10) ? ((duty_counter[2]) ? 1'b1 : 1'b0) : ( // 50% HIGH
+ ((duty_counter[2:1] == 2'b00) ? 1'b1 : 1'b0)))); // 25% HIGH
+
+ // Frequency Sweep
+ reg overflow;
+ always @(posedge clk_sweep, posedge start)
+ begin
+ if (start) begin
+ target_freq <= frequency;
+ sweep_left <= sweep_time;
+ overflow <= 0;
+ end
+ else begin
+ if (sweep_left != 3'b0) begin
+ sweep_left <= sweep_left - 1'b1;
+ if (sweep_decreasing)
+ target_freq <= target_freq - (target_freq << num_sweep_shifts);
+ else
+ {overflow, target_freq} <= {1'b0, target_freq} + ({1'b0, target_freq} << num_sweep_shifts);
+ end
+ else begin
+ target_freq <= frequency;
+ end
+ end
+ end
+ /*always@(posedge start)
+ begin
+ target_freq <= frequency;
+ end*/
+
+ sound_vol_env sound_vol_env(
+ .clk_vol_env(clk_vol_env),
+ .start(start),
+ .initial_volume(initial_volume),
+ .envelope_increasing(envelope_increasing),
+ .num_envelope_sweeps(num_envelope_sweeps),
+ .target_vol(target_vol)
+ );
+
+ wire enable_length;
+
+ sound_length_ctr #(6) sound_length_ctr(
+ .rst(rst),
+ .clk_length_ctr(clk_length_ctr),
+ .start(start),
+ .single(single),
+ .length(length),
+ .enable(enable_length)
+ );
+
+ assign enable = enable_length & ~overflow;
+
+ sound_channel_mix sound_channel_mix(
+ .enable(enable),
+ .modulate(target_freq_out),
+ .target_vol(target_vol),
+ .level(level)
+ );
+
+endmodule
diff --git a/verilog/rtl/sound_vol_env.v b/verilog/rtl/sound_vol_env.v
new file mode 100644
index 0000000..81cc07f
--- /dev/null
+++ b/verilog/rtl/sound_vol_env.v
@@ -0,0 +1,56 @@
+`timescale 1ns / 1ps
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 22:21:41 04/08/2018
+// Module Name: sound_vol_env
+// Project Name: VerilogBoy
+// Description:
+// Sound volume envelope control for channel 1 2 4
+// Dependencies:
+// none
+// Additional Comments:
+//
+//////////////////////////////////////////////////////////////////////////////////
+module sound_vol_env(
+ input clk_vol_env,
+ input start,
+ input [3:0] initial_volume,
+ input envelope_increasing,
+ input [2:0] num_envelope_sweeps,
+ output reg [3:0] target_vol
+ );
+
+ reg [2:0] enve_left; // Number of cycles before next sweep
+ wire enve_enabled = (num_envelope_sweeps == 3'd0) ? 0 : 1;
+
+ // Volume Envelope
+ always @(posedge clk_vol_env, posedge start)
+ begin
+ if (start) begin
+ target_vol <= initial_volume;
+ enve_left <= num_envelope_sweeps;
+ end
+ else begin
+ if (enve_left != 3'b0) begin
+ enve_left <= enve_left - 1'b1;
+ end
+ else begin
+ if (enve_enabled) begin
+ if (envelope_increasing) begin
+ if (target_vol != 4'b1111)
+ target_vol <= target_vol + 1;
+ end
+ else begin
+ if (target_vol != 4'b0000)
+ target_vol <= target_vol - 1;
+ end
+ enve_left <= num_envelope_sweeps;
+ end
+ end
+ end
+ end
+
+
+endmodule
diff --git a/verilog/rtl/sound_wave.v b/verilog/rtl/sound_wave.v
new file mode 100644
index 0000000..22bd632
--- /dev/null
+++ b/verilog/rtl/sound_wave.v
@@ -0,0 +1,96 @@
+`timescale 1ns / 1ps
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 15:06:53 04/09/2018
+// Module Name: sound_wave
+// Project Name: VerilogBoy
+// Description:
+// Sound wave player for channel 3
+// Dependencies:
+// clk_div, sound_length_ctr
+// Additional Comments:
+// If Ch3 bugs are to be implemented, they should be probably implemented
+// outside of this file. This file does not handle of RW to wave RAM
+//////////////////////////////////////////////////////////////////////////////////
+module sound_wave(
+ input rst, // Async reset
+ input clk, // Main CPU clock
+ input clk_length_ctr, // Length control clock
+ input [7:0] length, // Length = (256-t1)*(1/256) second, used iff single is set
+ input [1:0] volume,
+ input on,
+ input single,
+ input start,
+ input [10:0] frequency,
+ output [3:0] wave_a,
+ input [7:0] wave_d,
+ output [3:0] level,
+ output enable
+ );
+
+ // Freq = 64kHz / (2048 - frequency)
+ // Why????????
+
+ wire [3:0] current_sample;
+
+ reg [4:0] current_pointer = 5'b0;
+
+ assign wave_a[3:0] = current_pointer[4:1];
+ assign current_sample[3:0] = (current_pointer[0]) ?
+ (wave_d[3:0]) : (wave_d[7:4]);
+
+ wire clk_wave_base = clk; // base clock
+ /*clk_div #(.WIDTH(6), .DIV(32)) freq_div(
+ .i(clk),
+ .o(clk_wave_base)
+ );*/
+
+
+ reg clk_pointer_inc = 1'b0; // Clock for pointer to increment
+ reg [10:0] divider = 11'b0;
+ always @(posedge clk_wave_base, posedge start)
+ begin
+ if (start) begin
+ divider <= frequency;
+ end
+ else begin
+ if (divider == 11'd2047) begin
+ clk_pointer_inc <= ~clk_pointer_inc;
+ divider <= frequency;
+ end
+ else begin
+ divider <= divider + 1'b1;
+ end
+ end
+ end
+
+ always @(posedge clk_pointer_inc, posedge start)
+ begin
+ if (start) begin
+ current_pointer <= 5'b0;
+ end
+ else begin
+ if (on)
+ current_pointer <= current_pointer + 1'b1;
+ end
+ end
+
+ sound_length_ctr #(8) sound_length_ctr(
+ .rst(rst),
+ .clk_length_ctr(clk_length_ctr),
+ .start(start),
+ .single(single),
+ .length(length),
+ .enable(enable)
+ );
+
+ assign level = (on) ? (
+ (volume == 2'b00) ? (4'b0000) : (
+ (volume == 2'b01) ? (current_sample[3:0]) : (
+ (volume == 2'b10) ? ({1'b0, current_sample[3:1]}) : (
+ ({2'b0, current_sample[3:2]}))))) : 4'b0000;
+
+
+endmodule
diff --git a/verilog/rtl/timer.v b/verilog/rtl/timer.v
new file mode 100644
index 0000000..9d72628
--- /dev/null
+++ b/verilog/rtl/timer.v
@@ -0,0 +1,125 @@
+`timescale 1ns / 1ps
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 17:12:01 04/13/2018
+// Module Name: timer
+// Project Name: VerilogBoy
+// Description:
+// GameBoy internal timer
+// Dependencies:
+//
+// Additional Comments:
+// This should probably run at 1MHz domain, but currently at 4MHz.
+//////////////////////////////////////////////////////////////////////////////////
+module timer(
+ input wire clk,
+ input wire [1:0] ct, // certain things can only happen at 1MHz rate
+ input wire rst,
+ input wire [15:0] a,
+ output reg [7:0] dout,
+ input wire [7:0] din,
+ input wire rd,
+ input wire wr,
+ output reg int_tim_req,
+ input wire int_tim_ack
+ );
+
+ wire [7:0] reg_div; // Divider Register
+ reg [7:0] reg_tima; // Timer counter
+ reg [7:0] reg_tma; // Timer modulo
+ reg [7:0] reg_tac; // Timer control
+
+ wire addr_in_timer = ((a == 16'hFF04) ||
+ (a == 16'hFF05) ||
+ (a == 16'hFF06) ||
+ (a == 16'hFF07)) ? 1'b1 : 1'b0;
+
+ reg [15:0] div;
+
+ wire reg_timer_enable = reg_tac[2];
+ wire [1:0] reg_clock_sel = reg_tac[1:0];
+
+ assign reg_div[7:0] = div[15:8];
+ wire clk_4khz = div[9];
+ wire clk_256khz = div[3];
+ wire clk_64khz = div[5];
+ wire clk_16khz = div[7];
+ wire clk_tim;
+ assign clk_tim = (reg_timer_enable) ? (
+ (reg_clock_sel == 2'b00) ? (clk_4khz) : (
+ (reg_clock_sel == 2'b01) ? (clk_256khz) : (
+ (reg_clock_sel == 2'b10) ? (clk_64khz) :
+ (clk_16khz)))) : (1'b0);
+
+ reg last_clk_tim;
+ reg write_block;
+
+ // Bus RW
+ // Bus RW - Combinational Read
+ always @(*)
+ begin
+ dout = 8'hFF;
+ if (a == 16'hFF04) dout = reg_div; else
+ if (a == 16'hFF05) dout = reg_tima; else
+ if (a == 16'hFF06) dout = reg_tma; else
+ if (a == 16'hFF07) dout = reg_tac;
+ end
+
+ // Bus RW - Sequential Write
+ always @(posedge clk) begin
+ last_clk_tim <= clk_tim;
+ end
+
+ always @(posedge clk) begin
+ if (rst) begin
+ //reg_div <= 0;
+ reg_tima <= 0;
+ reg_tma <= 0;
+ reg_tac <= 0;
+ div <= 0;
+ int_tim_req <= 0;
+ write_block <= 0;
+ end
+ else begin
+ div <= div + 1'b1;
+ if ((wr) && (a == 16'hFF04)) div <= 4; // compensate 1 cycle delay
+ else if ((wr) && (a == 16'hFF06)) begin
+ // test acceptance/timer/tma_write_reloading seems to imply
+ // that the reloading is done using a latch rather a FF
+ // writing to tma in the same cycle will fall through to tima
+ // as well.
+ reg_tma <= din;
+ if (write_block)
+ reg_tima <= din;
+ end
+ else if ((wr) && (a == 16'hFF07)) reg_tac <= din;
+ else if ((wr) && (a == 16'hFF05) && (!write_block)) reg_tima <= din;
+ else begin
+ if ((last_clk_tim == 1'b1)&&(clk_tim == 1'b0)) begin
+ reg_tima <= reg_tima + 1'b1;
+ if (reg_tima == 8'hFF) begin
+ int_tim_req <= 1'b1; // interrupt doesn't get delayed.
+ end
+ end
+ else begin
+ if ((int_tim_req)&&(int_tim_ack)) begin
+ int_tim_req <= 1'b0;
+ end
+ if ((ct == 2'b00)&&(reg_timer_enable)) begin
+ if (reg_tima == 8'd0) begin
+ reg_tima <= reg_tma;
+ //int_tim_req <= 1'b1;
+ write_block <= 1'b1;
+ end
+ else begin
+ write_block <= 1'b0;
+ end
+ end
+ end
+ end
+ end
+ end
+
+endmodule
diff --git a/verilog/sim/Makefile b/verilog/sim/Makefile
new file mode 100644
index 0000000..415ae0a
--- /dev/null
+++ b/verilog/sim/Makefile
@@ -0,0 +1,88 @@
+TARGET := simulator
+OBJODIR := obj
+RTLDIR := .
+
+ifeq ($(VERILATOR_ROOT),)
+VERILATOR_ROOT ?= $(shell bash -c 'verilator -V|grep VERILATOR_ROOT | head -1 | sed -e " s/^.*=\s*//"')
+endif
+VROOT := $(VERILATOR_ROOT)
+VINCD := $(VROOT)/include
+RTLOBJDIR := $(RTLDIR)/obj_dir
+RTLOBJ := $(RTLOBJDIR)/Vboy__ALL.a
+
+CC = g++
+CXX = g++
+LD = g++
+
+LIBS := $(shell pkg-config sdl2 --cflags --libs) -lm
+
+COMMONFLAGS := \
+ -Wall -Og -g $(shell pkg-config sdl2 --cflags)
+
+CPPFLAGS := \
+ -faligned-new \
+ -std=c++17
+
+LDFLAGS :=
+
+INCLUDES += \
+ -I. \
+ -I$(RTLOBJDIR) \
+ -I$(RTLDIR) \
+ -I$(VINCD) \
+ -I$(VINCD)/vltstd
+
+CSRCS +=
+
+CPPSRCS += \
+ ./main.cpp \
+ ./dispsim.cpp \
+ ./mbcsim.cpp \
+ ./memsim.cpp \
+ ./mmrprobe.cpp \
+ verilated.cpp \
+ verilated_vcd_c.cpp
+
+OBJS := $(CSRCS:%.c=$(OBJODIR)/%.o) \
+ $(CPPSRCS:%.cpp=$(OBJODIR)/%.o)
+
+OBJS += $(RTLOBJ)
+
+$(RTLOBJ):
+ @echo Building RTL
+ make -f rtl.mk
+
+$(OBJODIR)/%.o: %.c $(RTLOBJ)
+ @echo [CC] $<
+ @mkdir -p $(dir $@)
+ @$(CC) $(COMMONFLAGS) $(CCFLAGS) $(INCLUDES) -c -o $@ $<
+
+$(OBJODIR)/%.o: %.cpp $(RTLOBJ)
+ @echo [CXX] $<
+ @mkdir -p $(dir $@)
+ @$(CXX) $(COMMONFLAGS) $(CPPFLAGS) $(INCLUDES) -c -o $@ $<
+
+$(OBJODIR)/%.o: $(VINCD)/%.cpp
+ @echo [CXX] $<
+ @mkdir -p $(dir $@)
+ @$(CXX) $(COMMONFLAGS) $(CPPFLAGS) $(INCLUDES) -c -o $@ $<
+
+PHONY += all
+all: $(OBJS)
+ @$(LD) $(LDFLAGS) $(OBJS) $(LIBS) -o $(TARGET)
+ @echo 'all finish'
+
+PHONY += clean
+clean:
+ rm -rf $(OBJODIR)
+ rm -f $(TARGET)
+ make -f rtl.mk clean
+ @echo 'clean finish'
+
+# Declare the contents of the .PHONY variable as phony. We keep that
+# information in a variable so we can use it in if_changed and friends.
+.PHONY: $(PHONY)
+
+# Set default target
+.DEFAULT_GOAL:= all
+
diff --git a/verilog/sim/dispsim.cpp b/verilog/sim/dispsim.cpp
new file mode 100644
index 0000000..c796b5d
--- /dev/null
+++ b/verilog/sim/dispsim.cpp
@@ -0,0 +1,166 @@
+//
+// VerilogBoy simulator
+// Copyright 2022 Wenting Zhang
+//
+// dispsim.cpp: Display simulation unit
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+#include <SDL.h>
+#include "dispsim.h"
+
+DISPSIM::DISPSIM(void) {
+ window = SDL_CreateWindow("VerilogBoy Simulation",
+ SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED,
+ dispWidth, dispHeight, SDL_SWSURFACE);
+
+ if (window == NULL) {
+ fprintf(stderr, "Unable to create window\n");
+ return;
+ }
+
+ renderer = SDL_CreateRenderer(window, -1,
+ SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC);
+
+ if (renderer == NULL)
+ {
+ fprintf(stderr, "Unable to create renderer\n");
+ return;
+ }
+
+ screen = SDL_CreateRGBSurface(SDL_SWSURFACE, contentWidth, contentHeight, 32,
+ 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000);
+
+ textureRect.x = textureRect.y = 0;
+ textureRect.w = contentWidth;
+ textureRect.h = contentHeight;
+
+ texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888,
+ SDL_TEXTUREACCESS_STREAMING, contentWidth, contentHeight);
+ SDL_SetHint(SDL_HINT_RENDER_SCALE_QUALITY, "0");
+
+ if (screen == NULL || texture == NULL)
+ {
+ fprintf(stderr, "Unable to allocate framebuffer or texture\n");
+ return;
+ }
+
+ xCounter = 0;
+ yCounter = 0;
+
+ SDL_FillRect(screen, &textureRect, 0xFF0000FF);
+ renderCopy();
+
+ tick = SDL_GetTicks();
+}
+
+DISPSIM::~DISPSIM(void) {
+ if (screen != NULL)
+ {
+ SDL_FreeSurface(screen);
+ }
+
+ if (texture)
+ {
+ SDL_DestroyTexture(texture);
+ }
+
+ if (renderer)
+ {
+ SDL_DestroyRenderer(renderer);
+ }
+
+ if (window)
+ {
+ SDL_DestroyWindow(window);
+ }
+}
+
+void DISPSIM::apply(const unsigned char lcd_data, const unsigned char lcd_hs,
+ const unsigned char lcd_vs, const unsigned char lcd_enable) {
+ if (!last_hs && lcd_hs) {
+ xCounter = 0;
+ yCounter ++;
+ }
+ if (!last_vs && lcd_vs) {
+ // Verical sync can happen at the same time.
+ yCounter = 0;
+ }
+ if (lcd_enable) {
+ xCounter ++;
+ setPixel(xCounter - HBP, yCounter - VBP, colorMap(lcd_data));
+ }
+
+ last_vs = lcd_vs;
+ last_hs = lcd_hs;
+
+ if ((SDL_GetTicks() - tick) > REFRESH_INTERVAL) {
+ renderCopy();
+ tick = SDL_GetTicks();
+ }
+}
+
+void DISPSIM::set_title(char *title) {
+ SDL_SetWindowTitle(window, title);
+}
+
+void DISPSIM::renderCopy(void) {
+ void *texturePixels;
+ int texturePitch;
+
+ SDL_LockTexture(texture, NULL, &texturePixels, &texturePitch);
+ memset(texturePixels, 0, textureRect.y * texturePitch);
+ uint8_t *pixels = (uint8_t *)texturePixels + textureRect.y * texturePitch;
+ uint8_t *src = (uint8_t *)screen->pixels;
+ int leftPitch = textureRect.x << 2;
+ int rightPitch = texturePitch - ((textureRect.x + textureRect.w) << 2);
+ for (int y = 0; y < textureRect.h; y++, src += screen->pitch)
+ {
+ memset(pixels, 0, leftPitch); pixels += leftPitch;
+ memcpy(pixels, src, contentWidth << 2); pixels += contentWidth << 2;
+ memset(pixels, 0, rightPitch); pixels += rightPitch;
+ }
+ memset(pixels, 0, textureRect.y * texturePitch);
+ SDL_UnlockTexture(texture);
+
+ SDL_RenderClear(renderer);
+ SDL_RenderCopy(renderer, texture, NULL, NULL);
+ SDL_RenderPresent(renderer);
+}
+
+void DISPSIM::setPixel(int x, int y, unsigned long pixel) {
+ uint32_t *pixels = (uint32_t *)screen->pixels;
+ if ((x < 0) || (y < 0) || (x >= contentWidth) || (y >= contentHeight))
+ return;
+ pixels[y * contentWidth + x] = pixel;
+}
+
+unsigned long DISPSIM::colorMap(unsigned char pixel) {
+ if (pixel == 3)
+ return 0xff212f25;
+ else if (pixel == 2)
+ return 0xff32513a;
+ else if (pixel == 1)
+ return 0xff658635;
+ else if (pixel == 0)
+ return 0xff8b9a26;
+ else
+ // how???
+ return 0xffffffff;
+}
\ No newline at end of file
diff --git a/verilog/sim/dispsim.h b/verilog/sim/dispsim.h
new file mode 100644
index 0000000..e6ae898
--- /dev/null
+++ b/verilog/sim/dispsim.h
@@ -0,0 +1,55 @@
+//
+// VerilogBoy simulator
+// Copyright 2022 Wenting Zhang
+//
+// dispsim.h: Display simulation unit
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+#pragma once
+
+class DISPSIM {
+public:
+ const int contentWidth = 160;
+ const int contentHeight = 144;
+ const int dispWidth = 320;
+ const int dispHeight = 288;
+ DISPSIM(void);
+ ~DISPSIM(void);
+ void apply(const unsigned char lcd_data, const unsigned char lcd_hs,
+ const unsigned char lcd_vs, const unsigned char lcd_enable);
+ void set_title(char *title);
+private:
+ static constexpr int HBP = 1;
+ static constexpr int VBP = 2;
+ static constexpr int REFRESH_INTERVAL = 20;
+ SDL_Surface *screen = NULL;
+ SDL_Window *window = NULL;
+ SDL_Renderer *renderer = NULL;
+ SDL_Texture *texture = NULL;
+ SDL_Rect textureRect;
+ unsigned char last_vs;
+ unsigned char last_hs;
+ int xCounter;
+ int yCounter;
+ int tick;
+ void renderCopy(void);
+ void setPixel(int x, int y, unsigned long pixel);
+ unsigned long colorMap(unsigned char pixel);
+};
diff --git a/verilog/sim/main.cpp b/verilog/sim/main.cpp
new file mode 100644
index 0000000..c181510
--- /dev/null
+++ b/verilog/sim/main.cpp
@@ -0,0 +1,374 @@
+//
+// VerilogBoy simulator
+// Copyright 2022 Wenting Zhang
+//
+// main.cpp: VerilogBoy main simulation unit
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+#include <stdio.h>
+#include <stdint.h>
+#include <assert.h>
+#include <time.h>
+
+#include <SDL.h>
+
+#include "verilated.h"
+#include "verilated_vcd_c.h"
+#include "Vboy.h"
+
+#include "memsim.h"
+#include "mbcsim.h"
+#include "dispsim.h"
+#include "mmrprobe.h"
+
+#define CLK_PERIOD_PS 250000
+
+#define RAM_BASE 0x80000000
+#define RAM_SIZE 1*1024*1024
+
+#define CON_BASE 0x20000000
+
+// Verilator related
+Vboy *core;
+VerilatedVcdC *trace;
+
+#define CONCAT(a,b) a##b
+#define SIGNAL(x) CONCAT(core->boy__DOT__,x)
+
+// this only applies to quiet mode.
+const uint64_t CYCLE_LIMIT = 32768;
+
+static bool quiet = false;
+static bool verbose = false;
+static bool enable_trace = false;
+static bool noboot = false;
+static bool nostop = false;
+static bool itrace = false;
+static bool usembc = false;
+static unsigned short breakpoint = 0xff7f;
+static char result_file[127];
+
+// Software simulated peripherals
+MEMSIM *cartrom;
+MEMSIM *cartram;
+MBCSIM *mbc;
+DISPSIM *dispsim;
+MMRPROBE *mmrprobe;
+FILE *it;
+
+// State
+uint64_t tickcount;
+
+double sc_time_stamp() {
+ // This is in pS. Currently we use a 10ns (100MHz) clock signal.
+ return (double)tickcount * (double)CLK_PERIOD_PS;
+}
+
+void tick() {
+ if (usembc) {
+ mbc->apply(
+ core->dout,
+ core->a,
+ core->wr,
+ core->rd,
+ core->din);
+ }
+ else {
+ cartrom->apply(
+ core->dout,
+ core->a,
+ 0,
+ //core->wr,
+ core->rd,
+ core->din);
+
+ cartram->apply(
+ core->dout,
+ core->a,
+ core->wr,
+ core->rd,
+ core->din);
+ }
+
+ if (!quiet) {
+ dispsim->apply(
+ core->pixel,
+ core->hs,
+ core->vs,
+ core->valid);
+ }
+
+ if (verbose) {
+ mmrprobe->apply(
+ SIGNAL(cpu_dout),
+ SIGNAL(cpu_a),
+ SIGNAL(cpu_wr),
+ SIGNAL(cpu_rd),
+ SIGNAL(cpu_din),
+ SIGNAL(cpu__DOT__last_pc));
+ }
+
+ tickcount++;
+
+ core->eval();
+ if (enable_trace) trace->dump(tickcount * CLK_PERIOD_PS - CLK_PERIOD_PS / 4);
+ core->clk = 1;
+ core->eval();
+ if (enable_trace) trace->dump(tickcount * CLK_PERIOD_PS);
+ core->clk = 0;
+ core->eval();
+ if (enable_trace) trace->dump(tickcount * CLK_PERIOD_PS + CLK_PERIOD_PS / 2);
+
+ if (itrace) {
+ if ((SIGNAL(cpu__DOT__ct_state == 3)) &&
+ (SIGNAL(cpu__DOT__next == 0))) {
+ // Instruction just finished executing
+ fprintf(it, "Time %ld\nPC = %04x, F = %c%c%c%c, A = %02x, SP = %02x%02x\nB = %02x, C = %02x, D = %02x, E = %02x, H = %02x, L = %02x\n",
+ 10 * tickcount,
+ SIGNAL(cpu__DOT__pc),
+ ((SIGNAL(cpu__DOT__flags)) & 0x8) ? 'Z' : '-',
+ ((SIGNAL(cpu__DOT__flags)) & 0x4) ? 'N' : '-',
+ ((SIGNAL(cpu__DOT__flags)) & 0x2) ? 'H' : '-',
+ ((SIGNAL(cpu__DOT__flags)) & 0x1) ? 'C' : '-',
+ SIGNAL(cpu__DOT__acc__DOT__data),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[6]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[7]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[0]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[1]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[2]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[3]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[4]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[5]));
+ }
+ }
+}
+
+void reset() {
+ core->rst = 0;
+ tick();
+ core->rst = 1;
+ tick();
+ core->rst = 0;
+ if (noboot) {
+ SIGNAL(brom_disable) = 1;
+ }
+}
+
+int main(int argc, char *argv[]) {
+
+ // Initialize testbench
+ Verilated::commandArgs(argc, argv);
+
+ core = new Vboy;
+ Verilated::traceEverOn(true);
+
+ if (argc < 2) {
+ puts("USAGE: vb_sim <rom.gb> [--testmode] [--verbose] [--trace] [--noboot]"
+ "[--nostop] [--itrace] [--mbc] (verilator paramters...)\n");
+ exit(0);
+ }
+
+ for (int i = 1; i < argc; i++) {
+ if (strcmp(argv[i], "--testmode") == 0) {
+ quiet = true;
+ strcpy(result_file, argv[1]);
+ char *location = strstr(result_file, ".");
+ if (location == NULL)
+ location = result_file + strlen(result_file);
+ strcpy(location, ".actual");
+ noboot = true;
+ }
+ // Skip boot ROM
+ if (strcmp(argv[i], "--noboot") == 0) {
+ noboot = true;
+ }
+ // Enable MMR probe
+ if (strcmp(argv[i], "--verbose") == 0) {
+ verbose = true;
+ }
+ // Enable waveform trace
+ if (strcmp(argv[i], "--trace") == 0) {
+ enable_trace = true;
+ }
+ // Does not stop on STOP/HALT
+ if (strcmp(argv[i], "--nostop") == 0) {
+ nostop = true;
+ }
+ // Enable instruction level trace
+ if (strcmp(argv[i], "--itrace") == 0) {
+ itrace = true;
+ }
+ // Enable MBC emulation
+ if (strcmp(argv[i], "--mbc") == 0) {
+ usembc = true;
+ }
+ }
+
+ if (enable_trace) {
+ trace = new VerilatedVcdC;
+ core->trace(trace, 99);
+ trace->open("trace.vcd");
+ }
+
+ if (usembc) {
+ mbc = new MBCSIM();
+ }
+ else {
+ cartrom = new MEMSIM(0x0000, 32768, 0);
+ cartram = new MEMSIM(0xa000, 8192, 0);
+ }
+
+ if (!quiet) {
+ dispsim = new DISPSIM();
+ }
+ if (verbose) {
+ mmrprobe = new MMRPROBE();
+ }
+ if (itrace) {
+ it = fopen("itrace.txt", "w");
+ if (!it) {
+ itrace = false;
+ fprintf(stderr, "Fail to open output file for itrace.\n");
+ }
+ }
+
+ if (usembc)
+ mbc->load(argv[1]);
+ else
+ cartrom->load(argv[1]);
+
+ // Start simulation
+ if (verbose)
+ printf("Simulation start.\n");
+
+ reset();
+
+ uint32_t sim_tick = 0;
+ uint32_t ms_tick = SDL_GetTicks();
+ char window_title[63];
+ bool running = true;
+ while (running) {
+ tick();
+
+ sim_tick++;
+
+ // Check end condition
+ if (SIGNAL(cpu__DOT__last_pc) == breakpoint) {
+ printf("Hit breakpoint\n");
+ running = false;
+ }
+
+ if ((tickcount > CYCLE_LIMIT) && (quiet) && (!nostop)) {
+ printf("Time Limit Exceeded\n");
+ running = false;
+ }
+
+ if (core->fault) {
+ printf("Core fault condition\n");
+ running = false;
+ }
+
+ if (core->done && !nostop)
+ running = false;
+
+ // Get the next event
+ if (!quiet & (sim_tick % 4096 == 0)) {
+ SDL_Event event;
+ if (SDL_PollEvent(&event)) {
+ if (event.type == SDL_QUIT) {
+ // Break out of the loop on quit
+ running = false;
+ }
+ }
+ uint32_t ms_delta = SDL_GetTicks() - ms_tick;
+ int sim_freq = sim_tick / ms_delta;
+ sim_tick = 0;
+ sprintf(window_title, "VerilogBoy Sim (%d kHz)", sim_freq);
+ dispsim->set_title(window_title);
+ ms_tick = SDL_GetTicks();
+ }
+ }
+
+ if (quiet) {
+ // output result to file
+ FILE *result;
+ result = fopen(result_file, "w+");
+ assert(result);
+ fprintf(result, "AF %02x%02x\r\n",
+ SIGNAL(cpu__DOT__acc__DOT__data),
+ SIGNAL(cpu__DOT__flags) << 4);
+ fprintf(result, "BC %02x%02x\r\n",
+ SIGNAL(cpu__DOT__regfile__DOT__regs[0]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[1]));
+ fprintf(result, "DE %02x%02x\r\n",
+ SIGNAL(cpu__DOT__regfile__DOT__regs[2]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[3]));
+ fprintf(result, "HL %02x%02x\r\n",
+ SIGNAL(cpu__DOT__regfile__DOT__regs[4]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[5]));
+ fprintf(result, "SP %02x%02x\r\n",
+ SIGNAL(cpu__DOT__regfile__DOT__regs[6]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[7]));
+ fprintf(result, "PC %04x\r\n",
+ SIGNAL(cpu__DOT__pc));
+ fclose(result);
+ }
+ // print on screen
+ printf("PC = %04x, F = %c%c%c%c, A = %02x, SP = %02x%02x\nB = %02x, C = %02x, D = %02x, E = %02x, H = %02x, L = %02x\n",
+ SIGNAL(cpu__DOT__pc),
+ ((SIGNAL(cpu__DOT__flags)) & 0x8) ? 'Z' : '-',
+ ((SIGNAL(cpu__DOT__flags)) & 0x4) ? 'N' : '-',
+ ((SIGNAL(cpu__DOT__flags)) & 0x2) ? 'H' : '-',
+ ((SIGNAL(cpu__DOT__flags)) & 0x1) ? 'C' : '-',
+ SIGNAL(cpu__DOT__acc__DOT__data),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[6]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[7]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[0]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[1]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[2]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[3]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[4]),
+ SIGNAL(cpu__DOT__regfile__DOT__regs[5])
+ );
+
+ if (enable_trace) {
+ trace->close();
+ }
+
+ delete core;
+ if (!quiet) {
+ delete dispsim;
+ }
+ if (verbose) {
+ delete mmrprobe;
+ }
+ if (it) {
+ fclose(it);
+ }
+ if (usembc) {
+ delete mbc;
+ }
+ else {
+ delete cartrom;
+ delete cartram;
+ }
+
+ return 0;
+}
\ No newline at end of file
diff --git a/verilog/sim/mbcsim.cpp b/verilog/sim/mbcsim.cpp
new file mode 100644
index 0000000..69a836f
--- /dev/null
+++ b/verilog/sim/mbcsim.cpp
@@ -0,0 +1,214 @@
+//
+// VerilogBoy simulator
+// Copyright 2022 Wenting Zhang
+//
+// memsim.cpp: Cartridge with memory bank controller (MBC) simulation
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <assert.h>
+#include "mbcsim.h"
+
+MBCSIM::MBCSIM(void) {
+ ram = new uint8_t[MBC_RAM_SIZE];
+ rom = new uint8_t[MBC_ROM_SIZE];
+ ram_enable = 0; // Disable by default
+ mbc_mode = 0; // Banking mode for MBC1
+ rom_bank = 1;
+ ram_bank = 0;
+ last_wr = 0;
+ last_rd = 0;
+ last_data = 0;
+}
+
+MBCSIM::~MBCSIM(void) {
+ delete[] ram;
+ delete[] rom;
+}
+
+void MBCSIM::load(const char *fname) {
+ FILE *fp;
+
+ fp = fopen(fname, "rb");
+ assert(fp);
+ fseek(fp, 0, SEEK_END);
+ size_t fsize = ftell(fp);
+ fseek(fp, 0, SEEK_SET);
+ size_t result = fread((void *)rom, fsize, 1, fp);
+ assert(result == 1);
+ fclose(fp);
+
+ char title[17];
+ title[16] = 0;
+ memcpy(title, rom + 0x134, 16);
+
+ printf("ROM Title: %s\n", title);
+
+ char ctype = rom[0x147];
+ if ((ctype == 0x00) || (ctype == 0x08) || (ctype == 0x09)) {
+ mbc_type = MBCNONE;
+ printf("MBC Type: None\n");
+ }
+ else if ((ctype >= 0x01)&&(ctype <= 0x03)) {
+ mbc_type = MBC1;
+ printf("MBC Type: MBC1\n");
+ }
+ else if ((ctype >= 0x05)&&(ctype <= 0x06)) {
+ mbc_type = MBC2;
+ printf("MBC Type: MBC2\n");
+ }
+ else if ((ctype >= 0x0f)&&(ctype <= 0x13)) {
+ mbc_type = MBC3;
+ printf("MBC Type: MBC3\n");
+ }
+ else if ((ctype >= 0x19)&&(ctype <= 0x1e)) {
+ mbc_type = MBC5;
+ printf("MBC Type: MBC5\n");
+ }
+ else {
+ mbc_type = MBCUNKNOWN;
+ printf("Unsupported Cartridge Type: %d\n", ctype);
+ }
+
+ int rom_size = rom[0x148];
+ if (rom_size <= 0x08)
+ rom_size = (1 << rom_size) * 32;
+ else if (rom_size == 0x52)
+ rom_size = 72*16; // 72 banks
+ else if (rom_size == 0x53)
+ rom_size = 80*16;
+ else if (rom_size == 0x54)
+ rom_size = 96*16;
+ else
+ rom_size = 32; // Fallback to 32KB
+ printf("ROM Size: %d KB\n", rom_size);
+
+ int ram_size = rom[0x149];
+ if (ram_size == 0x00)
+ ram_size = 0;
+ else if (ram_size == 0x01)
+ ram_size = 2;
+ else if (ram_size == 0x02)
+ ram_size = 8;
+ else if (ram_size == 0x03)
+ ram_size = 32;
+ else if (ram_size == 0x04)
+ ram_size = 128;
+ else if (ram_size == 0x05)
+ ram_size = 64;
+ else
+ ram_size = 0;
+ printf("RAM Size: %d KB\n", ram_size);
+
+ memset(ram, 0xff, MBC_RAM_SIZE);
+}
+
+void MBCSIM::apply(const uint8_t wr_data, const uint16_t address,
+ const uint8_t wr_enable, const uint8_t rd_enable, uint8_t &rd_data) {
+
+ // Address within ROM window or RAM window
+ if ((address <= 0x8000) || ((address >= 0xa000) && (address < 0xc000))) {
+ if (last_wr && !wr_enable) {
+ if (address >= 0xa000) {
+ // Write to RAM
+ if (ram_enable == 0x0a) {
+ if ((mbc_type == MBC1) && (mbc_mode == 0)) {
+ ram[address - 0xa000] = last_data;
+ }
+ else {
+ ram[address - 0xa000 + ram_bank * 0x2000] = last_data;
+ }
+ }
+ }
+ else if (address < 0x2000) {
+ // RAM Enable (MBC1/3/5)
+ ram_enable = last_data;
+ }
+ else if (address < 0x4000) {
+ // ROM Bank (MBC1/3/5)
+ if (mbc_type == MBC1) {
+ rom_bank &= ~0x1f;
+ rom_bank = (unsigned int)last_data & 0x1f;
+ if (last_data == 0)
+ rom_bank |= 0x01;
+ }
+ else if (mbc_type == MBC3) {
+ rom_bank &= ~0x7f;
+ rom_bank = (unsigned int)last_data & 0x7f;
+ if (last_data == 0)
+ rom_bank |= 0x01;
+ }
+ else if (mbc_type == MBC5) {
+ if (address < 0x3000) {
+ rom_bank &= ~0xff;
+ rom_bank |= (unsigned int)last_data & 0xff;
+ }
+ else {
+ rom_bank &= ~0x100;
+ rom_bank |= ((unsigned int)last_data & 0x01) << 8;
+ }
+ }
+ //printf("[MBC] Rom bank %d (%04x=%02x)\n", rom_bank, address, last_data);
+ }
+ else if (address < 0x6000) {
+ if ((mbc_type == MBC1) && (mbc_mode == 0)) {
+ // High ROM Bank
+ rom_bank &= ~0xe0;
+ rom_bank |= ((unsigned int)last_data & 0x03) << 5;
+ //printf("[MBC] Rom bank %d (%04x=%02x)\n", rom_bank, address, last_data);
+ }
+ else {
+ // RAM Bank
+ ram_bank = last_data;
+ //printf("[MBC] Ram bank %d (%04x=%02x)\n", ram_bank, address, last_data);
+ }
+ }
+ else if (address < 0x8000) {
+ mbc_mode = last_data;
+ }
+ }
+ else if (!last_rd && rd_enable) {
+ if (address < 0x4000) {
+ // LoROM
+ rd_data = rom[address];
+ }
+ else if (address < 0x8000) {
+ // HiROM
+ rd_data = rom[address - 0x4000 + rom_bank * 0x4000];
+ //printf("[MBC] Read from bank %d, %04x (%06x) = %02x\n", rom_bank, address, address - 0x4000 + rom_bank * 0x4000, rd_data);
+ }
+ else {
+ if ((mbc_type == MBC1) && (mbc_mode == 0)) {
+ rd_data = ram[address - 0xa000];
+ }
+ else {
+ rd_data = ram[address - 0xa000 + ram_bank * 0x2000];
+ }
+ }
+ }
+ }
+ last_rd = rd_enable;
+ last_wr = wr_enable;
+ last_data = wr_data;
+}
+
+
diff --git a/verilog/sim/mbcsim.h b/verilog/sim/mbcsim.h
new file mode 100644
index 0000000..87c4f36
--- /dev/null
+++ b/verilog/sim/mbcsim.h
@@ -0,0 +1,57 @@
+//
+// VerilogBoy simulator
+// Copyright 2022 Wenting Zhang
+//
+// memsim.h: Cartridge with memory bank controller (MBC) simulation
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+#pragma once
+
+#define MBC_RAM_SIZE (128*1024)
+#define MBC_ROM_SIZE (8*1024*1024)
+
+class MBCSIM {
+public:
+ MBCSIM(void);
+ ~MBCSIM(void);
+ void load(const char *fname);
+ void apply(const uint8_t wr_data, const uint16_t address, const uint8_t wr_enable,
+ const uint8_t rd_enable, uint8_t &rd_data);
+private:
+ typedef enum {
+ MBCNONE,
+ MBC1,
+ MBC2,
+ MBC3,
+ MBC5,
+ MBCUNKNOWN
+ } MBCTYPE;
+
+ uint8_t *rom;
+ uint8_t *ram;
+ uint8_t last_wr;
+ uint8_t last_rd;
+ uint8_t last_data;
+ MBCTYPE mbc_type;
+ char ram_enable;
+ char mbc_mode;
+ unsigned int rom_bank;
+ unsigned int ram_bank;
+};
diff --git a/verilog/sim/memsim.cpp b/verilog/sim/memsim.cpp
new file mode 100644
index 0000000..0cbf3b0
--- /dev/null
+++ b/verilog/sim/memsim.cpp
@@ -0,0 +1,93 @@
+//
+// VerilogBoy simulator
+// Copyright 2022 Wenting Zhang
+//
+// memsim.cpp: A memory simulation model with simple delay control
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <assert.h>
+#include "memsim.h"
+
+MEMSIM::MEMSIM(uint16_t base, size_t len, size_t delay) {
+ this->base = base;
+ this->len = len;
+ this->delay = delay;
+ mem = (uint8_t *)malloc(len);
+ delay_count = 0;
+ last_wr = 0;
+ last_rd = 0;
+ last_data = 0;
+}
+
+MEMSIM::~MEMSIM(void) {
+ free(mem);
+}
+
+void MEMSIM::load(char *fname) {
+ FILE *fp;
+
+ fp = fopen(fname, "rb");
+ assert(fp);
+ fseek(fp, 0, SEEK_END);
+ size_t fsize = ftell(fp);
+ fseek(fp, 0, SEEK_SET);
+ size_t result = fread((void *)mem, fsize, 1, fp);
+ assert(result == 1);
+ fclose(fp);
+}
+
+void MEMSIM::apply(uint8_t wr_data, uint16_t address,
+ uint8_t wr_enable, uint8_t rd_enable, uint8_t &rd_data) {
+
+ if (delay_count == 0) {
+ if ((address >= base) && (address < (base + len))) {
+ if (last_wr && !wr_enable) {
+ mem[address - base] = last_data;
+ delay_count = delay;
+#ifdef __DEBUG
+ printf("MEMBUS W[%04x] = %02x\n",
+ address,
+ last_data);
+#endif
+ }
+ else if (!last_rd && rd_enable) {
+ rd_data = mem[address - base];
+ delay_count = delay;
+#ifdef __DEBUG
+ printf("MEMBUS R[%04x] = %02x\n",
+ address,
+ rd_data);
+#endif
+ }
+ }
+ last_rd = rd_enable;
+ last_wr = wr_enable;
+ last_data = wr_data;
+ }
+ else {
+ delay_count --;
+ }
+}
+
+
diff --git a/verilog/sim/memsim.h b/verilog/sim/memsim.h
new file mode 100644
index 0000000..8ce05d2
--- /dev/null
+++ b/verilog/sim/memsim.h
@@ -0,0 +1,43 @@
+//
+// VerilogBoy simulator
+// Copyright 2022 Wenting Zhang
+//
+// memsim.h: A memory simulation model with simple delay control
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+#pragma once
+
+class MEMSIM {
+public:
+ MEMSIM(uint16_t base, size_t len, size_t delay);
+ ~MEMSIM(void);
+ void load(char *fname);
+ void apply(uint8_t wr_data, uint16_t address, uint8_t wr_enable,
+ uint8_t rd_enable, uint8_t &rd_data);
+private:
+ uint16_t base;
+ uint8_t *mem;
+ uint16_t len;
+ int delay;
+ int delay_count;
+ uint8_t last_wr;
+ uint8_t last_rd;
+ uint8_t last_data;
+};
diff --git a/verilog/sim/mmrprobe.cpp b/verilog/sim/mmrprobe.cpp
new file mode 100644
index 0000000..5ff5fc3
--- /dev/null
+++ b/verilog/sim/mmrprobe.cpp
@@ -0,0 +1,63 @@
+//
+// VerilogBoy simulator
+// Copyright 2022 Wenting Zhang
+//
+// mmrprobe.cpp: A probe that prints out MMR access logs
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <assert.h>
+#include "mmrprobe.h"
+
+MMRPROBE::MMRPROBE() {
+ last_wr = 0;
+ last_rd = 0;
+ last_data = 0;
+}
+
+MMRPROBE::~MMRPROBE(void) {
+
+}
+
+void MMRPROBE::apply(uint8_t wr_data, uint16_t address,
+ uint8_t wr_enable, uint8_t rd_enable, uint8_t &rd_data, uint16_t pc) {
+
+ // Ignore ROM and HRAM RW
+ if (last_wr && !wr_enable) {
+ if ((address >= 0x8000)&&(address <= 0xff7f)) {
+ printf("PC %04x: BUS W[%04x] = %02x\n", pc, address, last_data);
+ }
+ }
+ else if (last_rd && !rd_enable) {
+ if ((address >= 0x8000)&&(address <= 0xff7f)&&(address != 0xff44)) {
+ printf("PC %04x: BUS R[%04x] = %02x\n", pc, address, rd_data);
+ }
+ /*if ((address == 0xff44) && (rd_data == 0x99)) {
+ printf("VSYNC\n");
+ }*/
+ }
+ last_rd = rd_enable;
+ last_wr = wr_enable;
+ last_data = wr_data;
+}
+
+
diff --git a/verilog/sim/mmrprobe.h b/verilog/sim/mmrprobe.h
new file mode 100644
index 0000000..ceff7cd
--- /dev/null
+++ b/verilog/sim/mmrprobe.h
@@ -0,0 +1,37 @@
+//
+// VerilogBoy simulator
+// Copyright 2022 Wenting Zhang
+//
+// mmrprobe.h: A probe that prints out MMR access logs
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+#pragma once
+
+class MMRPROBE {
+public:
+ MMRPROBE(void);
+ ~MMRPROBE(void);
+ void apply(uint8_t wr_data, uint16_t address, uint8_t wr_enable,
+ uint8_t rd_enable, uint8_t &rd_data, uint16_t pc);
+private:
+ uint8_t last_wr;
+ uint8_t last_rd;
+ uint8_t last_data;
+};
diff --git a/verilog/sim/rtl.mk b/verilog/sim/rtl.mk
new file mode 100644
index 0000000..fdc9c72
--- /dev/null
+++ b/verilog/sim/rtl.mk
@@ -0,0 +1,42 @@
+TARGET ?= boy
+all: $(TARGET)
+
+VOBJ := obj_dir
+CXX := g++
+FBDIR := ../rtl
+CPUS ?= $(shell bash -c 'nproc --all')
+VERBOSE ?= 0
+
+.PHONY: all
+$(TARGET): $(VOBJ)/V$(TARGET)__ALL.a
+
+SUBMAKE := $(MAKE) --no-print-directory --directory=$(VOBJ) -f
+ifeq ($(VERILATOR_ROOT),)
+VERILATOR := verilator
+else
+VERILATOR := $(VERILATOR_ROOT)/bin/verilator
+endif
+VFLAGS := -Wall -Wno-fatal -MMD --trace -cc -I../rtl
+ifeq ($(VERBOSE), 1)
+VFLAGS += +define+VERBOSE=1
+endif
+
+$(VOBJ)/V$(TARGET)__ALL.a: $(VOBJ)/V$(TARGET).cpp $(VOBJ)/V$(TARGET).h
+$(VOBJ)/V$(TARGET)__ALL.a: $(VOBJ)/V$(TARGET).mk
+
+$(VOBJ)/V%.cpp $(VOBJ)/V%.h $(VOBJ)/V%.mk: $(FBDIR)/%.v
+ $(VERILATOR) $(VFLAGS) $*.v
+
+$(VOBJ)/V%.cpp: $(VOBJ)/V%.h
+$(VOBJ)/V%.mk: $(VOBJ)/V%.h
+$(VOBJ)/V%.h: $(FBDIR)/%.v
+
+$(VOBJ)/V%__ALL.a: $(VOBJ)/V%.mk
+ $(SUBMAKE) V$*.mk -j$(CPUS)
+
+.PHONY: clean
+clean:
+ rm -rf $(VOBJ)/*.mk
+ rm -rf $(VOBJ)/*.cpp
+ rm -rf $(VOBJ)/*.h
+ rm -rf $(VOBJ)/