Multiplex main RAM, cartridge, and video RAM access on the same bus
diff --git a/verilog/rtl/README.md b/verilog/rtl/README.md
new file mode 100644
index 0000000..09a7886
--- /dev/null
+++ b/verilog/rtl/README.md
@@ -0,0 +1,32 @@
+## Memory Access / Multiplexing
+
+Due to limited pins, cartridge/ main memory access and VRAM access are multiplexed over one same bus.
+
+4-cycles are divided as follows:
+
+- 0: Cartridge address setup (for external latching)
+- 1: VRAM access
+- 2: WRAM or cartridge RW
+- 3: VRAM access
+
+Diagram:
+
+```
+CT | 0 | 1 | 2 | 3 |
+ ___________ ___
+CLK ____| |___________|
+ _ __ __ __ __ __
+CK |__| |__| |__| |__| |__| |
+ ____ _____________________
+EALE |_____|
+ _____
+ECS ________________| |_________
+ ________________ _________
+WR |_____|
+ ____ _____ _____ _____ _____ ___
+ADDR ____X_WR__X_VR1_X_WR__X_VR2_X___
+ ____ _____ _____ _____
+DATA ____>-----<_VR1_X_WR__X_VR2_>---
+```
+
+Note that VRAM/WRAM seperation is based on function unit: Only PPU accesses VRAM on cycle 2 and 4. If CPU accesses VRAM, it would still happen on cycle 3.
diff --git a/verilog/rtl/async_ram.v b/verilog/rtl/async_ram.v
new file mode 100644
index 0000000..89765e6
--- /dev/null
+++ b/verilog/rtl/async_ram.v
@@ -0,0 +1,23 @@
+`timescale 1ns / 1ps
+`default_nettype wire
+module async_ram #(
+ parameter integer WORDS = 8192,
+ parameter ABITS = 13
+)(
+ input clka,
+ input wea,
+ input [ABITS - 1:0] addra,
+ input [7:0] dina,
+ output [7:0] douta
+);
+
+ reg [7:0] ram [0:WORDS-1];
+
+ always@(posedge clka) begin
+ if (wea)
+ ram[addra] <= dina;
+ end
+
+ assign douta = ram[addra];
+
+endmodule
diff --git a/verilog/rtl/boy.v b/verilog/rtl/boy.v
index 69c2d65..b437e09 100644
--- a/verilog/rtl/boy.v
+++ b/verilog/rtl/boy.v
@@ -21,7 +21,8 @@
input wire rst, // Async Reset Input
input wire clk, // 4.19MHz Clock Input
output wire phi, // 1.05MHz Reference Clock Output
- // Cartridge interface
+ output wire [1:0] ct, // 0-3T cycle number
+ // CPU/ DMA bus interface
output wire [15:0] a, // Address Bus
output wire [7:0] dout, // Data Bus
input wire [7:0] din,
@@ -38,6 +39,12 @@
// Sound output
output reg [15:0] left,
output reg [15:0] right,
+ // PPU bus interface
+ output wire [12:0] ppu_a,
+ output wire ppu_wr,
+ output wire ppu_rd,
+ output wire [7:0] ppu_dout,
+ input wire [7:0] ppu_din,
// Debug interface
output wire done,
output wire fault
@@ -49,17 +56,18 @@
reg [7:0] cpu_din; // CPU Data Bus, to CPU
wire [7:0] cpu_dout; // CPU Data Bus, from CPU
wire [15:0] cpu_a; // CPU Address Bus
+ wire [15:0] cpu_a_early; // CPU Address Unbuffered
wire [4:0] cpu_int_en; // CPU Interrupt Enable input
wire [4:0] cpu_int_flags_in; // CPU Interrupt Flags input
wire [4:0] cpu_int_flags_out; // CPU Interrupt Flags output
- wire [1:0] cpu_ct; // 0-3 T cycle number inside one M cycle
cpu cpu(
.clk(clk),
.rst(rst),
.phi(phi),
- .ct(cpu_ct),
+ .ct(ct),
.a(cpu_a),
+ .a_early(cpu_a_early),
.dout(cpu_dout),
.din(cpu_din),
.rd(cpu_rd),
@@ -89,16 +97,15 @@
wire dma_rd; // DMA Memory Write Enable
wire dma_wr; // DMA Memory Read Enable
wire [15:0] dma_a; // Main Address Bus
- reg [7:0] dma_din; // Main Data Bus
+ wire [7:0] dma_din; // Main Data Bus
wire [7:0] dma_dout;
wire [7:0] dma_mmio_dout;
reg dma_mmio_wr; // actually wire
- wire dma_occupy_extbus; // 0x0000 - 0x7FFF, 0xA000 - 0xFFFF
- wire dma_occupy_vidbus; // 0x8000 - 0x9FFF
- wire dma_occupy_oambus; // 0xFE00 - 0xFE9F
+ wire dma_occupy_bus;
dma dma(
.clk(clk),
.rst(rst),
+ .ct(ct),
.dma_rd(dma_rd),
.dma_wr(dma_wr),
.dma_a(dma_a),
@@ -107,10 +114,9 @@
.mmio_wr(dma_mmio_wr),
.mmio_din(cpu_dout),
.mmio_dout(dma_mmio_dout),
- .dma_occupy_extbus(dma_occupy_extbus),
- .dma_occupy_vidbus(dma_occupy_vidbus),
- .dma_occupy_oambus(dma_occupy_oambus)
+ .dma_occupy_bus(dma_occupy_bus)
);
+ assign dma_din = din;
// Interrupt
// int_req is the request signal from peripherals.
@@ -171,12 +177,6 @@
// PPU
wire [7:0] ppu_mmio_dout;
reg ppu_mmio_wr; // actually wire
- wire [15:0] vram_a;
- wire [7:0] vram_dout;
- //wire [7:0] vram_din;
- wire vram_rd;
- wire vram_wr;
- reg vram_cpu_wr;
wire [15:0] oam_a;
wire [7:0] oam_dout;
wire [7:0] oam_din;
@@ -184,28 +184,20 @@
wire oam_wr;
reg oam_cpu_wr;
- assign vram_a = (dma_occupy_vidbus) ? (dma_a) : (cpu_a);
- //assign vram_din = (dma_occupy_vidbus) ? (dma_dout) : (cpu_dout);
- assign vram_rd = (dma_occupy_vidbus) ? (dma_rd) : (cpu_rd);
- assign vram_wr = (dma_occupy_vidbus) ? (1'b0) : (vram_cpu_wr);
- assign oam_a = (dma_occupy_oambus) ? (dma_a) : (cpu_a);
- assign oam_din = (dma_occupy_oambus) ? (dma_dout) : (cpu_dout);
- assign oam_rd = (dma_occupy_oambus) ? (1'b0) : (cpu_rd);
- assign oam_wr = (dma_occupy_oambus) ? (dma_wr) : (oam_cpu_wr);
+ assign oam_a = (dma_occupy_bus) ? (dma_a) : (cpu_a);
+ assign oam_din = (dma_occupy_bus) ? (dma_dout) : (cpu_dout);
+ assign oam_rd = (dma_occupy_bus) ? (1'b0) : (cpu_rd);
+ assign oam_wr = (dma_occupy_bus) ? (dma_wr) : (oam_cpu_wr);
ppu ppu(
.clk(clk),
.rst(rst),
+ .ct(ct),
.mmio_a(cpu_a), // mmio bus is always accessable to CPU
.mmio_dout(ppu_mmio_dout),
.mmio_din(cpu_dout),
.mmio_rd(cpu_rd),
.mmio_wr(ppu_mmio_wr),
- .vram_a(vram_a),
- .vram_dout(vram_dout),
- .vram_din(cpu_dout), // DMA never writes to VRAM
- .vram_rd(vram_rd),
- .vram_wr(vram_wr),
.oam_a(oam_a),
.oam_dout(oam_dout),
.oam_din(oam_din),
@@ -220,6 +212,11 @@
.valid(valid),
.hs(hs), // Horizontal Sync, Low Active
.vs(vs), // Vertical Sync, Low Active
+ .vram_a(ppu_a),
+ .vram_dout(ppu_din),
+ .vram_din(ppu_dout),
+ .vram_rd(ppu_rd),
+ .vram_wr(ppu_wr),
// Ignore the debugging interface
/* verilator lint_off PINCONNECTEMPTY */
.scx(),
@@ -235,7 +232,7 @@
timer timer(
.clk(clk),
.rst(rst),
- .ct(cpu_ct),
+ .ct(ct),
.a(cpu_a),
.dout(timer_dout),
.din(cpu_dout),
@@ -308,25 +305,6 @@
.d(brom_dout)
);
- // Work RAM
- wire [7:0] wram_dout;
- wire [12:0] wram_a;
- wire wram_wr;
- reg wram_cpu_wr; // actually wire
-
- assign wram_a = (dma_occupy_extbus) ? (dma_a[12:0]) : (cpu_a[12:0]);
- assign wram_wr = (dma_occupy_extbus) ? (1'b0) : (wram_cpu_wr);
-
- singleport_ram #(
- .WORDS(8192)
- ) br_wram (
- .clka(clk),
- .wea(wram_wr),
- .addra(wram_a),
- .dina(cpu_dout), // DMA never writes to Work RAM
- .douta(wram_dout)
- );
-
// Keypad
wire [7:0] keypad_reg;
reg keypad_reg_wr; // actually wire
@@ -345,12 +323,12 @@
((keypad_high[0] == 1'b1) ? (key[3:0]) : 4'h0));
assign int_key_req = (keypad_reg[3:0] != 4'hf) ? (1'b1) : (1'b0);
- // External Bus
+ // External Bus (this includes CPU/DMA access to WRAM, VRAM, and cartridge)
reg ext_cpu_wr; // wire
- assign a = (dma_occupy_extbus) ? (dma_a) : (cpu_a);
+ assign a = (dma_occupy_bus) ? (dma_a) : (cpu_a_early);
assign dout = cpu_dout; // DMA never writes to external bus
- assign wr = (dma_occupy_extbus) ? (1'b0) : (ext_cpu_wr);
- assign rd = (dma_occupy_extbus) ? (dma_rd) : (cpu_rd);
+ assign wr = (dma_occupy_bus) ? (1'b0) : (ext_cpu_wr);
+ assign rd = (dma_occupy_bus) ? (dma_rd) : (cpu_rd);
// Bus Multiplexing, CPU
always @(*) begin
@@ -364,9 +342,7 @@
high_ram_wr = 1'b0;
sound_wr = 1'b0;
ppu_mmio_wr = 1'b0;
- vram_cpu_wr = 1'b0;
oam_cpu_wr = 1'b0;
- wram_cpu_wr = 1'b0;
ext_cpu_wr = 1'b0;
// -- These are exclusive to CPU --
if (cpu_a == 16'hffff) begin // 0xFFFF - IE
@@ -418,23 +394,9 @@
cpu_din = brom_dout;
end
// -- These are shared between CPU and DMA --
- else if (cpu_a >= 16'h8000 && cpu_a <= 16'h9fff) begin // VRAM
- vram_cpu_wr = cpu_wr;
- cpu_din = (dma_occupy_vidbus) ? (8'hff) : (vram_dout);
- end
- else if (cpu_a >= 16'hfe00 && cpu_a <= 16'hfe9f) begin // OAM
- oam_cpu_wr = cpu_wr;
- cpu_din = (dma_occupy_oambus) ? (8'hff) : (oam_dout);
- end
- else if ((cpu_a >= 16'hc000 && cpu_a <= 16'hdfff) ||
- (cpu_a >= 16'he000 && cpu_a <= 16'hfdff)) begin // WRAM
- wram_cpu_wr = cpu_wr;
- cpu_din = (dma_occupy_extbus) ? (8'hff) : (wram_dout);
- end
- else if ((cpu_a <= 16'h7fff) ||
- (cpu_a >= 16'ha000 && cpu_a <= 16'hbfff)) begin // External
+ else if (cpu_a <= 16'hfdff) begin // External/ Work RAM/ Video RAM
ext_cpu_wr = cpu_wr;
- cpu_din = (dma_occupy_extbus) ? (8'hff) : (din);
+ cpu_din = (dma_occupy_bus) ? (8'hff) : (din);
end
else begin
// Unmapped area
@@ -442,18 +404,4 @@
end
end
- // Bus Multiplexing, DMA
- always @(*) begin
- if (dma_a >= 16'h8000 && dma_a <= 16'h9fff) begin // VRAM
- dma_din = vram_dout;
- end
- else if ((dma_a >= 16'hc000 && dma_a <= 16'hdfff) ||
- (dma_a >= 16'he000 && dma_a <= 16'hfdff)) begin // WRAM
- dma_din = wram_dout;
- end
- else begin
- dma_din = din;
- end
- end
-
endmodule
diff --git a/verilog/rtl/chip.v b/verilog/rtl/chip.v
new file mode 100644
index 0000000..ffceb0c
--- /dev/null
+++ b/verilog/rtl/chip.v
@@ -0,0 +1,149 @@
+`timescale 1ns / 1ps
+`default_nettype none
+//////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 18:48:36 02/14/2018
+// Design Name:
+// Module Name: ppu
+// Project Name:
+// Target Devices:
+// Tool versions:
+// Description:
+// Chip top level
+// Additional Comments:
+// Wraps up the VerilogBoy and expose signals to be connected to the pad frame
+//////////////////////////////////////////////////////////////////////////////////
+module chip(
+ input wire clk, // 4 MHz clock input
+ input wire rst, // Active high sync reset
+ output reg [15:0] a, // Address bus
+ output reg [7:0] dout, // Data bus to be written
+ input wire [7:0] din, // Data bus read
+ output reg doe, // Data bus output enable
+ output reg wr, // High active write enable
+ output reg cale, // Cartridge address latch enable
+ output reg cs, // Cartridge chip select
+ output wire hsync, // LCD horizontal sync
+ output wire vsync, // LCD vertical sync
+ output wire pvalid, // LCD pixel valid/ clock gate
+ output wire [1:0] pixel, // LCD pixel output
+ input wire skey, // Serial key input
+ output wire audiol, // Audio left output
+ output wire audior, // Audio right output
+ input wire mode, // Test mode
+ // For testbench only
+ output wire done,
+ output wire fault
+);
+ wire [1:0] ct;
+ wire [15:0] cpu_a;
+ wire [7:0] cpu_dout;
+ reg [7:0] cpu_din;
+ wire cpu_wr;
+ wire cpu_rd;
+ wire [15:0] ppu_a;
+ wire [7:0] ppu_dout;
+ reg [7:0] ppu_din;
+ wire ppu_wr;
+ wire ppu_rd;
+ wire [15:0] left;
+ wire [15:0] right;
+
+ boy boy(
+ .rst(rst), // Async Reset Input
+ .clk(clk), // 4.19MHz Clock Input
+ .phi(), // 1.05MHz Reference Clock Output
+ .ct(ct), // 0-3T cycle number
+ // Cartridge interface
+ .a(cpu_a), // Address Bus
+ .dout(cpu_dout), // Data Bus
+ .din(cpu_din),
+ .wr(cpu_wr), // Write Enable
+ .rd(cpu_rd), // Read Enable
+ // Keyboard input
+ .key(8'b0),
+ // LCD output
+ .hs(hsync), // Horizontal Sync Output
+ .vs(vsync), // Vertical Sync Output
+ .cpl(), // Pixel Data Latch
+ .pixel(pixel), // Pixel Data
+ .valid(pvalid),
+ // Sound output
+ .left(left),
+ .right(right),
+ // Video RAM interface
+ .ppu_a(ppu_a[12:0]),
+ .ppu_wr(ppu_wr),
+ .ppu_rd(ppu_rd),
+ .ppu_din(ppu_din),
+ .ppu_dout(ppu_dout),
+ // Debug interface
+ .done(done),
+ .fault(fault)
+ );
+
+ assign ppu_a[15:13] = 3'b100;
+
+ // Internal SRAM (WRAM + VRAM, 16KB)
+ // Address
+ // 1000 xxxx xxxx xxxx VRAM
+ // 1001 xxxx xxxx xxxx VRAM
+ // 1100 xxxx xxxx xxxx WRAM
+ // 1101 xxxx xxxx xxxx WRAM
+
+ wire addr_is_ram = ((cpu_a >= 16'h8000) && (cpu_a <= 16'h9fff)) ||
+ ((cpu_a >= 16'hc000) && (cpu_a <= 16'hdfff));
+ wire addr_is_cart = ((cpu_a <= 16'h7fff) || // Cart ROM
+ ((cpu_a >= 16'ha000) && (cpu_a <= 16'hbfff))); // Cart RAM
+
+ reg addr_is_cart_reg;
+ reg [15:0] cpu_a_reg;
+ always @(posedge clk) begin
+ if (ct == 2'b00) begin
+ addr_is_cart_reg <= addr_is_cart;
+ cpu_a_reg <= cpu_a;
+ end
+ end
+
+ // Bus multiplexing
+ always @(*) begin
+ if (ct == 2'b00) begin
+ // CPU/ DMA address output
+ a = cpu_a;
+ dout = 8'hff;
+ doe = 1'b0;
+ wr = 1'b0;
+ cale = 1'b1;
+ cs = 1'b0;
+ cpu_din = 8'hff;
+ ppu_din = 8'hff;
+ end
+ else if ((ct == 2'b01) || (ct == 2'b11)) begin
+ // VRAM access (read only)
+ a = ppu_a;
+ dout = 8'hff;
+ doe = 1'b0;
+ wr = 1'b0;
+ cale = 1'b0;
+ cs = 1'b0;
+ cpu_din = 8'hff;
+ ppu_din = din;
+ end
+ else begin
+ // CPU/ DMA access (RW)
+ a = cpu_a_reg;
+ dout = cpu_dout;
+ doe = !cpu_wr;
+ wr = cpu_wr;
+ cale = 1'b0;
+ cs = addr_is_cart_reg;
+ cpu_din = din;
+ ppu_din = 8'hff;
+ end
+ end
+
+
+endmodule
+`default_nettype wire
diff --git a/verilog/rtl/cpu.v b/verilog/rtl/cpu.v
index 6b59982..cd6df6b 100644
--- a/verilog/rtl/cpu.v
+++ b/verilog/rtl/cpu.v
@@ -21,6 +21,7 @@
output reg phi,
output wire [1:0] ct,
output reg [15:0] a,
+ output wire [15:0] a_early,
output reg [7:0] dout,
input [7:0] din,
output reg rd,
@@ -454,6 +455,12 @@
end
2'b01: begin
// Read in progress
+ if (bus_op == 2'b10) begin
+ // Write cycle
+ wr <= 1;
+ dout <= db_wr;
+ end
+ // Otherwise wait for next cycle for read
end
2'b10: begin
if (bus_op == 2'b10) begin
@@ -501,6 +508,8 @@
end
end
+ assign a_early = ab_wr; // For external latching
+
// CT - FSM / Instruction Execution
reg [1:0] alu_src_a_ct;
reg [2:0] alu_src_b_ct;
diff --git a/verilog/rtl/dma.v b/verilog/rtl/dma.v
index 5aaadaa..d384a53 100644
--- a/verilog/rtl/dma.v
+++ b/verilog/rtl/dma.v
@@ -25,6 +25,7 @@
input wire clk,
//input wire phi,
input wire rst,
+ input wire [1:0] ct,
output reg dma_rd,
output reg dma_wr,
//output wire dma_rd_comb,
@@ -35,9 +36,7 @@
input wire mmio_wr,
input wire [7:0] mmio_din,
output wire [7:0] mmio_dout,
- output wire dma_occupy_extbus,
- output wire dma_occupy_vidbus,
- output wire dma_occupy_oambus
+ output wire dma_occupy_bus
);
// DMA data blocks /////////////////////////////////////////////////////////
@@ -49,11 +48,7 @@
reg cpu_mem_disable;
- assign dma_occupy_extbus = cpu_mem_disable &
- ((dma_start_addr <= 8'h7f) || (dma_start_addr >= 8'ha0));
- assign dma_occupy_vidbus = cpu_mem_disable &
- ((dma_start_addr >= 8'h80) && (dma_start_addr <= 8'h9f));
- assign dma_occupy_oambus = cpu_mem_disable;
+ assign dma_occupy_bus = cpu_mem_disable;
// DMA transfer logic //////////////////////////////////////////////////////
@@ -95,16 +90,11 @@
if (mmio_wr) begin
// Transfer starts on next cycle
state <= DMA_DELAY;
- count <= 8'd3; // Delay before start
end
- else
- count <= 8'd0;
+ count <= 8'd0;
end
DMA_DELAY: begin
- if (count != 8'd0) begin
- count <= count - 1;
- end
- else begin
+ if (ct == 2'b11) begin
state <= DMA_TRANSFER_READ_ADDR;
end
end
@@ -116,7 +106,7 @@
dma_rd <= 1'b1;
if (mmio_wr) begin // Allow re-triggering
state <= DMA_DELAY;
- count <= 8'd3; // Delay before start
+ count <= 8'd0;
end
else
state <= DMA_TRANSFER_READ_DATA;
@@ -134,7 +124,7 @@
dma_wr <= 1'b1;
if (mmio_wr) begin // Allow re-triggering
state <= DMA_DELAY;
- count <= 8'd3; // Delay before start
+ count <= 8'd0;
end
else
state <= DMA_TRANSFER_WRITE_WAIT;
@@ -143,7 +133,7 @@
// Wait
if (mmio_wr) begin // Allow re-triggering
state <= DMA_DELAY;
- count <= 8'd3; // Delay before start
+ count <= 8'd0; // Delay before start
end
else
if (count == 8'h9f) begin
diff --git a/verilog/rtl/ppu.v b/verilog/rtl/ppu.v
index bf4e1e8..01ab72e 100644
--- a/verilog/rtl/ppu.v
+++ b/verilog/rtl/ppu.v
@@ -53,18 +53,13 @@
module ppu(
input clk,
input rst,
+ input wire [1:0] ct,
// MMIO Bus, 0xFF40 - 0xFF4B, always visible to CPU
input wire [15:0] mmio_a,
output reg [7:0] mmio_dout,
input wire [7:0] mmio_din,
input wire mmio_rd,
input wire mmio_wr,
- // VRAM Bus, 0x8000 - 0x9FFF
- input wire [15:0] vram_a,
- output wire [7:0] vram_dout,
- input wire [7:0] vram_din,
- input wire vram_rd,
- input wire vram_wr,
// OAM Bus, 0xFE00 - 0xFE9F
input wire [15:0] oam_a,
output wire [7:0] oam_dout,
@@ -82,6 +77,12 @@
output reg valid, // Pixel Valid
output reg hs, // Horizontal Sync, High Valid
output reg vs, // Vertical Sync, High Valid
+ // Video RAM interface
+ output wire [12:0] vram_a,
+ output wire vram_wr,
+ output wire vram_rd,
+ output wire [7:0] vram_din,
+ input wire [7:0] vram_dout,
//Debug output
output [7:0] scx,
output [7:0] scy,
@@ -140,11 +141,7 @@
wire vram_addr_int_sel; // 0 - BG, 1 - OBJ
assign vram_addr_int = (vram_addr_int_sel == 1'b1) ? (vram_addr_obj) : (vram_addr_bg);
-
- wire vram_access_ext = ((reg_mode == PPU_MODE_H_BLANK)||
- (reg_mode == PPU_MODE_V_BLANK)||
- (reg_mode == PPU_MODE_OAM_SEARCH));
- wire vram_access_int = ~vram_access_ext;
+
wire oam_access_ext = ((reg_mode == PPU_MODE_H_BLANK)||
(reg_mode == PPU_MODE_V_BLANK));
@@ -187,25 +184,12 @@
assign oam_dout = (oam_access_ext) ? (oam_data_out_byte) : (8'hFF);
// 8 bit WR, 8 bit RD, 8KB VRAM
- wire vram_we;
- wire [12:0] vram_addr;
- wire [7:0] vram_data_in;
wire [7:0] vram_data_out;
-
- singleport_ram #(
- .WORDS(8192)
- ) br_vram (
- .clka(~clk),
- .wea(vram_we),
- .addra(vram_addr[12:0]),
- .dina(vram_data_in),
- .douta(vram_data_out));
-
- assign vram_addr_ext = vram_a[12:0];
- assign vram_addr = (vram_access_ext) ? (vram_addr_ext) : (vram_addr_int);
- assign vram_data_in = vram_din;
- assign vram_we = (vram_wr)&(vram_access_ext);
- assign vram_dout = (vram_access_ext) ? (vram_data_out) : (8'hFF);
+
+ assign vram_a = vram_addr_int;
+ assign vram_wr = 1'b0; // PPU doesn't write to VRAM
+ assign vram_din = 8'd0;
+ assign vram_data_out = vram_dout;
// Pixel Pipeline
@@ -450,7 +434,8 @@
assign vram_addr_int_sel =
((r_state == S_OAMRDB) || (r_state == S_OFRD0A) || (r_state == S_OFRD0B)
|| (r_state == S_OFRD1A) || (r_state == S_OFRD1B)) ? 1'b1 : 1'b0;
-
+ assign vram_rd = (r_state == S_FTIDB) || (r_state == S_FRD0B) ||
+ (r_state == S_FRD1B) || (r_state == S_OFRD0B) || (r_state == S_OFRD1B);
// Current mode logic, based on current state
always @ (posedge clk)
@@ -712,6 +697,8 @@
end
end
end
+
+ wire ram_ready = ((ct == 2'b00) || (ct == 2'b10));
// Next State Logic
// Since new state get updated during posedge
@@ -733,20 +720,20 @@
) : (S_IDLE);
S_OAMX: r_next_state = (reg_lcd_en) ? (S_OAMY) : (S_IDLE);
S_OAMY: r_next_state = (reg_lcd_en) ? ((oam_search_count == (PPU_OAM_SEARCH_LENGTH - 1'b1)) ? (S_FTIDA) : (S_OAMX)) : (S_IDLE);
- S_FTIDA: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FTIDB))) : (S_IDLE);
+ S_FTIDA: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (ram_ready ? S_FTIDB : S_FTIDA))) : (S_IDLE);
S_FTIDB: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FRD0A))) : (S_IDLE);
- S_FRD0A: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FRD0B))) : (S_IDLE);
+ S_FRD0A: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (ram_ready ? S_FRD0B : S_FRD0A))) : (S_IDLE);
S_FRD0B: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FRD1A))) : (S_IDLE);
- S_FRD1A: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FRD1B))) : (S_IDLE);
+ S_FRD1A: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (ram_ready ? S_FRD1B : S_FRD1A))) : (S_IDLE);
S_FRD1B: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : ((pf_empty != PF_FULL) ? (S_FTIDA) : (S_FWAITA)))) : (S_IDLE); // If fifo not full, no wait state is needed
S_FWAITA: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FWAITB))) : (S_IDLE);
S_FWAITB: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : ((window_trigger) ? (S_SWW) : (S_FTIDA))) : (S_IDLE);
S_SWW: r_next_state = (reg_lcd_en) ? ((h_pix_output == (PPU_H_OUTPUT - 1'b1)) ? (S_BLANK) : (S_FTIDA)) : (S_IDLE);
S_OAMRDA: r_next_state = (reg_lcd_en) ? (S_OAMRDB) : (S_IDLE);
S_OAMRDB: r_next_state = (reg_lcd_en) ? (S_OFRD0A) : (S_IDLE);
- S_OFRD0A: r_next_state = (reg_lcd_en) ? (S_OFRD0B) : (S_IDLE);
+ S_OFRD0A: r_next_state = (reg_lcd_en) ? (ram_ready ? S_OFRD0B : S_OFRD0A) : (S_IDLE);
S_OFRD0B: r_next_state = (reg_lcd_en) ? (S_OFRD1A) : (S_IDLE);
- S_OFRD1A: r_next_state = (reg_lcd_en) ? (S_OFRD1B) : (S_IDLE);
+ S_OFRD1A: r_next_state = (reg_lcd_en) ? (ram_ready ? S_OFRD1B : S_OFRD1A) : (S_IDLE);
S_OFRD1B: r_next_state = (reg_lcd_en) ? (S_OWB) : (S_IDLE);
S_OWB: r_next_state = (reg_lcd_en) ? (r_next_backup) : (S_IDLE);
default: r_next_state = S_IDLE;
diff --git a/verilog/rtl/simtop.v b/verilog/rtl/simtop.v
new file mode 100644
index 0000000..64a5070
--- /dev/null
+++ b/verilog/rtl/simtop.v
@@ -0,0 +1,90 @@
+`timescale 1ns / 1ps
+`default_nettype wire
+////////////////////////////////////////////////////////////////////////////////
+// Company:
+// Engineer: Wenting Zhang
+//
+// Create Date: 17:30:26 02/08/2018
+// Module Name: simtop
+// Project Name: VerilogBoy
+// Description:
+// Top-level wrapper for RTL simulation
+////////////////////////////////////////////////////////////////////////////////
+module simtop(
+ input wire clk,
+ input wire rst,
+ // Cartridge interface
+ output reg [15:0] a,
+ output wire [7:0] dout,
+ input wire [7:0] din,
+ output wire wr,
+ output wire rd,
+ // Keyboard input
+ input wire [7:0] key,
+ // LCD output
+ output wire hs,
+ output wire vs,
+ output wire [1:0] pixel,
+ output wire valid,
+ // For testbench only
+ output wire done,
+ output wire fault
+ );
+
+ wire [15:0] bus_a;
+ wire [7:0] bus_dout;
+ wire [7:0] bus_din;
+ wire bus_doe;
+ wire bus_wr;
+ wire bus_cale;
+ wire bus_cs;
+ wire skey;
+
+ chip chip(
+ .clk(clk),
+ .rst(rst),
+ .a(bus_a),
+ .dout(bus_dout),
+ .din(bus_din),
+ .doe(bus_doe),
+ .wr(bus_wr),
+ .cale(bus_cale),
+ .cs(bus_cs),
+ .hsync(hs),
+ .vsync(vs),
+ .pvalid(valid),
+ .pixel(pixel),
+ .skey(skey),
+ .audiol(),
+ .audior(),
+ .mode(1'b0),
+ .done(done),
+ .fault(fault)
+ );
+
+ wire sram_we;
+ wire [7:0] sram_dout;
+ async_ram #(.WORDS(16384), .ABITS(14)) sram(
+ .clka(clk),
+ .wea(sram_we),
+ .addra({bus_a[14], bus_a[12:0]}),
+ .dina(bus_dout),
+ .douta(sram_dout)
+ );
+ assign sram_we = bus_wr & !bus_cs;
+
+ // OR use transparent latch
+ always @(posedge clk) begin
+ if (bus_cale)
+ a <= bus_a;
+ end
+
+ assign dout = bus_dout;
+ assign bus_din = bus_cs ? din : sram_dout;
+ assign wr = bus_cs & bus_wr;
+ assign rd = ~bus_wr; // Always enable output
+
+ // Key parallel to serial
+ assign skey = 1'b0;
+
+endmodule
diff --git a/verilog/sim/Makefile b/verilog/sim/Makefile
index 415ae0a..eb56330 100644
--- a/verilog/sim/Makefile
+++ b/verilog/sim/Makefile
@@ -8,7 +8,7 @@
VROOT := $(VERILATOR_ROOT)
VINCD := $(VROOT)/include
RTLOBJDIR := $(RTLDIR)/obj_dir
-RTLOBJ := $(RTLOBJDIR)/Vboy__ALL.a
+RTLOBJ := $(RTLOBJDIR)/Vsimtop__ALL.a
CC = g++
CXX = g++
diff --git a/verilog/sim/bootrom.mif b/verilog/sim/bootrom.mif
new file mode 120000
index 0000000..e4e13d4
--- /dev/null
+++ b/verilog/sim/bootrom.mif
@@ -0,0 +1 @@
+../rtl/bootrom.mif
\ No newline at end of file
diff --git a/verilog/sim/main.cpp b/verilog/sim/main.cpp
index c181510..a7d33bd 100644
--- a/verilog/sim/main.cpp
+++ b/verilog/sim/main.cpp
@@ -31,7 +31,7 @@
#include "verilated.h"
#include "verilated_vcd_c.h"
-#include "Vboy.h"
+#include "Vsimtop.h"
#include "memsim.h"
#include "mbcsim.h"
@@ -46,11 +46,11 @@
#define CON_BASE 0x20000000
// Verilator related
-Vboy *core;
+Vsimtop *core;
VerilatedVcdC *trace;
#define CONCAT(a,b) a##b
-#define SIGNAL(x) CONCAT(core->boy__DOT__,x)
+#define SIGNAL(x) CONCAT(core->simtop__DOT__chip__DOT__boy__DOT__,x)
// this only applies to quiet mode.
const uint64_t CYCLE_LIMIT = 32768;
@@ -141,7 +141,7 @@
(SIGNAL(cpu__DOT__next == 0))) {
// Instruction just finished executing
fprintf(it, "Time %ld\nPC = %04x, F = %c%c%c%c, A = %02x, SP = %02x%02x\nB = %02x, C = %02x, D = %02x, E = %02x, H = %02x, L = %02x\n",
- 10 * tickcount,
+ 10 * (tickcount - 1), // Make timing compatible with old traces
SIGNAL(cpu__DOT__pc),
((SIGNAL(cpu__DOT__flags)) & 0x8) ? 'Z' : '-',
((SIGNAL(cpu__DOT__flags)) & 0x4) ? 'N' : '-',
@@ -176,7 +176,7 @@
// Initialize testbench
Verilated::commandArgs(argc, argv);
- core = new Vboy;
+ core = new Vsimtop;
Verilated::traceEverOn(true);
if (argc < 2) {
@@ -231,8 +231,8 @@
mbc = new MBCSIM();
}
else {
- cartrom = new MEMSIM(0x0000, 32768, 0);
- cartram = new MEMSIM(0xa000, 8192, 0);
+ cartrom = new MEMSIM(0x0000, 32768);
+ cartram = new MEMSIM(0xa000, 8192);
}
if (!quiet) {
diff --git a/verilog/sim/memsim.cpp b/verilog/sim/memsim.cpp
index 0cbf3b0..bf79380 100644
--- a/verilog/sim/memsim.cpp
+++ b/verilog/sim/memsim.cpp
@@ -2,7 +2,7 @@
// VerilogBoy simulator
// Copyright 2022 Wenting Zhang
//
-// memsim.cpp: A memory simulation model with simple delay control
+// memsim.cpp: An async memory simulation model
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
@@ -29,15 +29,10 @@
#include <assert.h>
#include "memsim.h"
-MEMSIM::MEMSIM(uint16_t base, size_t len, size_t delay) {
+MEMSIM::MEMSIM(uint16_t base, size_t len) {
this->base = base;
this->len = len;
- this->delay = delay;
mem = (uint8_t *)malloc(len);
- delay_count = 0;
- last_wr = 0;
- last_rd = 0;
- last_data = 0;
}
MEMSIM::~MEMSIM(void) {
@@ -58,35 +53,25 @@
}
void MEMSIM::apply(uint8_t wr_data, uint16_t address,
- uint8_t wr_enable, uint8_t rd_enable, uint8_t &rd_data) {
+ uint8_t wr, uint8_t rd, uint8_t &rd_data) {
- if (delay_count == 0) {
- if ((address >= base) && (address < (base + len))) {
- if (last_wr && !wr_enable) {
- mem[address - base] = last_data;
- delay_count = delay;
+ if ((address >= base) && (address < (base + len))) {
+ if (wr) {
+ mem[address - base] = wr_data;
#ifdef __DEBUG
- printf("MEMBUS W[%04x] = %02x\n",
- address,
- last_data);
+ printf("MEMBUS W[%04x] = %02x\n",
+ address,
+ wr_data);
#endif
- }
- else if (!last_rd && rd_enable) {
- rd_data = mem[address - base];
- delay_count = delay;
-#ifdef __DEBUG
- printf("MEMBUS R[%04x] = %02x\n",
- address,
- rd_data);
-#endif
- }
}
- last_rd = rd_enable;
- last_wr = wr_enable;
- last_data = wr_data;
- }
- else {
- delay_count --;
+ else if (rd) {
+ rd_data = mem[address - base];
+#ifdef __DEBUG
+ printf("MEMBUS R[%04x] = %02x\n",
+ address,
+ rd_data);
+#endif
+ }
}
}
diff --git a/verilog/sim/memsim.h b/verilog/sim/memsim.h
index 8ce05d2..8457d32 100644
--- a/verilog/sim/memsim.h
+++ b/verilog/sim/memsim.h
@@ -26,18 +26,13 @@
class MEMSIM {
public:
- MEMSIM(uint16_t base, size_t len, size_t delay);
+ MEMSIM(uint16_t base, size_t len);
~MEMSIM(void);
void load(char *fname);
- void apply(uint8_t wr_data, uint16_t address, uint8_t wr_enable,
- uint8_t rd_enable, uint8_t &rd_data);
+ void apply(uint8_t wr_data, uint16_t address, uint8_t wr,
+ uint8_t rd, uint8_t &rd_data);
private:
uint16_t base;
uint8_t *mem;
uint16_t len;
- int delay;
- int delay_count;
- uint8_t last_wr;
- uint8_t last_rd;
- uint8_t last_data;
};
diff --git a/verilog/sim/rtl.mk b/verilog/sim/rtl.mk
index fdc9c72..024e74e 100644
--- a/verilog/sim/rtl.mk
+++ b/verilog/sim/rtl.mk
@@ -1,4 +1,4 @@
-TARGET ?= boy
+TARGET ?= simtop
all: $(TARGET)
VOBJ := obj_dir