Refactor final parameter by pooling instr_mem's
diff --git a/verilog/rtl/instr_mem.v b/verilog/rtl/instr_mem.v
index cd78fcc..50830df 100644
--- a/verilog/rtl/instr_mem.v
+++ b/verilog/rtl/instr_mem.v
@@ -7,30 +7,51 @@
Instruction memory
*/
-module instr_mem #(parameter DEPTH=128) (
+module instr_mem (
input clk,
input rst_n,
- input [`PC_WIDTH-1:0] raddr,
- output [`INSTR_WIDTH-1:0] rdata,
- input we,
- input [`PC_WIDTH-1:0] waddr,
- input [`INSTR_WIDTH-1:0] wdata
+ input [`CORES*`PC_WIDTH-1:0] raddr,
+ output [`CORES*`INSTR_WIDTH-1:0] rdata,
+ input [`CORES-1:0] we,
+ input [`CORES*`PC_WIDTH-1:0] waddr,
+ input [`CORES*`INSTR_WIDTH-1:0] wdata
);
-reg [`INSTR_WIDTH-1:0] mem[DEPTH-1:0];
+localparam CORES_RNDUP = 1 << `LOG_CORES;
-assign rdata = mem[raddr];
+generate genvar core;
+for(core=0; core<`CORES; core=core+1) begin:g_core
-integer i;
-always @ (posedge clk) begin
- if (!rst_n) begin
- for (i=0; i<DEPTH; i=i+1) begin
- mem[i] <= {(`INSTR_WIDTH){1'b0}};
+ localparam DEPTH_MULT = (core + CORES_RNDUP) & ~(core + CORES_RNDUP-1);
+ // e.g. for 8 cores, depths are multiplied by 8, 1, 2, 1, 4, 1, 2, 1
+ // so that we have a few cores that accept longer programs but the total
+ // memory required is still kept reasonably low
+
+ localparam DEPTH = `INSTR_DEPTH * DEPTH_MULT;
+
+ wire [`PC_WIDTH-1:0] craddr = raddr[core*`PC_WIDTH +: `PC_WIDTH];
+ wire cwe = we[core];
+ wire [`PC_WIDTH-1:0] cwaddr = waddr[core*`PC_WIDTH +: `PC_WIDTH];
+ wire [`INSTR_WIDTH-1:0] cwdata = wdata[core*`INSTR_WIDTH +: `INSTR_WIDTH];
+
+ reg [`INSTR_WIDTH-1:0] mem[DEPTH-1:0];
+
+ wire [`INSTR_WIDTH-1:0] crdata = mem[craddr];
+ assign rdata[core*`INSTR_WIDTH +: `INSTR_WIDTH] = crdata;
+
+ integer i;
+ always @ (posedge clk) begin
+ if (!rst_n) begin
+ for (i=0; i<DEPTH; i=i+1) begin
+ mem[i] <= {(`INSTR_WIDTH){1'b0}};
+ end
+ end else begin
+ if (cwe) mem[cwaddr] <= cwdata;
end
- end else begin
- if (we) mem[waddr] <= wdata;
end
+
end
+endgenerate
endmodule
diff --git a/verilog/rtl/mcu.v b/verilog/rtl/mcu.v
index 941bc3d..0f65f18 100644
--- a/verilog/rtl/mcu.v
+++ b/verilog/rtl/mcu.v
@@ -20,13 +20,13 @@
|||| ||| +----------+ +=| prng |=+ |||
+------+ +-----------+ ||+=| cpu core |=+ +------+ | +-----------+
| |==| instr mem |=====| w/alu |===============| |
- | | +-----------+ || +----------+ +------+ | | |
- | | || +----------+ +=| prng |=+ | |
- | prog | +-----------+ |+==| cpu core |=+ +------+ | | |
+ | | | | || +----------+ +------+ | | |
+ | | | - - - - - | || +----------+ +=| prng |=+ | |
+ | prog | | | |+==| cpu core |=+ +------+ | | |
| mux |==| instr mem |=====| w/alu |===============| mem mesh |
- | | +-----------+ | +----------+ +------+ | | |
- | | | +----------+ +=| prng |=+ | |
- | | +-----------+ +===| cpu core |=+ +------+ | |
+ | | | | | +----------+ +------+ | | |
+ | | | - - - - - | | +----------+ +=| prng |=+ | |
+ | | | | +===| cpu core |=+ +------+ | |
| |==| instr mem |=====| w/alu |===============| |
+------+ +-----------+ +----------+ +-----------+
@@ -66,81 +66,80 @@
wire [`IO_PINS-1:0] pin_data_in; // pads > iof
wire [`IO_PINS-1:0] pin_data_out; // pads < iof
-// between cpu core and corresponding instruction memory
-wire [`INSTR_WIDTH-1:0] opcode[`CORES-1:0]; // cpu < im
-wire [`PC_WIDTH-1:0] progctr[`CORES-1:0]; // cpu > im
+// between cpu core and instruction memory (unpacked version for cpu core)
+wire [`PC_WIDTH-1:0] im_raddr[`CORES-1:0]; // cpu > im
+wire [`INSTR_WIDTH-1:0] im_rdata[`CORES-1:0]; // cpu < im
+
+// between cpu core and instruction memory (packed version for instruction memory)
+wire [`CORES*`PC_WIDTH-1:0] im_raddr_raw; // cpu > im
+wire [`CORES*`INSTR_WIDTH-1:0] im_rdata_raw; // cpu < im
// between cpu core and memory mesh (unpacked versions for cpu cores)
-wire [`DATA_WIDTH-1:0] mem_rdata[`CORES-1:0]; // cpu < mesh
+wire [`DATA_WIDTH-1:0] mem_rdata[`CORES-1:0]; // cpu < mesh
wire mem_we[`CORES-1:0]; // cpu > mesh
-wire [`ADDR_WIDTH-1:0] mem_waddr[`CORES-1:0]; // cpu > mesh
+wire [`ADDR_WIDTH-1:0] mem_waddr[`CORES-1:0]; // cpu > mesh
wire [SPREAD_WIDTH-1:0] mem_wspread[`CORES-1:0]; // cpu > mesh
-wire [`DATA_WIDTH-1:0] mem_wdata[`CORES-1:0]; // cpu > mesh
-wire [`ADDR_WIDTH-1:0] mem_raddr[`CORES-1:0]; // cpu > mesh
+wire [`DATA_WIDTH-1:0] mem_wdata[`CORES-1:0]; // cpu > mesh
+wire [`ADDR_WIDTH-1:0] mem_raddr[`CORES-1:0]; // cpu > mesh
// between cpu core and memory mesh (packed versions for memory mesh)
-wire [`CORES*`DATA_WIDTH-1:0] mem_rdata_raw; // cpu < mesh
+wire [`CORES*`DATA_WIDTH-1:0] mem_rdata_raw; // cpu < mesh
wire [`CORES-1:0] mem_we_raw; // cpu > mesh
-wire [`CORES*`ADDR_WIDTH-1:0] mem_waddr_raw; // cpu > mesh
+wire [`CORES*`ADDR_WIDTH-1:0] mem_waddr_raw; // cpu > mesh
wire [`CORES*SPREAD_WIDTH-1:0] mem_wspread_raw; // cpu > mesh
-wire [`CORES*`DATA_WIDTH-1:0] mem_wdata_raw; // cpu > mesh
-wire [`CORES*`ADDR_WIDTH-1:0] mem_raddr_raw; // cpu > mesh
+wire [`CORES*`DATA_WIDTH-1:0] mem_wdata_raw; // cpu > mesh
+wire [`CORES*`ADDR_WIDTH-1:0] mem_raddr_raw; // cpu > mesh
// between cpu core and corresponding prng
-wire [`DATA_WIDTH-1:0] prng_random[`CORES-1:0]; // cpu < prng
+wire [`DATA_WIDTH-1:0] prng_random[`CORES-1:0]; // cpu < prng
-// between instruction memory and programming multiplexer (unpacked versions for instruction memory)
-wire im_we[`CORES-1:0]; // im < pmux
-wire [`PC_WIDTH-1:0] im_waddr[`CORES-1:0]; // im < pmux
-wire [`INSTR_WIDTH-1:0] im_wdata[`CORES-1:0]; // im < pmux
-
-// between instruction memory and programming multiplexer (packed versions for programming multiplexer)
+// between instruction memory and programming multiplexer
wire [`CORES-1:0] im_we_raw; // im < pmux
-wire [`CORES*`PC_WIDTH-1:0] im_waddr_raw; // im < pmux
-wire [`CORES*`INSTR_WIDTH-1:0] im_wdata_raw; // im < pmux
+wire [`CORES*`PC_WIDTH-1:0] im_waddr_raw; // im < pmux
+wire [`CORES*`INSTR_WIDTH-1:0] im_wdata_raw; // im < pmux
// between memory mesh and io filter
wire [MEM_IO_PORTS-1:0] mem_io_active_in; // mesh < iof
wire [MEM_IO_PORTS-1:0] mem_io_active_out; // mesh > iof
-wire [MEM_IO_PORTS*`DATA_WIDTH-1:0] mem_io_data_in; // mesh < iof
-wire [MEM_IO_PORTS*`DATA_WIDTH-1:0] mem_io_data_out; // mesh > iof
+wire [MEM_IO_PORTS*`DATA_WIDTH-1:0] mem_io_data_in; // mesh < iof
+wire [MEM_IO_PORTS*`DATA_WIDTH-1:0] mem_io_data_out; // mesh > iof
// between debugging multiplexer and cpu core (unpacked versions for cpu core)
wire [1:0] debug_cpu_mode[`CORES-1:0]; // dmux > cpu
wire [3:0] debug_reg_sel[`CORES-1:0]; // dmux > cpu
wire debug_reg_we[`CORES-1:0]; // dmux > cpu
-wire [`DATA_WIDTH-1:0] debug_reg_wdata[`CORES-1:0]; // dmux > cpu
+wire [`DATA_WIDTH-1:0] debug_reg_wdata[`CORES-1:0]; // dmux > cpu
wire debug_reg_stopped[`CORES-1:0]; // dmux < cpu
-wire [`DATA_WIDTH-1:0] debug_reg_rdata[`CORES-1:0]; // dmux < cpu
+wire [`DATA_WIDTH-1:0] debug_reg_rdata[`CORES-1:0]; // dmux < cpu
// between debugging multiplexer and cpu core (packed versions for debugging multiplexer)
wire [`CORES*2-1:0] debug_cpu_mode_raw; // dmux > cpu
wire [`CORES*4-1:0] debug_reg_sel_raw; // dmux > cpu
wire [`CORES-1:0] debug_reg_we_raw; // dmux > cpu
-wire [`CORES*`DATA_WIDTH-1:0] debug_reg_wdata_raw; // dmux > cpu
+wire [`CORES*`DATA_WIDTH-1:0] debug_reg_wdata_raw; // dmux > cpu
wire [`CORES-1:0] debug_reg_stopped_raw; // dmux < cpu
-wire [`CORES*`DATA_WIDTH-1:0] debug_reg_rdata_raw; // dmux < cpu
+wire [`CORES*`DATA_WIDTH-1:0] debug_reg_rdata_raw; // dmux < cpu
// between wishbone multiplexer and programming multiplexer
wire prog_we; // wbmux > pmux
-wire [`LOG_CORES-1:0] prog_sel; // wbmux > pmux
-wire [`PC_WIDTH-1:0] prog_waddr; // wbmux > pmux
-wire [`INSTR_WIDTH-1:0] prog_wdata; // wbmux > pmux
+wire [`LOG_CORES-1:0] prog_sel; // wbmux > pmux
+wire [`PC_WIDTH-1:0] prog_waddr; // wbmux > pmux
+wire [`INSTR_WIDTH-1:0] prog_wdata; // wbmux > pmux
// between wishbone multiplexer and io pads
wire pads_we; // wbmux > pads
wire pads_waddr; // wbmux > pads
-wire [`IO_PINS-1:0] pads_wdata; // wbmux > pads
+wire [`IO_PINS-1:0] pads_wdata; // wbmux > pads
// between wishbone multiplexer and debugging multiplexer
-wire [`LOG_CORES-1:0] debug_sel; // wbmux > dmux
+wire [`LOG_CORES-1:0] debug_sel; // wbmux > dmux
wire [4:0] debug_addr; // wbmux > dmux
wire debug_we; // wbmux > dmux
-wire [`DATA_WIDTH-1:0] debug_wdata; // wbmux > dmux
-wire [`DATA_WIDTH-1:0] debug_rdata; // wbmux < dmux
+wire [`DATA_WIDTH-1:0] debug_wdata; // wbmux > dmux
+wire [`DATA_WIDTH-1:0] debug_rdata; // wbmux < dmux
// between wishbone multiplexer and entropy pool
-wire [`WB_WIDTH-1:0] entropy_word; // wbmux > ep
+wire [`WB_WIDTH-1:0] entropy_word; // wbmux > ep
// between entropy pool and prng's
wire entropy_bit; // ep > prng
@@ -154,7 +153,7 @@
cpu_core cpu_core_inst (
.clk(clk),
.rst_n(rst_soft_n),
- .opcode(opcode[core]),
+ .opcode(im_rdata[core]),
.mem_rdata(mem_rdata[core]),
.cpu_num(cpu_num),
.prng_in(prng_random[core]),
@@ -162,7 +161,7 @@
.debug_sel(debug_reg_sel[core]),
.debug_we(debug_reg_we[core]),
.debug_wdata(debug_reg_wdata[core]),
- .progctr(progctr[core]),
+ .progctr(im_raddr[core]),
.mem_we(mem_we[core]),
.mem_waddr(mem_waddr[core]),
.mem_wspread(mem_wspread[core]),
@@ -172,24 +171,6 @@
.debug_rdata(debug_reg_rdata[core])
);
- // add corresponding instruction memory
- localparam CORES_RNDUP = 1 << `LOG_CORES;
- localparam DEPTH_MULT = (core + CORES_RNDUP) & ~(core + CORES_RNDUP-1);
- // e.g. for 8 cores, depths are multiplied by 8, 1, 2, 1, 4, 1, 2, 1
- // so that we have a few cores that accept longer programs but the total
- // memory required is still kept reasonably low
- instr_mem #(
- .DEPTH(`INSTR_DEPTH * DEPTH_MULT)
- ) instr_mem_inst (
- .clk(clk),
- .rst_n(rst_hard_n),
- .raddr(progctr[core]),
- .rdata(opcode[core]),
- .we(im_we[core]),
- .waddr(im_waddr[core]),
- .wdata(im_wdata[core])
- );
-
// add its own pseudorandom number generator
wire [`PRNG_STATE_BITS-1:0] index = core;
prng_wrap prng_inst (
@@ -210,10 +191,11 @@
// convert memory mesh outputs: packed to unpacked
assign mem_rdata[core] = mem_rdata_raw[core*`DATA_WIDTH +: `DATA_WIDTH];
- // convert programming multiplexer outputs: packed to unpacked
- assign im_we[core] = im_we_raw[core];
- assign im_waddr[core] = im_waddr_raw[core*`PC_WIDTH +: `PC_WIDTH];
- assign im_wdata[core] = im_wdata_raw[core*`INSTR_WIDTH +: `INSTR_WIDTH];
+ // convert instruction memory inputs: unpacked to packed
+ assign im_raddr_raw[core*`PC_WIDTH +: `PC_WIDTH] = im_raddr[core];
+
+ // convert instruction memory outputs: packed to unpacked
+ assign im_rdata[core] = im_rdata_raw[core*`INSTR_WIDTH +: `INSTR_WIDTH];
// convert debugging multiplexer inputs: unpacked to packed
assign debug_reg_stopped_raw[core] = debug_reg_stopped[core];
@@ -257,7 +239,18 @@
.port_data_out(mem_io_data_out)
);
-// add the programming multiplexer, with a packed bus towards instruction memories
+// add instruction memory blocks
+instr_mem instr_mem_inst (
+ .clk(clk),
+ .rst_n(rst_hard_n),
+ .raddr(im_raddr_raw),
+ .rdata(im_rdata_raw),
+ .we(im_we_raw),
+ .waddr(im_waddr_raw),
+ .wdata(im_wdata_raw)
+);
+
+// add the programming multiplexer
prog_mux prog_mux_inst (
.we(prog_we),
.sel(prog_sel),