Refactor some parameters to defines
diff --git a/openlane/user_project/config.tcl b/openlane/user_project/config.tcl
index ddd965f..057b0b0 100755
--- a/openlane/user_project/config.tcl
+++ b/openlane/user_project/config.tcl
@@ -19,6 +19,7 @@
set ::env(VERILOG_FILES) "\
$::env(CARAVEL_ROOT)/verilog/rtl/defines.v \
+ $script_dir/../../verilog/rtl/defines.v \
$script_dir/../../verilog/rtl/user_project.v \
$script_dir/../../verilog/rtl/mcu.v \
$script_dir/../../verilog/rtl/cpu_core.v \
diff --git a/verilog/rtl/alu.v b/verilog/rtl/alu.v
index 0d7c26c..9a72dee 100644
--- a/verilog/rtl/alu.v
+++ b/verilog/rtl/alu.v
@@ -28,84 +28,84 @@
Division by a constant can be compiled to a `muld` followed by an `rcr`.
*/
-module alu #(parameter DATA_WIDTH=16) (
+module alu (
input [3:0] opcode,
- input [DATA_WIDTH-1:0] in1,
- input [DATA_WIDTH-1:0] in2,
+ input [`DATA_WIDTH-1:0] in1,
+ input [`DATA_WIDTH-1:0] in2,
input carry,
- output [DATA_WIDTH-1:0] out,
+ output [`DATA_WIDTH-1:0] out,
output carry_out
);
- wire [DATA_WIDTH-1:0] op_out[15:0];
+ wire [`DATA_WIDTH-1:0] op_out[15:0];
wire op_carry[15:0];
- wire [DATA_WIDTH-1:0] and_out = in1 & in2;
+ wire [`DATA_WIDTH-1:0] and_out = in1 & in2;
wire and_carry = |and_out;
assign op_out[0] = and_out;
assign op_carry[0] = and_carry;
- wire [DATA_WIDTH-1:0] or_out = in1 | in2;
+ wire [`DATA_WIDTH-1:0] or_out = in1 | in2;
wire or_carry = &or_out;
assign op_out[1] = or_out;
assign op_carry[1] = or_carry;
- wire [DATA_WIDTH-1:0] xor_out = in1 ^ in2;
+ wire [`DATA_WIDTH-1:0] xor_out = in1 ^ in2;
wire xor_carry = ^xor_out;
assign op_out[2] = xor_out;
assign op_carry[2] = xor_carry;
- wire [DATA_WIDTH-1:0] mux_out = carry ? in2 : in1;
- wire mux_carry = mux_out[DATA_WIDTH-1];
+ wire [`DATA_WIDTH-1:0] mux_out = carry ? in2 : in1;
+ wire mux_carry = mux_out[`DATA_WIDTH-1];
assign op_out[3] = mux_out;
assign op_carry[3] = mux_carry;
- wire [DATA_WIDTH-1:0] nand_out = ~and_out;
+ wire [`DATA_WIDTH-1:0] nand_out = ~and_out;
wire nand_carry = ~and_carry;
assign op_out[4] = nand_out;
assign op_carry[4] = nand_carry;
- wire [DATA_WIDTH-1:0] nor_out = ~or_out;
+ wire [`DATA_WIDTH-1:0] nor_out = ~or_out;
wire nor_carry = ~or_carry;
assign op_out[5] = nor_out;
assign op_carry[5] = nor_carry;
- wire [DATA_WIDTH-1:0] nxor_out = ~xor_out;
+ wire [`DATA_WIDTH-1:0] nxor_out = ~xor_out;
wire nxor_carry = ~xor_carry;
assign op_out[6] = nxor_out;
assign op_carry[6] = nxor_carry;
- wire [DATA_WIDTH-1:0] nmux_out = ~mux_out;
+ wire [`DATA_WIDTH-1:0] nmux_out = ~mux_out;
wire nmux_carry = ~mux_carry;
assign op_out[7] = nmux_out;
assign op_carry[7] = nmux_carry;
- wire [DATA_WIDTH-1:0] rcl_out;
+ wire [`DATA_WIDTH-1:0] rcl_out;
wire rcl_carry, rcl_ignore;
assign {rcl_carry, rcl_out, rcl_ignore} = {1'b0, in1, carry} << in2;
assign op_out[8] = rcl_out;
assign op_carry[8] = rcl_carry;
- wire [DATA_WIDTH-1:0] rcr_out;
+ wire [`DATA_WIDTH-1:0] rcr_out;
wire rcr_carry, rcr_ignore;
assign {rcr_ignore, rcr_out, rcr_carry} = {carry, in1, 1'b0} >> in2;
assign op_out[9] = rcr_out;
assign op_carry[9] = rcr_carry;
- wire [DATA_WIDTH-1:0] add_out;
+ wire [`DATA_WIDTH-1:0] add_out;
wire add_carry;
assign {add_carry, add_out} = in1 + in2 + carry;
assign op_out[10] = add_out;
assign op_carry[10] = add_carry;
- wire [DATA_WIDTH-1:0] sub_out;
+ wire [`DATA_WIDTH-1:0] sub_out;
wire sub_carry;
assign {sub_carry, sub_out} = in1 - in2 - carry;
assign op_out[11] = sub_out;
assign op_carry[11] = sub_carry;
- wire [DATA_WIDTH-1:0] mulh_out;
- wire [DATA_WIDTH-1:0] mul_out;
+ wire [`DATA_WIDTH-1:0] mulh_out;
+ wire [`DATA_WIDTH-1:0] mul_out;
assign {mulh_out, mul_out} = in1 * in2;
wire mul_carry = |mulh_out;
wire mulh_carry = mul_carry;
@@ -114,18 +114,18 @@
assign op_out[13] = mulh_out;
assign op_carry[13] = mulh_carry;
- wire [DATA_WIDTH-1:0] muld_out;
- wire [DATA_WIDTH-1:0] muld_ignore;
+ wire [`DATA_WIDTH-1:0] muld_out;
+ wire [`DATA_WIDTH-1:0] muld_ignore;
wire muld_carry;
assign {muld_carry, muld_out, muld_ignore} = in1 * {1'b1, in2};
assign op_out[14] = muld_out;
assign op_carry[14] = muld_carry;
- wire [DATA_WIDTH-1:0] in1c = in1 + carry;
- wire [DATA_WIDTH-1:0] in1d = in1 - (!carry);
- wire [DATA_WIDTH-1:0] log_bits;
- localparam LOG_WIDTH = $clog2(DATA_WIDTH);
- assign log_bits[DATA_WIDTH-1:LOG_WIDTH] = 0;
+ wire [`DATA_WIDTH-1:0] in1c = in1 + carry;
+ wire [`DATA_WIDTH-1:0] in1d = in1 - (!carry);
+ wire [`DATA_WIDTH-1:0] log_bits;
+ localparam LOG_WIDTH = $clog2(`DATA_WIDTH);
+ assign log_bits[`DATA_WIDTH-1:LOG_WIDTH] = 0;
generate genvar i;
for (i=LOG_WIDTH-1; i>=0; i=i-1) begin:g_bit
wire [(1<<(i+1))-1:0] subseq;
@@ -140,7 +140,7 @@
endgenerate
wire in1nz = in1c || carry;
wire in1no = |in1d;
- wire [DATA_WIDTH-1:0] log_out = in1nz ? (log_bits + in1no) : -1;
+ wire [`DATA_WIDTH-1:0] log_out = in1nz ? (log_bits + in1no) : -1;
wire log_carry = in1nz && !(in1c & in1d);
assign op_out[15] = log_out;
assign op_carry[15] = log_carry;
diff --git a/verilog/rtl/cpu_core.v b/verilog/rtl/cpu_core.v
index 350b22a..fb3c2b1 100644
--- a/verilog/rtl/cpu_core.v
+++ b/verilog/rtl/cpu_core.v
@@ -11,7 +11,7 @@
take two values from registers, memory or other sources, feed them through the ALU and put the
results in a register or memory cell or use it as a jump target.
-Opcode structure assumes INSTR_WIDTH=32. Changing it requires substantial edits to the code below.
+Opcode structure assumes `INSTR_WIDTH=32. Changing it requires substantial edits to the code below.
Opcodes have 32 bits and use the following format:
AAA BBB C DD EEEE FFF GGGGGGGGGGGGGGGG
@@ -101,43 +101,43 @@
Parameters:
DATA_WIDTH = processor word size
-PC_WIDTH = size of program counter, should be <= DATA_WIDTH
-ADDR_WIDTH = size of mem_mesh addresses, should be <= DATA_WIDTH
+PC_WIDTH = size of program counter, should be <= `DATA_WIDTH
+ADDR_WIDTH = size of mem_mesh addresses, should be <= `DATA_WIDTH
SPREAD_WIDTH = size of mem_mesh spread value
INSTR_WIDTH = combined size of opcode & immediate, should be kept at 32
CPU_NUM = id number to differentiate cpu cores, can be queried by code running on the processor
*/
-module cpu_core #(parameter DATA_WIDTH=16, PC_WIDTH=8, ADDR_WIDTH=8, SPREAD_WIDTH=3, INSTR_WIDTH=32, CPU_NUM=0) (
+module cpu_core #(parameter CPU_NUM=0) (
input clk, // clock signal
input rst_n, // reset, active low
- input [INSTR_WIDTH-1:0] opcode, // opcode to be executed & immediate args
- input [DATA_WIDTH-1:0] mem_rdata, // connected to 'rdata' of memory module
- input [DATA_WIDTH-1:0] prng_in, // random number from prng
+ input [`INSTR_WIDTH-1:0] opcode, // opcode to be executed & immediate args
+ input [`DATA_WIDTH-1:0] mem_rdata, // connected to 'rdata' of memory module
+ input [`DATA_WIDTH-1:0] prng_in, // random number from prng
input [1:0] debug_mode, // debug: 00 = no change, 01 = single step, 10 = run, 11 = stop
input [3:0] debug_sel, // debug: cpu status register to query or modify
input debug_we, // debug: modify selected status register
- input [DATA_WIDTH-1:0] debug_wdata, // debug: new value of selected status register
- output [PC_WIDTH-1:0] progctr, // program counter
+ input [`DATA_WIDTH-1:0] debug_wdata, // debug: new value of selected status register
+ output [`PC_WIDTH-1:0] progctr, // program counter
output mem_we, // +-
- output [ADDR_WIDTH-1:0] mem_waddr, // | connected to
- output [SPREAD_WIDTH-1:0] mem_wspread, // | corresponding ports
- output [DATA_WIDTH-1:0] mem_wdata, // | of memory module
- output [ADDR_WIDTH-1:0] mem_raddr, // +-
+ output [`ADDR_WIDTH-1:0] mem_waddr, // | connected to
+ output [`SPREAD_WIDTH-1:0] mem_wspread, // | corresponding ports
+ output [`DATA_WIDTH-1:0] mem_wdata, // | of memory module
+ output [`ADDR_WIDTH-1:0] mem_raddr, // +-
output debug_stopped, // debug: read back whether core is stopped
- output [DATA_WIDTH-1:0] debug_rdata // debug: current value of selected status register
+ output [`DATA_WIDTH-1:0] debug_rdata // debug: current value of selected status register
);
-reg [DATA_WIDTH-1:0] reg1; // general-purpose registers
-reg [DATA_WIDTH-1:0] reg2;
+reg [`DATA_WIDTH-1:0] reg1; // general-purpose registers
+reg [`DATA_WIDTH-1:0] reg2;
reg carry; // carry flag
-reg [DATA_WIDTH-1:0] pc; // register for program counter
-reg [DATA_WIDTH-1:0] timer; // clock ticks since last reset
-reg [ADDR_WIDTH-1:0] raddr; // next read address
+reg [`DATA_WIDTH-1:0] pc; // register for program counter
+reg [`DATA_WIDTH-1:0] timer; // clock ticks since last reset
+reg [`ADDR_WIDTH-1:0] raddr; // next read address
reg we; // write to memory on next cycle
-reg [ADDR_WIDTH-1:0] waddr; // next write address
-reg [SPREAD_WIDTH-1:0] wspread; // next write spread
-reg [DATA_WIDTH-1:0] wdata; // next write data
+reg [`ADDR_WIDTH-1:0] waddr; // next write address
+reg [`SPREAD_WIDTH-1:0] wspread; // next write spread
+reg [`DATA_WIDTH-1:0] wdata; // next write data
reg stopped; // cpu core is stopped
assign progctr = pc;
@@ -161,9 +161,9 @@
wire op_extra_rdata = op_extra == 2; // copy rdata to reg1 (or reg2 if reg1 is the target)
wire op_extra_waddr = op_extra == 3; // fill waddr & wspread from immediate
-wire [DATA_WIDTH-1:0] next_pc = pc + 1;
+wire [`DATA_WIDTH-1:0] next_pc = pc + 1;
-wire [DATA_WIDTH-1:0] sources1[7:0];
+wire [`DATA_WIDTH-1:0] sources1[7:0];
assign sources1[0] = reg1;
assign sources1[1] = reg2;
assign sources1[2] = next_pc;
@@ -173,7 +173,7 @@
assign sources1[6] = timer;
assign sources1[7] = CPU_NUM;
-wire [DATA_WIDTH-1:0] sources2[7:0];
+wire [`DATA_WIDTH-1:0] sources2[7:0];
assign sources2[0] = reg1;
assign sources2[1] = reg2;
assign sources2[2] = next_pc;
@@ -183,20 +183,18 @@
assign sources2[6] = prng_in;
assign sources2[7] = 1;
-wire [DATA_WIDTH-1:0] in1_orig = sources1[op_in1]; // data to use as alu input 1, unless overridden by op_extra_carry
-wire in1_oh = in1_orig[DATA_WIDTH-1]; // highest bit of in1_orig
-wire [DATA_WIDTH-1:0] in1 = op_extra_carry ? op_immed : in1_orig; // data to use as alu input 1
-wire [DATA_WIDTH-1:0] in2 = sources2[op_in2]; // data to use as alu input 2
+wire [`DATA_WIDTH-1:0] in1_orig = sources1[op_in1]; // data to use as alu input 1, unless overridden by op_extra_carry
+wire in1_oh = in1_orig[`DATA_WIDTH-1]; // highest bit of in1_orig
+wire [`DATA_WIDTH-1:0] in1 = op_extra_carry ? op_immed : in1_orig; // data to use as alu input 1
+wire [`DATA_WIDTH-1:0] in2 = sources2[op_in2]; // data to use as alu input 2
wire carry_def = op_rst_carry ? 0 : carry; // carry to use as alu input, unless overridden by op_extra_carry
wire carry_ovr = op_rst_carry ? ~in1_oh : in1_oh; // override value if op_extra_carry is set
wire alu_cin = op_extra_carry ? carry_ovr : carry_def; // consolidated carry input for alu
-wire [DATA_WIDTH-1:0] alu_out; // data output from alu
+wire [`DATA_WIDTH-1:0] alu_out; // data output from alu
wire alu_cout; // carry output from alu
-alu #(
- .DATA_WIDTH(DATA_WIDTH)
-) alu_inst (
+alu alu_inst (
.opcode(op_alu),
.in1(in1),
.in2(in2),
@@ -215,30 +213,30 @@
// extract values from immediate to prepare for op_extra_waddr case
wire immed_ovr = op_immed[15];
-wire [DATA_WIDTH-1:0] s_hi4 = immed_ovr ? op_immed[14:0] : op_immed[14:11];
-wire [DATA_WIDTH-1:0] d_lo11 = immed_ovr ? reg1 : op_immed[10:0];
-wire [DATA_WIDTH-1:0] a_hi8 = immed_ovr ? op_immed[14:0] : op_immed[14:7];
-wire [DATA_WIDTH-1:0] d_lo7 = immed_ovr ? reg1 : op_immed[6:0];
-wire [DATA_WIDTH-1:0] a_hi11 = immed_ovr ? reg1 : op_immed[14:4];
-wire [DATA_WIDTH-1:0] s_lo4 = immed_ovr ? op_immed[14:0] : op_immed[3:0];
+wire [`DATA_WIDTH-1:0] s_hi4 = immed_ovr ? op_immed[14:0] : op_immed[14:11];
+wire [`DATA_WIDTH-1:0] d_lo11 = immed_ovr ? reg1 : op_immed[10:0];
+wire [`DATA_WIDTH-1:0] a_hi8 = immed_ovr ? op_immed[14:0] : op_immed[14:7];
+wire [`DATA_WIDTH-1:0] d_lo7 = immed_ovr ? reg1 : op_immed[6:0];
+wire [`DATA_WIDTH-1:0] a_hi11 = immed_ovr ? reg1 : op_immed[14:4];
+wire [`DATA_WIDTH-1:0] s_lo4 = immed_ovr ? op_immed[14:0] : op_immed[3:0];
// update target with alu output
// if op_extra_rdata is set, also write mem_rdata to reg1 (if target is reg1, use reg2 instead)
// if op_extra_waddr is set, also fill waddr & wspread with immediate (if target is waddr/wspread, replace with wdata)
-wire [DATA_WIDTH-1:0] reg1_mod = op_target_reg1 ? alu_out : (op_extra_rdata ? mem_rdata : reg1);
-wire [DATA_WIDTH-1:0] reg2_mod = op_target_reg2 ? alu_out : ((op_extra_rdata && op_target_reg1) ? mem_rdata : reg2);
-wire [DATA_WIDTH-1:0] pc_mod = op_target_pc ? alu_out : next_pc;
-wire [DATA_WIDTH-1:0] raddr_mod = op_target_raddr ? alu_out : raddr;
-wire [DATA_WIDTH-1:0] waddr_mod = op_target_waddr ? alu_out :
+wire [`DATA_WIDTH-1:0] reg1_mod = op_target_reg1 ? alu_out : (op_extra_rdata ? mem_rdata : reg1);
+wire [`DATA_WIDTH-1:0] reg2_mod = op_target_reg2 ? alu_out : ((op_extra_rdata && op_target_reg1) ? mem_rdata : reg2);
+wire [`DATA_WIDTH-1:0] pc_mod = op_target_pc ? alu_out : next_pc;
+wire [`DATA_WIDTH-1:0] raddr_mod = op_target_raddr ? alu_out : raddr;
+wire [`DATA_WIDTH-1:0] waddr_mod = op_target_waddr ? alu_out :
(op_extra_waddr ? (op_target_wspread ? a_hi8 : a_hi11) : waddr);
-wire [DATA_WIDTH-1:0] wspread_mod = op_target_wspread ? alu_out :
+wire [`DATA_WIDTH-1:0] wspread_mod = op_target_wspread ? alu_out :
(op_extra_waddr ? (op_target_waddr ? s_hi4 : s_lo4) : wspread);
-wire [DATA_WIDTH-1:0] wdata_mod = op_target_wdata ? alu_out :
+wire [`DATA_WIDTH-1:0] wdata_mod = op_target_wdata ? alu_out :
(op_extra_waddr ? (op_target_wspread ? d_lo7 : (op_target_waddr ? d_lo11 : wdata)) : wdata);
wire we_mod = op_target_wdata || (op_extra_waddr && (op_target_waddr || op_target_wspread));
// debug interface
-wire [DATA_WIDTH-1:0] debug_reg[15:0];
+wire [`DATA_WIDTH-1:0] debug_reg[15:0];
assign debug_reg[0] = pc;
assign debug_reg[1] = opcode[31:16];
assign debug_reg[2] = opcode[15:0];
diff --git a/verilog/rtl/debug_mux.v b/verilog/rtl/debug_mux.v
index 68717cf..a4b5149 100644
--- a/verilog/rtl/debug_mux.v
+++ b/verilog/rtl/debug_mux.v
@@ -10,26 +10,26 @@
Queries or modifies registers and status flags.
*/
-module debug_mux #(parameter CORES=8, LOG_CORES=3, DATA_WIDTH=16) (
- input [LOG_CORES-1:0] sel, // controller interface
+module debug_mux (
+ input [`LOG_CORES-1:0] sel, // controller interface
input [4:0] addr, // 0xxxx affects status register xxxx, 10000 affects running/stopped state
input we,
- input [DATA_WIDTH-1:0] wdata,
- output [DATA_WIDTH-1:0] rdata,
- input [CORES-1:0] reg_stopped, // interface towards cpu cores
- input [CORES*DATA_WIDTH-1:0] reg_rdata,
- output [CORES*2-1:0] cpu_mode,
- output [CORES*4-1:0] reg_sel,
- output [CORES-1:0] reg_we,
- output [CORES*DATA_WIDTH-1:0] reg_wdata
+ input [`DATA_WIDTH-1:0] wdata,
+ output [`DATA_WIDTH-1:0] rdata,
+ input [`CORES-1:0] reg_stopped, // interface towards cpu cores
+ input [`CORES*`DATA_WIDTH-1:0] reg_rdata,
+ output [`CORES*2-1:0] cpu_mode,
+ output [`CORES*4-1:0] reg_sel,
+ output [`CORES-1:0] reg_we,
+ output [`CORES*`DATA_WIDTH-1:0] reg_wdata
);
-wire reg_stopped_i[CORES-1:0];
-wire [DATA_WIDTH-1:0] reg_rdata_i[CORES-1:0];
-wire [1:0] cpu_mode_i[CORES-1:0];
-wire [3:0] reg_sel_i[CORES-1:0];
-wire reg_we_i[CORES-1:0];
-wire [DATA_WIDTH-1:0] reg_wdata_i[CORES-1:0];
+wire reg_stopped_i[`CORES-1:0];
+wire [`DATA_WIDTH-1:0] reg_rdata_i[`CORES-1:0];
+wire [1:0] cpu_mode_i[`CORES-1:0];
+wire [3:0] reg_sel_i[`CORES-1:0];
+wire reg_we_i[`CORES-1:0];
+wire [`DATA_WIDTH-1:0] reg_wdata_i[`CORES-1:0];
wire cc_mode;
wire [3:0] cc_sel;
@@ -37,13 +37,13 @@
assign rdata = cc_mode ? reg_stopped_i[sel] : reg_rdata_i[sel];
generate genvar core;
-for(core=0; core<CORES; core=core+1) begin:g_core
+for(core=0; core<`CORES; core=core+1) begin:g_core
assign reg_stopped_i[core] = reg_stopped[core];
- assign reg_rdata_i[core] = reg_rdata[core*DATA_WIDTH +: DATA_WIDTH];
+ assign reg_rdata_i[core] = reg_rdata[core*`DATA_WIDTH +: `DATA_WIDTH];
assign cpu_mode[core*2 +: 2] = cpu_mode_i[core];
assign reg_sel[core*4 +: 4] = reg_sel_i[core];
assign reg_we[core] = reg_we_i[core];
- assign reg_wdata[core*DATA_WIDTH +: DATA_WIDTH] = reg_wdata_i[core];
+ assign reg_wdata[core*`DATA_WIDTH +: `DATA_WIDTH] = reg_wdata_i[core];
wire cur = sel == core;
assign cpu_mode_i[core] = (cur && we && cc_mode) ? wdata : 2'b00;
diff --git a/verilog/rtl/defines.v b/verilog/rtl/defines.v
new file mode 100644
index 0000000..d3a87e7
--- /dev/null
+++ b/verilog/rtl/defines.v
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: 2021 Tamas Hubai
+
+`default_nettype none
+
+// number of cpu cores
+`define CORES 4
+
+// number of memory mesh cells per cpu core
+`define MEM_DEPTH 32
+
+// machine word size
+`define DATA_WIDTH 16
+
+// minimum number of instructions in program memory (some cores will have a multiple of it)
+`define INSTR_DEPTH 16
+
+// number of io pins usable by code on cpu cores
+`define IO_PINS 16
+
+// map io pin 0 to caravel io pad `FIRST_PAD
+`define FIRST_PAD 12
+
+// wishbone bus width, fixed to 32
+`define WB_WIDTH 32
+
+// number of caravel logic analyzer probes
+`define LOGIC_PROBES 128
+
+// number of caravel io pads
+`define IO_PADS `MPRJ_IO_PADS
+
+// opcode width including args, should be fixed at 32 or opcode handling needs to be changed
+`define INSTR_WIDTH 32
+
+// size of lfsr for prng, should be fixed at 32 or polynomials need to be updated
+`define PRNG_STATE_BITS 32
+
+`define LOG_CORES $clog2(`CORES)
+`define SPREAD_WIDTH $clog2(2 + `LOG_CORES)
+`define ADDR_WIDTH $clog2(`MEM_DEPTH)
+`define PC_WIDTH ($clog2(`INSTR_DEPTH) + $clog2(`CORES))
+`define MEM_IO_PORTS (2 + `IO_PINS)
+`define MEM_IO_FIRST (`MEM_DEPTH - `MEM_IO_PORTS)
+`define MEM_IO_LAST1 `MEM_DEPTH
+
+`default_nettype wire
+
diff --git a/verilog/rtl/entropy_pool.v b/verilog/rtl/entropy_pool.v
index c9e984c..8a6713d 100644
--- a/verilog/rtl/entropy_pool.v
+++ b/verilog/rtl/entropy_pool.v
@@ -7,15 +7,15 @@
Simple entropy pool, shifting a single bit into prng's in each clock cycle
*/
-module entropy_pool #(parameter WIDTH=16) (
+module entropy_pool (
input clk,
input rst_n,
- input[WIDTH-1:0] e_word,
+ input[`WB_WIDTH-1:0] e_word,
output e_bit
);
-reg[WIDTH-1:0] e_pool;
-wire[WIDTH-1:0] e_pool_mod;
+reg[`WB_WIDTH-1:0] e_pool;
+wire[`WB_WIDTH-1:0] e_pool_mod;
assign {e_pool_mod, e_bit} = {1'b0, e_pool} ^ {e_word, 1'b0};
always @(posedge clk) begin
diff --git a/verilog/rtl/instr_mem.v b/verilog/rtl/instr_mem.v
index 61638a9..cd78fcc 100644
--- a/verilog/rtl/instr_mem.v
+++ b/verilog/rtl/instr_mem.v
@@ -7,17 +7,17 @@
Instruction memory
*/
-module instr_mem #(parameter PC_WIDTH=8, INSTR_WIDTH=32, DEPTH=128) (
+module instr_mem #(parameter DEPTH=128) (
input clk,
input rst_n,
- input [PC_WIDTH-1:0] raddr,
- output [INSTR_WIDTH-1:0] rdata,
+ input [`PC_WIDTH-1:0] raddr,
+ output [`INSTR_WIDTH-1:0] rdata,
input we,
- input [PC_WIDTH-1:0] waddr,
- input [INSTR_WIDTH-1:0] wdata
+ input [`PC_WIDTH-1:0] waddr,
+ input [`INSTR_WIDTH-1:0] wdata
);
-reg [INSTR_WIDTH-1:0] mem[DEPTH-1:0];
+reg [`INSTR_WIDTH-1:0] mem[DEPTH-1:0];
assign rdata = mem[raddr];
@@ -25,7 +25,7 @@
always @ (posedge clk) begin
if (!rst_n) begin
for (i=0; i<DEPTH; i=i+1) begin
- mem[i] <= {(INSTR_WIDTH){1'b0}};
+ mem[i] <= {(`INSTR_WIDTH){1'b0}};
end
end else begin
if (we) mem[waddr] <= wdata;
diff --git a/verilog/rtl/io_filter.v b/verilog/rtl/io_filter.v
index 91d670c..13ac03f 100644
--- a/verilog/rtl/io_filter.v
+++ b/verilog/rtl/io_filter.v
@@ -17,71 +17,67 @@
Pins send and receive continuous streams of bits while io ports only fire on changes.
Writing ports corresponding to individual pins override bits of the joined output port.
-We assume IO_PINS <= DATA_WIDTH. Alternatively we could modify the code to use more than one joined
+We assume `IO_PINS <= `DATA_WIDTH. Alternatively we could modify the code to use more than one joined
port per direction.
*/
-module io_filter #(parameter IO_PINS=16, DATA_WIDTH=16) (
+module io_filter (
input clk,
input rst_n,
- input [IO_PINS-1:0] pin_dir, // 0=input, 1=output
- input [IO_PINS-1:0] pin_data_in, // input for both mem_mesh & io_filter
- output [IO_PINS-1:0] pin_data_out, // output for both mem_mesh & io_filter
- output [IO_PINS+2-1:0] port_active_in, // input for mem_mesh, output for io_filter
- input [IO_PINS+2-1:0] port_active_out, // output for mem_mesh, input for io_filter
- output [(IO_PINS+2)*DATA_WIDTH-1:0] port_data_in,
- input [(IO_PINS+2)*DATA_WIDTH-1:0] port_data_out
+ input [`IO_PINS-1:0] pin_dir, // 0=input, 1=output
+ input [`IO_PINS-1:0] pin_data_in, // input for both mem_mesh & io_filter
+ output [`IO_PINS-1:0] pin_data_out, // output for both mem_mesh & io_filter
+ output [`IO_PINS+2-1:0] port_active_in, // input for mem_mesh, output for io_filter
+ input [`IO_PINS+2-1:0] port_active_out, // output for mem_mesh, input for io_filter
+ output [(`IO_PINS+2)*`DATA_WIDTH-1:0] port_data_in,
+ input [(`IO_PINS+2)*`DATA_WIDTH-1:0] port_data_out
);
-reg [IO_PINS-1:0] saved_in;
-reg [IO_PINS-1:0] saved_out;
+reg [`IO_PINS-1:0] saved_in;
+reg [`IO_PINS-1:0] saved_out;
// input
-wire [IO_PINS-1:0] input_indiv = pin_data_in; // select input pins
-wire [IO_PINS-1:0] input_indiv_active = pin_data_in ^ saved_in; // a pin is active if it changed from the last state
+wire [`IO_PINS-1:0] input_indiv = pin_data_in; // select input pins
+wire [`IO_PINS-1:0] input_indiv_active = pin_data_in ^ saved_in; // a pin is active if it changed from the last state
wire input_joined_active = |input_indiv_active; // update the joined port if any of the pins changed
-wire [IO_PINS-1:0] input_joined;
-pin_compress #( // compress input bits together
- .WIDTH(IO_PINS)
-) comp (
+wire [`IO_PINS-1:0] input_joined;
+pin_compress comp ( // compress input bits together
.data(input_indiv),
.mask(~pin_dir),
.result(input_joined)
);
// input
-assign port_active_in[IO_PINS +: 2] = {input_joined_active, 1'b0}; // assign the joined ports & their active states
-assign port_data_in[IO_PINS*DATA_WIDTH +: 2*DATA_WIDTH] = {input_joined, {(DATA_WIDTH){1'b0}}};
+assign port_active_in[`IO_PINS +: 2] = {input_joined_active, 1'b0}; // assign the joined ports & their active states
+assign port_data_in[`IO_PINS*`DATA_WIDTH +: 2*`DATA_WIDTH] = {input_joined, {(`DATA_WIDTH){1'b0}}};
// output
-wire [IO_PINS-1:0] output_indiv;
-wire [IO_PINS-1:0] output_indiv_active;
+wire [`IO_PINS-1:0] output_indiv;
+wire [`IO_PINS-1:0] output_indiv_active;
generate genvar pin;
- for (pin=0; pin<IO_PINS; pin=pin+1) begin:g_pin
+ for (pin=0; pin<`IO_PINS; pin=pin+1) begin:g_pin
// input
assign port_active_in[pin] = input_indiv_active[pin]; // assign the individual ports & their active states
- assign port_data_in[pin*DATA_WIDTH +: DATA_WIDTH] = {(DATA_WIDTH){input_indiv[pin]}};
+ assign port_data_in[pin*`DATA_WIDTH +: `DATA_WIDTH] = {(`DATA_WIDTH){input_indiv[pin]}};
// output
assign pin_data_out[pin] = saved_out[pin]; // output pins keep their state between writes
assign output_indiv_active[pin] = port_active_out[pin]; // get pins & their active states from the individual output ports
- assign output_indiv[pin] = port_data_out[pin*DATA_WIDTH];
+ assign output_indiv[pin] = port_data_out[pin*`DATA_WIDTH];
end
endgenerate
// output
-wire [IO_PINS-1:0] output_joined = port_data_out[IO_PINS*DATA_WIDTH +: DATA_WIDTH]; // get pins & their active state from the joined output port
-wire output_joined_active = port_active_out[IO_PINS];
-wire [IO_PINS-1:0] output_decomp;
-pin_decompress #( // decompress output pins to their respective bit positions
- .WIDTH(IO_PINS)
-) decomp (
+wire [`IO_PINS-1:0] output_joined = port_data_out[`IO_PINS*`DATA_WIDTH +: `DATA_WIDTH]; // get pins & their active state from the joined output port
+wire output_joined_active = port_active_out[`IO_PINS];
+wire [`IO_PINS-1:0] output_decomp;
+pin_decompress decomp ( // decompress output pins to their respective bit positions
.data(output_joined),
.mask(pin_dir),
.result(output_decomp)
);
// consolidate pins set through joined & individual ports (individual ports have priority)
-wire [IO_PINS-1:0] output_mixed = (output_indiv_active & output_indiv) | (~output_indiv_active & output_decomp);
-wire [IO_PINS-1:0] output_mixed_active = output_indiv_active | {(IO_PINS){output_joined_active}};
+wire [`IO_PINS-1:0] output_mixed = (output_indiv_active & output_indiv) | (~output_indiv_active & output_decomp);
+wire [`IO_PINS-1:0] output_mixed_active = output_indiv_active | {(`IO_PINS){output_joined_active}};
integer i;
always @(posedge clk) begin
@@ -89,7 +85,7 @@
saved_in <= 0;
saved_out <= 0;
end else begin
- for (i=0; i<IO_PINS; i=i+1) begin
+ for (i=0; i<`IO_PINS; i=i+1) begin
// active outputs change the saved state in order to keep being sent
if (output_mixed_active[i]) saved_out[i] <= output_mixed[i];
// inputs are only active for a single cycle while they differ from their saved state
diff --git a/verilog/rtl/io_filter_rev.v b/verilog/rtl/io_filter_rev.v
index 67a0fc4..ab592e4 100644
--- a/verilog/rtl/io_filter_rev.v
+++ b/verilog/rtl/io_filter_rev.v
@@ -7,26 +7,23 @@
IO filter with reversed pin order
*/
-module io_filter_rev #(parameter IO_PINS=16, DATA_WIDTH=16) (
+module io_filter_rev (
input clk,
input rst_n,
- input [IO_PINS-1:0] pin_dir, // 0=input, 1=output
- input [IO_PINS-1:0] pin_data_in, // input for both mem_mesh & io_filter
- output [IO_PINS-1:0] pin_data_out, // output for both mem_mesh & io_filter
- output [IO_PINS+2-1:0] port_active_in, // input for mem_mesh, output for io_filter
- input [IO_PINS+2-1:0] port_active_out, // output for mem_mesh, input for io_filter
- output [(IO_PINS+2)*DATA_WIDTH-1:0] port_data_in,
- input [(IO_PINS+2)*DATA_WIDTH-1:0] port_data_out
+ input [`IO_PINS-1:0] pin_dir, // 0=input, 1=output
+ input [`IO_PINS-1:0] pin_data_in, // input for both mem_mesh & io_filter
+ output [`IO_PINS-1:0] pin_data_out, // output for both mem_mesh & io_filter
+ output [`IO_PINS+2-1:0] port_active_in, // input for mem_mesh, output for io_filter
+ input [`IO_PINS+2-1:0] port_active_out, // output for mem_mesh, input for io_filter
+ output [(`IO_PINS+2)*`DATA_WIDTH-1:0] port_data_in,
+ input [(`IO_PINS+2)*`DATA_WIDTH-1:0] port_data_out
);
-wire [IO_PINS-1:0] pin_dir_rev;
-wire [IO_PINS-1:0] pin_data_in_rev;
-wire [IO_PINS-1:0] pin_data_out_rev;
+wire [`IO_PINS-1:0] pin_dir_rev;
+wire [`IO_PINS-1:0] pin_data_in_rev;
+wire [`IO_PINS-1:0] pin_data_out_rev;
-io_filter #(
- .IO_PINS(IO_PINS),
- .DATA_WIDTH(DATA_WIDTH)
-) io_filter_inst (
+io_filter io_filter_inst (
.clk(clk),
.rst_n(rst_n),
.pin_dir(pin_dir_rev),
@@ -39,8 +36,8 @@
);
generate genvar pin;
- for (pin=0; pin<IO_PINS; pin=pin+1) begin:g_pin
- localparam rpin = IO_PINS-1-pin;
+ for (pin=0; pin<`IO_PINS; pin=pin+1) begin:g_pin
+ localparam rpin = `IO_PINS-1-pin;
assign pin_dir_rev[pin] = pin_dir[rpin];
assign pin_data_in_rev[pin] = pin_data_in[rpin];
assign pin_data_out[pin] = pin_data_out_rev[rpin];
diff --git a/verilog/rtl/io_pads.v b/verilog/rtl/io_pads.v
index 4089b74..8fd7714 100644
--- a/verilog/rtl/io_pads.v
+++ b/verilog/rtl/io_pads.v
@@ -9,36 +9,36 @@
IO_PINS = logical pins accessible for the program running on the cpu cores
IO_PADS = pads made available by Caravel for user projects (maps to MPRJ_IO_PADS)
LOGIC_PROBES = logic analyzer probes
-FIRST_PAD = map pin 0 to pad FIRST_PAD, pin 1 to pad FIRST_PAD+1 etc.
+FIRST_PAD = map pin 0 to pad `FIRST_PAD, pin 1 to pad `FIRST_PAD+1 etc.
*/
-module io_pads #(parameter IO_PINS=16, IO_PADS=38, LOGIC_PROBES=128, FIRST_PAD=12) (
+module io_pads (
// Caravel interface
input wb_clk_i,
input wb_rst_i,
- input [LOGIC_PROBES-1:0] la_data_in,
- output [LOGIC_PROBES-1:0] la_data_out,
- input [LOGIC_PROBES-1:0] la_oenb,
- input [IO_PADS-1:0] io_in,
- output [IO_PADS-1:0] io_out,
- output [IO_PADS-1:0] io_oeb,
+ input [`LOGIC_PROBES-1:0] la_data_in,
+ output [`LOGIC_PROBES-1:0] la_data_out,
+ input [`LOGIC_PROBES-1:0] la_oenb,
+ input [`IO_PADS-1:0] io_in,
+ output [`IO_PADS-1:0] io_out,
+ output [`IO_PADS-1:0] io_oeb,
// MCU interface
output clk,
output rst_hard_n,
output rst_soft_n,
output rst_prng_n,
// IO filter interface
- output [IO_PINS-1:0] pin_dir,
- output [IO_PINS-1:0] pin_data_in,
- input [IO_PINS-1:0] pin_data_out,
+ output [`IO_PINS-1:0] pin_dir,
+ output [`IO_PINS-1:0] pin_data_in,
+ input [`IO_PINS-1:0] pin_data_out,
// Wishbone multiplexer interface
input cfg_we,
input cfg_addr,
- input [IO_PINS-1:0] cfg_wdata
+ input [`IO_PINS-1:0] cfg_wdata
);
reg programming;
-reg [IO_PINS-1:0] saved_dir;
+reg [`IO_PINS-1:0] saved_dir;
// allow logic analyzer probes to override clock & reset signals
assign clk = la_oenb[0] ? wb_clk_i : la_data_in[0];
@@ -47,45 +47,45 @@
assign rst_prng_n = la_oenb[3] ? !wb_rst_i : la_data_in[3];
localparam LA_DIR = 4; // index of logic analyzer probes for pin directions
-localparam LA_PIN = LA_DIR + IO_PINS; // index of logic analyzer probes for pin values
-localparam LA_PAD = LA_PIN + IO_PINS; // index of logic analyzer probes for pad values
-localparam LA_END = LA_PAD + IO_PADS; // index of first unused logic analyzer probe
-localparam LA_REM = LOGIC_PROBES - LA_END; // unused logic analyzer probes
+localparam LA_PIN = LA_DIR + `IO_PINS; // index of logic analyzer probes for pin values
+localparam LA_PAD = LA_PIN + `IO_PINS; // index of logic analyzer probes for pad values
+localparam LA_END = LA_PAD + `IO_PADS; // index of first unused logic analyzer probe
+localparam LA_REM = `LOGIC_PROBES - LA_END; // unused logic analyzer probes
-localparam PAD_REM = IO_PADS - IO_PINS - FIRST_PAD; // unused pads remaining after the last io pin
+localparam PAD_REM = `IO_PADS - `IO_PINS - `FIRST_PAD; // unused pads remaining after the last io pin
// while programming, all pins are inputs, otherwise they follow the saved_dir array
// the logic analyzer can override everything
-assign pin_dir = (la_oenb[LA_DIR +: IO_PINS] & (rst_soft_n ? saved_dir : 0)) |
- (~la_oenb[LA_DIR +: IO_PINS] & la_data_in[LA_DIR +: IO_PINS]);
+assign pin_dir = (la_oenb[LA_DIR +: `IO_PINS] & (rst_soft_n ? saved_dir : 0)) |
+ (~la_oenb[LA_DIR +: `IO_PINS] & la_data_in[LA_DIR +: `IO_PINS]);
// pin values are read from corresponding pads as long as the pin direction is set to input
-assign pin_data_in = (la_oenb[LA_PIN +: IO_PINS] & ~pin_dir & io_in[FIRST_PAD +: IO_PINS]) |
- (~la_oenb[LA_PIN +: IO_PINS] & la_data_in[LA_PIN +: IO_PINS]);
+assign pin_data_in = (la_oenb[LA_PIN +: `IO_PINS] & ~pin_dir & io_in[`FIRST_PAD +: `IO_PINS]) |
+ (~la_oenb[LA_PIN +: `IO_PINS] & la_data_in[LA_PIN +: `IO_PINS]);
// configure pad directions according to pin directions, pads not matched to pins are marked as inputs
-assign io_oeb = (la_oenb[LA_PAD +: IO_PADS] & {{(PAD_REM){1'b1}}, ~pin_dir, {(FIRST_PAD){1'b1}}}) |
- (~la_oenb[LA_PAD +: IO_PADS] & {(IO_PADS){1'b0}});
+assign io_oeb = (la_oenb[LA_PAD +: `IO_PADS] & {{(PAD_REM){1'b1}}, ~pin_dir, {(`FIRST_PAD){1'b1}}}) |
+ (~la_oenb[LA_PAD +: `IO_PADS] & {(`IO_PADS){1'b0}});
// pin values are written to corresponding pads, zeroes are written to unassigned pads (they are inputs anyway)
-assign io_out = (la_oenb[LA_PAD +: IO_PADS] & {{(PAD_REM){1'b0}}, pin_dir & pin_data_out, {(FIRST_PAD){1'b0}}}) |
- (~la_oenb[LA_PAD +: IO_PADS] & la_data_in[LA_PAD +: IO_PADS]);
+assign io_out = (la_oenb[LA_PAD +: `IO_PADS] & {{(PAD_REM){1'b0}}, pin_dir & pin_data_out, {(`FIRST_PAD){1'b0}}}) |
+ (~la_oenb[LA_PAD +: `IO_PADS] & la_data_in[LA_PAD +: `IO_PADS]);
// logic analyzer probes can also read back the same signals and values
assign la_data_out[0] = clk;
assign la_data_out[1] = rst_hard_n;
assign la_data_out[2] = rst_soft_n;
assign la_data_out[3] = rst_prng_n;
-assign la_data_out[LA_DIR +: IO_PINS] = pin_dir;
-assign la_data_out[LA_PIN +: IO_PINS] = pin_data_out;
-assign la_data_out[LA_PAD +: IO_PADS] = io_in;
+assign la_data_out[LA_DIR +: `IO_PINS] = pin_dir;
+assign la_data_out[LA_PIN +: `IO_PINS] = pin_data_out;
+assign la_data_out[LA_PAD +: `IO_PADS] = io_in;
assign la_data_out[LA_END +: LA_REM] = {(LA_REM){1'b0}};
// change programming mode & pin directions from the wishbone multiplexer
always @(posedge clk) begin
if (!rst_hard_n) begin
programming <= 0;
- saved_dir <= {(IO_PINS){1'b0}};
+ saved_dir <= {(`IO_PINS){1'b0}};
end else begin
if (cfg_we) begin
case (cfg_addr)
diff --git a/verilog/rtl/mcu.v b/verilog/rtl/mcu.v
index f5d3288..b726176 100644
--- a/verilog/rtl/mcu.v
+++ b/verilog/rtl/mcu.v
@@ -32,42 +32,28 @@
*/
-module mcu #(parameter
- CORES = 8, // number of cpu cores
- LOG_CORES = 3, // clog2(CORES)
- MEM_DEPTH = 256, // number of memory mesh cells per cpu core
- DATA_WIDTH = 16, // machine word size
- PC_WIDTH = 8, // program counter size, should be at least clog2(INSTR_DEPTH)+clog2(CORES)
- ADDR_WIDTH = 8, // memory mesh address width, should be at least clog2(MEM_DEPTH)
- INSTR_WIDTH = 32, // opcode width including args, should be fixed at 32 or opcode handling needs to be changed
- INSTR_DEPTH = 32, // minimum number of instructions in program memory (some cores will have a multiple of it)
- IO_PINS = 16, // number of io pins usable by code on cpu cores
- IO_PADS = 38, // number of caravel io pads
- FIRST_PAD = 12, // map io pin 0 to caravel io pad FIRST_PAD
- LOGIC_PROBES = 128, // number of caravel logic analyzer probes
- WB_WIDTH = 32 // wishbone bus width, fixed to 32
-)(
+module mcu (
input wb_clk_i, // wishbone clock
input wb_rst_i, // wb reset, active high
input wbs_stb_i, // wb strobe
input wbs_cyc_i, // wb cycle
input wbs_we_i, // wb write enable
- input [WB_WIDTH-1:0] wbs_adr_i, // wb address
- input [WB_WIDTH-1:0] wbs_dat_i, // wb input data
+ input [`WB_WIDTH-1:0] wbs_adr_i, // wb address
+ input [`WB_WIDTH-1:0] wbs_dat_i, // wb input data
output wbs_ack_o, // wb acknowledge
- output [WB_WIDTH-1:0] wbs_dat_o, // wb output data
- input [LOGIC_PROBES-1:0] la_data_in, // logic analyzer probes input
- output [LOGIC_PROBES-1:0] la_data_out, // la probes output
- input [LOGIC_PROBES-1:0] la_oenb, // la probes direction, 0=input (write by la), 1=output (read by la)
- input [IO_PADS-1:0] io_in, // io pads input
- output [IO_PADS-1:0] io_out, // io pads output
- output [IO_PADS-1:0] io_oeb // io pads direction, 0=output (write by mcu), 1=input (read by mcu)
+ output [`WB_WIDTH-1:0] wbs_dat_o, // wb output data
+ input [`LOGIC_PROBES-1:0] la_data_in, // logic analyzer probes input
+ output [`LOGIC_PROBES-1:0] la_data_out, // la probes output
+ input [`LOGIC_PROBES-1:0] la_oenb, // la probes direction, 0=input (write by la), 1=output (read by la)
+ input [`IO_PADS-1:0] io_in, // io pads input
+ output [`IO_PADS-1:0] io_out, // io pads output
+ output [`IO_PADS-1:0] io_oeb // io pads direction, 0=output (write by mcu), 1=input (read by mcu)
);
-localparam SPREAD_LAYERS = LOG_CORES;
+localparam SPREAD_LAYERS = `LOG_CORES;
localparam SPREAD_WIDTH = $clog2(2 + SPREAD_LAYERS);
-localparam MEM_IO_PORTS = 2 + IO_PINS;
-localparam MEM_IO_FIRST = MEM_DEPTH - MEM_IO_PORTS;
+localparam MEM_IO_PORTS = 2 + `IO_PINS;
+localparam MEM_IO_FIRST = `MEM_DEPTH - MEM_IO_PORTS;
// clock and reset signals, set by io_pads using wb_clk_i, wb_rst_i and logic probes
wire clk;
@@ -76,100 +62,95 @@
wire rst_prng_n;
// between io pads and io filter
-wire [IO_PINS-1:0] pin_dir; // pads > iof
-wire [IO_PINS-1:0] pin_data_in; // pads > iof
-wire [IO_PINS-1:0] pin_data_out; // pads < iof
+wire [`IO_PINS-1:0] pin_dir; // pads > iof
+wire [`IO_PINS-1:0] pin_data_in; // pads > iof
+wire [`IO_PINS-1:0] pin_data_out; // pads < iof
// between cpu core and corresponding instruction memory
-wire [INSTR_WIDTH-1:0] opcode[CORES-1:0]; // cpu < im
-wire [PC_WIDTH-1:0] progctr[CORES-1:0]; // cpu > im
+wire [`INSTR_WIDTH-1:0] opcode[`CORES-1:0]; // cpu < im
+wire [`PC_WIDTH-1:0] progctr[`CORES-1:0]; // cpu > im
// between cpu core and memory mesh (unpacked versions for cpu cores)
-wire [DATA_WIDTH-1:0] mem_rdata[CORES-1:0]; // cpu < mesh
-wire mem_we[CORES-1:0]; // cpu > mesh
-wire [ADDR_WIDTH-1:0] mem_waddr[CORES-1:0]; // cpu > mesh
-wire [SPREAD_WIDTH-1:0] mem_wspread[CORES-1:0]; // cpu > mesh
-wire [DATA_WIDTH-1:0] mem_wdata[CORES-1:0]; // cpu > mesh
-wire [ADDR_WIDTH-1:0] mem_raddr[CORES-1:0]; // cpu > mesh
+wire [`DATA_WIDTH-1:0] mem_rdata[`CORES-1:0]; // cpu < mesh
+wire mem_we[`CORES-1:0]; // cpu > mesh
+wire [`ADDR_WIDTH-1:0] mem_waddr[`CORES-1:0]; // cpu > mesh
+wire [SPREAD_WIDTH-1:0] mem_wspread[`CORES-1:0]; // cpu > mesh
+wire [`DATA_WIDTH-1:0] mem_wdata[`CORES-1:0]; // cpu > mesh
+wire [`ADDR_WIDTH-1:0] mem_raddr[`CORES-1:0]; // cpu > mesh
// between cpu core and memory mesh (packed versions for memory mesh)
-wire [CORES*DATA_WIDTH-1:0] mem_rdata_raw; // cpu < mesh
-wire [CORES-1:0] mem_we_raw; // cpu > mesh
-wire [CORES*ADDR_WIDTH-1:0] mem_waddr_raw; // cpu > mesh
-wire [CORES*SPREAD_WIDTH-1:0] mem_wspread_raw; // cpu > mesh
-wire [CORES*DATA_WIDTH-1:0] mem_wdata_raw; // cpu > mesh
-wire [CORES*ADDR_WIDTH-1:0] mem_raddr_raw; // cpu > mesh
+wire [`CORES*`DATA_WIDTH-1:0] mem_rdata_raw; // cpu < mesh
+wire [`CORES-1:0] mem_we_raw; // cpu > mesh
+wire [`CORES*`ADDR_WIDTH-1:0] mem_waddr_raw; // cpu > mesh
+wire [`CORES*SPREAD_WIDTH-1:0] mem_wspread_raw; // cpu > mesh
+wire [`CORES*`DATA_WIDTH-1:0] mem_wdata_raw; // cpu > mesh
+wire [`CORES*`ADDR_WIDTH-1:0] mem_raddr_raw; // cpu > mesh
// between cpu core and corresponding prng
-wire [DATA_WIDTH-1:0] prng_random[CORES-1:0]; // cpu < prng
+wire [`DATA_WIDTH-1:0] prng_random[`CORES-1:0]; // cpu < prng
// between instruction memory and programming multiplexer (unpacked versions for instruction memory)
-wire im_we[CORES-1:0]; // im < pmux
-wire [PC_WIDTH-1:0] im_waddr[CORES-1:0]; // im < pmux
-wire [INSTR_WIDTH-1:0] im_wdata[CORES-1:0]; // im < pmux
+wire im_we[`CORES-1:0]; // im < pmux
+wire [`PC_WIDTH-1:0] im_waddr[`CORES-1:0]; // im < pmux
+wire [`INSTR_WIDTH-1:0] im_wdata[`CORES-1:0]; // im < pmux
// between instruction memory and programming multiplexer (packed versions for programming multiplexer)
-wire [CORES-1:0] im_we_raw; // im < pmux
-wire [CORES*PC_WIDTH-1:0] im_waddr_raw; // im < pmux
-wire [CORES*INSTR_WIDTH-1:0] im_wdata_raw; // im < pmux
+wire [`CORES-1:0] im_we_raw; // im < pmux
+wire [`CORES*`PC_WIDTH-1:0] im_waddr_raw; // im < pmux
+wire [`CORES*`INSTR_WIDTH-1:0] im_wdata_raw; // im < pmux
// between memory mesh and io filter
wire [MEM_IO_PORTS-1:0] mem_io_active_in; // mesh < iof
wire [MEM_IO_PORTS-1:0] mem_io_active_out; // mesh > iof
-wire [MEM_IO_PORTS*DATA_WIDTH-1:0] mem_io_data_in; // mesh < iof
-wire [MEM_IO_PORTS*DATA_WIDTH-1:0] mem_io_data_out; // mesh > iof
+wire [MEM_IO_PORTS*`DATA_WIDTH-1:0] mem_io_data_in; // mesh < iof
+wire [MEM_IO_PORTS*`DATA_WIDTH-1:0] mem_io_data_out; // mesh > iof
// between debugging multiplexer and cpu core (unpacked versions for cpu core)
-wire [1:0] debug_cpu_mode[CORES-1:0]; // dmux > cpu
-wire [3:0] debug_reg_sel[CORES-1:0]; // dmux > cpu
-wire debug_reg_we[CORES-1:0]; // dmux > cpu
-wire [DATA_WIDTH-1:0] debug_reg_wdata[CORES-1:0]; // dmux > cpu
-wire debug_reg_stopped[CORES-1:0]; // dmux < cpu
-wire [DATA_WIDTH-1:0] debug_reg_rdata[CORES-1:0]; // dmux < cpu
+wire [1:0] debug_cpu_mode[`CORES-1:0]; // dmux > cpu
+wire [3:0] debug_reg_sel[`CORES-1:0]; // dmux > cpu
+wire debug_reg_we[`CORES-1:0]; // dmux > cpu
+wire [`DATA_WIDTH-1:0] debug_reg_wdata[`CORES-1:0]; // dmux > cpu
+wire debug_reg_stopped[`CORES-1:0]; // dmux < cpu
+wire [`DATA_WIDTH-1:0] debug_reg_rdata[`CORES-1:0]; // dmux < cpu
// between debugging multiplexer and cpu core (packed versions for debugging multiplexer)
-wire [CORES*2-1:0] debug_cpu_mode_raw; // dmux > cpu
-wire [CORES*4-1:0] debug_reg_sel_raw; // dmux > cpu
-wire [CORES-1:0] debug_reg_we_raw; // dmux > cpu
-wire [CORES*DATA_WIDTH-1:0] debug_reg_wdata_raw; // dmux > cpu
-wire [CORES-1:0] debug_reg_stopped_raw; // dmux < cpu
-wire [CORES*DATA_WIDTH-1:0] debug_reg_rdata_raw; // dmux < cpu
+wire [`CORES*2-1:0] debug_cpu_mode_raw; // dmux > cpu
+wire [`CORES*4-1:0] debug_reg_sel_raw; // dmux > cpu
+wire [`CORES-1:0] debug_reg_we_raw; // dmux > cpu
+wire [`CORES*`DATA_WIDTH-1:0] debug_reg_wdata_raw; // dmux > cpu
+wire [`CORES-1:0] debug_reg_stopped_raw; // dmux < cpu
+wire [`CORES*`DATA_WIDTH-1:0] debug_reg_rdata_raw; // dmux < cpu
// between wishbone multiplexer and programming multiplexer
wire prog_we; // wbmux > pmux
-wire [LOG_CORES-1:0] prog_sel; // wbmux > pmux
-wire [PC_WIDTH-1:0] prog_waddr; // wbmux > pmux
-wire [INSTR_WIDTH-1:0] prog_wdata; // wbmux > pmux
+wire [`LOG_CORES-1:0] prog_sel; // wbmux > pmux
+wire [`PC_WIDTH-1:0] prog_waddr; // wbmux > pmux
+wire [`INSTR_WIDTH-1:0] prog_wdata; // wbmux > pmux
// between wishbone multiplexer and io pads
wire pads_we; // wbmux > pads
wire pads_waddr; // wbmux > pads
-wire [IO_PINS-1:0] pads_wdata; // wbmux > pads
+wire [`IO_PINS-1:0] pads_wdata; // wbmux > pads
// between wishbone multiplexer and debugging multiplexer
-wire [LOG_CORES-1:0] debug_sel; // wbmux > dmux
+wire [`LOG_CORES-1:0] debug_sel; // wbmux > dmux
wire [4:0] debug_addr; // wbmux > dmux
wire debug_we; // wbmux > dmux
-wire [DATA_WIDTH-1:0] debug_wdata; // wbmux > dmux
-wire [DATA_WIDTH-1:0] debug_rdata; // wbmux < dmux
+wire [`DATA_WIDTH-1:0] debug_wdata; // wbmux > dmux
+wire [`DATA_WIDTH-1:0] debug_rdata; // wbmux < dmux
// between wishbone multiplexer and entropy pool
-wire [WB_WIDTH-1:0] entropy_word; // wbmux > ep
+wire [`WB_WIDTH-1:0] entropy_word; // wbmux > ep
// between entropy pool and prng's
wire entropy_bit; // ep > prng
// repeat for each cpu core
generate genvar core;
-for(core=0; core<CORES; core=core+1) begin:g_core
+for(core=0; core<`CORES; core=core+1) begin:g_core
// add the cpu core itself
cpu_core #(
- .DATA_WIDTH(DATA_WIDTH),
- .PC_WIDTH(PC_WIDTH),
- .ADDR_WIDTH(ADDR_WIDTH),
- .SPREAD_WIDTH(SPREAD_WIDTH),
- .INSTR_WIDTH(INSTR_WIDTH),
.CPU_NUM(core)
) cpu_core_inst (
.clk(clk),
@@ -192,15 +173,13 @@
);
// add corresponding instruction memory
- localparam CORES_RNDUP = 1 << LOG_CORES;
+ localparam CORES_RNDUP = 1 << `LOG_CORES;
localparam DEPTH_MULT = (core + CORES_RNDUP) & ~(core + CORES_RNDUP-1);
// e.g. for 8 cores, depths are multiplied by 8, 1, 2, 1, 4, 1, 2, 1
// so that we have a few cores that accept longer programs but the total
// memory required is still kept reasonably low
instr_mem #(
- .PC_WIDTH(PC_WIDTH),
- .INSTR_WIDTH(INSTR_WIDTH),
- .DEPTH(INSTR_DEPTH * DEPTH_MULT)
+ .DEPTH(`INSTR_DEPTH * DEPTH_MULT)
) instr_mem_inst (
.clk(clk),
.rst_n(rst_hard_n),
@@ -213,8 +192,7 @@
// add its own pseudorandom number generator
prng_wrap #(
- .INDEX(core),
- .OUTPUT_BITS(DATA_WIDTH)
+ .INDEX(core)
) prng_inst (
.clk(clk),
.rst_n(rst_prng_n),
@@ -224,44 +202,34 @@
// convert memory mesh inputs: unpacked to packed
assign mem_we_raw[core] = mem_we[core];
- assign mem_waddr_raw[core*ADDR_WIDTH +: ADDR_WIDTH] = mem_waddr[core];
+ assign mem_waddr_raw[core*`ADDR_WIDTH +: `ADDR_WIDTH] = mem_waddr[core];
assign mem_wspread_raw[core*SPREAD_WIDTH +: SPREAD_WIDTH] = mem_wspread[core];
- assign mem_wdata_raw[core*DATA_WIDTH +: DATA_WIDTH] = mem_wdata[core];
- assign mem_raddr_raw[core*ADDR_WIDTH +: ADDR_WIDTH] = mem_raddr[core];
+ assign mem_wdata_raw[core*`DATA_WIDTH +: `DATA_WIDTH] = mem_wdata[core];
+ assign mem_raddr_raw[core*`ADDR_WIDTH +: `ADDR_WIDTH] = mem_raddr[core];
// convert memory mesh outputs: packed to unpacked
- assign mem_rdata[core] = mem_rdata_raw[core*DATA_WIDTH +: DATA_WIDTH];
+ assign mem_rdata[core] = mem_rdata_raw[core*`DATA_WIDTH +: `DATA_WIDTH];
// convert programming multiplexer outputs: packed to unpacked
assign im_we[core] = im_we_raw[core];
- assign im_waddr[core] = im_waddr_raw[core*PC_WIDTH +: PC_WIDTH];
- assign im_wdata[core] = im_wdata_raw[core*INSTR_WIDTH +: INSTR_WIDTH];
+ assign im_waddr[core] = im_waddr_raw[core*`PC_WIDTH +: `PC_WIDTH];
+ assign im_wdata[core] = im_wdata_raw[core*`INSTR_WIDTH +: `INSTR_WIDTH];
// convert debugging multiplexer inputs: unpacked to packed
assign debug_reg_stopped_raw[core] = debug_reg_stopped[core];
- assign debug_reg_rdata_raw[core*DATA_WIDTH +: DATA_WIDTH] = debug_reg_rdata[core];
+ assign debug_reg_rdata_raw[core*`DATA_WIDTH +: `DATA_WIDTH] = debug_reg_rdata[core];
// convert debugging multiplexer outputs: packed to unpacked
assign debug_cpu_mode[core] = debug_cpu_mode_raw[core*2 +: 2];
assign debug_reg_sel[core] = debug_reg_sel_raw[core*4 +: 4];
assign debug_reg_we[core] = debug_reg_we_raw[core];
- assign debug_reg_wdata[core] = debug_reg_wdata_raw[core*DATA_WIDTH +: DATA_WIDTH];
+ assign debug_reg_wdata[core] = debug_reg_wdata_raw[core*`DATA_WIDTH +: `DATA_WIDTH];
end
endgenerate
// add the memory mesh, with a packed bus towards the cpu cores
-mem_mesh #(
- .CORES(CORES),
- .DEPTH(MEM_DEPTH),
- .DATA_WIDTH(DATA_WIDTH),
- .ADDR_WIDTH(ADDR_WIDTH),
- .SPREAD_LAYERS(SPREAD_LAYERS),
- .SPREAD_WIDTH(SPREAD_WIDTH),
- .USE_IO(1),
- .IO_PORTS(MEM_IO_PORTS),
- .IO_FIRST(MEM_IO_FIRST)
-) mem_mesh_inst (
+mem_mesh mem_mesh_inst (
.clk(clk),
.rst_n(rst_soft_n),
.we(mem_we_raw),
@@ -277,10 +245,7 @@
);
// add the io filter connected to the memory mesh
-io_filter_rev #(
- .IO_PINS(IO_PINS),
- .DATA_WIDTH(DATA_WIDTH)
-) io_filter_inst (
+io_filter_rev io_filter_inst (
.clk(clk),
.rst_n(rst_soft_n),
.pin_dir(pin_dir),
@@ -293,12 +258,7 @@
);
// add the programming multiplexer, with a packed bus towards instruction memories
-prog_mux #(
- .CORES(CORES),
- .LOG_CORES(LOG_CORES),
- .PC_WIDTH(PC_WIDTH),
- .INSTR_WIDTH(INSTR_WIDTH)
-) prog_mux_inst (
+prog_mux prog_mux_inst (
.we(prog_we),
.sel(prog_sel),
.waddr(prog_waddr),
@@ -309,11 +269,7 @@
);
// add the debugging multiplexer, with a packed bus towards cpu cores
-debug_mux #(
- .CORES(CORES),
- .LOG_CORES(LOG_CORES),
- .DATA_WIDTH(DATA_WIDTH)
-) debug_mux_inst (
+debug_mux debug_mux_inst (
.sel(debug_sel),
.addr(debug_addr),
.we(debug_we),
@@ -328,9 +284,7 @@
);
// add the entropy pool
-entropy_pool #(
- .WIDTH(WB_WIDTH)
-) entropy_pool_inst (
+entropy_pool entropy_pool_inst (
.clk(clk),
.rst_n(rst_prng_n),
.e_word(entropy_word),
@@ -338,14 +292,7 @@
);
// add the wishbone multiplexer
-wb_mux #(
- .LOG_CORES(LOG_CORES),
- .PC_WIDTH(PC_WIDTH),
- .INSTR_WIDTH(INSTR_WIDTH),
- .DATA_WIDTH(DATA_WIDTH),
- .IO_PINS(IO_PINS),
- .WB_WIDTH(WB_WIDTH)
-) wb_mux_inst (
+wb_mux wb_mux_inst (
.wbs_stb_i(wbs_stb_i),
.wbs_cyc_i(wbs_cyc_i),
.wbs_we_i(wbs_we_i),
@@ -370,12 +317,7 @@
// add the io pads & logic analyzer probes
// (this includes some reset & clock logic as well)
-io_pads #(
- .IO_PINS(IO_PINS),
- .IO_PADS(IO_PADS),
- .LOGIC_PROBES(LOGIC_PROBES),
- .FIRST_PAD(FIRST_PAD)
-) io_pads_inst (
+io_pads io_pads_inst (
.wb_clk_i(wb_clk_i),
.wb_rst_i(wb_rst_i),
.la_data_in(la_data_in),
diff --git a/verilog/rtl/mem_mesh.v b/verilog/rtl/mem_mesh.v
index 0afa424..25a219c 100644
--- a/verilog/rtl/mem_mesh.v
+++ b/verilog/rtl/mem_mesh.v
@@ -7,72 +7,73 @@
Generates a DFF RAM block for each core with a tree-like interconnect mesh between them
Parameters:
-CORES = number of cpu cores, also specifies the number of ram blocks
-DEPTH = number of words per ram block
-DATA_WIDTH = word size, number of bits per memory cell
-ADDR_WIDTH = address bus width, should be clog2(DEPTH)
-SPREAD_LAYERS = number of spread layers, should be clog2(CORES)
-SPREAD_WIDTH = spread bus width, should be clog2(2+SPREAD_LAYERS)
-IO_PORTS = number of io ports, should be <= DEPTH
-IO_FIRST = memory cell mapped to the first io port, should be <= DEPTH - IO_PORTS
+`CORES = number of cpu cores, also specifies the number of ram blocks
+`MEM_DEPTH = number of words per ram block
+`DATA_WIDTH = word size, number of bits per memory cell
+`ADDR_WIDTH = address bus width, should be clog2(`MEM_DEPTH)
+`LOG_CORES = number of spread layers, should be clog2(`CORES)
+`SPREAD_WIDTH = spread bus width, should be clog2(2+`LOG_CORES)
+`MEM_IO_PORTS = number of io ports, should be <= `MEM_DEPTH
+`MEM_IO_FIRST = memory cell mapped to the first io port
+`MEM_IO_LAST1 = memory cell mapped to the last io port + 1
A value of wspread > 0 on write operations specifies that the same address should also be written in some
other memory blocks. In particular, blocks whose number only differ in the lowest wspread bits are affected.
If several simultaneous write operations affect the same memory cell, writes with higher wspread have
priority. For writes having equal wspread the core with the lowest number wins.
-If addresses < IO_BUS_WIDTH are written with wspread > SPREAD_LAYERS, wdata is also sent to the io bus.
+If addresses < IO_BUS_WIDTH are written with wspread > `LOG_CORES, wdata is also sent to the io bus.
Incoming data on the io bus is written to the respective cells with maximal spread (affecting all cores).
*/
-module mem_mesh #(parameter CORES=8, DEPTH=256, DATA_WIDTH=16, ADDR_WIDTH=8, SPREAD_LAYERS=3, SPREAD_WIDTH=3, USE_IO=1, IO_PORTS=16, IO_FIRST=240) (
+module mem_mesh (
input clk, // clock signal
input rst_n, // reset, active low
- input [CORES-1:0] we, // write enable
- input [CORES*ADDR_WIDTH-1:0] waddr, // write address
- input [CORES*SPREAD_WIDTH-1:0] wspread, // write spread
- input [CORES*DATA_WIDTH-1:0] wdata, // write data
- input [CORES*ADDR_WIDTH-1:0] raddr, // read address
- output [CORES*DATA_WIDTH-1:0] rdata, // read data
- input [IO_PORTS-1:0] io_active_in, // is receiving data on io bus
- output [IO_PORTS-1:0] io_active_out, // is sending data on io bus
- input [IO_PORTS*DATA_WIDTH-1:0] io_data_in, // io bus input
- output [IO_PORTS*DATA_WIDTH-1:0] io_data_out // io bus output
+ input [`CORES-1:0] we, // write enable
+ input [`CORES*`ADDR_WIDTH-1:0] waddr, // write address
+ input [`CORES*`SPREAD_WIDTH-1:0] wspread, // write spread
+ input [`CORES*`DATA_WIDTH-1:0] wdata, // write data
+ input [`CORES*`ADDR_WIDTH-1:0] raddr, // read address
+ output [`CORES*`DATA_WIDTH-1:0] rdata, // read data
+ input [`MEM_IO_PORTS-1:0] io_active_in, // is receiving data on io bus
+ output [`MEM_IO_PORTS-1:0] io_active_out, // is sending data on io bus
+ input [`MEM_IO_PORTS*`DATA_WIDTH-1:0] io_data_in, // io bus input
+ output [`MEM_IO_PORTS*`DATA_WIDTH-1:0] io_data_out // io bus output
);
-reg [DATA_WIDTH-1:0] mem[CORES-1:0][DEPTH-1:0]; // memory cells
-wire presel[CORES-1:0][DEPTH-1:0]; // is address selected before spreading
-wire uspread[CORES-1:0][SPREAD_LAYERS+1-1:0]; // is spreading to layer
-wire postsel[CORES-1:0][DEPTH-1:0]; // is address selected after spreading
-wire [DATA_WIDTH-1:0] postdata[CORES-1:0][DEPTH-1:0]; // data to be written after spreading
+reg [`DATA_WIDTH-1:0] mem[`CORES-1:0][`MEM_DEPTH-1:0]; // memory cells
+wire presel[`CORES-1:0][`MEM_DEPTH-1:0]; // is address selected before spreading
+wire uspread[`CORES-1:0][`LOG_CORES+1-1:0]; // is spreading to layer
+wire postsel[`CORES-1:0][`MEM_DEPTH-1:0]; // is address selected after spreading
+wire [`DATA_WIDTH-1:0] postdata[`CORES-1:0][`MEM_DEPTH-1:0]; // data to be written after spreading
generate genvar core, addr, layer, group, spl;
// convert spread to unary
-for (core=0; core<CORES; core=core+1) begin:g_core
- for(layer=0; layer<=SPREAD_LAYERS; layer=layer+1) begin:g_layer
- assign uspread[core][layer] = we[core] & wspread[core*SPREAD_WIDTH +: SPREAD_WIDTH] > layer;
+for (core=0; core<`CORES; core=core+1) begin:g_core
+ for(layer=0; layer<=`LOG_CORES; layer=layer+1) begin:g_layer
+ assign uspread[core][layer] = we[core] & wspread[core*`SPREAD_WIDTH +: `SPREAD_WIDTH] > layer;
end
end
-for (addr=0; addr<DEPTH; addr=addr+1) begin:g_cell
+for (addr=0; addr<`MEM_DEPTH; addr=addr+1) begin:g_cell
// convert write address to one-hot encoding
- for (core=0; core<CORES; core=core+1) begin:g_core_m
- assign presel[core][addr] = we[core] & (waddr[core*ADDR_WIDTH +: ADDR_WIDTH] == addr);
+ for (core=0; core<`CORES; core=core+1) begin:g_core_m
+ assign presel[core][addr] = we[core] & (waddr[core*`ADDR_WIDTH +: `ADDR_WIDTH] == addr);
end
// calculate spreading from individual cores to groups of cores
- for (layer=0; layer<=SPREAD_LAYERS; layer=layer+1) begin:spread
- localparam GROUPS = CORES >> layer;
+ for (layer=0; layer<=`LOG_CORES; layer=layer+1) begin:spread
+ localparam GROUPS = `CORES >> layer;
wire gsel[GROUPS-1:0];
- wire [DATA_WIDTH-1:0] gdata[GROUPS-1:0];
- wire gspread[GROUPS-1:0][SPREAD_LAYERS+1-layer-1:0];
+ wire [`DATA_WIDTH-1:0] gdata[GROUPS-1:0];
+ wire gspread[GROUPS-1:0][`LOG_CORES+1-layer-1:0];
if (layer == 0) begin:i_layerz
for (group=0; group<GROUPS; group=group+1) begin:g_group
assign gsel[group] = presel[group][addr];
- assign gdata[group] = {(DATA_WIDTH){we[group]}} & wdata[group*DATA_WIDTH +: DATA_WIDTH];
- for (spl=0; spl<=SPREAD_LAYERS; spl=spl+1) begin:cspread
+ assign gdata[group] = {(`DATA_WIDTH){we[group]}} & wdata[group*`DATA_WIDTH +: `DATA_WIDTH];
+ for (spl=0; spl<=`LOG_CORES; spl=spl+1) begin:cspread
assign gspread[group][spl] = uspread[group][spl];
end
end
@@ -80,11 +81,11 @@
for (group=0; group<GROUPS; group=group+1) begin:g_group
wire gs1 = spread[layer-1].gsel[group*2] & spread[layer-1].gspread[group*2][0];
wire gs2 = spread[layer-1].gsel[group*2+1] & spread[layer-1].gspread[group*2+1][0];
- wire [DATA_WIDTH-1:0] gd1 = spread[layer-1].gdata[group*2];
- wire [DATA_WIDTH-1:0] gd2 = spread[layer-1].gdata[group*2+1];
+ wire [`DATA_WIDTH-1:0] gd1 = spread[layer-1].gdata[group*2];
+ wire [`DATA_WIDTH-1:0] gd2 = spread[layer-1].gdata[group*2+1];
assign gsel[group] = gs1 | gs2;
assign gdata[group] = gs1 ? gd1 : gd2;
- for (spl=0; spl<=SPREAD_LAYERS-layer; spl=spl+1) begin:g_spread
+ for (spl=0; spl<=`LOG_CORES-layer; spl=spl+1) begin:g_spread
wire gsp1 = spread[layer-1].gspread[group*2][spl+1];
wire gsp2 = spread[layer-1].gspread[group*2+1][spl+1];
assign gspread[group][spl] = gs1 ? gsp1 : gsp2;
@@ -95,26 +96,26 @@
// mix in io logic at the highest spreading level
wire gs_i;
- wire [DATA_WIDTH-1:0] gd_i;
- if (USE_IO && IO_FIRST <= addr && addr < IO_FIRST + IO_PORTS) begin:i_io
- localparam io = addr - IO_FIRST;
- wire gs_o = spread[SPREAD_LAYERS].gsel[0] & spread[SPREAD_LAYERS].gspread[0][0];
- wire [DATA_WIDTH-1:0] gd_o = {(DATA_WIDTH){gs_o}} & spread[SPREAD_LAYERS].gdata[0];
+ wire [`DATA_WIDTH-1:0] gd_i;
+ if (`MEM_IO_FIRST <= addr && addr < `MEM_IO_LAST1) begin:i_io
+ localparam io = addr - `MEM_IO_FIRST;
+ wire gs_o = spread[`LOG_CORES].gsel[0] & spread[`LOG_CORES].gspread[0][0];
+ wire [`DATA_WIDTH-1:0] gd_o = {(`DATA_WIDTH){gs_o}} & spread[`LOG_CORES].gdata[0];
assign io_active_out[io] = gs_o;
- assign io_data_out[io*DATA_WIDTH +: DATA_WIDTH] = gd_o;
- assign gs_i = io_active_in[io] ? 1'b1 : spread[SPREAD_LAYERS].gsel[0];
- assign gd_i = io_active_in[io] ? io_data_in[io*DATA_WIDTH +: DATA_WIDTH] : spread[SPREAD_LAYERS].gdata[0];
+ assign io_data_out[io*`DATA_WIDTH +: `DATA_WIDTH] = gd_o;
+ assign gs_i = io_active_in[io] ? 1'b1 : spread[`LOG_CORES].gsel[0];
+ assign gd_i = io_active_in[io] ? io_data_in[io*`DATA_WIDTH +: `DATA_WIDTH] : spread[`LOG_CORES].gdata[0];
end else begin:i_nio
- assign gs_i = spread[SPREAD_LAYERS].gsel[0];
- assign gd_i = spread[SPREAD_LAYERS].gdata[0];
+ assign gs_i = spread[`LOG_CORES].gsel[0];
+ assign gd_i = spread[`LOG_CORES].gdata[0];
end
// calculate spreading back from groups of cores to individual cores
- for (layer=SPREAD_LAYERS; layer>=0; layer=layer-1) begin:collect
- localparam GROUPS = CORES >> layer;
+ for (layer=`LOG_CORES; layer>=0; layer=layer-1) begin:collect
+ localparam GROUPS = `CORES >> layer;
wire pgsel[GROUPS-1:0];
- wire [DATA_WIDTH-1:0] pgdata[GROUPS-1:0];
- if (layer == SPREAD_LAYERS) begin:i_layerl
+ wire [`DATA_WIDTH-1:0] pgdata[GROUPS-1:0];
+ if (layer == `LOG_CORES) begin:i_layerl
assign pgsel[0] = gs_i;
assign pgdata[0] = gd_i;
for (group=1; group<GROUPS; group=group+1) begin:g_group
@@ -124,21 +125,21 @@
end else begin:i_layernl
for (group=0; group<GROUPS; group=group+1) begin:g_group
wire gs = spread[layer].gsel[group];
- wire [DATA_WIDTH-1:0] gd = spread[layer].gdata[group];
+ wire [`DATA_WIDTH-1:0] gd = spread[layer].gdata[group];
wire cgs = collect[layer+1].pgsel[group/2];
- wire [DATA_WIDTH-1:0] cgd = collect[layer+1].pgdata[group/2];
+ wire [`DATA_WIDTH-1:0] cgd = collect[layer+1].pgdata[group/2];
assign pgsel[group] = cgs | gs;
assign pgdata[group] = cgs ? cgd : gd;
end
end
end
- for (core=0; core<CORES; core=core+1) begin:g_core_c
+ for (core=0; core<`CORES; core=core+1) begin:g_core_c
assign postsel[core][addr] = collect[0].pgsel[core];
assign postdata[core][addr] = collect[0].pgdata[core];
end
// sequential write logic
- for (core=0; core<CORES; core=core+1) begin:g_core_w
+ for (core=0; core<`CORES; core=core+1) begin:g_core_w
always @(posedge clk) begin
if (!rst_n) begin
mem[core][addr] <= 0;
@@ -153,9 +154,9 @@
end
// read logic
-for (core=0; core<CORES; core=core+1) begin:g_core_r
- wire [ADDR_WIDTH-1:0] craddr = raddr[core*ADDR_WIDTH +: ADDR_WIDTH];
- assign rdata[core*DATA_WIDTH +: DATA_WIDTH] = mem[core][craddr];
+for (core=0; core<`CORES; core=core+1) begin:g_core_r
+ wire [`ADDR_WIDTH-1:0] craddr = raddr[core*`ADDR_WIDTH +: `ADDR_WIDTH];
+ assign rdata[core*`DATA_WIDTH +: `DATA_WIDTH] = mem[core][craddr];
end
endgenerate
diff --git a/verilog/rtl/pin_compress.v b/verilog/rtl/pin_compress.v
index b508527..e202070 100644
--- a/verilog/rtl/pin_compress.v
+++ b/verilog/rtl/pin_compress.v
@@ -13,23 +13,23 @@
result = 0000000000011011
*/
-module pin_compress #(parameter WIDTH=16) (
- input [WIDTH-1:0] data,
- input [WIDTH-1:0] mask,
- output [WIDTH-1:0] result
+module pin_compress (
+ input [`IO_PINS-1:0] data,
+ input [`IO_PINS-1:0] mask,
+ output [`IO_PINS-1:0] result
);
generate genvar layer;
- for (layer=0; layer<WIDTH; layer=layer+1) begin:comp
- wire [WIDTH-1:0] sd;
+ for (layer=0; layer<`IO_PINS; layer=layer+1) begin:comp
+ wire [`IO_PINS-1:0] sd;
if (layer == 0) begin:i_first
- assign sd = {{(WIDTH-1){1'b0}}, data[WIDTH-1] & mask[WIDTH-1]};
+ assign sd = {{(`IO_PINS-1){1'b0}}, data[`IO_PINS-1] & mask[`IO_PINS-1]};
end else begin:i_nfirst
- wire [WIDTH-1:0] sdp = comp[layer-1].sd;
- assign sd = mask[WIDTH-1-layer] ? {sdp[WIDTH-2:0], data[WIDTH-1-layer]} : sdp;
+ wire [`IO_PINS-1:0] sdp = comp[layer-1].sd;
+ assign sd = mask[`IO_PINS-1-layer] ? {sdp[`IO_PINS-2:0], data[`IO_PINS-1-layer]} : sdp;
end
end
- assign result = comp[WIDTH-1].sd;
+ assign result = comp[`IO_PINS-1].sd;
endgenerate
endmodule
diff --git a/verilog/rtl/pin_decompress.v b/verilog/rtl/pin_decompress.v
index bea24eb..1d81b8f 100644
--- a/verilog/rtl/pin_decompress.v
+++ b/verilog/rtl/pin_decompress.v
@@ -13,19 +13,19 @@
result = 0000000100000101
*/
-module pin_decompress #(parameter WIDTH=16) (
- input [WIDTH-1:0] data,
- input [WIDTH-1:0] mask,
- output [WIDTH-1:0] result
+module pin_decompress (
+ input [`IO_PINS-1:0] data,
+ input [`IO_PINS-1:0] mask,
+ output [`IO_PINS-1:0] result
);
generate genvar layer;
- for (layer=0; layer<WIDTH; layer=layer+1) begin:decomp
- wire [WIDTH-1:0] sd;
+ for (layer=0; layer<`IO_PINS; layer=layer+1) begin:decomp
+ wire [`IO_PINS-1:0] sd;
if (layer == 0) begin:i_first
assign sd = data;
end else begin:i_nfirst
- wire [WIDTH-1:0] sdp = decomp[layer-1].sd;
+ wire [`IO_PINS-1:0] sdp = decomp[layer-1].sd;
assign sd = mask[layer-1] ? sdp >> 1 : sdp;
end
assign result[layer] = mask[layer] & sd[0];
diff --git a/verilog/rtl/prng.v b/verilog/rtl/prng.v
index 30d647a..0406b46 100644
--- a/verilog/rtl/prng.v
+++ b/verilog/rtl/prng.v
@@ -6,27 +6,27 @@
/*
Pseudorandom number generator using a Fibonacci-style XNOR linear feedback shift register
-STATE_BITS = number of bits for prng state
-POLYNOMIAL = bit mask used for feedback, should be chosen so that the prng repeats ifself after 2^(STATE_BITS-1) cycles
+`PRNG_STATE_BITS = number of bits for prng state
+POLYNOMIAL = bit mask used for feedback, should be chosen so that the prng repeats ifself after 2^(`PRNG_STATE_BITS-1) cycles
STATE_INIT = used to seed the prng on reset
-OUTPUT_BITS = number of bits shifted out every clock cycle
+`DATA_WIDTH = number of bits shifted out every clock cycle
*/
-module prng #(parameter STATE_BITS = 4, POLYNOMIAL = 4'b1001, STATE_INIT = 4'b0000, OUTPUT_BITS = 2) (
+module prng #(parameter POLYNOMIAL = 4'b1001, STATE_INIT = 4'b0000) (
input clk,
input rst_n,
input entropy, // optional external entropy for more randomness
- output [OUTPUT_BITS-1:0] random
+ output [`DATA_WIDTH-1:0] random
);
-localparam SCRAMBLE_CYCLES = STATE_BITS;
-reg [STATE_BITS-1:0] state;
+localparam SCRAMBLE_CYCLES = `PRNG_STATE_BITS;
+reg [`PRNG_STATE_BITS-1:0] state;
generate genvar shift;
-// shift register for generating next OUTPUT_BITS states
-for (shift=0; shift<OUTPUT_BITS; shift=shift+1) begin:g_shift
- wire [STATE_BITS-1:0] prev_state;
+// shift register for generating next `DATA_WIDTH states
+for (shift=0; shift<`DATA_WIDTH; shift=shift+1) begin:g_shift
+ wire [`PRNG_STATE_BITS-1:0] prev_state;
wire feedback;
if (shift == 0) begin:i_first
assign prev_state = state;
@@ -35,25 +35,25 @@
assign prev_state = g_shift[shift-1].new_state;
assign feedback = ^(prev_state & POLYNOMIAL);
end
- wire [STATE_BITS-1:0] new_state = {prev_state[STATE_BITS-2:0], ~feedback};
- assign random[OUTPUT_BITS-shift-1] = prev_state[STATE_BITS-1];
+ wire [`PRNG_STATE_BITS-1:0] new_state = {prev_state[`PRNG_STATE_BITS-2:0], ~feedback};
+ assign random[`DATA_WIDTH-shift-1] = prev_state[`PRNG_STATE_BITS-1];
end
-wire [STATE_BITS-1:0] final_state = g_shift[OUTPUT_BITS-1].new_state;
+wire [`PRNG_STATE_BITS-1:0] final_state = g_shift[`DATA_WIDTH-1].new_state;
// reuse the same shift register to shift out a couple of bits in the beginning so that
// we can use a very simple seed without affecting the quality of the first few cycles
// (this happens at synth time, so it's practically free)
for (shift=0; shift<SCRAMBLE_CYCLES; shift=shift+1) begin:g_scramble
- wire [STATE_BITS-1:0] prev_state;
+ wire [`PRNG_STATE_BITS-1:0] prev_state;
if (shift == 0) begin:i_first
assign prev_state = STATE_INIT;
end else begin:i_nfirst
assign prev_state = g_scramble[shift-1].new_state;
end
wire feedback = ^(prev_state & POLYNOMIAL);
- wire [STATE_BITS-1:0] new_state = {prev_state[STATE_BITS-2:0], ~feedback};
+ wire [`PRNG_STATE_BITS-1:0] new_state = {prev_state[`PRNG_STATE_BITS-2:0], ~feedback};
end
-wire [STATE_BITS-1:0] scrambled_init = g_scramble[SCRAMBLE_CYCLES-1].new_state;
+wire [`PRNG_STATE_BITS-1:0] scrambled_init = g_scramble[SCRAMBLE_CYCLES-1].new_state;
endgenerate
diff --git a/verilog/rtl/prng_wrap.v b/verilog/rtl/prng_wrap.v
index 9a635d3..45f1543 100644
--- a/verilog/rtl/prng_wrap.v
+++ b/verilog/rtl/prng_wrap.v
@@ -9,14 +9,13 @@
Different choices of 0 <= INDEX < 256 generate independent prng's. For even more, the table below should be extended.
*/
-module prng_wrap #(parameter INDEX = 0, OUTPUT_BITS = 16) (
+module prng_wrap #(parameter INDEX = 0) (
input clk,
input rst_n,
input entropy,
- output [OUTPUT_BITS-1:0] random
+ output [`DATA_WIDTH-1:0] random
);
-localparam STATE_BITS = 32;
localparam POLY_ARRAY_LEN = 256;
localparam POLY_ARRAY = {
32'h80000062, 32'h80000092, 32'h80000106, 32'h80000114, 32'h80000412, 32'h80000414, 32'h80000806, 32'h80000850,
@@ -54,10 +53,8 @@
};
prng #(
- .STATE_BITS(STATE_BITS),
- .POLYNOMIAL(POLY_ARRAY[(POLY_ARRAY_LEN-1-(INDEX % POLY_ARRAY_LEN))*STATE_BITS +: STATE_BITS]),
- .STATE_INIT(INDEX),
- .OUTPUT_BITS(OUTPUT_BITS)
+ .POLYNOMIAL(POLY_ARRAY[(POLY_ARRAY_LEN-1-(INDEX % POLY_ARRAY_LEN))*`PRNG_STATE_BITS +: `PRNG_STATE_BITS]),
+ .STATE_INIT(INDEX)
) prng_inst (
.clk(clk),
.rst_n(rst_n),
diff --git a/verilog/rtl/prog_mux.v b/verilog/rtl/prog_mux.v
index b358bca..a013a11 100644
--- a/verilog/rtl/prog_mux.v
+++ b/verilog/rtl/prog_mux.v
@@ -7,22 +7,22 @@
Fully combinatorial programming multiplexer
*/
-module prog_mux #(parameter CORES=8, LOG_CORES=3, PC_WIDTH=8, INSTR_WIDTH=32) (
+module prog_mux (
input we,
- input [LOG_CORES-1:0] sel,
- input [PC_WIDTH-1:0] waddr,
- input [INSTR_WIDTH-1:0] wdata,
- output [CORES-1:0] cwe,
- output [CORES*PC_WIDTH-1:0] cwaddr,
- output [CORES*INSTR_WIDTH-1:0] cwdata
+ input [`LOG_CORES-1:0] sel,
+ input [`PC_WIDTH-1:0] waddr,
+ input [`INSTR_WIDTH-1:0] wdata,
+ output [`CORES-1:0] cwe,
+ output [`CORES*`PC_WIDTH-1:0] cwaddr,
+ output [`CORES*`INSTR_WIDTH-1:0] cwdata
);
generate genvar core;
-for (core=0; core<CORES; core=core+1) begin:g_core
+for (core=0; core<`CORES; core=core+1) begin:g_core
wire active = we && sel==core;
assign cwe[core] = active;
- assign cwaddr[core*PC_WIDTH +: PC_WIDTH] = {(PC_WIDTH){active}} & waddr;
- assign cwdata[core*INSTR_WIDTH +: INSTR_WIDTH] = {(INSTR_WIDTH){active}} & wdata;
+ assign cwaddr[core*`PC_WIDTH +: `PC_WIDTH] = {(`PC_WIDTH){active}} & waddr;
+ assign cwdata[core*`INSTR_WIDTH +: `INSTR_WIDTH] = {(`INSTR_WIDTH){active}} & wdata;
end
endgenerate
diff --git a/verilog/rtl/user_project.v b/verilog/rtl/user_project.v
index 81e5d2e..5763c7f 100644
--- a/verilog/rtl/user_project.v
+++ b/verilog/rtl/user_project.v
@@ -35,21 +35,7 @@
output [2:0] irq
);
-mcu #(
- .CORES(4),
- .LOG_CORES(2),
- .MEM_DEPTH(32),
- .DATA_WIDTH(16),
- .PC_WIDTH(6),
- .ADDR_WIDTH(5),
- .INSTR_WIDTH(32),
- .INSTR_DEPTH(16),
- .IO_PINS(16),
- .IO_PADS(`MPRJ_IO_PADS),
- .FIRST_PAD(12),
- .LOGIC_PROBES(128),
- .WB_WIDTH(32)
-) mcu_inst (
+mcu mcu_inst (
.wb_clk_i(wb_clk_i),
.wb_rst_i(wb_rst_i),
.wbs_stb_i(wbs_stb_i),
diff --git a/verilog/rtl/wb_mux.v b/verilog/rtl/wb_mux.v
index ccf755b..f014583 100644
--- a/verilog/rtl/wb_mux.v
+++ b/verilog/rtl/wb_mux.v
@@ -14,9 +14,6 @@
- if WE_I is negated, a read operation is performed using ADR_I with the result in DAT_O
- all other ports are unused
-The wishbone bus width (WB_WIDTH below) is fixed to 32 by the platform and our code
-assumes that all other widths fit into it.
-
This module (like other muxes in this project) is fully combinatorial.
Registered logic happens in connected cpu cores, instruction memories and the entropy pool.
Therefore CLK_I and RST_I are not directly used here. However, it is used in the
@@ -24,41 +21,34 @@
connected to the other interfaces.
*/
-module wb_mux #(parameter
- LOG_CORES=3,
- PC_WIDTH=8,
- INSTR_WIDTH=32,
- DATA_WIDTH=16,
- IO_PINS=16,
- WB_WIDTH=32
-) (
+module wb_mux (
// wishbone interface
//input wb_clk_i, // wb clock
//input wb_rst_i, // wb reset, active high
input wbs_stb_i, // wb strobe signal
input wbs_cyc_i, // wb cycle signal, sending on the bus requires wbs_stb_i && wbs_cyc_i
input wbs_we_i, // wb write enable signal, 0=input 1=output
- input [WB_WIDTH-1:0] wbs_adr_i, // wb address
- input [WB_WIDTH-1:0] wbs_dat_i, // wb input data
+ input [`WB_WIDTH-1:0] wbs_adr_i, // wb address
+ input [`WB_WIDTH-1:0] wbs_dat_i, // wb input data
output wbs_ack_o, // wb acknowledge
- output [WB_WIDTH-1:0] wbs_dat_o, // wb output data
+ output [`WB_WIDTH-1:0] wbs_dat_o, // wb output data
// programmer interface
output prog_we,
- output [LOG_CORES-1:0] prog_sel,
- output [PC_WIDTH-1:0] prog_waddr,
- output [INSTR_WIDTH-1:0] prog_wdata,
+ output [`LOG_CORES-1:0] prog_sel,
+ output [`PC_WIDTH-1:0] prog_waddr,
+ output [`INSTR_WIDTH-1:0] prog_wdata,
// pads & soft reset interface
output pads_we,
output pads_waddr,
- output [IO_PINS-1:0] pads_wdata,
+ output [`IO_PINS-1:0] pads_wdata,
// debugger interface
- output [LOG_CORES-1:0] debug_sel,
+ output [`LOG_CORES-1:0] debug_sel,
output [4:0] debug_addr,
output debug_we,
- output [DATA_WIDTH-1:0] debug_wdata,
- input [DATA_WIDTH-1:0] debug_rdata,
+ output [`DATA_WIDTH-1:0] debug_wdata,
+ input [`DATA_WIDTH-1:0] debug_rdata,
// entropy pool interface
- output[WB_WIDTH-1:0] entropy_word
+ output[`WB_WIDTH-1:0] entropy_word
);
// minimal wishbone logic
@@ -66,7 +56,7 @@
assign wbs_ack_o = valid;
// interface selection
-wire[1:0] interface = wbs_adr_i[WB_WIDTH-2 +: 2];
+wire[1:0] interface = wbs_adr_i[`WB_WIDTH-2 +: 2];
wire if_prog = valid && interface == 2'b00;
wire if_pads = valid && interface == 2'b01;
wire if_debug = valid && interface == 2'b10;
@@ -74,16 +64,16 @@
// programmer interface
assign prog_we = if_prog && wbs_we_i;
-assign {prog_sel, prog_waddr} = prog_we ? wbs_adr_i[WB_WIDTH-3:0] : 0;
+assign {prog_sel, prog_waddr} = prog_we ? wbs_adr_i[`WB_WIDTH-3:0] : 0;
assign prog_wdata = prog_we ? wbs_dat_i : 0;
// pads interface
assign pads_we = if_pads && wbs_we_i;
-assign pads_waddr = pads_we ? wbs_adr_i[WB_WIDTH-3:0] : 0;
+assign pads_waddr = pads_we ? wbs_adr_i[`WB_WIDTH-3:0] : 0;
assign pads_wdata = pads_we ? wbs_dat_i : 0;
// debugger interface, input
-assign {debug_sel, debug_addr} = if_debug ? wbs_adr_i[WB_WIDTH-3:0] : 0;
+assign {debug_sel, debug_addr} = if_debug ? wbs_adr_i[`WB_WIDTH-3:0] : 0;
assign debug_we = if_debug && wbs_we_i;
assign debug_wdata = debug_we ? wbs_dat_i : 0;