Downgrade IP to Verilog 2005 to work with Icraus and Yosys

Signed-off-by: Matthew Ballance <matt.ballance@gmail.com>
diff --git a/dv/bringup/Makefile b/dv/bringup/Makefile
index ad7dbab..edbc363 100644
--- a/dv/bringup/Makefile
+++ b/dv/bringup/Makefile
@@ -30,8 +30,6 @@
 export PYTHONPATH
 PATH := $(PACKAGES_DIR)/python/bin:$(PATH)
 export PATH
-COCOTB_PREFIX := $(shell $(PACKAGES_DIR)/python/bin/cocotb-config --prefix)
-VPI_LIBS += $(COCOTB_PREFIX)/cocotb/libs/libcocotbvpi_modelsim.so
 
 
 VLSIM_CLKSPEC += -clkspec clk=10ns
@@ -47,6 +45,9 @@
 
 clean ::
 	echo "TODO"
+	for sim in $(BRINGUP_DIR)/../common/*.mk; do \
+		$(MAKE) -f $$sim clean; \
+	done
 	
 
 include $(BRINGUP_DIR)/../common/$(SIM).mk
diff --git a/dv/bringup/bringup_tb.sv b/dv/bringup/bringup_tb.sv
index d28b0ee..39e33f6 100644
--- a/dv/bringup/bringup_tb.sv
+++ b/dv/bringup/bringup_tb.sv
@@ -1,6 +1,9 @@
 /****************************************************************************
  * bringup_tb.sv
  ****************************************************************************/
+`ifdef IVERILOG
+`timescale 1ns/1ns
+`endif
 
 `ifndef MPRJ_IO_PADS
 	`define MPRJ_IO_PADS 38
@@ -14,13 +17,37 @@
 	
 `ifdef HAVE_HDL_CLOCKGEN
 	reg clk_r = 0;
-	assign clk_r = #5ns ~clk_r;
+	initial begin
+		forever begin
+			#10;
+			clk_r <= ~clk_r;
+		end
+	end
+	assign clk = clk_r;
 `endif
 
+`ifdef IVERILOG
+		// Icarus requires help with timeout 
+		// and wave capture
+		reg[31:0]               timeout;
+		initial begin
+			if ($test$plusargs("dumpvars")) begin
+				$dumpfile("simx.vcd");
+				$dumpvars(0, bringup_tb);
+			end
+			if (!$value$plusargs("timeout=%d", timeout)) begin
+				timeout=1000;
+			end
+			$display("--> Wait for timeout");
+			# timeout;
+			$display("<-- Wait for timeout");
+			$finish();
+		end		
+`endif
 	
 	wire clock = clk; 
 	reg[15:0]			reset_cnt;
-	reg[15:0]			reset_key /*verilator public*/;
+	reg[15:0]			reset_key /*verilator public*/ = 0;
 	
 	always @(posedge clock) begin
 		if (reset_key != 16'ha520) begin
diff --git a/dv/bringup/python/bringup_tests/__pycache__/mgmt_mem_access.cpython-36.pyc b/dv/bringup/python/bringup_tests/__pycache__/mgmt_mem_access.cpython-36.pyc
index 3ed6cd8..ca45710 100644
--- a/dv/bringup/python/bringup_tests/__pycache__/mgmt_mem_access.cpython-36.pyc
+++ b/dv/bringup/python/bringup_tests/__pycache__/mgmt_mem_access.cpython-36.pyc
Binary files differ
diff --git a/dv/bringup/python/bringup_tests/la_utils.py b/dv/bringup/python/bringup_tests/la_utils.py
index 90976e8..e3c63b3 100644
--- a/dv/bringup/python/bringup_tests/la_utils.py
+++ b/dv/bringup/python/bringup_tests/la_utils.py
@@ -7,13 +7,14 @@
 class LaUtils(object):
     CLOCK_IDX = 127
     RESET_IDX = 126
+    CORE_RESET_IDX = 125
     
     def __init__(self, la_bfm):
         self.la_bfm = la_bfm
         
     async def set_dut_clock_control(self, en):
         # First, set reset high and clock low
-        await self.la_bfm.set_bits(LaUtils.RESET_IDX, 1, 1)
+        await self.la_bfm.set_bits(LaUtils.RESET_IDX, 0, 1)
         await self.la_bfm.set_bits(LaUtils.CLOCK_IDX, 0, 1)
         
         if en:
@@ -25,10 +26,15 @@
             await self.la_bfm.set_oen(LaUtils.RESET_IDX, 1, 1)
             await self.la_bfm.set_oen(LaUtils.CLOCK_IDX, 1, 1)
         
+    async def set_core_reset(self, en):
+        if en:
+            await self.la_bfm.set_bits(LaUtils.CORE_RESET_IDX, 0, 1)
+        else:
+            await self.la_bfm.set_bits(LaUtils.CORE_RESET_IDX, 1, 1)
         
     async def reset_cycle_dut(self, cycles=10):
         # Set reset high
-        await self.la_bfm.set_bits(LaUtils.RESET_IDX, 1, 1)
+        await self.la_bfm.set_bits(LaUtils.RESET_IDX, 0, 1)
         await self.la_bfm.propagate()
         
         # Clock 
@@ -36,7 +42,7 @@
             await self.clock_dut()
             
         # Set reset low
-        await self.la_bfm.set_bits(LaUtils.RESET_IDX, 0, 1)
+        await self.la_bfm.set_bits(LaUtils.RESET_IDX, 1, 1)
         await self.clock_dut()
         
         pass
@@ -46,5 +52,6 @@
         await self.la_bfm.propagate()
         await self.la_bfm.set_bits(LaUtils.CLOCK_IDX, 0, 1)
         await self.la_bfm.propagate()
+
         
         
\ No newline at end of file
diff --git a/dv/bringup/python/bringup_tests/mgmt_mem_access.py b/dv/bringup/python/bringup_tests/mgmt_mem_access.py
index fbf7a38..1937cc8 100644
--- a/dv/bringup/python/bringup_tests/mgmt_mem_access.py
+++ b/dv/bringup/python/bringup_tests/mgmt_mem_access.py
@@ -8,6 +8,7 @@
 from wishbone_bfms.wb_initiator_bfm import WbInitiatorBfm
 from logic_analyzer_bfms.la_initiator_bfm import LaInitiatorBfm
 from random import Random
+from bringup_tests.la_utils import LaUtils
 
 
 @cocotb.test()
@@ -16,10 +17,22 @@
     Hold the payload DUT in reset via the logic analyzer
     Meanwhile, test that the management interface can access memory
     """
+    print("--> pybfms.init()")
     await pybfms.init()
+    print("<-- pybfms.init()")
     u_wb : WbInitiatorBfm = pybfms.find_bfm(".*u_wb")
     u_la : LaInitiatorBfm = pybfms.find_bfm(".*u_la")
     
+    print("u_wb=" + str(u_wb))
+    print("u_la=" + str(u_la))
+    
+    # Bring the system out of reset
+    la_utils = LaUtils(u_la)
+    print("--> reset_cycle_dut")
+    await la_utils.reset_cycle_dut(100)
+    print("<-- reset_cycle_dut")
+    await la_utils.set_dut_clock_control(False)
+    
     # Test that we can write and read dut 'ROM'
     wr_data = []
     r = Random(0)
@@ -36,4 +49,13 @@
             print("PASS: " + hex(0x80000000+4*i))
         else:
             print("FAIL: " + hex(0x80000000+4*i) + " expect " + hex(wr_data[i]) + " receive " + hex(data))
-            
+
+    # Release the processor from reset
+    await la_utils.set_core_reset(True)
+    for i in range(10):
+        await u_la.propagate()
+    await la_utils.set_core_reset(False)
+
+    for i in range(1000):
+        await u_la.propagate()
+        
\ No newline at end of file
diff --git a/dv/common/icarus.mk b/dv/common/icarus.mk
index f0fc7c5..d9b319c 100644
--- a/dv/common/icarus.mk
+++ b/dv/common/icarus.mk
@@ -17,25 +17,42 @@
 PACKAGES_DIR := $(abspath $(COMMON_DIR)/../../packages)
 VLSIM := $(PACKAGES_DIR)/python/bin/vlsim
 PYBFMS_VPI_LIB := $(shell $(PACKAGES_DIR)/python/bin/pybfms lib)
+COCOTB_PREFIX := $(shell $(PACKAGES_DIR)/python/bin/cocotb-config --prefix)
+TIMEOUT?=1ms
+
+DEFINES += IVERILOG HAVE_HDL_CLOCKGEN
+
+ifeq (ms,$(findstring ms,$(TIMEOUT)))
+  timeout=$(shell expr $(subst ms,,$(TIMEOUT)) '*' 1000000)
+else
+  ifeq (us,$(findstring us,$(TIMEOUT)))
+    timeout=$(shell expr $(subst us,,$(TIMEOUT)) '*' 1000)
+  else
+    ifeq (ns,$(findstring ns,$(TIMEOUT)))
+      timeout=$(shell expr $(subst ns,,$(TIMEOUT)) '*' 1)
+    else
+      ifeq (s,$(findstring s,$(TIMEOUT)))
+        timeout=$(shell expr $(subst s,,$(TIMEOUT)) '*' 1000000000)
+      else
+        timeout=error: unknown $(TIMEOUT)
+      endif
+    endif
+  endif
+endif
+
+SIMV_ARGS += +timeout=$(timeout)
 
 SIMV=simv.vvp
 ifneq (,$(DEBUG))
-VLSIM_OPTIONS += --trace-fst
-SIMV_ARGS += +vlsim.trace
-SIMV := simv.debug
-else
-SIMV := simv.ndebug
+SIMV_ARGS += +dumpvars
 endif
 
-# Enable VPI for Verilator
-VLSIM_OPTIONS += --vpi
-VLSIM_OPTIONS += --top-module $(TOP_MODULE)
-
 IVERILOG_OPTIONS += $(foreach inc,$(INCDIRS),-I $(inc))
 IVERILOG_OPTIONS += $(foreach def,$(DEFINES),-D $(def))
-VVP_ARGS += $(foreach vpi,$(VPI_LIBS),-m $(vpi))
+VVP_OPTIONS += $(foreach vpi,$(VPI_LIBS),-m $(vpi))
 
-VPI_LIBS += $(PYBFMS_DPI_LIB)
+VPI_LIBS += $(PYBFMS_VPI_LIB)
+VPI_LIBS += $(COCOTB_PREFIX)/cocotb/libs/libcocotbvpi_icarus.vpl
 
 build : $(SIMV)
 
@@ -43,7 +60,7 @@
 	iverilog -o $@ $(IVERILOG_OPTIONS) $(SRCS) pybfms_gen.v 
 
 run : $(SIMV)
-	vvp $(SIMV) $(VVP_ARGS)
+	vvp $(VVP_OPTIONS) $(SIMV) $(SIMV_ARGS)
 	
 pybfms_gen.v :
 	$(PACKAGES_DIR)/python/bin/pybfms generate \
diff --git a/dv/common/vlsim.mk b/dv/common/vlsim.mk
index 184767b..20c6e77 100644
--- a/dv/common/vlsim.mk
+++ b/dv/common/vlsim.mk
@@ -19,6 +19,7 @@
 PACKAGES_DIR := $(abspath $(COMMON_DIR)/../../packages)
 VLSIM := $(PACKAGES_DIR)/python/bin/vlsim
 PYBFMS_DPI_LIB := $(shell $(PACKAGES_DIR)/python/bin/pybfms lib)
+COCOTB_PREFIX := $(shell $(PACKAGES_DIR)/python/bin/cocotb-config --prefix)
 
 ifneq (,$(DEBUG))
 VLSIM_OPTIONS += --trace-fst
@@ -37,6 +38,7 @@
 SIMV_ARGS += $(foreach vpi,$(VPI_LIBS),+vpi=$(vpi))
 
 DPI_LIBS += $(PYBFMS_DPI_LIB)
+VPI_LIBS += $(COCOTB_PREFIX)/cocotb/libs/libcocotbvpi_verilator.so
 
 build : $(SIMV)
 
diff --git a/verilog/rtl/fwpayload.v b/verilog/rtl/fwpayload.v
index f585e22..da198ef 100644
--- a/verilog/rtl/fwpayload.v
+++ b/verilog/rtl/fwpayload.v
@@ -57,29 +57,35 @@
 	
 	localparam N_TARGETS = 1;
 	localparam TGT_ID_SRAM = 0;
-	wire[31:0]		IC_I_ADR[N_INITIATORS-1:0];
-	wire[31:0]		IC_I_DAT_W[N_INITIATORS-1:0];
-	wire[31:0]		IC_I_DAT_R[N_INITIATORS-1:0];
-	wire			IC_I_CYC[N_INITIATORS-1:0];
-	wire			IC_I_ERR[N_INITIATORS-1:0];
-	wire[3:0]		IC_I_SEL[N_INITIATORS-1:0];
-	wire			IC_I_STB[N_INITIATORS-1:0];
-	wire			IC_I_ACK[N_INITIATORS-1:0];
-	wire			IC_I_WE[N_INITIATORS-1:0];
+	// TBD
+	localparam TGT_ID_SPI  = 1;
+	localparam TGT_ID_UART = 2;
+	localparam TGT_ID_GPIO = 3;
+	wire[32*N_INITIATORS-1:0]		IC_I_ADR;
+	wire[32*N_INITIATORS-1:0]		IC_I_DAT_W;
+	wire[32*N_INITIATORS-1:0]		IC_I_DAT_R;
+	wire[N_INITIATORS-1:0]			IC_I_CYC;
+	wire[N_INITIATORS-1:0]			IC_I_ERR;
+	wire[4*N_INITIATORS-1:0]		IC_I_SEL;
+	wire[N_INITIATORS-1:0]			IC_I_STB;
+	wire[N_INITIATORS-1:0]			IC_I_ACK;
+	wire[N_INITIATORS-1:0]			IC_I_WE;
 	
-	wire[31:0]		IC_T_ADR[N_TARGETS:0];
-	wire[31:0]		IC_T_DAT_W[N_TARGETS:0];
-	wire[31:0]		IC_T_DAT_R[N_TARGETS:0];
-	wire			IC_T_CYC[N_TARGETS:0];
-	wire			IC_T_ERR[N_TARGETS:0];
-	wire[3:0]		IC_T_SEL[N_TARGETS:0];
-	wire			IC_T_STB[N_TARGETS:0];
-	wire			IC_T_ACK[N_TARGETS:0];
-	wire			IC_T_WE[N_TARGETS:0];
-	
+	wire[32*(N_TARGETS+1)-1:0]		IC_T_ADR;
+	wire[32*(N_TARGETS+1)-1:0]		IC_T_DAT_W;
+	wire[32*(N_TARGETS+1)-1:0]		IC_T_DAT_R;
+	wire[N_TARGETS:0]				IC_T_CYC;
+	wire[N_TARGETS:0]				IC_T_ERR;
+	wire[4*(N_TARGETS+1)-1:0]		IC_T_SEL;
+	wire[N_TARGETS:0]				IC_T_STB;
+	wire[N_TARGETS:0]				IC_T_ACK;
+	wire[N_TARGETS:0]				IC_T_WE;
+
+	// Interconnect has a default target that
+	// to which unmapped accesses are directed
 	assign IC_T_ACK[N_TARGETS] = 1;
 	assign IC_T_ERR[N_TARGETS] = 1;
-	assign IC_T_DAT_R[N_TARGETS] = 0;
+	assign IC_T_DAT_R[32*N_TARGETS+:32] = 0;
 	
 	// Interconnect
 	wb_interconnect_NxN #(
@@ -120,12 +126,12 @@
 	/****************************************************************
 	 * Connect management interface to port 1 on the interconnect
 	 ****************************************************************/
-	assign IC_I_ADR[INIT_ID_MGMT] = wbs_adr_i;
-	assign IC_I_DAT_W[INIT_ID_MGMT] = wbs_dat_i;
-	assign wbs_dat_o = IC_I_DAT_R[INIT_ID_MGMT];
+	assign IC_I_ADR[32*INIT_ID_MGMT+:32] = wbs_adr_i;
+	assign IC_I_DAT_W[32*INIT_ID_MGMT+:32] = wbs_dat_i;
+	assign wbs_dat_o = IC_I_DAT_R[32*INIT_ID_MGMT+:32];
 	assign IC_I_CYC[INIT_ID_MGMT] = wbs_cyc_i;
 //	assign IC_I_ERR[INIT_ID_MGMT] = //wbs_cyc_i;
-	assign IC_I_SEL[INIT_ID_MGMT] = wbs_sel_i;
+	assign IC_I_SEL[4*INIT_ID_MGMT+:4] = wbs_sel_i;
 	assign IC_I_STB[INIT_ID_MGMT] = wbs_stb_i;
 	assign wbs_ack_o = IC_I_ACK[INIT_ID_MGMT];
 	assign IC_I_WE[INIT_ID_MGMT] = wbs_we_i;
@@ -133,42 +139,38 @@
 	// Clock/reset control
 	// Allow the logic analyzer to take control of clock/reset
 	// Default to using the caravel clock/reset
-//	assign clk = (~la_oen[127]) ? la_data_in[127]: wb_clk_i;
-//	assign rst = (~la_oen[126]) ? la_data_in[126]: wb_rst_i;
-	assign clk = wb_clk_i;
-	assign rst = wb_rst_i;
-	
-	wire[31:0]			iaddr;
-	reg[31:0]			idata;
-	wire				ivalid;
-	wire				iready;
-	wire[31:0]			daddr;
-	wire[31:0]			dwdata;
-	wire[31:0]			dwstb;
-	wire				dwrite;
-	reg[31:0]			drdata;
-	wire				dvalid;
-	wire				dready;
+	assign clk = (~la_oen[127]) ? la_data_in[127]: wb_clk_i;
+	assign rst = (~la_oen[126]) ? ~la_data_in[126]: wb_rst_i;
+	assign core_rst = (~la_oen[125]) ? ~la_data_in[125]: wb_rst_i;
+//	assign clk = wb_clk_i;
+//	assign rst = wb_rst_i;
 	
 	localparam RAM_BITS = 8;
 	localparam ROM_BITS = 8;
 
-	fwrisc_rv32i u_core (
+	fwrisc_rv32i_wb u_core (
 				.clock(clk),
-				.reset(rst),
-		
-				.iaddr(iaddr),
-				.idata(idata),
-				.ivalid(ivalid),
-				.iready(iready),
-		
-				.dvalid(dvalid),
-				.daddr(daddr),
-				.dwdata(dwdata),
-				.dwstb(dwstb),
-				.dwrite(dwrite),
-				.drdata(drdata),
-				.dready(dready)
+				.reset(core_rst),
+
+				.wbi_adr_o(IC_I_ADR[32*INIT_ID_CORE_I+:32]),
+				.wbi_dat_o(IC_I_DAT_W[32*INIT_ID_CORE_I+:32]),
+				.wbi_dat_i(IC_I_DAT_R[32*INIT_ID_CORE_I+:32]),
+				.wbi_cyc_o(IC_I_CYC[INIT_ID_CORE_I]),
+				.wbi_err_i(IC_I_ERR[INIT_ID_CORE_I]),
+				.wbi_sel_o(IC_I_SEL[4*INIT_ID_CORE_I+:4]),
+				.wbi_stb_o(IC_I_STB[INIT_ID_CORE_I]),
+				.wbi_ack_i(IC_I_ACK[INIT_ID_CORE_I]),
+				.wbi_we_o(IC_I_WE[INIT_ID_CORE_I]),
+				
+				.wbd_adr_o(IC_I_ADR[32*INIT_ID_CORE_D+:32]),
+				.wbd_dat_o(IC_I_DAT_W[32*INIT_ID_CORE_D+:32]),
+				.wbd_dat_i(IC_I_DAT_R[32*INIT_ID_CORE_D+:32]),
+				.wbd_cyc_o(IC_I_CYC[INIT_ID_CORE_D]),
+				.wbd_err_i(IC_I_ERR[INIT_ID_CORE_D]),
+				.wbd_sel_o(IC_I_SEL[4*INIT_ID_CORE_D+:4]),
+				.wbd_stb_o(IC_I_STB[INIT_ID_CORE_D]),
+				.wbd_ack_i(IC_I_ACK[INIT_ID_CORE_D]),
+				.wbd_we_o(IC_I_WE[INIT_ID_CORE_D])
 			);
 
 	
@@ -192,7 +194,7 @@
 	
 	assign la_data_out[REG_PROBE_OFF+31:REG_PROBE_OFF] = reg_probe;
 	assign la_data_out[PC_PROBE_OFF+31:PC_PROBE_OFF] = pc_probe;
-	assign la_data_out[IVALID_OFF] = u_core.u_core.instr_complete;
+//	assign la_data_out[IVALID_OFF] = u_core.u_core.instr_complete;
 
 	// 640 pixels
 	// 16x16?
@@ -212,75 +214,22 @@
 	// ROM: 'h8000_0000
 	// RAM: 'h8000_8000
 	// LED: 'hC000_0000
-	reg[7:0]			ram_0[(1 << RAM_BITS)-1:0]; // 16k ram
-	reg[7:0]			ram_1[(1 << RAM_BITS)-1:0]; // 16k ram
-	reg[7:0]			ram_2[(1 << RAM_BITS)-1:0]; // 16k ram
-	reg[7:0]			ram_3[(1 << RAM_BITS)-1:0]; // 16k ram
-	reg[31:0]			rom[(1 << ROM_BITS)-1:0];   // 16k rom
-//	reg[31:0]			led;
-	reg[31:0]			tx_r;
-	reg					iready_r, dready_r;
 	
-	assign iready = iready_r;
-	assign dready = dready_r;
-
 //	initial begin
 //		$readmemh("rom.hex", rom);
 //	end
 	
-	reg[31:0]			addr_d;
-	reg[31:0]			addr_i;
-	reg[31:0]			led;
-
-	assign io_out[31:0] = led;
-	
-	always @(posedge clk) begin
-		addr_d <= daddr;
-		addr_i <= iaddr;
-
-		if (dvalid && dready && dwrite) begin
-			if (daddr[31:28] == 4'h8 && 
-					daddr[15:12] == 4'h8) begin
-				//				$display("Write to RAM: 'h%08h", daddr[13:2]);
-				if (dwstb[0]) ram_0[daddr[RAM_BITS+1:2]] <= dwdata[7:0];
-				if (dwstb[1]) ram_1[daddr[RAM_BITS+1:2]] <= dwdata[15:8];
-				if (dwstb[2]) ram_2[daddr[RAM_BITS+1:2]] <= dwdata[23:16];
-				if (dwstb[3]) ram_3[daddr[RAM_BITS+1:2]] <= dwdata[31:24];
-			end else if (daddr[31:28] == 4'hc) begin
-				if (daddr[3:2] == 4'h0) begin
-					led <= dwdata;
-				end else if (daddr[3:2] == 4'h1) begin
-					tx_r <= dwdata;
-				end
-			end
-		end
-	end
-	
-	always @(posedge clk) begin
-		// Prefer data access
-		if (dvalid) begin
-			dready_r <= 1;
-			iready_r <= 0;
-		end else if (ivalid) begin
-			iready_r <= 1;
-			dready_r <= 0;
-		end else begin
-			iready_r <= 0;
-			dready_r <= 0;
-		end
-	end
-
 	/****************************************************************
 	 * Simple WB to SRAM bridge
 	 ****************************************************************/
 	reg[1:0] wb_bridge_state = 0;
-	wire[31:0] sram_adr_i = IC_T_ADR[TGT_ID_SRAM];
-	wire[31:0] sram_dat_w = IC_T_DAT_W[TGT_ID_SRAM];
+	wire[31:0] sram_adr_i = IC_T_ADR[32*TGT_ID_SRAM+:32];
+	wire[31:0] sram_dat_w = IC_T_DAT_W[32*TGT_ID_SRAM+:32];
 	wire[31:0] sram_dat_r;
-	assign IC_T_DAT_R[TGT_ID_SRAM] = sram_dat_r;
+	assign IC_T_DAT_R[32*TGT_ID_SRAM+:32] = sram_dat_r;
 	wire       sram_cyc_i = IC_T_CYC[TGT_ID_SRAM];
 	assign     IC_T_ERR[TGT_ID_SRAM] = 0;
-	wire[3:0]  sram_sel_i = IC_T_SEL[TGT_ID_SRAM];
+	wire[3:0]  sram_sel_i = IC_T_SEL[4*TGT_ID_SRAM+:4];
 	wire       sram_stb_i = IC_T_STB[TGT_ID_SRAM];
 	wire       sram_ack_o;
 	assign     IC_T_ACK[TGT_ID_SRAM] = sram_ack_o;
@@ -319,58 +268,13 @@
 			.a_sel(sram_sel_i));
 	assign sram_ack_o = (wb_bridge_state == 3);
 	
-
-	wire [31:0] storage_mgmt_addr    = wbs_adr_i; // [ADDRESS_WIDTH+(DATA_WIDTH/32):(DATA_WIDTH/32)+1];
-	wire storage_mgmt_rd_en          = (wbs_cyc_i & wbs_stb_i & !wbs_we_i);
-	wire storage_mgmt_wr_en          = (wbs_cyc_i & wbs_stb_i & wbs_we_i);
-	wire [3:0] storage_mgmt_byte_en  = wbs_sel_i;
-	wire [31:0] storage_mgmt_wr_dat  = wbs_dat_i;
-	wire [31:0] storage_mgmt_rd_dat;
-	
-	assign wbs_dat_o = storage_mgmt_rd_dat;
-	
-	assign wbs_ack_o = (wb_bridge_state == 2);
-
-	// TODO: allow to read 'ram' too
-	assign storage_mgmt_rd_dat = rom[wbs_adr_i[ROM_BITS+1:2]];
-	
-	always @(posedge wb_clk_i) begin
-		if (storage_mgmt_wr_en) begin
-			rom[storage_mgmt_addr[13:2]] <= storage_mgmt_wr_dat;
-		end
-	end	
-	
-	always @* begin
-		if (addr_d[31:28] == 4'h8 && addr_d[15:12] == 4'h8) begin 
-			drdata = {
-					ram_3[addr_d[RAM_BITS+1:2]],
-					ram_2[addr_d[RAM_BITS+1:2]],
-					ram_1[addr_d[RAM_BITS+1:2]],
-					ram_0[addr_d[RAM_BITS+1:2]]
-				};
-		end else begin
-			drdata = rom[addr_d[13:2]];
-		end
-		
-		if (addr_i[31:28] == 4'h8 && addr_i[15:12] == 4'h8) begin
-			idata = {
-					ram_3[addr_i[RAM_BITS+1:2]],
-					ram_2[addr_i[RAM_BITS+1:2]],
-					ram_1[addr_i[RAM_BITS+1:2]],
-					ram_0[addr_i[RAM_BITS+1:2]]
-				};
-		end else begin
-			idata = rom[addr_i[ROM_BITS+2:2]];
-		end
-	end	
-	
 	// Some form of general I/O
 	// - GPIO?
 	// - 
 	
 	// Some form of specific I/O
-	// - UART?
-	// - SPI?
+	// - UART
+	// - SPI
 	
 	
 endmodule
diff --git a/verilog/rtl/wb_interconnect_NxN.sv b/verilog/rtl/wb_interconnect_NxN.sv
index b87cf56..f3b0ce3 100644
--- a/verilog/rtl/wb_interconnect_NxN.sv
+++ b/verilog/rtl/wb_interconnect_NxN.sv
@@ -8,55 +8,58 @@
  * TODO: Add module documentation
  */
 module wb_interconnect_NxN #(
-		parameter int 									WB_ADDR_WIDTH=32,
-		parameter int unsigned							WB_DATA_WIDTH=32,
-		parameter int unsigned							N_INITIATORS=1,
-		parameter int unsigned							N_TARGETS=1,
-		parameter bit [N_INITIATORS*WB_ADDR_WIDTH-1:0] 	I_ADR_MASK = {
+		parameter 									WB_ADDR_WIDTH=32,
+		parameter 									WB_DATA_WIDTH=32,
+		parameter 									N_INITIATORS=1,
+		parameter 									N_TARGETS=1,
+		parameter [N_INITIATORS*WB_ADDR_WIDTH-1:0] 	I_ADR_MASK = {
 			{8'hFF, {24{1'b0}} }
 		},
 		parameter [N_TARGETS*WB_ADDR_WIDTH-1:0] 		T_ADR = {
 			{ 32'h2800_0000 }
 		}
 		) (
-		input							clk,
-		input							rst,
-		input[WB_ADDR_WIDTH-1:0]		ADR[N_INITIATORS-1:0],
-		input[WB_DATA_WIDTH-1:0]		DAT_W[N_INITIATORS-1:0],
-		output[WB_DATA_WIDTH-1:0]		DAT_R[N_INITIATORS-1:0],
-		input							CYC[N_INITIATORS-1:0],
-		output							ERR[N_INITIATORS-1:0],
-		input[(WB_DATA_WIDTH/8)-1:0]	SEL[N_INITIATORS-1:0],
-		input							STB[N_INITIATORS-1:0],
-		output							ACK[N_INITIATORS-1:0],
-		input							WE[N_INITIATORS-1:0],
+		input										clk,
+		input										rst,
+		input[WB_ADDR_WIDTH*N_INITIATORS-1:0]		ADR,
+		input[WB_DATA_WIDTH*N_INITIATORS-1:0]		DAT_W,
+		output[WB_DATA_WIDTH*N_INITIATORS-1:0]		DAT_R,
+		input[N_INITIATORS-1:0]						CYC,
+		output[N_INITIATORS-1:0]					ERR,
+		input[(WB_DATA_WIDTH/8)*N_INITIATORS-1:0]	SEL,
+		input[N_INITIATORS-1:0]						STB,
+		output[N_INITIATORS-1:0]					ACK,
+		input[N_INITIATORS-1:0]						WE,
 
-		output[WB_ADDR_WIDTH-1:0]		TADR[N_TARGETS:0],
-		output[WB_DATA_WIDTH-1:0]		TDAT_W[N_TARGETS:0],
-		input[WB_DATA_WIDTH-1:0]		TDAT_R[N_TARGETS:0],
-		output							TCYC[N_TARGETS:0],
-		input							TERR[N_TARGETS:0],
-		output[(WB_DATA_WIDTH/8)-1:0]	TSEL[N_TARGETS:0],
-		output							TSTB[N_TARGETS:0],
-		input							TACK[N_TARGETS:0],
-		output							TWE[N_TARGETS:0]
+		output[WB_ADDR_WIDTH*(N_TARGETS+1)-1:0]		TADR,
+		output[WB_DATA_WIDTH*(N_TARGETS+1)-1:0]		TDAT_W,
+		input[WB_DATA_WIDTH*(N_TARGETS+1)-1:0]		TDAT_R,
+		output[N_TARGETS:0]							TCYC,
+		input[N_TARGETS:0]							TERR,
+		output[(WB_DATA_WIDTH/8)*(N_TARGETS+1)-1:0]	TSEL,
+		output[N_TARGETS:0]							TSTB,
+		input[N_TARGETS:0]							TACK,
+		output[N_TARGETS:0]							TWE
 		);
 	
-	localparam int WB_DATA_MSB = (WB_DATA_WIDTH-1);
-	localparam int N_INIT_ID_BITS = (N_INITIATORS>1)?$clog2(N_INITIATORS):1;
-	localparam int N_TARG_ID_BITS = $clog2(N_TARGETS+1);
-	localparam bit[N_TARG_ID_BITS:0]		NO_TARGET  = {(N_TARG_ID_BITS+1){1'b1}};
-	localparam bit[N_INIT_ID_BITS:0]		NO_INITIATOR = {(N_INIT_ID_BITS+1){1'b1}};
+	localparam WB_DATA_MSB = (WB_DATA_WIDTH-1);
+	localparam N_INIT_ID_BITS = (N_INITIATORS>1)?$clog2(N_INITIATORS):1;
+	localparam N_TARG_ID_BITS = $clog2(N_TARGETS+1);
+	localparam NO_TARGET  = {(N_TARG_ID_BITS+1){1'b1}};
+	localparam NO_INITIATOR = {(N_INIT_ID_BITS+1){1'b1}};
 	
 	// Interface to the decode-fail target
 //	wb_if				TERR();
 
 	function reg[N_TARG_ID_BITS:0] addr2targ_id(
-		reg[N_INIT_ID_BITS-1:0]		initiator,
-		reg[WB_ADDR_WIDTH-1:0] 		addr
+		input reg[N_INIT_ID_BITS-1:0]		initiator,
+		input reg[WB_ADDR_WIDTH-1:0] 		addr
 		);
+		integer i;
+		begin
+		addr2targ_id = N_TARGETS;
 //		$display("addr2targ_id: 'h%08h 'h%08h", addr, ADDR_RANGES);
-		for (int i=0; i<N_TARGETS; i+=1) begin
+		for (i=0; i<N_TARGETS; i+=1) begin
 //			$display("Address Range: %0d 'h%08h..'h%08h", i, 
 //					ADDR_RANGES[(WB_ADDR_WIDTH*(i+2)-1)-:WB_ADDR_WIDTH],
 //					ADDR_RANGES[(WB_ADDR_WIDTH*(i+1)-1)-:WB_ADDR_WIDTH]);
@@ -65,16 +68,15 @@
 					(addr&I_ADR_MASK[(WB_ADDR_WIDTH*(i+1))-1-:WB_ADDR_WIDTH]) == 
 					(T_ADR[(WB_ADDR_WIDTH*(i+1))-1-:WB_ADDR_WIDTH])) begin
 				$display("Address 'h%08h: range=%0d", addr, N_TARGETS-1);
-				return N_TARGETS-1;
+				addr2targ_id = N_TARGETS-1;
 			end
 		end
-		$display("%t: Address 'h%08h - decode fail", $time, addr);
-		return (N_TARGETS);
+		end
 	endfunction
 	
 // Read request state machine
 
-	// Master state machine
+	// Initiator state machine
 	reg[2:0]							initiator_state[N_INITIATORS-1:0];
 	reg[N_TARG_ID_BITS:0]				initiator_selected_target[N_INITIATORS-1:0];
 	wire								initiator_gnt[N_TARGETS:0];
@@ -83,7 +85,7 @@
 	
 	generate
 		genvar m_i;
-		for (m_i=0; m_i<N_INITIATORS; m_i++) begin : block_m_i
+		for (m_i=0; m_i<N_INITIATORS; m_i=m_i+1) begin : block_m_i
 			always @(posedge clk) begin
 				if (rst == 1) begin
 					initiator_state[m_i] <= 0;
@@ -95,7 +97,7 @@
 								initiator_state[m_i] <= 1;
 								initiator_selected_target[m_i] <= addr2targ_id(
 										m_i, 
-										ADR[m_i]
+										ADR[WB_ADDR_WIDTH*m_i+:WB_ADDR_WIDTH]
 										);
 //								$display("Master %0d => Slave %0d", m_i, addr2targ_id(m_i, ADR[m_i]));
 							end
@@ -118,8 +120,8 @@
 	generate
 		genvar m_req_i, m_req_j;
 
-		for (m_req_i=0; m_req_i <(N_TARGETS+1); m_req_i++) begin : block_m_req_i
-			for (m_req_j=0; m_req_j < N_INITIATORS; m_req_j++) begin : block_m_req_j
+		for (m_req_i=0; m_req_i <(N_TARGETS+1); m_req_i=m_req_i+1) begin : block_m_req_i
+			for (m_req_j=0; m_req_j < N_INITIATORS; m_req_j=m_req_j+1) begin : block_m_req_j
 				assign initiator_target_req[m_req_i][m_req_j] = (initiator_selected_target[m_req_j] == m_req_i);
 			end
 		end
@@ -128,7 +130,7 @@
 	generate
 		genvar s_arb_i;
 		
-		for (s_arb_i=0; s_arb_i<(N_TARGETS+1); s_arb_i++) begin : s_arb
+		for (s_arb_i=0; s_arb_i<(N_TARGETS+1); s_arb_i=s_arb_i+1) begin : s_arb
 			wb_NxN_arbiter #(
 				.N_REQ  (N_INITIATORS)
 				) 
@@ -147,7 +149,7 @@
 	generate
 		genvar s_am_i;
 		
-		for (s_am_i=0; s_am_i<(N_TARGETS+1); s_am_i++) begin : block_s_am_i
+		for (s_am_i=0; s_am_i<(N_TARGETS+1); s_am_i=s_am_i+1) begin : block_s_am_i
 			assign target_active_initiator[s_am_i] =
 				(initiator_gnt[s_am_i])?initiator_gnt_id[s_am_i]:NO_INITIATOR;
 		end
@@ -157,11 +159,12 @@
 	generate
 		genvar s2m_i;
 		
-		for (s2m_i=0; s2m_i<N_INITIATORS; s2m_i++) begin : block_s2m_i
-			assign DAT_R[s2m_i] = (initiator_selected_target[s2m_i] != NO_TARGET && 
-										initiator_gnt[initiator_selected_target[s2m_i]] && 
-										initiator_gnt_id[initiator_selected_target[s2m_i]] == s2m_i)?
-										TDAT_R[initiator_selected_target[s2m_i]]:0;
+		for (s2m_i=0; s2m_i<N_INITIATORS; s2m_i=s2m_i+1) begin : block_s2m_i
+			assign DAT_R[WB_DATA_WIDTH*s2m_i+:WB_DATA_WIDTH] = 
+				(initiator_selected_target[s2m_i] != NO_TARGET && 
+						initiator_gnt[initiator_selected_target[s2m_i]] && 
+						initiator_gnt_id[initiator_selected_target[s2m_i]] == s2m_i)?
+					TDAT_R[WB_DATA_WIDTH*initiator_selected_target[s2m_i]+:WB_DATA_WIDTH]:0;
 			assign ERR[s2m_i] = (initiator_selected_target[s2m_i] != NO_TARGET && 
 										initiator_gnt[initiator_selected_target[s2m_i]] && 
 										initiator_gnt_id[initiator_selected_target[s2m_i]] == s2m_i)?
@@ -176,11 +179,15 @@
 	// WB signals to target mux
 	generate
 		genvar m2s_i;
-		for(m2s_i=0; m2s_i<(N_TARGETS+1); m2s_i++) begin : WB_M2S_assign
-			assign TADR[m2s_i] = (target_active_initiator[m2s_i] == NO_INITIATOR)?0:ADR[target_active_initiator[m2s_i]];
-			assign TDAT_W[m2s_i] = (target_active_initiator[m2s_i] == NO_INITIATOR)?0:DAT_W[target_active_initiator[m2s_i]];
+		for(m2s_i=0; m2s_i<(N_TARGETS+1); m2s_i=m2s_i+1) begin : WB_M2S_assign
+			assign TADR[WB_ADDR_WIDTH*m2s_i+:WB_ADDR_WIDTH] = 
+				(target_active_initiator[m2s_i] == NO_INITIATOR)?0:ADR[WB_ADDR_WIDTH*target_active_initiator[m2s_i]+:WB_ADDR_WIDTH];
+			assign TDAT_W[WB_DATA_WIDTH*m2s_i+:WB_DATA_WIDTH] = 
+				(target_active_initiator[m2s_i] == NO_INITIATOR)?0:DAT_W[WB_DATA_WIDTH*target_active_initiator[m2s_i]+:WB_DATA_WIDTH];
 			assign TCYC[m2s_i] = (target_active_initiator[m2s_i] == NO_INITIATOR)?0:CYC[target_active_initiator[m2s_i]];
-			assign TSEL[m2s_i] = (target_active_initiator[m2s_i] == NO_INITIATOR)?0:SEL[target_active_initiator[m2s_i]];
+			assign TSEL[(WB_DATA_WIDTH/8)*m2s_i+:(WB_DATA_WIDTH/8)] = 
+				(target_active_initiator[m2s_i] == NO_INITIATOR)?0:
+				SEL[(WB_DATA_WIDTH/8)*target_active_initiator[m2s_i]+:(WB_DATA_WIDTH/8)];
 			assign TSTB[m2s_i] = (target_active_initiator[m2s_i] == NO_INITIATOR)?0:STB[target_active_initiator[m2s_i]];
 			assign TWE[m2s_i] = (target_active_initiator[m2s_i] == NO_INITIATOR)?0:WE[target_active_initiator[m2s_i]];
 		end
@@ -202,7 +209,7 @@
 endmodule
 
 module wb_NxN_arbiter #(
-		parameter int			N_REQ=2
+		parameter 					N_REQ=2
 		) (
 		input						clk,
 		input						rst,
@@ -233,7 +240,7 @@
 	generate
 		genvar gnt_ppc_i;
 		
-	for (gnt_ppc_i=N_REQ-1; gnt_ppc_i>=0; gnt_ppc_i--) begin : block_gnt_ppc_i
+	for (gnt_ppc_i=N_REQ-1; gnt_ppc_i>=0; gnt_ppc_i=gnt_ppc_i-1) begin : block_gnt_ppc_i
 		if (gnt_ppc_i == 0) begin
 			assign gnt_ppc[gnt_ppc_i] = last_gnt[0];
 		end else begin
@@ -246,7 +253,7 @@
 	generate
 		genvar unmasked_gnt_i;
 		
-	for (unmasked_gnt_i=0; unmasked_gnt_i<N_REQ; unmasked_gnt_i++) begin : block_unmasked_gnt_i
+	for (unmasked_gnt_i=0; unmasked_gnt_i<N_REQ; unmasked_gnt_i=unmasked_gnt_i+1) begin : block_unmasked_gnt_i
 		// Prioritized unmasked grant vector. Grant to the lowest active grant
 		if (unmasked_gnt_i == 0) begin
 			assign unmasked_gnt[unmasked_gnt_i] = req[unmasked_gnt_i];
@@ -260,7 +267,7 @@
 	generate
 		genvar masked_gnt_i;
 		
-	for (masked_gnt_i=0; masked_gnt_i<N_REQ; masked_gnt_i++) begin : block_masked_gnt_i
+	for (masked_gnt_i=0; masked_gnt_i<N_REQ; masked_gnt_i=masked_gnt_i+1) begin : block_masked_gnt_i
 		if (masked_gnt_i == 0) begin
 			assign masked_gnt[masked_gnt_i] = (gnt_ppc_next[masked_gnt_i] & req[masked_gnt_i]);
 		end else begin
@@ -303,19 +310,20 @@
 		end
 	end
 
-	function reg[$clog2(N_REQ)-1:0] gnt2id(reg[N_REQ-1:0] gnt);
-		automatic int i;
-		reg[$clog2(N_REQ)-1:0] result;
+	function reg[$clog2(N_REQ)-1:0] gnt2id(input reg[N_REQ-1:0] gnt);
+		integer i;
+		begin
+//		reg[$clog2(N_REQ)-1:0] result;
 		
-		result = 0;
+		gnt2id = 0;
 		
 		for (i=0; i<N_REQ; i++) begin
 			if (gnt[i]) begin
-				result |= i;
+				gnt2id |= i;
 			end
 		end
 	
-		return result;
+		end
 	endfunction
 
 endmodule