added azadi_soc design files
diff --git a/verilog/rtl/.azadi_soc_top.sv.swp b/verilog/rtl/.azadi_soc_top.sv.swp
new file mode 100644
index 0000000..cb81277
--- /dev/null
+++ b/verilog/rtl/.azadi_soc_top.sv.swp
Binary files differ
diff --git a/verilog/rtl/.user_project_wrapper.v.swp b/verilog/rtl/.user_project_wrapper.v.swp
new file mode 100644
index 0000000..ca62df7
--- /dev/null
+++ b/verilog/rtl/.user_project_wrapper.v.swp
Binary files differ
diff --git a/verilog/rtl/PWM.v b/verilog/rtl/PWM.v
new file mode 100644
index 0000000..44078a0
--- /dev/null
+++ b/verilog/rtl/PWM.v
@@ -0,0 +1,251 @@
+/*

+control register [7:0]ctrl:

+bit 0:	When set, external clock is chosen for PWM/timer. When cleared, wb clock is used for PWM/timer.

+bit 1:	When set,  PWM is enabled. When cleared,  timer is enabled.

+bit 2:	When set,  PWM/timer starts. When cleared, PWM/timer stops.

+bit 3:	When set, timer runs continuously. When cleared, timer runs one time.

+bit 4:	When set, o_pwm enabled.

+bit 5:	timer interrupt bit	When it is written with 0, interrupt request is cleared. 

+bit 6:	When set, a 16-bit external signal i_DC is used as duty cycle. When cleared, register DC is used.

+bit 7:	When set, counter reset for PWM/timer, it's output and bit 5 will also be cleared. When changing from PWM mode to timer mode reset is needed before timer starts.

+*/

+module	PWM(

+//tlul interface

+	input 			clk_i,												

+	input 			rst_ni,												

+

+	input 			re_i,											

+	input 			we_i,											

+	input  [7:0]    addr_i,											

+	input  [31:0]   wdata_i,											

+	input  [3:0]	be_i,										

+	output [31:0]   rdata_o,												

+	//output          error_o,												

+

+	input			i_extclk,

+	input   [15:0]	i_DC,

+	input			i_valid_DC,

+	output			o_pwm,

+	output          o_pwm_2,

+	output  reg     oe_pwm1,

+	output  reg     oe_pwm2

+

+);

+

+////////////////////control logic////////////////////////////

+parameter  adr_ctrl_1	=	0,

+		   adr_divisor_1=	4,

+		   adr_period_1	=	8,

+		   adr_DC_1		=	12;

+

+parameter  adr_ctrl_2	=	16,

+		   adr_divisor_2=	20,

+		   adr_period_2	=	24,

+		   adr_DC_2		=	28;

+

+

+

+					 

+reg	[7:0]  ctrl;

+reg	[15:0] period;		

+reg	[15:0] DC;		

+reg	[15:0] divisor;	

+

+reg	[7:0]  ctrl_2;

+reg	[15:0] period_2;		

+reg	[15:0] DC_2;		

+reg	[15:0] divisor_2;	

+

+wire	   write;

+

+assign	   write = we_i & ~re_i;

+

+always@(posedge clk_i)

+	if(~rst_ni)begin

+		ctrl[4:2]	<=	0;

+		ctrl[0]  	<=	0;

+		ctrl[7:6]	<=	0;

+		DC			<=	0;

+		period		<=	0;

+		divisor		<=	0;

+

+		

+		ctrl_2[4:2]	<=	0;

+		ctrl_2[0]  	<=	0;

+		ctrl_2[7:6]	<=	0;

+		DC_2		<=	0;

+		period_2	<=	0;

+		divisor_2	<=	0;

+	end

+	else if(write)begin

+		case(addr_i)

+			adr_ctrl_1:begin

+				ctrl[0]	<=	wdata_i[0];

+				ctrl[4:2]	<=	wdata_i[4:2];

+				ctrl[7:6]	<=	wdata_i[7:6];

+			end

+

+			adr_ctrl_2:begin

+				ctrl_2[0]	<=	wdata_i[0];

+				ctrl_2[4:2]	<=	wdata_i[4:2];

+				ctrl_2[7:6]	<=	wdata_i[7:6];

+			end

+

+			adr_divisor_1	:  divisor	<=	wdata_i[15:0];

+			adr_period_1	:  period   <=	wdata_i[15:0];

+			adr_DC_1		:  DC		<=	wdata_i[15:0];

+

+			adr_divisor_2	:  divisor_2	<=	wdata_i[15:0];

+			adr_period_2	:  period_2		<=	wdata_i[15:0];

+			adr_DC_2		:  DC_2			<=	wdata_i[15:0];

+		endcase

+	end

+

+

+

+wire	pwm;

+always @(posedge clk_i) begin

+  	ctrl[1] <= 1'b1;

+end

+

+

+assign	pwm = ctrl[1];

+

+wire    pwm_1;

+always @(posedge clk_i) begin

+    ctrl_2[1]   <= 1'b1;

+end

+

+assign		pwm_1 = ctrl_2[1];

+wire	    eclk_2,oclk_2;

+///////////////////////////////////////////////////////////

+

+//////down clocking for pwm///////////////////

+wire	clk_source;

+wire	eclk,oclk;

+assign	clk_source = clk_i;

+down_clocking_even	clock_div_ev(

+	.clk_i			(clk_source) ,

+	.rst_ni			(rst_ni),

+	.i_divisor	    ({1'b0,divisor[15:1]}),

+	.o_clk			(eclk)

+);

+down_clocking_odd	clock_div_od(

+	.clk_i			(clk_source),

+	.rst_ni			(rst_ni),

+	.i_divisor	    ({1'b0,divisor[15:1]}),

+	.o_clk			(oclk)

+);

+wire	clk;

+assign	clk = divisor[0]? oclk: eclk;

+

+

+

+

+down_clocking_even	clock_div_ev_2(

+	.clk_i			(clk_source) ,

+	.rst_ni			(rst_ni),

+	.i_divisor	    ({1'b0,divisor_2[15:1]}),

+	.o_clk			(eclk_2)

+);

+down_clocking_odd	clock_div_od_2(

+	.clk_i			(clk_source),

+	.rst_ni			(rst_ni),

+	.i_divisor	    ({1'b0,divisor_2[15:1]}),

+	.o_clk			(oclk_2)

+);

+wire	clk_2;

+assign	clk_2 = divisor_2[0]? oclk_2: eclk_2;

+

+///////////////////////////////////////////////////////

+

+/////////////////main counter //////////////////////////

+reg		[15:0]   		ct;

+reg						pts;	//PWM signal 

+reg		[15:0]			extDC;

+wire	[15:0]			DC_1;

+assign					DC_1 =	ctrl[6]?	extDC:	DC;	//external or internal duty cycle toggle

+wire	[15:0]			period_1;

+assign					period_1 = (period==0)?	0:	(period-1);

+

+wire					rst_ct;

+assign					rst_ct	=	~rst_ni|ctrl[7];

+

+

+reg		[15:0]   		ct_2;

+reg						pts_2;	//PWM signal 

+reg		[15:0]			extDC_2;

+wire	[15:0]			DCw_2;

+assign					DCw_2 =	ctrl_2[6]?	extDC_2:	DC_2;	//external or internal duty cycle toggle

+wire	[15:0]			period_P2;

+assign					period_P2	=	(period_2==0)?	0:	(period_2-1);

+

+wire					rst_ct_2;

+assign					rst_ct_2	=	~rst_ni|ctrl_2[7];

+

+always@(posedge clk )

+	if(rst_ct)begin

+		pts   <= 0;

+		ct    <= 0;

+		extDC <= 0;

+	end

+	else begin

+	if(i_valid_DC)	extDC	<=	i_DC;

+	if(ctrl[2])begin

+		if(pwm) begin

+		    oe_pwm1 <= 1'b1;

+			if(ct	>=	period_1) ct <=	0;

+			else ct	<=	ct+1;

+

+			if(ct	<	DC_1)	pts	<=	1'b1;

+			else pts	<=	1'b0;

+		end

+	end

+	else begin

+			pts	    <= 1'b0;

+			ct	    <= 0;

+			oe_pwm1 <= 0;

+	end

+end

+

+

+

+always@(posedge clk_2 )

+	if(rst_ct_2)begin

+		pts_2	<=	0;

+		ct_2	<=	0;

+		extDC_2	<=	0;

+	end

+	else begin

+	if(i_valid_DC)	extDC_2	<=	i_DC;

+	if(ctrl_2[2])begin

+		if(pwm_1) begin

+		     oe_pwm2 <= 1'b1;

+			if(ct_2	>=	period_P2) ct_2	<=	0;

+			else ct_2	<=	ct_2+1;

+

+			if(ct_2	<	DCw_2)	pts_2	<=	1'b1;

+			else pts_2	<=	1'b0;

+		end

+	end

+	else begin

+			pts_2	<=	1'b0;

+			ct_2	<=	0;

+			oe_pwm2 <=  1'b0;

+	end

+end

+//////////////////////////////////////////////////////////

+

+assign	o_pwm   = ctrl[4]? pts: 0;

+assign	o_pwm_2 = ctrl_2[4]? pts_2: 0;

+assign	rdata_o = (addr_i == adr_ctrl_1)   ? {8'h0,ctrl}  :

+			  	  (addr_i == adr_divisor_1)? divisor	  :

+			  	  (addr_i == adr_period_1) ? period		  :

+			  	  (addr_i == adr_DC_1)	   ? DC			  : 

+				  (addr_i == adr_DC_2)	   ? DC_2		  :

+				  (addr_i == adr_period_2) ? period_2	  :

+				  (addr_i == adr_divisor_2)? divisor_2    :

+				  (addr_i == adr_ctrl_2)   ? {8'h0,ctrl_2}:0;

+

+

+endmodule

diff --git a/verilog/rtl/azadi_soc_top.sv b/verilog/rtl/azadi_soc_top.sv
new file mode 100644
index 0000000..da32e03
--- /dev/null
+++ b/verilog/rtl/azadi_soc_top.sv
@@ -0,0 +1,520 @@
+
+module azadi_soc_top (
+  input clk_i,
+  input rst_ni,
+  input prog,
+  //output system_rst_ni,
+  //output prog_rst_ni,
+  input  logic [31:0] gpio_i,
+  output logic [31:0] gpio_o,
+  output logic [31:0] gpio_oe,
+
+  // jtag interface 
+  input  logic       jtag_tck_i,
+  input  logic       jtag_tms_i,
+  input  logic       jtag_trst_ni,
+  input  logic       jtag_tdi_i,
+  output logic       jtag_tdo_o,
+  output logic       jtag_tdo_oe_o,
+
+  // uart-periph interface
+  output logic       uart_tx,
+  output logic       tx_en_o,
+  input  logic       uart_rx,
+
+  // PWM interface  
+
+  output logic       pwm_o,
+  output logic       pwm_o_2,
+  output logic       pwm1_oe,
+  output logic       pwm2_oe,
+
+  // SPI interface
+
+  output logic    [`SPI_SS_NB-1:0] ss_o,        
+  output logic                     sclk_o,      
+  output logic                     sd_o,
+  output logic                     sd_oe,       
+  input  logic                     sd_i
+);
+
+localparam logic [31:0] JTAG_ID = {
+ 4'h0,     // Version
+ 16'h4F54, // Part Number: "OT"
+ 11'h426,  // Manufacturer Identity: Google
+ 1'b1      // (fixed)
+};
+
+
+ logic prog_rst_n;
+ logic system_rst_ni;
+  logic [31:0] gpio_in;
+  logic [31:0] gpio_out;
+  
+  assign gpio_in = gpio_i;
+  assign gpio_o  = gpio_out; 
+
+  logic         instr_valid;
+  logic [11:0]  tlul_addr;
+  logic         req_i;
+  logic [31:0]  tlul_data;
+  logic dbg_req;
+  logic dbg_rst;
+
+
+ // instruction sram interface 
+  logic        instr_csb;
+  logic [11:0] instr_addr;
+  logic [31:0] instr_wdata;
+  logic [3:0]  instr_wmask;
+  logic        instr_we;
+  logic [31:0] instr_rdata;
+  
+   // data sram interface
+  logic        data_csb;
+  logic [11:0] data_addr;
+  logic [31:0] data_wdata;
+  logic [3:0]  data_wmask;
+  logic        data_we;
+  logic [31:0] data_rdata;
+  
+  logic [31:0] iccm_ctrl_data;
+  logic        iccm_ctrl_we;
+  logic [11:0] iccm_ctrl_addr_o;
+
+        
+  tlul_pkg::tl_h2d_t ifu_to_xbar;
+  tlul_pkg::tl_d2h_t xbar_to_ifu;
+  tlul_pkg::tl_h2d_t xbar_to_iccm;
+  tlul_pkg::tl_d2h_t iccm_to_xbar;
+
+  tlul_pkg::tl_h2d_t lsu_to_xbar;
+  tlul_pkg::tl_d2h_t xbar_to_lsu;
+
+  tlul_pkg::tl_h2d_t xbar_to_dccm;
+  tlul_pkg::tl_d2h_t dccm_to_xbar;
+
+  tlul_pkg::tl_h2d_t xbarp_to_gpio;
+  tlul_pkg::tl_d2h_t gpio_to_xbarp;
+
+  tlul_pkg::tl_h2d_t dm_to_xbar;
+  tlul_pkg::tl_d2h_t xbar_to_dm;
+
+  tlul_pkg::tl_h2d_t dbgrom_to_xbar;
+  tlul_pkg::tl_d2h_t xbar_to_dbgrom;
+
+  tlul_pkg::tl_h2d_t plic_req;
+  tlul_pkg::tl_d2h_t plic_resp;
+
+  tlul_pkg::tl_h2d_t xbar_to_uart;
+  tlul_pkg::tl_d2h_t uart_to_xbar;
+
+  tlul_pkg::tl_h2d_t xbar_to_timer;
+  tlul_pkg::tl_d2h_t timer_to_xbar;
+
+  tlul_pkg::tl_h2d_t xbar_to_pwm;
+  tlul_pkg::tl_d2h_t pwm_to_xbar;
+
+  tlul_pkg::tl_h2d_t xbar_to_spi;
+  tlul_pkg::tl_d2h_t spi_to_xbar;
+
+  // interrupt vector
+  logic [43:0] intr_vector;
+
+  // Interrupt source list 
+  logic [31:0] intr_gpio;
+  logic        intr_uart0_tx_watermark;
+  logic        intr_uart0_rx_watermark;
+  logic        intr_uart0_tx_empty;
+  logic        intr_uart0_rx_overflow;
+  logic        intr_uart0_rx_frame_err;
+  logic        intr_uart0_rx_break_err;
+  logic        intr_uart0_rx_timeout;
+  logic        intr_uart0_rx_parity_err;
+  logic        intr_req;
+  logic        intr_srx;
+  logic        intr_stx;
+  logic        intr_timer;
+
+  assign intr_vector = { 
+      intr_srx,
+      intr_stx,
+      intr_uart0_rx_parity_err,
+      intr_uart0_rx_timeout,
+      intr_uart0_rx_break_err,
+      intr_uart0_rx_frame_err,
+      intr_uart0_rx_overflow,
+      intr_uart0_tx_empty,
+      intr_uart0_rx_watermark,
+      intr_uart0_tx_watermark,
+      intr_gpio,
+      1'b0
+  };
+
+// jtag interface 
+
+  jtag_pkg::jtag_req_t jtag_req;
+  jtag_pkg::jtag_rsp_t jtag_rsp;
+  
+
+  assign jtag_req.tck    = jtag_tck_i;
+  assign jtag_req.tms    = jtag_tms_i;
+  assign jtag_req.trst_n = jtag_trst_ni;
+  assign jtag_req.tdi    = jtag_tdi_i;
+  assign jtag_tdo_o      = jtag_rsp.tdo;
+  assign jtag_tdo_oe_o = jtag_rsp.tdo_oe;
+
+
+brq_core_top #(
+    .PMPEnable        (1'b0),
+    .PMPGranularity   (0), 
+    .PMPNumRegions    (4), 
+    .MHPMCounterNum   (0), 
+    .MHPMCounterWidth (40), 
+    .RV32E            (1'b0), 
+    .RV32M            (brq_pkg::RV32MSlow), 
+    .RV32B            (brq_pkg::RV32BNone), 
+    .RegFile          (brq_pkg::RegFileFF), 
+    .BranchTargetALU  (1'b0), 
+    .WritebackStage   (1'b1), 
+    .ICache           (1'b0), 
+    .ICacheECC        (1'b0), 
+    .BranchPredictor  (1'b0), 
+    .DbgTriggerEn     (1'b1), 
+    .DbgHwBreakNum    (1), 
+    .Securebrq        (1'b0),
+    .DmHaltAddr       (tl_main_pkg::ADDR_SPACE_DEBUG_ROM + 32'h 800), 
+    .DmExceptionAddr  (tl_main_pkg::ADDR_SPACE_DEBUG_ROM + dm::ExceptionAddress) 
+) u_top (
+    .clk_i (clk_i),
+    .rst_ni (system_rst_ni),
+
+  // instruction memory interface 
+    .tl_i_i (xbar_to_ifu),
+    .tl_i_o (ifu_to_xbar),
+
+  // data memory interface 
+    .tl_d_i (xbar_to_lsu),
+    .tl_d_o (lsu_to_xbar),
+
+    .test_en_i   (1'b0),     // enable all clk_i gates for testing
+
+    .hart_id_i   (32'b0), 
+    .boot_addr_i (32'h20000000),
+
+        // Interrupt inputs
+    .irq_software_i (1'b0),
+    .irq_timer_i    (intr_timer),
+    .irq_external_i (intr_req),
+    .irq_fast_i     ('0),
+    .irq_nm_i       (1'b0),       // non-maskeable interrupt
+
+    // Debug Interface
+    .debug_req_i    (dbg_req),
+        // CPU Control Signals
+    .fetch_enable_i (1'b1),
+    .alert_minor_o  (),
+    .alert_major_o  (),
+    .core_sleep_o   ()
+);
+
+// Debug module
+rv_dm #(
+  .NrHarts(1),
+  .IdcodeValue(JTAG_ID)
+ // .DirectDmiTap (DirectDmiTap)
+) debug_module (
+  .clk_i(clk_i),       // clk_i
+  .rst_ni(rst_ni),      // asynchronous reset active low, connect PoR
+                                          // here, not the system reset
+  .testmode_i('0),
+  .ndmreset_o(dbg_rst),  // non-debug module reset
+  .dmactive_o(),  // debug module is active
+  .debug_req_o(dbg_req), // async debug request
+  .unavailable_i(1'b0), // communicate whether the hart is unavailable
+                                            // (e.g.: power down)
+
+  // bus device with debug memory, for an execution based technique
+  .tl_d_i(dbgrom_to_xbar),
+  .tl_d_o(xbar_to_dbgrom),
+
+  // bus host, for system bus accesses
+  .tl_h_o(dm_to_xbar),
+  .tl_h_i(xbar_to_dm),
+
+  .jtag_req_i(jtag_req),
+  .jtag_rsp_o(jtag_rsp)
+);
+
+
+
+// main xbar module
+  tl_xbar_main main_swith (
+  .clk_i         (clk_i),
+  .rst_ni        (system_rst_ni),
+
+  // Host interfaces
+  .tl_brqif_i         (ifu_to_xbar),
+  .tl_brqif_o         (xbar_to_ifu),
+  .tl_brqlsu_i        (lsu_to_xbar),
+  .tl_brqlsu_o        (xbar_to_lsu),
+  .tl_dm_sba_i        (dm_to_xbar),
+  .tl_dm_sba_o        (xbar_to_dm),
+
+  // Device interfaces
+  .tl_iccm_o          (xbar_to_iccm),
+  .tl_iccm_i          (iccm_to_xbar),
+  .tl_debug_rom_o     (dbgrom_to_xbar),
+  .tl_debug_rom_i     (xbar_to_dbgrom),
+  .tl_dccm_o          (xbar_to_dccm),
+  .tl_dccm_i          (dccm_to_xbar),
+  .tl_timer0_o        (xbar_to_timer),
+  .tl_timer0_i        (timer_to_xbar),
+  .tl_uart_o          (xbar_to_uart),
+  .tl_uart_i          (uart_to_xbar),
+  .tl_spi_o           (xbar_to_spi),
+  .tl_spi_i           (spi_to_xbar),
+  .tl_pwm_o           (xbar_to_pwm),
+  .tl_pwm_i           (pwm_to_xbar),
+  .tl_gpio_o          (xbarp_to_gpio),
+  .tl_gpio_i          (gpio_to_xbarp),
+  .tl_plic_o          (plic_req),
+  .tl_plic_i          (plic_resp)
+);
+
+
+// timer
+rv_timer timer0( 
+  .clk_i  (clk_i),
+  .rst_ni (system_rst_ni),
+
+  .tl_i   (xbar_to_timer),
+  .tl_o   (timer_to_xbar),
+
+  .intr_timer_expired_0_0_o (intr_timer)
+);
+
+// PWM module
+
+pwm_top u_pwm(
+
+  .clk_i   (clk_i),
+  .rst_ni  (system_rst_ni),
+
+  .tl_i    (xbar_to_pwm),
+  .tl_o    (pwm_to_xbar),
+
+
+  .pwm_o   (pwm_o),
+  .pwm_o_2 (pwm_o_2),
+  .pwm1_oe (pwm1_oe),
+  .pwm2_oe (pwm2_oe)
+);
+
+
+// spi module 
+
+spi_top u_spi_host(
+
+  .clk_i       (clk_i),
+  .rst_ni      (system_rst_ni),
+
+  .tl_i        (xbar_to_spi),
+  .tl_o        (spi_to_xbar),
+
+  // SPI signals                  
+  .intr_rx_o   (intr_srx),
+  .intr_tx_o   (intr_stx),                   
+  .ss_o        (ss_o),         
+  .sclk_o      (sclk_o),       
+  .sd_o        (sd_o),
+  .sd_oe       (sd_oe),       
+  .sd_i        (sd_i)
+);
+
+
+//GPIO module
+gpio GPIO (
+  .clk_i          (clk_i),
+  .rst_ni         (system_rst_ni),
+
+  // Below Regster interface can be changed
+  .tl_i           (xbarp_to_gpio),
+  .tl_o           (gpio_to_xbarp),
+
+  .cio_gpio_i     (gpio_in),
+  .cio_gpio_o     (gpio_out),
+  .cio_gpio_en_o  (gpio_oe),
+
+  .intr_gpio_o    (intr_gpio )  
+);
+
+
+rstmgr reset_manager(
+  .clk_i(clk_i),
+  .rst_ni(rst_ni),
+  .ndmreset (dbg_rst),
+  .prog_rst_ni(prog_rst_ni),
+  .sys_rst_ni(system_rst_ni)
+);
+
+rv_plic intr_controller (
+  .clk_i(clk_i),
+  .rst_ni(system_rst_ni),
+
+  // Bus Interface (device)
+  .tl_i (plic_req),
+  .tl_o (plic_resp),
+
+  // Interrupt Sources
+  .intr_src_i (intr_vector),
+
+  // Interrupt notification to targets
+  .irq_o (intr_req),
+  .msip_o()
+);
+
+uart u_uart0(
+  .clk_i                   (clk_i             ),
+  .rst_ni                  (system_rst_ni     ),
+
+  // Bus Interface
+  .tl_i                    (xbar_to_uart      ),
+  .tl_o                    (uart_to_xbar      ),
+
+  // Generic IO
+  .cio_rx_i                (uart_rx           ),
+  .cio_tx_o                (uart_tx           ),
+  .cio_tx_en_o             (tx_en_o           ),
+
+  // Interrupts
+  .intr_tx_watermark_o     (intr_uart0_tx_watermark ),
+  .intr_rx_watermark_o     (intr_uart0_rx_watermark ),
+  .intr_tx_empty_o         (intr_uart0_tx_empty     ),
+  .intr_rx_overflow_o      (intr_uart0_rx_overflow  ),
+  .intr_rx_frame_err_o     (intr_uart0_rx_frame_err ),
+  .intr_rx_break_err_o     (intr_uart0_rx_break_err ),
+  .intr_rx_timeout_o       (intr_uart0_rx_timeout   ),
+  .intr_rx_parity_err_o    (intr_uart0_rx_parity_err) 
+);
+
+logic rx_dv_i;
+logic [7:0] rx_byte_i;
+	
+iccm_controller u_dut(
+    .clk_i      (~clk_i),
+	.rst_ni     (rst_ni),
+	.prog_i     (prog),
+	.rx_dv_i    (rx_dv_i),
+	.rx_byte_i  (rx_byte_i),
+	.we_o       (iccm_ctrl_we),
+	.addr_o     (iccm_ctrl_addr_o),
+	.wdata_o    (iccm_ctrl_data),
+	.reset_o    (prog_rst_ni)
+);
+	
+uart_rx_prog u_uart_rx_prog(
+	.clk_i         (~clk_i),
+	.rst_ni        (rst_ni),
+	.i_Rx_Serial   (uart_rx),
+	.CLKS_PER_BIT  (16'd1667),
+	.o_Rx_DV       (rx_dv_i),
+	.o_Rx_Byte     (rx_byte_i)
+);
+
+
+// dummy instruction memory
+instr_mem_top iccm_adapter(
+  .clk_i            (clk_i),
+  .rst_ni           (system_rst_ni),
+  
+  .tl_i             (xbar_to_iccm),
+  .tl_o             (iccm_to_xbar),
+// iccm controller interface 
+  .iccm_ctrl_addr   (iccm_ctrl_addr_o),
+  .iccm_ctrl_wdata  (iccm_ctrl_data),
+  .iccm_ctrl_we     (iccm_ctrl_we),
+  .prog_rst_ni      (prog_rst_ni),
+    
+
+// instruction sram interface 
+  .csb              (instr_csb),
+  .addr_o           (instr_addr),
+  .wdata_o          (instr_wdata),
+  .wmask_o          (instr_wmask),
+  .we_o             (instr_we),
+  .rdata_i          (instr_rdata)
+);
+
+
+  sram #(  
+     .NUM_WMASKS  (4),
+//     .MEMD        (4096),
+     .DATA_WIDTH  (32), // data width
+//     .nRPORTS     (1) , // number of reading ports
+//     .nWPORTS     (1), // number of write ports
+     .IZERO       (0) , // binary / Initial RAM with zeros (has priority over IFILE)
+//     .BASIC_MODEL (1024),
+     .ADDR_WIDTH  (11)
+    ) u_iccm ( /*`ifdef USE_POWER_PINS
+    inout vdd;
+    inout gnd;
+  `endif*/
+    .clk0      (~clk_i), // clock
+    .csb0      (instr_csb), // active low chip select
+    .web0      (instr_we), // active low write control
+    .wmask0    (instr_wmask), // write mask
+    .addr0     (instr_addr[10:0]),
+    .din0      (instr_wdata),
+    .dout0     (instr_rdata),
+    .clk1     (1'b0),
+    .csb1     (1'b1),
+    .addr1    ('0),
+    .dout1    ()
+    );
+// dummy data memory
+
+data_mem_top dccm_adapter(
+  .clk_i    (clk_i),
+  .rst_ni    (system_rst_ni),
+
+// tl-ul insterface
+  .tl_d_i   (xbar_to_dccm),
+  .tl_d_o   (dccm_to_xbar),
+  
+  // sram interface
+   .csb     (data_csb),
+   .addr_o  (data_addr),
+   .wdata_o (data_wdata),
+   .wmask_o (data_wmask),
+   .we_o    (data_we),
+   .rdata_i (data_rdata)
+);
+
+
+sram #(  
+   .NUM_WMASKS  (4),
+//   .MEMD        (4096),
+   .DATA_WIDTH  (32), // data width
+//   .nRPORTS     (1) , // number of reading ports
+//   .nWPORTS     (1), // number of write ports
+   .IZERO       (0) , // binary / Initial RAM with zeros (has priority over IFILE)
+//   .BASIC_MODEL (1024),
+   .ADDR_WIDTH  (11)
+  ) u_dccm ( /*`ifdef USE_POWER_PINS
+  inout vdd;
+  inout gnd;
+`endif*/
+  .clk0      (~clk_i), // clock
+  .csb0      (data_csb), // active low chip select
+  .web0      (data_we), // active low write control
+  .wmask0    (data_wmask), // write mask
+  .addr0     (data_addr[10:0]),
+  .din0      (data_wdata),
+  .dout0     (data_rdata),
+  .clk1      (1'b0),
+  .csb1      (1'b1),
+  .addr1     ('0),
+  .dout1     ()
+  );
+endmodule
diff --git a/verilog/rtl/brq_core.sv b/verilog/rtl/brq_core.sv
new file mode 100644
index 0000000..1e83243
--- /dev/null
+++ b/verilog/rtl/brq_core.sv
@@ -0,0 +1,1592 @@
+`ifdef RISCV_FORMAL
+  `define RVFI
+`endif
+
+
+/**
+ * Top level module of the brq RISC-V core
+ */
+module brq_core #(
+    parameter bit                 PMPEnable        = 1'b0,
+    parameter int unsigned        PMPGranularity   = 0,
+    parameter int unsigned        PMPNumRegions    = 0,
+    parameter int unsigned        MHPMCounterNum   = 0,
+    parameter int unsigned        MHPMCounterWidth = 40,
+    parameter bit                 RV32E            = 1'b0,
+    parameter brq_pkg::rv32m_e    RV32M            = brq_pkg::RV32MFast,
+    parameter brq_pkg::rv32b_e    RV32B            = brq_pkg::RV32BNone,
+    parameter brq_pkg::regfile_e  RegFile          = brq_pkg::RegFileFF,
+    parameter brq_pkg::rvfloat_e  RVF              = brq_pkg::RV32FSingle, // for floating point
+    parameter int unsigned        FloatingPoint    = 1'b1,
+    parameter bit                 BranchTargetALU  = 1'b0,
+    parameter bit                 WritebackStage   = 1'b1,
+    parameter bit                 ICache           = 1'b0,
+    parameter bit                 ICacheECC        = 1'b0,
+    parameter bit                 BranchPredictor  = 1'b0,
+    parameter bit                 DbgTriggerEn     = 1'b0,
+    parameter int unsigned        DbgHwBreakNum    = 1,
+    parameter bit                 Securebrq        = 1'b0,
+    parameter int unsigned        DmHaltAddr       = 32'h1A110800,
+    parameter int unsigned        DmExceptionAddr  = 32'h1A110808
+) (
+    // Clock and Reset
+    input  logic        clk_i,
+    input  logic        rst_ni,
+
+    input  logic        test_en_i,     // enable all clock gates for testing
+
+    input  logic [31:0] hart_id_i,
+    input  logic [31:0] boot_addr_i,
+
+    // Instruction memory interface
+    output logic        instr_req_o,
+    input  logic        instr_gnt_i,
+    input  logic        instr_rvalid_i,
+    output logic [31:0] instr_addr_o,
+    input  logic [31:0] instr_rdata_i,
+    input  logic        instr_err_i,
+
+    // Data memory interface
+    output logic        data_req_o,
+    input  logic        data_gnt_i,
+    input  logic        data_rvalid_i,
+    output logic        data_we_o,
+    output logic [3:0]  data_be_o,
+    output logic [31:0] data_addr_o,
+    output logic [31:0] data_wdata_o,
+    input  logic [31:0] data_rdata_i,
+    input  logic        data_err_i,
+
+    // Interrupt inputs
+    input  logic        irq_software_i,
+    input  logic        irq_timer_i,
+    input  logic        irq_external_i,
+    input  logic [14:0] irq_fast_i,
+    input  logic        irq_nm_i,       // non-maskeable interrupt
+
+    // Debug Interface
+    input  logic        debug_req_i,
+
+    // RISC-V Formal Interface
+    // Does not comply with the coding standards of _i/_o suffixes, but follows
+    // the convention of RISC-V Formal Interface Specification.
+`ifdef RVFI
+    output logic        rvfi_valid,
+    output logic [63:0] rvfi_order,
+    output logic [31:0] rvfi_insn,
+    output logic        rvfi_trap,
+    output logic        rvfi_halt,
+    output logic        rvfi_intr,
+    output logic [ 1:0] rvfi_mode,
+    output logic [ 1:0] rvfi_ixl,
+    output logic [ 4:0] rvfi_rs1_addr,
+    output logic [ 4:0] rvfi_rs2_addr,
+    output logic [ 4:0] rvfi_rs3_addr,
+    output logic [31:0] rvfi_rs1_rdata,
+    output logic [31:0] rvfi_rs2_rdata,
+    output logic [31:0] rvfi_rs3_rdata,
+    output logic [ 4:0] rvfi_rd_addr,
+    output logic [31:0] rvfi_rd_wdata,
+    output logic [31:0] rvfi_pc_rdata,
+    output logic [31:0] rvfi_pc_wdata,
+    output logic [31:0] rvfi_mem_addr,
+    output logic [ 3:0] rvfi_mem_rmask,
+    output logic [ 3:0] rvfi_mem_wmask,
+    output logic [31:0] rvfi_mem_rdata,
+    output logic [31:0] rvfi_mem_wdata,
+`endif
+
+    // CPU Control Signals
+    input  logic        fetch_enable_i,
+    output logic        alert_minor_o,
+    output logic        alert_major_o,
+    output logic        core_sleep_o
+);
+
+  import brq_pkg::*;
+  
+  // floating point 
+  localparam int unsigned W = 32;
+  logic                   fp_flush;
+  logic                   in_ready_c2fpu;   // ready - from core to FPU 
+  logic                   in_valid_c2fpu;   // valid - from FPU to core 
+  logic                   out_ready_fpu2c;  // ready - from FPU to core
+  logic                   out_valid_fpu2c;  // valid - from core to FPU
+  logic                   valid_id_fpu;     // select which valid signal will go to dec
+  logic                   fp_rm_dynamic;
+  logic                   fp_alu_op_mod;  
+  logic [4:0]             fp_rf_raddr_a;
+  logic [4:0]             fp_rf_raddr_b;
+  logic [4:0]             fp_rf_raddr_c;
+  logic [W-1:0]           fp_rf_rdata_a;
+  logic [W-1:0]           fp_rf_rdata_b;
+  logic [W-1:0]           fp_rf_rdata_c;
+  logic                   fp_rf_wen_id;
+  logic                   is_fp_instr;
+  logic [2:0][W-1:0]      fp_operands;   // three operands in fpu   
+  logic                   fp_busy;
+  logic                   fpu_busy_idu;
+  logic [W-1:0]           fp_result;
+  logic [ 31:0]           data_wb;
+  logic [4:0]             fp_rf_waddr_id;
+  logic [4:0]             fp_rf_waddr_wb;
+  logic                   fp_rf_we;
+  logic                   fp_rf_wen_wb;
+  logic                   use_fp_rs1;
+  logic                   use_fp_rs2;
+  logic                   use_fp_rd;
+  logic                   fp_rf_write_wb;
+  logic [31:0]            rf_int_fp_lsu;
+  logic                   fp_swap_oprnds;
+  logic                   fpu_is_busy;
+  logic                   fp_load;
+  logic [31:0]            fp_rf_wdata_wb;
+  fpnew_pkg::status_t     fp_status;
+  fpnew_pkg::operation_e  fp_operation;
+  fpnew_pkg::roundmode_e  fp_rounding_mode;
+  fpnew_pkg::roundmode_e  fp_frm_csr;
+  fpnew_pkg::roundmode_e  fp_frm_fpnew;
+  fpnew_pkg::operation_e  fp_alu_operator;
+  fpnew_pkg::fp_format_e  fp_src_fmt;
+  fpnew_pkg::fp_format_e  fp_dst_fmt;
+
+  // brq                 
+  localparam int unsigned PMP_NUM_CHAN      = 2;
+  localparam bit          DataIndTiming     = Securebrq;
+  localparam bit          DummyInstructions = Securebrq;
+  localparam bit          PCIncrCheck       = Securebrq;
+  localparam bit          ShadowCSR         = Securebrq;
+  // Speculative branch option, trades-off performance against timing.
+  // Setting this to 1 eases branch target critical paths significantly but reduces performance
+  // by ~3% (based on CoreMark/MHz score).
+  // Set by default in the max PMP config which has the tightest budget for branch target timing.
+  localparam bit          SpecBranch        = PMPEnable & (PMPNumRegions == 16);
+  localparam bit          RegFileECC        = Securebrq;
+  localparam int unsigned RegFileDataWidth  = RegFileECC ? 32 + 7 : 32;
+
+  // IF/ID signals
+  logic        dummy_instr_id;
+  logic        instr_valid_id;
+  logic        instr_new_id;
+  logic [31:0] instr_rdata_id;                 // Instruction sampled inside IF stage
+  logic [31:0] instr_rdata_alu_id;             // Instruction sampled inside IF stage (replicated to
+                                               // ease fan-out)
+  logic [15:0] instr_rdata_c_id;               // Compressed instruction sampled inside IF stage
+  logic        instr_is_compressed_id;
+  logic        instr_perf_count_id;
+  logic        instr_bp_taken_id;
+  logic        instr_fetch_err;                // Bus error on instr fetch
+  logic        instr_fetch_err_plus2;          // Instruction error is misaligned
+  logic        illegal_c_insn_id;              // Illegal compressed instruction sent to ID stage
+  logic [31:0] pc_if;                          // Program counter in IF stage
+  logic [31:0] pc_id;                          // Program counter in ID stage
+  logic [31:0] pc_wb;                          // Program counter in WB stage
+  logic [33:0] imd_val_d_ex[2];                // Intermediate register for multicycle Ops
+  logic [33:0] imd_val_q_ex[2];                // Intermediate register for multicycle Ops
+  logic [1:0]  imd_val_we_ex;
+
+  logic        data_ind_timing;
+  logic        dummy_instr_en;
+  logic [2:0]  dummy_instr_mask;
+  logic        dummy_instr_seed_en;
+  logic [31:0] dummy_instr_seed;
+  logic        icache_enable;
+  logic        icache_inval;
+  logic        pc_mismatch_alert;
+  logic        csr_shadow_err;
+
+  logic        instr_first_cycle_id;
+  logic        instr_valid_clear;
+  logic        pc_set;
+  logic        pc_set_spec;
+  logic        nt_branch_mispredict;
+  pc_sel_e     pc_mux_id;                      // Mux selector for next PC
+  exc_pc_sel_e exc_pc_mux_id;                  // Mux selector for exception PC
+  exc_cause_e  exc_cause;                      // Exception cause
+
+  logic        lsu_load_err;
+  logic        lsu_store_err;
+
+  // LSU signals
+  logic        lsu_addr_incr_req;
+  logic [31:0] lsu_addr_last;
+
+  // Jump and branch target and decision (EX->IF)
+  logic [31:0] branch_target_ex;
+  logic        branch_decision;
+
+  // Core busy signals
+  logic        ctrl_busy;
+  logic        if_busy;
+  logic        lsu_busy;
+  logic        core_busy_d, core_busy_q;
+
+  // Register File
+  logic [4:0]  rf_raddr_a;
+  logic [31:0] rf_rdata_a;
+  logic [4:0]  rf_raddr_b;
+  logic [31:0] rf_rdata_b;
+  logic        rf_ren_a;
+  logic        rf_ren_b;
+  logic [4:0]  rf_waddr_wb;
+  logic [31:0] rf_wdata_wb;
+  // Writeback register write data that can be used on the forwarding path (doesn't factor in memory
+  // read data as this is too late for the forwarding path)
+  logic [31:0] rf_wdata_fwd_wb;
+  logic [31:0] rf_wdata_lsu;
+  logic        rf_we_wb;
+  logic        rf_we_lsu;
+
+  logic [4:0]  rf_waddr_id;
+  logic [31:0] rf_wdata_id;
+  logic        rf_we_id;
+  logic        rf_rd_a_wb_match;
+  logic        rf_rd_b_wb_match;
+
+  // ALU Control
+  alu_op_e     alu_operator_ex;
+  logic [31:0] alu_operand_a_ex;
+  logic [31:0] alu_operand_b_ex;
+
+  logic [31:0] bt_a_operand;
+  logic [31:0] bt_b_operand;
+
+  logic [31:0] alu_adder_result_ex;    // Used to forward computed address to LSU
+  logic [31:0] result_ex;
+
+  // Multiplier Control
+  logic        mult_en_ex;
+  logic        div_en_ex;
+  logic        mult_sel_ex;
+  logic        div_sel_ex;
+  md_op_e      multdiv_operator_ex;
+  logic [1:0]  multdiv_signed_mode_ex;
+  logic [31:0] multdiv_operand_a_ex;
+  logic [31:0] multdiv_operand_b_ex;
+  logic        multdiv_ready_id;
+
+  // CSR control
+  logic        csr_access;
+  csr_op_e     csr_op;
+  logic        csr_op_en;
+  csr_num_e    csr_addr;
+  logic [31:0] csr_rdata;
+  logic [31:0] csr_wdata;
+  logic        illegal_csr_insn_id;    // CSR access to non-existent register,
+                                       // with wrong priviledge level,
+                                       // or missing write permissions
+
+  // Data Memory Control
+  logic        lsu_we;
+  logic [1:0]  lsu_type;
+  logic        lsu_sign_ext;
+  logic        lsu_req;
+  logic [31:0] lsu_wdata;
+  logic        lsu_req_done;
+
+  // stall control
+  logic        id_in_ready;
+  logic        ex_valid;
+
+  logic        lsu_resp_valid;
+  logic        lsu_resp_err;
+
+  // Signals between instruction core interface and pipe (if and id stages)
+  logic        instr_req_int;          // Id stage asserts a req to instruction core interface
+
+  // Writeback stage
+  logic           en_wb;
+  wb_instr_type_e instr_type_wb;
+  logic           ready_wb;
+  logic           rf_write_wb;
+  logic           outstanding_load_wb;
+  logic           outstanding_store_wb;
+
+  // Interrupts
+  logic        irq_pending;
+  logic        nmi_mode;
+  irqs_t       irqs;
+  logic        csr_mstatus_mie;
+  logic [31:0] csr_mepc, csr_depc;
+
+  // PMP signals
+  logic [33:0] csr_pmp_addr [PMPNumRegions];
+  pmp_cfg_t    csr_pmp_cfg  [PMPNumRegions];
+  logic        pmp_req_err  [PMP_NUM_CHAN];
+  logic        instr_req_out;
+  logic        data_req_out;
+
+  logic        csr_save_if;
+  logic        csr_save_id;
+  logic        csr_save_wb;
+  logic        csr_restore_mret_id;
+  logic        csr_restore_dret_id;
+  logic        csr_save_cause;
+  logic        csr_mtvec_init;
+  logic [31:0] csr_mtvec;
+  logic [31:0] csr_mtval;
+  logic        csr_mstatus_tw;
+  priv_lvl_e   priv_mode_id;
+  priv_lvl_e   priv_mode_if;
+  priv_lvl_e   priv_mode_lsu;
+
+  // debug mode and dcsr configuration
+  logic        debug_mode;
+  dbg_cause_e  debug_cause;
+  logic        debug_csr_save;
+  logic        debug_single_step;
+  logic        debug_ebreakm;
+  logic        debug_ebreaku;
+  logic        trigger_match;
+
+  // signals relating to instruction movements between pipeline stages
+  // used by performance counters and RVFI
+  logic        instr_id_done;
+  logic        instr_done_wb;
+
+  logic        perf_instr_ret_wb;
+  logic        perf_instr_ret_compressed_wb;
+  logic        perf_iside_wait;
+  logic        perf_dside_wait;
+  logic        perf_mul_wait;
+  logic        perf_div_wait;
+  logic        perf_jump;
+  logic        perf_branch;
+  logic        perf_tbranch;
+  logic        perf_load;
+  logic        perf_store;
+
+  // for RVFI
+  logic        illegal_insn_id, unused_illegal_insn_id; // ID stage sees an illegal instruction
+
+  // RISC-V Formal Interface signals
+`ifdef RVFI
+  logic        rvfi_instr_new_wb;
+  logic        rvfi_intr_d;
+  logic        rvfi_intr_q;
+  logic        rvfi_set_trap_pc_d;
+  logic        rvfi_set_trap_pc_q;
+  logic [31:0] rvfi_insn_id;
+  logic [4:0]  rvfi_rs1_addr_d;
+  logic [4:0]  rvfi_rs1_addr_q;
+  logic [4:0]  rvfi_rs2_addr_d;
+  logic [4:0]  rvfi_rs2_addr_q;
+  logic [4:0]  rvfi_rs3_addr_d;
+  logic [31:0] rvfi_rs1_data_d;
+  logic [31:0] rvfi_rs1_data_q;
+  logic [31:0] rvfi_rs2_data_d;
+  logic [31:0] rvfi_rs2_data_q;
+  logic [31:0] rvfi_rs3_data_d;
+  logic [4:0]  rvfi_rd_addr_wb;
+  logic [4:0]  rvfi_rd_addr_q;
+  logic [4:0]  rvfi_rd_addr_d;
+  logic [31:0] rvfi_rd_wdata_wb;
+  logic [31:0] rvfi_rd_wdata_d;
+  logic [31:0] rvfi_rd_wdata_q;
+  logic        rvfi_rd_we_wb;
+  logic [3:0]  rvfi_mem_mask_int;
+  logic [31:0] rvfi_mem_rdata_d;
+  logic [31:0] rvfi_mem_rdata_q;
+  logic [31:0] rvfi_mem_wdata_d;
+  logic [31:0] rvfi_mem_wdata_q;
+  logic [31:0] rvfi_mem_addr_d;
+  logic [31:0] rvfi_mem_addr_q;
+`endif
+
+  //////////////////////
+  // Clock management //
+  //////////////////////
+
+  logic        clk;
+
+  logic        clock_en;
+
+  // Before going to sleep, wait for I- and D-side
+  // interfaces to finish ongoing operations.
+  assign core_busy_d = ctrl_busy | if_busy | lsu_busy | fp_busy;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      core_busy_q <= 1'b0;
+    end else begin
+      core_busy_q <= core_busy_d;
+    end
+  end
+  // capture fetch_enable_i in fetch_enable_q, once for ever
+  logic fetch_enable_q;
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      fetch_enable_q <= 1'b0;
+    end else if (fetch_enable_i) begin
+      fetch_enable_q <= 1'b1;
+    end
+  end
+
+  assign clock_en     = fetch_enable_q & (core_busy_q | debug_req_i | irq_pending | irq_nm_i);
+  assign core_sleep_o = ~clock_en;
+
+  // main clock gate of the core
+  // generates all clocks except the one for the debug unit which is
+  // independent
+  prim_clock_gating core_clock_gate_i (
+      .clk_i     ( clk_i           ),
+      .en_i      ( clock_en        ),
+      .test_en_i ( test_en_i       ),
+      .clk_o     ( clk            )
+  );
+
+  //////////////
+  // IF stage //
+  //////////////
+
+  brq_ifu #(
+      .DmHaltAddr        ( DmHaltAddr        ),
+      .DmExceptionAddr   ( DmExceptionAddr   ),
+      .DummyInstructions ( DummyInstructions ),
+      .ICache            ( ICache            ),
+      .ICacheECC         ( ICacheECC         ),
+      .PCIncrCheck       ( PCIncrCheck       ),
+      .BranchPredictor   ( BranchPredictor   )
+  ) if_stage_i (
+      .clk_i                    ( clk                    ),
+      .rst_ni                   ( rst_ni                 ),
+
+      .boot_addr_i              ( boot_addr_i            ),
+      .req_i                    ( instr_req_int          ), // instruction request control
+
+      // instruction cache interface
+      .instr_req_o              ( instr_req_out          ),
+      .instr_addr_o             ( instr_addr_o           ),
+      .instr_gnt_i              ( instr_gnt_i            ),
+      .instr_rvalid_i           ( instr_rvalid_i         ),
+      .instr_rdata_i            ( instr_rdata_i          ),
+      .instr_err_i              ( instr_err_i            ),
+      .instr_pmp_err_i          ( pmp_req_err[PMP_I]     ),
+
+      // outputs to ID stage
+      .instr_valid_id_o         ( instr_valid_id         ),
+      .instr_new_id_o           ( instr_new_id           ),
+      .instr_rdata_id_o         ( instr_rdata_id         ),
+      .instr_rdata_alu_id_o     ( instr_rdata_alu_id     ),
+      .instr_rdata_c_id_o       ( instr_rdata_c_id       ),
+      .instr_is_compressed_id_o ( instr_is_compressed_id ),
+      .instr_bp_taken_o         ( instr_bp_taken_id      ),
+      .instr_fetch_err_o        ( instr_fetch_err        ),
+      .instr_fetch_err_plus2_o  ( instr_fetch_err_plus2  ),
+      .illegal_c_insn_id_o      ( illegal_c_insn_id      ),
+      .dummy_instr_id_o         ( dummy_instr_id         ),
+      .pc_if_o                  ( pc_if                  ),
+      .pc_id_o                  ( pc_id                  ),
+
+      // control signals
+      .instr_valid_clear_i      ( instr_valid_clear      ),
+      .pc_set_i                 ( pc_set                 ),
+      .pc_set_spec_i            ( pc_set_spec            ),
+      .pc_mux_i                 ( pc_mux_id              ),
+      .nt_branch_mispredict_i   ( nt_branch_mispredict   ),
+      .exc_pc_mux_i             ( exc_pc_mux_id          ),
+      .exc_cause                ( exc_cause              ),
+      .dummy_instr_en_i         ( dummy_instr_en         ),
+      .dummy_instr_mask_i       ( dummy_instr_mask       ),
+      .dummy_instr_seed_en_i    ( dummy_instr_seed_en    ),
+      .dummy_instr_seed_i       ( dummy_instr_seed       ),
+      .icache_enable_i          ( icache_enable          ),
+      .icache_inval_i           ( icache_inval           ),
+
+      // branch targets
+      .branch_target_ex_i       ( branch_target_ex       ),
+
+      // CSRs
+      .csr_mepc_i               ( csr_mepc               ), // exception return address
+      .csr_depc_i               ( csr_depc               ), // debug return address
+      .csr_mtvec_i              ( csr_mtvec              ), // trap-vector base address
+      .csr_mtvec_init_o         ( csr_mtvec_init         ),
+
+      // pipeline stalls
+      .id_in_ready_i            ( id_in_ready            ),
+      .pc_mismatch_alert_o      ( pc_mismatch_alert      ),
+      .if_busy_o                ( if_busy                )
+  );
+
+  // Core is waiting for the ISide when ID/EX stage is ready for a new instruction but none are
+  // available
+  assign perf_iside_wait = id_in_ready & ~instr_valid_id;
+
+  // Qualify the instruction request with PMP error
+  assign instr_req_o = instr_req_out & ~pmp_req_err[PMP_I];
+
+  //////////////
+  // ID stage //
+  //////////////
+
+  brq_idu #(
+      .RV32E           ( RV32E           ),
+      .RV32M           ( RV32M           ),
+      .RV32B           ( RV32B           ),
+      .BranchTargetALU ( BranchTargetALU ),
+      .DataIndTiming   ( DataIndTiming   ),
+      .SpecBranch      ( SpecBranch      ),
+      .WritebackStage  ( WritebackStage  ),
+      .BranchPredictor ( BranchPredictor )
+  ) id_stage_i (
+      .clk_i                        ( clk                      ),
+      .rst_ni                       ( rst_ni                   ),
+
+      // Processor Enable
+      .ctrl_busy_o                  ( ctrl_busy                ),
+      .illegal_insn_o               ( illegal_insn_id          ),
+
+      // from/to IF-ID pipeline register
+      .instr_valid_i                ( instr_valid_id           ),
+      .instr_rdata_i                ( instr_rdata_id           ),
+      .instr_rdata_alu_i            ( instr_rdata_alu_id       ),
+      .instr_rdata_c_i              ( instr_rdata_c_id         ),
+      .instr_is_compressed_i        ( instr_is_compressed_id   ),
+      .instr_bp_taken_i             ( instr_bp_taken_id        ),
+
+      // Jumps and branches
+      .branch_decision_i            ( branch_decision          ),
+
+      // IF and ID control signals
+      .instr_first_cycle_id_o       ( instr_first_cycle_id     ),
+      .instr_valid_clear_o          ( instr_valid_clear        ),
+      .id_in_ready_o                ( id_in_ready              ),
+      .instr_req_o                  ( instr_req_int            ),
+      .pc_set_o                     ( pc_set                   ),
+      .pc_set_spec_o                ( pc_set_spec              ),
+      .pc_mux_o                     ( pc_mux_id                ),
+      .nt_branch_mispredict_o       ( nt_branch_mispredict     ),
+      .exc_pc_mux_o                 ( exc_pc_mux_id            ),
+      .exc_cause_o                  ( exc_cause                ),
+      .icache_inval_o               ( icache_inval             ),
+
+      .instr_fetch_err_i            ( instr_fetch_err          ),
+      .instr_fetch_err_plus2_i      ( instr_fetch_err_plus2    ),
+      .illegal_c_insn_i             ( illegal_c_insn_id        ),
+
+      .pc_id_i                      ( pc_id                    ),
+
+      // Stalls
+      .ex_valid_i                   ( valid_id_fpu             ), // changed by zeeshan from 
+                                                                  // ex_valid to valid_id_fpu 
+                                                                  // for ready selection
+      .lsu_resp_valid_i             ( lsu_resp_valid           ),
+
+      .alu_operator_ex_o            ( alu_operator_ex          ),
+      .alu_operand_a_ex_o           ( alu_operand_a_ex         ),
+      .alu_operand_b_ex_o           ( alu_operand_b_ex         ),
+
+      .imd_val_q_ex_o               ( imd_val_q_ex             ),
+      .imd_val_d_ex_i               ( imd_val_d_ex             ),
+      .imd_val_we_ex_i              ( imd_val_we_ex            ),
+
+      .bt_a_operand_o               ( bt_a_operand             ),
+      .bt_b_operand_o               ( bt_b_operand             ),
+
+      .mult_en_ex_o                 ( mult_en_ex               ),
+      .div_en_ex_o                  ( div_en_ex                ),
+      .mult_sel_ex_o                ( mult_sel_ex              ),
+      .div_sel_ex_o                 ( div_sel_ex               ),
+      .multdiv_operator_ex_o        ( multdiv_operator_ex      ),
+      .multdiv_signed_mode_ex_o     ( multdiv_signed_mode_ex   ),
+      .multdiv_operand_a_ex_o       ( multdiv_operand_a_ex     ),
+      .multdiv_operand_b_ex_o       ( multdiv_operand_b_ex     ),
+      .multdiv_ready_id_o           ( multdiv_ready_id         ),
+
+      // CSR ID/EX
+      .csr_access_o                 ( csr_access               ),
+      .csr_op_o                     ( csr_op                   ),
+      .csr_op_en_o                  ( csr_op_en                ),
+      .csr_save_if_o                ( csr_save_if              ), // control signal to save PC
+      .csr_save_id_o                ( csr_save_id              ), // control signal to save PC
+      .csr_save_wb_o                ( csr_save_wb              ), // control signal to save PC
+      .csr_restore_mret_id_o        ( csr_restore_mret_id      ), // restore mstatus upon MRET
+      .csr_restore_dret_id_o        ( csr_restore_dret_id      ), // restore mstatus upon MRET
+      .csr_save_cause_o             ( csr_save_cause           ),
+      .csr_mtval_o                  ( csr_mtval                ),
+      .priv_mode_i                  ( priv_mode_id             ),
+      .csr_mstatus_tw_i             ( csr_mstatus_tw           ),
+      .illegal_csr_insn_i           ( illegal_csr_insn_id      ),
+      .data_ind_timing_i            ( data_ind_timing          ),
+
+      // LSU
+      .lsu_req_o                    ( lsu_req                  ), // to load store unit
+      .lsu_we_o                     ( lsu_we                   ), // to load store unit
+      .lsu_type_o                   ( lsu_type                 ), // to load store unit
+      .lsu_sign_ext_o               ( lsu_sign_ext             ), // to load store unit
+      .lsu_wdata_o                  ( lsu_wdata                ), // to load store unit
+      .lsu_req_done_i               ( lsu_req_done             ), // from load store unit
+
+      .lsu_addr_incr_req_i          ( lsu_addr_incr_req        ),
+      .lsu_addr_last_i              ( lsu_addr_last            ),
+
+      .lsu_load_err_i               ( lsu_load_err             ),
+      .lsu_store_err_i              ( lsu_store_err            ),
+
+      // Interrupt Signals
+      .csr_mstatus_mie_i            ( csr_mstatus_mie          ),
+      .irq_pending_i                ( irq_pending              ),
+      .irqs_i                       ( irqs                     ),
+      .irq_nm_i                     ( irq_nm_i                 ),
+      .nmi_mode_o                   ( nmi_mode                 ),
+
+      // Debug Signal
+      .debug_mode_o                 ( debug_mode               ),
+      .debug_cause_o                ( debug_cause              ),
+      .debug_csr_save_o             ( debug_csr_save           ),
+      .debug_req_i                  ( debug_req_i              ),
+      .debug_single_step_i          ( debug_single_step        ),
+      .debug_ebreakm_i              ( debug_ebreakm            ),
+      .debug_ebreaku_i              ( debug_ebreaku            ),
+      .trigger_match_i              ( trigger_match            ),
+
+      // write data to commit in the register file
+      .result_ex_i                  ( data_wb                  ), // changed by zeeshan from result_ex
+                                                                  // to data_wb for FVCT, FMV.WX ins
+      .csr_rdata_i                  ( csr_rdata                ),
+
+      .rf_raddr_a_o                 ( rf_raddr_a               ),
+      .rf_rdata_a_i                 ( rf_rdata_a               ),
+      .rf_raddr_b_o                 ( rf_raddr_b               ),
+      .rf_rdata_b_i                 ( rf_int_fp_lsu            ),
+      .rf_ren_a_o                   ( rf_ren_a                 ),
+      .rf_ren_b_o                   ( rf_ren_b                 ),
+      .rf_waddr_id_o                ( rf_waddr_id              ),
+      .rf_wdata_id_o                ( rf_wdata_id              ),
+      .rf_we_id_o                   ( rf_we_id                 ),
+      .rf_rd_a_wb_match_o           ( rf_rd_a_wb_match         ),
+      .rf_rd_b_wb_match_o           ( rf_rd_b_wb_match         ),
+
+      .rf_waddr_wb_i                ( rf_waddr_wb              ),
+      .rf_wdata_fwd_wb_i            ( rf_wdata_fwd_wb          ),
+      .rf_write_wb_i                ( rf_write_wb              ),
+
+      .en_wb_o                      ( en_wb                    ),
+      .instr_type_wb_o              ( instr_type_wb            ),
+      .instr_perf_count_id_o        ( instr_perf_count_id      ),
+      .ready_wb_i                   ( ready_wb                 ),
+      .outstanding_load_wb_i        ( outstanding_load_wb      ),
+      .outstanding_store_wb_i       ( outstanding_store_wb     ),
+
+      // Performance Counters
+      .perf_jump_o                  ( perf_jump                ),
+      .perf_branch_o                ( perf_branch              ),
+      .perf_tbranch_o               ( perf_tbranch             ),
+      .perf_dside_wait_o            ( perf_dside_wait          ),
+      .perf_mul_wait_o              ( perf_mul_wait            ),
+      .perf_div_wait_o              ( perf_div_wait            ),
+      .instr_id_done_o              ( instr_id_done            ),
+
+      // Floating point extensions IO
+      .fp_rounding_mode_o              ( fp_rounding_mode      ),   // defines the rounding mode 
+      .fp_rf_rdata_a_i                 ( fp_rf_rdata_a         ),
+      .fp_rf_rdata_b_i                 ( fp_rf_rdata_b         ),
+      .fp_rf_rdata_c_i                 ( fp_rf_rdata_c         ),
+      .fp_rf_raddr_a_o                 ( fp_rf_raddr_a         ),
+      .fp_rf_raddr_b_o                 ( fp_rf_raddr_b         ),
+      .fp_rf_raddr_c_o                 ( fp_rf_raddr_c         ),
+      .fp_rf_waddr_o                   ( fp_rf_waddr_id        ),
+      .fp_rf_we_o                      ( fp_rf_wen_id          ),
+      .fp_alu_operator_o               ( fp_alu_operator       ),
+      .fp_alu_op_mod_o                 ( fp_alu_op_mod         ),
+      .fp_src_fmt_o                    ( fp_src_fmt            ),
+      .fp_dst_fmt_o                    ( fp_dst_fmt            ),
+      .fp_rm_dynamic_o                 ( fp_rm_dynamic         ),
+      .fp_flush_o                      ( fp_flush              ),
+      .is_fp_instr_o                   ( is_fp_instr           ),
+      .use_fp_rs1_o                    ( use_fp_rs1            ),
+      .use_fp_rs2_o                    ( use_fp_rs2            ),
+      .use_fp_rs3_o                    ( use_fp_rs3            ),
+      .use_fp_rd_o                     ( use_fp_rd             ),
+      .fpu_busy_i                      ( fpu_busy_idu          ),
+      .fp_rf_write_wb_i                ( fp_rf_write_wb        ),
+      .fp_rf_wdata_fwd_wb_i            ( fp_rf_wdata_wb        ),
+      .fp_operands_o                   ( fp_operands           ),
+      .fp_load_o                       ( fp_load               )
+  );
+
+  // for RVFI only
+  assign unused_illegal_insn_id = illegal_insn_id;
+
+  brq_exu #(
+      .RV32M                    ( RV32M                    ),
+      .RV32B                    ( RV32B                    ),
+      .BranchTargetALU          ( BranchTargetALU          )
+  ) ex_block_i (
+      .clk_i                    ( clk                      ),
+      .rst_ni                   ( rst_ni                   ),
+
+      // ALU signal from ID stage
+      .alu_operator_i           ( alu_operator_ex          ),
+      .alu_operand_a_i          ( alu_operand_a_ex         ),
+      .alu_operand_b_i          ( alu_operand_b_ex         ),
+      .alu_instr_first_cycle_i  ( instr_first_cycle_id     ),
+
+      // Branch target ALU signal from ID stage
+      .bt_a_operand_i           ( bt_a_operand             ),
+      .bt_b_operand_i           ( bt_b_operand             ),
+
+      // Multipler/Divider signal from ID stage
+      .multdiv_operator_i       ( multdiv_operator_ex      ),
+      .mult_en_i                ( mult_en_ex               ),
+      .div_en_i                 ( div_en_ex                ),
+      .mult_sel_i               ( mult_sel_ex              ),
+      .div_sel_i                ( div_sel_ex               ),
+      .multdiv_signed_mode_i    ( multdiv_signed_mode_ex   ),
+      .multdiv_operand_a_i      ( multdiv_operand_a_ex     ),
+      .multdiv_operand_b_i      ( multdiv_operand_b_ex     ),
+      .multdiv_ready_id_i       ( multdiv_ready_id         ),
+      .data_ind_timing_i        ( data_ind_timing          ),
+
+      // Intermediate value register
+      .imd_val_we_o             ( imd_val_we_ex            ),
+      .imd_val_d_o              ( imd_val_d_ex             ),
+      .imd_val_q_i              ( imd_val_q_ex             ),
+
+      // Outputs
+      .alu_adder_result_ex_o    ( alu_adder_result_ex      ), // to LSU
+      .result_ex_o              ( result_ex                ), // to ID
+
+      .branch_target_o          ( branch_target_ex         ), // to IF
+      .branch_decision_o        ( branch_decision          ), // to ID
+
+      .ex_valid_o               ( ex_valid                 )
+  );
+
+  /////////////////////
+  // Load/store unit //
+  /////////////////////
+
+  assign data_req_o   = data_req_out & ~pmp_req_err[PMP_D];
+  assign lsu_resp_err = lsu_load_err | lsu_store_err;
+
+  brq_lsu load_store_unit_i (
+      .clk_i                 ( clk                 ),
+      .rst_ni                ( rst_ni              ),
+
+      // data interface
+      .data_req_o            ( data_req_out        ),
+      .data_gnt_i            ( data_gnt_i          ),
+      .data_rvalid_i         ( data_rvalid_i       ),
+      .data_err_i            ( data_err_i          ),
+      .data_pmp_err_i        ( pmp_req_err[PMP_D]  ),
+
+      .data_addr_o           ( data_addr_o         ),
+      .data_we_o             ( data_we_o           ),
+      .data_be_o             ( data_be_o           ),
+      .data_wdata_o          ( data_wdata_o        ),
+      .data_rdata_i          ( data_rdata_i        ),
+
+      // signals to/from ID/EX stage
+      .lsu_we_i              ( lsu_we              ),
+      .lsu_type_i            ( lsu_type            ),
+      .lsu_wdata_i           ( lsu_wdata           ),
+      .lsu_sign_ext_i        ( lsu_sign_ext        ),
+
+      .lsu_rdata_o           ( rf_wdata_lsu        ),
+      .lsu_rdata_valid_o     ( rf_we_lsu           ),
+      .lsu_req_i             ( lsu_req             ),
+      .lsu_req_done_o        ( lsu_req_done        ),
+
+      .adder_result_ex_i     ( alu_adder_result_ex ),
+
+      .addr_incr_req_o       ( lsu_addr_incr_req   ),
+      .addr_last_o           ( lsu_addr_last       ),
+
+
+      .lsu_resp_valid_o      ( lsu_resp_valid      ),
+
+      // exception signals
+      .load_err_o            ( lsu_load_err        ),
+      .store_err_o           ( lsu_store_err       ),
+
+      .busy_o                ( lsu_busy            ),
+
+      .perf_load_o           ( perf_load           ),
+      .perf_store_o          ( perf_store          )
+  );
+
+  brq_wbu #(
+    .WritebackStage ( WritebackStage )
+  ) wb_stage_i (
+    .clk_i                          ( clk                          ),
+    .rst_ni                         ( rst_ni                       ),
+    .en_wb_i                        ( en_wb                        ),
+    .instr_type_wb_i                ( instr_type_wb                ),
+    .pc_id_i                        ( pc_id                        ),
+    .instr_is_compressed_id_i       ( instr_is_compressed_id       ),
+    .instr_perf_count_id_i          ( instr_perf_count_id          ),
+
+    .ready_wb_o                     ( ready_wb                     ),
+    .rf_write_wb_o                  ( rf_write_wb                  ),
+    .outstanding_load_wb_o          ( outstanding_load_wb          ),
+    .outstanding_store_wb_o         ( outstanding_store_wb         ),
+    .pc_wb_o                        ( pc_wb                        ),
+    .perf_instr_ret_wb_o            ( perf_instr_ret_wb            ),
+    .perf_instr_ret_compressed_wb_o ( perf_instr_ret_compressed_wb ),
+
+    .rf_waddr_id_i                  ( rf_waddr_id                  ),
+    .rf_wdata_id_i                  ( rf_wdata_id                  ),
+    .rf_we_id_i                     ( rf_we_id                     ),
+
+    .rf_wdata_lsu_i                 ( rf_wdata_lsu                 ),
+    .rf_we_lsu_i                    ( rf_we_lsu                    ),
+
+    .rf_wdata_fwd_wb_o              ( rf_wdata_fwd_wb              ),
+
+    .rf_waddr_wb_o                  ( rf_waddr_wb                  ),
+    .rf_wdata_wb_o                  ( rf_wdata_wb                  ),
+    .rf_we_wb_o                     ( rf_we_wb                     ),
+
+    .lsu_resp_valid_i               ( lsu_resp_valid               ),
+    .lsu_resp_err_i                 ( lsu_resp_err                 ),
+
+    .instr_done_wb_o                ( instr_done_wb                ),
+
+    // floating point
+    .fp_rf_write_wb_o               ( fp_rf_write_wb               ),
+    .fp_rf_wen_wb_o                 ( fp_rf_wen_wb                 ),
+    .fp_rf_waddr_wb_o               ( fp_rf_waddr_wb               ),
+    .fp_rf_wen_id_i                 ( fp_rf_wen_id                 ),
+    .fp_rf_waddr_id_i               ( fp_rf_waddr_id               ),
+    .fp_rf_wdata_wb_o               ( fp_rf_wdata_wb               ),
+    .fp_load_i                      ( fp_load                      )
+  );
+
+  ///////////////////////
+  // Register file ECC //
+  ///////////////////////
+
+  logic [RegFileDataWidth-1:0] rf_wdata_wb_ecc;
+  logic [RegFileDataWidth-1:0] rf_rdata_a_ecc;
+  logic [RegFileDataWidth-1:0] rf_rdata_b_ecc;
+  logic                        rf_ecc_err_comb;
+
+  if (RegFileECC) begin : gen_regfile_ecc
+
+    logic [1:0] rf_ecc_err_a, rf_ecc_err_b;
+    logic       rf_ecc_err_a_id, rf_ecc_err_b_id;
+
+    // ECC checkbit generation for regiter file wdata
+    prim_secded_39_32_enc regfile_ecc_enc (
+      .in  (rf_wdata_wb),
+      .out (rf_wdata_wb_ecc)
+    );
+
+    // ECC checking on register file rdata
+    prim_secded_39_32_dec regfile_ecc_dec_a (
+      .in         (rf_rdata_a_ecc),
+      .d_o        (),
+      .syndrome_o (),
+      .err_o      (rf_ecc_err_a)
+    );
+    prim_secded_39_32_dec regfile_ecc_dec_b (
+      .in         (rf_rdata_b_ecc),
+      .d_o        (),
+      .syndrome_o (),
+      .err_o      (rf_ecc_err_b)
+    );
+
+    // Assign read outputs - no error correction, just trigger an alert
+    assign rf_rdata_a = rf_rdata_a_ecc[31:0];
+    assign rf_rdata_b = rf_rdata_b_ecc[31:0];
+
+    // Calculate errors - qualify with WB forwarding to avoid xprop into the alert signal
+    assign rf_ecc_err_a_id = |rf_ecc_err_a & rf_ren_a & ~rf_rd_a_wb_match;
+    assign rf_ecc_err_b_id = |rf_ecc_err_b & rf_ren_b & ~rf_rd_b_wb_match;
+
+    // Combined error
+    assign rf_ecc_err_comb = instr_valid_id & (rf_ecc_err_a_id | rf_ecc_err_b_id);
+
+  end else begin : gen_no_regfile_ecc
+    logic unused_rf_ren_a, unused_rf_ren_b;
+    logic unused_rf_rd_a_wb_match, unused_rf_rd_b_wb_match;
+
+    assign unused_rf_ren_a         = rf_ren_a;
+    assign unused_rf_ren_b         = rf_ren_b;
+    assign unused_rf_rd_a_wb_match = rf_rd_a_wb_match;
+    assign unused_rf_rd_b_wb_match = rf_rd_b_wb_match;
+    assign rf_wdata_wb_ecc         = rf_wdata_wb;
+    assign rf_rdata_a              = rf_rdata_a_ecc;
+    assign rf_rdata_b              = rf_rdata_b_ecc;
+    assign rf_ecc_err_comb         = 1'b0;
+  end
+
+  assign rf_int_fp_lsu = (is_fp_instr & use_fp_rs2) ? fp_rf_rdata_b : rf_rdata_b;
+
+  if (RegFile == RegFileFF) begin : gen_regfile_ff
+    brq_register_file_ff #(
+        .RV32E             ( RV32E             ),
+        .DataWidth         ( RegFileDataWidth  ),
+        .DummyInstructions ( DummyInstructions )
+    ) register_file_i (
+        .clk_i            ( clk_i           ),
+        .rst_ni           ( rst_ni          ),
+
+        .test_en_i        ( test_en_i       ),
+        .dummy_instr_id_i ( dummy_instr_id  ),
+
+        .raddr_a_i        ( rf_raddr_a      ),
+        .rdata_a_o        ( rf_rdata_a_ecc  ),
+        .raddr_b_i        ( rf_raddr_b      ),
+        .rdata_b_o        ( rf_rdata_b_ecc  ),
+        .waddr_a_i        ( rf_waddr_wb     ),
+        .wdata_a_i        ( rf_wdata_wb_ecc ),
+        .we_a_i           ( rf_we_wb        )
+    );
+  end else if (RegFile == RegFileFPGA) begin : gen_regfile_fpga
+    brq_register_file_fpga #(
+        .RV32E             ( RV32E             ),
+        .DataWidth         ( RegFileDataWidth  ),
+        .DummyInstructions ( DummyInstructions )
+    ) register_file_i (
+        .clk_i            ( clk_i          ),
+        .rst_ni           ( rst_ni          ),
+
+        .test_en_i        ( test_en_i       ),
+        .dummy_instr_id_i ( dummy_instr_id  ),
+
+        .raddr_a_i        ( rf_raddr_a      ),
+        .rdata_a_o        ( rf_rdata_a_ecc  ),
+        .raddr_b_i        ( rf_raddr_b      ),
+        .rdata_b_o        ( rf_rdata_b_ecc  ),
+        .waddr_a_i        ( rf_waddr_wb     ),
+        .wdata_a_i        ( rf_wdata_wb_ecc ),
+        .we_a_i           ( rf_we_wb        )
+    );
+  end else if (RegFile == RegFileLatch) begin : gen_regfile_latch
+    brq_register_file_latch #(
+        .RV32E             ( RV32E             ),
+        .DataWidth         ( RegFileDataWidth  ),
+        .DummyInstructions ( DummyInstructions )
+    ) register_file_i (
+        .clk_i            ( clk_i           ),
+        .rst_ni           ( rst_ni          ),
+
+        .test_en_i        ( test_en_i       ),
+        .dummy_instr_id_i ( dummy_instr_id  ),
+
+        .raddr_a_i        ( rf_raddr_a      ),
+        .rdata_a_o        ( rf_rdata_a_ecc  ),
+        .raddr_b_i        ( rf_raddr_b      ),
+        .rdata_b_o        ( rf_rdata_b_ecc  ),
+        .waddr_a_i        ( rf_waddr_wb     ),
+        .wdata_a_i        ( rf_wdata_wb_ecc ),
+        .we_a_i           ( rf_we_wb        )
+    );
+  end
+
+  if (FloatingPoint) begin : gen_fp_regfile
+    brq_fp_register_file_ff #(
+      .RVF       ( RVF ),
+      .DataWidth ( W   )
+    ) fp_register_file (
+      .clk_i     ( clk_i          ),
+      .rst_ni    ( rst_ni         ),
+
+      .raddr_a_i ( fp_rf_raddr_a  ),
+      .rdata_a_o ( fp_rf_rdata_a  ),
+
+      .raddr_b_i ( fp_rf_raddr_b  ),
+      .rdata_b_o ( fp_rf_rdata_b  ),
+
+      .raddr_c_i ( fp_rf_raddr_c  ),
+      .rdata_c_o ( fp_rf_rdata_c  ),
+
+      .waddr_a_i ( fp_rf_waddr_wb ),
+      .wdata_a_i ( fp_rf_wdata_wb ),
+      .we_a_i    ( fp_rf_wen_wb   )
+);
+  end
+
+  ///////////////////
+  // Alert outputs //
+  ///////////////////
+
+  // Minor alert - core is in a recoverable state
+  // TODO add I$ ECC errors here
+  assign alert_minor_o = 1'b0;
+
+  // Major alert - core is unrecoverable
+  assign alert_major_o = (rf_ecc_err_comb | pc_mismatch_alert | csr_shadow_err);
+
+
+
+  // Explict INC_ASSERT block to avoid unused signal lint warnings were asserts are not included
+  `ifdef INC_ASSERT
+  // Signals used for assertions only
+  logic outstanding_load_resp;
+  logic outstanding_store_resp;
+
+  logic outstanding_load_id;
+  logic outstanding_store_id;
+
+  assign outstanding_load_id  = id_stage_i.instr_executing & id_stage_i.lsu_req_dec &
+                                ~id_stage_i.lsu_we;
+  assign outstanding_store_id = id_stage_i.instr_executing & id_stage_i.lsu_req_dec &
+                                id_stage_i.lsu_we;
+
+  if (WritebackStage) begin : gen_wb_stage
+    // When the writeback stage is present a load/store could be in ID or WB. A Load/store in ID can
+    // see a response before it moves to WB when it is unaligned otherwise we should only see
+    // a response when load/store is in WB.
+    assign outstanding_load_resp  = outstanding_load_wb |
+      (outstanding_load_id  & load_store_unit_i.split_misaligned_access);
+
+    assign outstanding_store_resp = outstanding_store_wb |
+      (outstanding_store_id & load_store_unit_i.split_misaligned_access);
+
+    // When writing back the result of a load, the load must have made it to writeback
+
+  end else begin : gen_no_wb_stage
+    // Without writeback stage only look into whether load or store is in ID to determine if
+    // a response is expected.
+    assign outstanding_load_resp  = outstanding_load_id;
+    assign outstanding_store_resp = outstanding_store_id;
+  end
+  `endif
+  
+
+  ////////////////////////
+  // RF (Register File) //
+  ////////////////////////
+`ifdef RVFI
+  assign rvfi_rd_addr_wb  = rf_waddr_wb;
+  assign rvfi_rd_wdata_wb = rf_we_wb ? rf_wdata_wb : rf_wdata_lsu;
+  assign rvfi_rd_we_wb    = rf_we_wb | rf_we_lsu;
+`endif
+
+
+  /////////////////////////////////////////
+  // CSRs (Control and Status Registers) //
+  /////////////////////////////////////////
+
+  assign csr_wdata  = alu_operand_a_ex;
+  assign csr_addr   = csr_num_e'(csr_access ? alu_operand_b_ex[11:0] : 12'b0);
+
+  brq_cs_registers #(
+      .DbgTriggerEn      ( DbgTriggerEn      ),
+      .DbgHwBreakNum     ( DbgHwBreakNum     ),
+      .DataIndTiming     ( DataIndTiming     ),
+      .DummyInstructions ( DummyInstructions ),
+      .ShadowCSR         ( ShadowCSR         ),
+      .ICache            ( ICache            ),
+      .MHPMCounterNum    ( MHPMCounterNum    ),
+      .MHPMCounterWidth  ( MHPMCounterWidth  ),
+      .PMPEnable         ( PMPEnable         ),
+      .PMPGranularity    ( PMPGranularity    ),
+      .PMPNumRegions     ( PMPNumRegions     ),
+      .RV32E             ( RV32E             ),
+      .RV32M             ( RV32M             )
+  ) cs_registers_i (
+      .clk_i                   ( clk                          ),
+      .rst_ni                  ( rst_ni                       ),
+
+      // Hart ID from outside
+      .hart_id_i               ( hart_id_i                    ),
+      .priv_mode_id_o          ( priv_mode_id                 ),
+      .priv_mode_if_o          ( priv_mode_if                 ),
+      .priv_mode_lsu_o         ( priv_mode_lsu                ),
+
+      // mtvec
+      .csr_mtvec_o             ( csr_mtvec                    ),
+      .csr_mtvec_init_i        ( csr_mtvec_init               ),
+      .boot_addr_i             ( boot_addr_i                  ),
+
+      // Interface to CSRs     ( SRAM like                    )
+      .csr_access_i            ( csr_access                   ),
+      .csr_addr_i              ( csr_addr                     ),
+      .csr_wdata_i             ( csr_wdata                    ),
+      .csr_op_i                ( csr_op                       ),
+      .csr_op_en_i             ( csr_op_en                    ),
+      .csr_rdata_o             ( csr_rdata                    ),
+
+      // Interrupt related control signals
+      .irq_software_i          ( irq_software_i               ),
+      .irq_timer_i             ( irq_timer_i                  ),
+      .irq_external_i          ( irq_external_i               ),
+      .irq_fast_i              ( irq_fast_i                   ),
+      .nmi_mode_i              ( nmi_mode                     ),
+      .irq_pending_o           ( irq_pending                  ),
+      .irqs_o                  ( irqs                         ),
+      .csr_mstatus_mie_o       ( csr_mstatus_mie              ),
+      .csr_mstatus_tw_o        ( csr_mstatus_tw               ),
+      .csr_mepc_o              ( csr_mepc                     ),
+
+      // PMP
+      .csr_pmp_cfg_o           ( csr_pmp_cfg                  ),
+      .csr_pmp_addr_o          ( csr_pmp_addr                 ),
+
+      // debug
+      .csr_depc_o              ( csr_depc                     ),
+      .debug_mode_i            ( debug_mode                   ),
+      .debug_cause_i           ( debug_cause                  ),
+      .debug_csr_save_i        ( debug_csr_save               ),
+      .debug_single_step_o     ( debug_single_step            ),
+      .debug_ebreakm_o         ( debug_ebreakm                ),
+      .debug_ebreaku_o         ( debug_ebreaku                ),
+      .trigger_match_o         ( trigger_match                ),
+
+      .pc_if_i                 ( pc_if                        ),
+      .pc_id_i                 ( pc_id                        ),
+      .pc_wb_i                 ( pc_wb                        ),
+
+      .data_ind_timing_o       ( data_ind_timing              ),
+      .dummy_instr_en_o        ( dummy_instr_en               ),
+      .dummy_instr_mask_o      ( dummy_instr_mask             ),
+      .dummy_instr_seed_en_o   ( dummy_instr_seed_en          ),
+      .dummy_instr_seed_o      ( dummy_instr_seed             ),
+      .icache_enable_o         ( icache_enable                ),
+      .csr_shadow_err_o        ( csr_shadow_err               ),
+
+      .csr_save_if_i           ( csr_save_if                  ),
+      .csr_save_id_i           ( csr_save_id                  ),
+      .csr_save_wb_i           ( csr_save_wb                  ),
+      .csr_restore_mret_i      ( csr_restore_mret_id          ),
+      .csr_restore_dret_i      ( csr_restore_dret_id          ),
+      .csr_save_cause_i        ( csr_save_cause               ),
+      .csr_mcause_i            ( exc_cause                    ),
+      .csr_mtval_i             ( csr_mtval                    ),
+      .illegal_csr_insn_o      ( illegal_csr_insn_id          ),
+
+      // performance counter related signals
+      .instr_ret_i             ( perf_instr_ret_wb            ),
+      .instr_ret_compressed_i  ( perf_instr_ret_compressed_wb ),
+      .iside_wait_i            ( perf_iside_wait              ),
+      .jump_i                  ( perf_jump                    ),
+      .branch_i                ( perf_branch                  ),
+      .branch_taken_i          ( perf_tbranch                 ),
+      .mem_load_i              ( perf_load                    ),
+      .mem_store_i             ( perf_store                   ),
+      .dside_wait_i            ( perf_dside_wait              ),
+      .mul_wait_i              ( perf_mul_wait                ),
+      .div_wait_i              ( perf_div_wait                ),
+
+      // floating point
+      .fp_rm_dynamic_i         ( fp_rm_dynamic                ),
+      .fp_frm_o                ( fp_frm_csr                   ),
+      .fp_status_i             ( fp_status                    ),
+      .is_fp_instr_i           ( is_fp_instr                  )
+  );
+
+  assign fp_frm_fpnew   = fp_rm_dynamic ? fp_frm_csr : fp_rounding_mode;
+  assign in_ready_c2fpu = id_in_ready; //multdiv_ready_id;
+  assign in_valid_c2fpu = (instr_valid_id & is_fp_instr);
+  // assign ready_id_fpu = id_in_ready; // (is_fp_instr) ? out_ready_fpu2c : id_in_ready;
+  assign valid_id_fpu = (is_fp_instr) ? out_valid_fpu2c : ex_valid;
+  
+// FPU instance
+  fpnew_top #(
+    .Features       ( fpnew_pkg::RV32F          ),
+    .Implementation ( fpnew_pkg::DEFAULT_NOREGS ),
+    .TagType        ( logic                     )
+  ) i_fpnew_top (
+    .clk_i          ( clk              ),
+    .rst_ni         ( rst_ni           ),
+    .operands_i     ( fp_operands      ),
+    .rnd_mode_i     ( fp_frm_fpnew     ),
+    .op_i           ( fp_alu_operator  ),
+    .op_mod_i       ( fp_alu_op_mod    ),
+    .src_fmt_i      ( fp_src_fmt       ),
+    .dst_fmt_i      ( fp_dst_fmt       ),
+    .int_fmt_i      ( fpnew_pkg::INT32 ),
+    .vectorial_op_i ( 1'b0             ),
+    .tag_i          ( '1               ),
+    .in_valid_i     ( in_valid_c2fpu   ),
+    .in_ready_o     ( out_ready_fpu2c  ),
+    .flush_i        ( fp_flush         ),
+    .result_o       ( fp_result        ),
+    .status_o       ( fp_status        ),
+    .tag_o          (                  ),
+    .out_valid_o    ( out_valid_fpu2c  ),
+    .out_ready_i    ( in_ready_c2fpu   ),
+    .busy_o         ( fp_busy          )
+  );
+
+  assign fpu_busy_idu = fp_busy & (~out_valid_fpu2c);
+  assign data_wb = is_fp_instr ? fp_result : result_ex;
+
+  // These assertions are in top-level as instr_valid_id required as the enable term
+
+  if (PMPEnable) begin : g_pmp
+    logic [33:0] pmp_req_addr [PMP_NUM_CHAN];
+    pmp_req_e    pmp_req_type [PMP_NUM_CHAN];
+    priv_lvl_e   pmp_priv_lvl [PMP_NUM_CHAN];
+
+    assign pmp_req_addr[PMP_I] = {2'b00,instr_addr_o[31:0]};
+    assign pmp_req_type[PMP_I] = PMP_ACC_EXEC;
+    assign pmp_priv_lvl[PMP_I] = priv_mode_if;
+    assign pmp_req_addr[PMP_D] = {2'b00,data_addr_o[31:0]};
+    assign pmp_req_type[PMP_D] = data_we_o ? PMP_ACC_WRITE : PMP_ACC_READ;
+    assign pmp_priv_lvl[PMP_D] = priv_mode_lsu;
+
+    brq_pmp #(
+        .PMPGranularity        ( PMPGranularity ),
+        .PMPNumChan            ( PMP_NUM_CHAN   ),
+        .PMPNumRegions         ( PMPNumRegions  )
+    ) pmp_i (
+        .clk_i                 ( clk           ),
+        .rst_ni                ( rst_ni         ),
+        // Interface to CSRs
+        .csr_pmp_cfg_i         ( csr_pmp_cfg    ),
+        .csr_pmp_addr_i        ( csr_pmp_addr   ),
+        .priv_mode_i           ( pmp_priv_lvl   ),
+        // Access checking channels
+        .pmp_req_addr_i        ( pmp_req_addr   ),
+        .pmp_req_type_i        ( pmp_req_type   ),
+        .pmp_req_err_o         ( pmp_req_err    )
+    );
+  end else begin : g_no_pmp
+    // Unused signal tieoff
+    priv_lvl_e unused_priv_lvl_if, unused_priv_lvl_ls;
+    logic [33:0] unused_csr_pmp_addr [PMPNumRegions];
+    pmp_cfg_t    unused_csr_pmp_cfg  [PMPNumRegions];
+    assign unused_priv_lvl_if = priv_mode_if;
+    assign unused_priv_lvl_ls = priv_mode_lsu;
+    assign unused_csr_pmp_addr = csr_pmp_addr;
+    assign unused_csr_pmp_cfg = csr_pmp_cfg;
+
+    // Output tieoff
+    assign pmp_req_err[PMP_I] = 1'b0;
+    assign pmp_req_err[PMP_D] = 1'b0;
+  end
+
+`ifdef RVFI
+  // When writeback stage is present RVFI information is emitted when instruction is finished in
+  // third stage but some information must be captured whilst the instruction is in the second
+  // stage. Without writeback stage RVFI information is all emitted when instruction retires in
+  // second stage. RVFI outputs are all straight from flops. So 2 stage pipeline requires a single
+  // set of flops (instr_info => RVFI_out), 3 stage pipeline requires two sets (instr_info => wb
+  // => RVFI_out)
+  localparam int RVFI_STAGES = WritebackStage ? 2 : 1;
+
+  logic        rvfi_stage_valid     [RVFI_STAGES];
+  logic [63:0] rvfi_stage_order     [RVFI_STAGES];
+  logic [31:0] rvfi_stage_insn      [RVFI_STAGES];
+  logic        rvfi_stage_trap      [RVFI_STAGES];
+  logic        rvfi_stage_halt      [RVFI_STAGES];
+  logic        rvfi_stage_intr      [RVFI_STAGES];
+  logic [ 1:0] rvfi_stage_mode      [RVFI_STAGES];
+  logic [ 1:0] rvfi_stage_ixl       [RVFI_STAGES];
+  logic [ 4:0] rvfi_stage_rs1_addr  [RVFI_STAGES];
+  logic [ 4:0] rvfi_stage_rs2_addr  [RVFI_STAGES];
+  logic [ 4:0] rvfi_stage_rs3_addr  [RVFI_STAGES];
+  logic [31:0] rvfi_stage_rs1_rdata [RVFI_STAGES];
+  logic [31:0] rvfi_stage_rs2_rdata [RVFI_STAGES];
+  logic [31:0] rvfi_stage_rs3_rdata [RVFI_STAGES];
+  logic [ 4:0] rvfi_stage_rd_addr   [RVFI_STAGES];
+  logic [31:0] rvfi_stage_rd_wdata  [RVFI_STAGES];
+  logic [31:0] rvfi_stage_pc_rdata  [RVFI_STAGES];
+  logic [31:0] rvfi_stage_pc_wdata  [RVFI_STAGES];
+  logic [31:0] rvfi_stage_mem_addr  [RVFI_STAGES];
+  logic [ 3:0] rvfi_stage_mem_rmask [RVFI_STAGES];
+  logic [ 3:0] rvfi_stage_mem_wmask [RVFI_STAGES];
+  logic [31:0] rvfi_stage_mem_rdata [RVFI_STAGES];
+  logic [31:0] rvfi_stage_mem_wdata [RVFI_STAGES];
+
+  logic        rvfi_stage_valid_d   [RVFI_STAGES];
+
+  assign rvfi_valid     = rvfi_stage_valid    [RVFI_STAGES-1];
+  assign rvfi_order     = rvfi_stage_order    [RVFI_STAGES-1];
+  assign rvfi_insn      = rvfi_stage_insn     [RVFI_STAGES-1];
+  assign rvfi_trap      = rvfi_stage_trap     [RVFI_STAGES-1];
+  assign rvfi_halt      = rvfi_stage_halt     [RVFI_STAGES-1];
+  assign rvfi_intr      = rvfi_stage_intr     [RVFI_STAGES-1];
+  assign rvfi_mode      = rvfi_stage_mode     [RVFI_STAGES-1];
+  assign rvfi_ixl       = rvfi_stage_ixl      [RVFI_STAGES-1];
+  assign rvfi_rs1_addr  = rvfi_stage_rs1_addr [RVFI_STAGES-1];
+  assign rvfi_rs2_addr  = rvfi_stage_rs2_addr [RVFI_STAGES-1];
+  assign rvfi_rs3_addr  = rvfi_stage_rs3_addr [RVFI_STAGES-1];
+  assign rvfi_rs1_rdata = rvfi_stage_rs1_rdata[RVFI_STAGES-1];
+  assign rvfi_rs2_rdata = rvfi_stage_rs2_rdata[RVFI_STAGES-1];
+  assign rvfi_rs3_rdata = rvfi_stage_rs3_rdata[RVFI_STAGES-1];
+  assign rvfi_rd_addr   = rvfi_stage_rd_addr  [RVFI_STAGES-1];
+  assign rvfi_rd_wdata  = rvfi_stage_rd_wdata [RVFI_STAGES-1];
+  assign rvfi_pc_rdata  = rvfi_stage_pc_rdata [RVFI_STAGES-1];
+  assign rvfi_pc_wdata  = rvfi_stage_pc_wdata [RVFI_STAGES-1];
+  assign rvfi_mem_addr  = rvfi_stage_mem_addr [RVFI_STAGES-1];
+  assign rvfi_mem_rmask = rvfi_stage_mem_rmask[RVFI_STAGES-1];
+  assign rvfi_mem_wmask = rvfi_stage_mem_wmask[RVFI_STAGES-1];
+  assign rvfi_mem_rdata = rvfi_stage_mem_rdata[RVFI_STAGES-1];
+  assign rvfi_mem_wdata = rvfi_stage_mem_wdata[RVFI_STAGES-1];
+
+  if (WritebackStage) begin : gen_rvfi_wb_stage
+    logic unused_instr_new_id;
+
+    assign unused_instr_new_id = instr_new_id;
+
+    // With writeback stage first RVFI stage buffers instruction information captured in ID/EX
+    // awaiting instruction retirement and RF Write data/Mem read data whilst instruction is in WB
+    // So first stage becomes valid when instruction leaves ID/EX stage and remains valid until
+    // instruction leaves WB
+    assign rvfi_stage_valid_d[0] = (instr_id_done & ~dummy_instr_id) |
+                                   (rvfi_stage_valid[0] & ~instr_done_wb);
+    // Second stage is output stage so simple valid cycle after instruction leaves WB (and so has
+    // retired)
+    assign rvfi_stage_valid_d[1] = instr_done_wb;
+
+    // Signal new instruction in WB cycle after instruction leaves ID/EX (to enter WB)
+    logic rvfi_instr_new_wb_q;
+
+    assign rvfi_instr_new_wb = rvfi_instr_new_wb_q;
+
+    always_ff @(posedge clk or negedge rst_ni) begin
+      if (~rst_ni) begin
+        rvfi_instr_new_wb_q <= 0;
+      end else begin
+        rvfi_instr_new_wb_q <= instr_id_done;
+      end
+    end
+  end else begin : gen_rvfi_no_wb_stage
+    // Without writeback stage first RVFI stage is output stage so simply valid the cycle after
+    // instruction leaves ID/EX (and so has retired)
+    assign rvfi_stage_valid_d[0] = instr_id_done & ~dummy_instr_id;
+    // Without writeback stage signal new instr_new_wb when instruction enters ID/EX to correctly
+    // setup register write signals
+    assign rvfi_instr_new_wb = instr_new_id;
+  end
+
+  for (genvar i = 0;i < RVFI_STAGES; i = i + 1) begin : g_rvfi_stages
+    always_ff @(posedge clk or negedge rst_ni) begin
+      if (!rst_ni) begin
+        rvfi_stage_halt[i]      <= '0;
+        rvfi_stage_trap[i]      <= '0;
+        rvfi_stage_intr[i]      <= '0;
+        rvfi_stage_order[i]     <= '0;
+        rvfi_stage_insn[i]      <= '0;
+        rvfi_stage_mode[i]      <= {PRIV_LVL_M};
+        rvfi_stage_ixl[i]       <= CSR_MISA_MXL;
+        rvfi_stage_rs1_addr[i]  <= '0;
+        rvfi_stage_rs2_addr[i]  <= '0;
+        rvfi_stage_rs3_addr[i]  <= '0;
+        rvfi_stage_pc_rdata[i]  <= '0;
+        rvfi_stage_pc_wdata[i]  <= '0;
+        rvfi_stage_mem_rmask[i] <= '0;
+        rvfi_stage_mem_wmask[i] <= '0;
+        rvfi_stage_valid[i]     <= '0;
+        rvfi_stage_rs1_rdata[i] <= '0;
+        rvfi_stage_rs2_rdata[i] <= '0;
+        rvfi_stage_rs3_rdata[i] <= '0;
+        rvfi_stage_rd_wdata[i]  <= '0;
+        rvfi_stage_rd_addr[i]   <= '0;
+        rvfi_stage_mem_rdata[i] <= '0;
+        rvfi_stage_mem_wdata[i] <= '0;
+        rvfi_stage_mem_addr[i]  <= '0;
+      end else begin
+        rvfi_stage_valid[i] <= rvfi_stage_valid_d[i];
+
+        if (i == 0) begin
+          if(instr_id_done) begin
+            rvfi_stage_halt[i]      <= '0;
+            rvfi_stage_trap[i]      <= illegal_insn_id;
+            rvfi_stage_intr[i]      <= rvfi_intr_d;
+            rvfi_stage_order[i]     <= rvfi_stage_order[i] + 64'(rvfi_stage_valid_d[i]);
+            rvfi_stage_insn[i]      <= rvfi_insn_id;
+            rvfi_stage_mode[i]      <= {priv_mode_id};
+            rvfi_stage_ixl[i]       <= CSR_MISA_MXL;
+            rvfi_stage_rs1_addr[i]  <= rvfi_rs1_addr_d;
+            rvfi_stage_rs2_addr[i]  <= rvfi_rs2_addr_d;
+            rvfi_stage_rs3_addr[i]  <= rvfi_rs3_addr_d;
+            rvfi_stage_pc_rdata[i]  <= pc_id;
+            rvfi_stage_pc_wdata[i]  <= pc_set ? branch_target_ex : pc_if;
+            rvfi_stage_mem_rmask[i] <= rvfi_mem_mask_int;
+            rvfi_stage_mem_wmask[i] <= data_we_o ? rvfi_mem_mask_int : 4'b0000;
+            rvfi_stage_rs1_rdata[i] <= rvfi_rs1_data_d;
+            rvfi_stage_rs2_rdata[i] <= rvfi_rs2_data_d;
+            rvfi_stage_rs3_rdata[i] <= rvfi_rs3_data_d;
+            rvfi_stage_rd_addr[i]   <= rvfi_rd_addr_d;
+            rvfi_stage_rd_wdata[i]  <= rvfi_rd_wdata_d;
+            rvfi_stage_mem_rdata[i] <= rvfi_mem_rdata_d;
+            rvfi_stage_mem_wdata[i] <= rvfi_mem_wdata_d;
+            rvfi_stage_mem_addr[i]  <= rvfi_mem_addr_d;
+          end
+        end else begin
+          if(instr_done_wb) begin
+            rvfi_stage_halt[i]      <= rvfi_stage_halt[i-1];
+            rvfi_stage_trap[i]      <= rvfi_stage_trap[i-1];
+            rvfi_stage_intr[i]      <= rvfi_stage_intr[i-1];
+            rvfi_stage_order[i]     <= rvfi_stage_order[i-1];
+            rvfi_stage_insn[i]      <= rvfi_stage_insn[i-1];
+            rvfi_stage_mode[i]      <= rvfi_stage_mode[i-1];
+            rvfi_stage_ixl[i]       <= rvfi_stage_ixl[i-1];
+            rvfi_stage_rs1_addr[i]  <= rvfi_stage_rs1_addr[i-1];
+            rvfi_stage_rs2_addr[i]  <= rvfi_stage_rs2_addr[i-1];
+            rvfi_stage_rs3_addr[i]  <= rvfi_stage_rs3_addr[i-1];
+            rvfi_stage_pc_rdata[i]  <= rvfi_stage_pc_rdata[i-1];
+            rvfi_stage_pc_wdata[i]  <= rvfi_stage_pc_wdata[i-1];
+            rvfi_stage_mem_rmask[i] <= rvfi_stage_mem_rmask[i-1];
+            rvfi_stage_mem_wmask[i] <= rvfi_stage_mem_wmask[i-1];
+            rvfi_stage_rs1_rdata[i] <= rvfi_stage_rs1_rdata[i-1];
+            rvfi_stage_rs2_rdata[i] <= rvfi_stage_rs2_rdata[i-1];
+            rvfi_stage_rs3_rdata[i] <= rvfi_stage_rs3_rdata[i-1];
+            rvfi_stage_mem_wdata[i] <= rvfi_stage_mem_wdata[i-1];
+            rvfi_stage_mem_addr[i]  <= rvfi_stage_mem_addr[i-1];
+
+            // For 2 RVFI_STAGES/Writeback Stage ignore first stage flops for rd_addr, rd_wdata and
+            // mem_rdata. For RF write addr/data actual write happens in writeback so capture
+            // address/data there. For mem_rdata that is only available from the writeback stage.
+            // Previous stage flops still exist in RTL as they are used by the non writeback config
+            rvfi_stage_rd_addr[i]   <= rvfi_rd_addr_d;
+            rvfi_stage_rd_wdata[i]  <= rvfi_rd_wdata_d;
+            rvfi_stage_mem_rdata[i] <= rvfi_mem_rdata_d;
+          end
+        end
+      end
+    end
+  end
+
+
+  // Memory adddress/write data available first cycle of ld/st instruction from register read
+  always_comb begin
+    if (instr_first_cycle_id) begin
+      rvfi_mem_addr_d  = alu_adder_result_ex;
+      rvfi_mem_wdata_d = lsu_wdata;
+    end else begin
+      rvfi_mem_addr_d  = rvfi_mem_addr_q;
+      rvfi_mem_wdata_d = rvfi_mem_wdata_q;
+    end
+  end
+
+  // Capture read data from LSU when it becomes valid
+  always_comb begin
+    if (lsu_resp_valid) begin
+      rvfi_mem_rdata_d = rf_wdata_lsu;
+    end else begin
+      rvfi_mem_rdata_d = rvfi_mem_rdata_q;
+    end
+  end
+
+  always_ff @(posedge clk or negedge rst_ni) begin
+    if (!rst_ni) begin
+      rvfi_mem_addr_q  <= '0;
+      rvfi_mem_rdata_q <= '0;
+      rvfi_mem_wdata_q <= '0;
+    end else begin
+      rvfi_mem_addr_q  <= rvfi_mem_addr_d;
+      rvfi_mem_rdata_q <= rvfi_mem_rdata_d;
+      rvfi_mem_wdata_q <= rvfi_mem_wdata_d;
+    end
+  end
+  // Byte enable based on data type
+  always_comb begin
+    unique case (lsu_type)
+      2'b00:   rvfi_mem_mask_int = 4'b1111;
+      2'b01:   rvfi_mem_mask_int = 4'b0011;
+      2'b10:   rvfi_mem_mask_int = 4'b0001;
+      default: rvfi_mem_mask_int = 4'b0000;
+    endcase
+  end
+
+  always_comb begin
+    if (instr_is_compressed_id) begin
+      rvfi_insn_id = {16'b0, instr_rdata_c_id};
+    end else begin
+      rvfi_insn_id = instr_rdata_id;
+    end
+  end
+
+  // Source registers 1 and 2 are read in the first instruction cycle
+  // Source register 3 is read in the second instruction cycle.
+  always_comb begin
+    if (instr_first_cycle_id) begin
+      rvfi_rs1_data_d = rf_ren_a ? multdiv_operand_a_ex : '0;
+      rvfi_rs1_addr_d = rf_ren_a ? rf_raddr_a : '0;
+      rvfi_rs2_data_d = rf_ren_b ? multdiv_operand_b_ex : '0;
+      rvfi_rs2_addr_d = rf_ren_b ? rf_raddr_b : '0;
+      rvfi_rs3_data_d = '0;
+      rvfi_rs3_addr_d = '0;
+    end else begin
+      rvfi_rs1_data_d = rvfi_rs1_data_q;
+      rvfi_rs1_addr_d = rvfi_rs1_addr_q;
+      rvfi_rs2_data_d = rvfi_rs2_data_q;
+      rvfi_rs2_addr_d = rvfi_rs2_addr_q;
+      rvfi_rs3_data_d = multdiv_operand_a_ex;
+      rvfi_rs3_addr_d = rf_raddr_a;
+    end
+  end
+  always_ff @(posedge clk or negedge rst_ni) begin
+    if (!rst_ni) begin
+      rvfi_rs1_data_q <= '0;
+      rvfi_rs1_addr_q <= '0;
+      rvfi_rs2_data_q <= '0;
+      rvfi_rs2_addr_q <= '0;
+
+    end else begin
+      rvfi_rs1_data_q <= rvfi_rs1_data_d;
+      rvfi_rs1_addr_q <= rvfi_rs1_addr_d;
+      rvfi_rs2_data_q <= rvfi_rs2_data_d;
+      rvfi_rs2_addr_q <= rvfi_rs2_addr_d;
+    end
+  end
+
+  always_comb begin
+    if(rvfi_rd_we_wb) begin
+      // Capture address/data of write to register file
+      rvfi_rd_addr_d  = rvfi_rd_addr_wb;
+      // If writing to x0 zero write data as required by RVFI specification
+      if(rvfi_rd_addr_wb == 5'b0) begin
+        rvfi_rd_wdata_d = '0;
+      end else begin
+        rvfi_rd_wdata_d = rvfi_rd_wdata_wb;
+      end
+    end else if(rvfi_instr_new_wb) begin
+      // If no RF write but new instruction in Writeback (when present) or ID/EX (when no writeback
+      // stage present) then zero RF write address/data as required by RVFI specification
+      rvfi_rd_addr_d  = '0;
+      rvfi_rd_wdata_d = '0;
+    end else begin
+      // Otherwise maintain previous value
+      rvfi_rd_addr_d  = rvfi_rd_addr_q;
+      rvfi_rd_wdata_d = rvfi_rd_wdata_q;
+    end
+  end
+
+  // RD write register is refreshed only once per cycle and
+  // then it is kept stable for the cycle.
+  always_ff @(posedge clk or negedge rst_ni) begin
+    if (!rst_ni) begin
+      rvfi_rd_addr_q    <= '0;
+      rvfi_rd_wdata_q   <= '0;
+    end else begin
+      rvfi_rd_addr_q    <= rvfi_rd_addr_d;
+      rvfi_rd_wdata_q   <= rvfi_rd_wdata_d;
+    end
+  end
+
+  // rvfi_intr must be set for first instruction that is part of a trap handler.
+  // On the first cycle of a new instruction see if a trap PC was set by the previous instruction,
+  // otherwise maintain value.
+  assign rvfi_intr_d = instr_first_cycle_id ? rvfi_set_trap_pc_q : rvfi_intr_q;
+
+  always_comb begin
+    rvfi_set_trap_pc_d = rvfi_set_trap_pc_q;
+
+    if (pc_set && pc_mux_id == PC_EXC &&
+        (exc_pc_mux_id == EXC_PC_EXC || exc_pc_mux_id == EXC_PC_IRQ)) begin
+      // PC is set to enter a trap handler
+      rvfi_set_trap_pc_d = 1'b1;
+    end else if (rvfi_set_trap_pc_q && instr_id_done) begin
+      // first instruction has been executed after PC is set to trap handler
+      rvfi_set_trap_pc_d = 1'b0;
+    end
+  end
+
+  always_ff @(posedge clk or negedge rst_ni) begin
+    if (!rst_ni) begin
+      rvfi_set_trap_pc_q <= 1'b0;
+      rvfi_intr_q        <= 1'b0;
+    end else begin
+      rvfi_set_trap_pc_q <= rvfi_set_trap_pc_d;
+      rvfi_intr_q        <= rvfi_intr_d;
+    end
+  end
+
+`else
+  logic unused_instr_new_id, unused_instr_done_wb;
+  assign unused_instr_new_id = instr_new_id;
+  assign unused_instr_done_wb = instr_done_wb;
+`endif
+
+
+
+endmodule
diff --git a/verilog/rtl/brq_core_top.sv b/verilog/rtl/brq_core_top.sv
new file mode 100644
index 0000000..65b428a
--- /dev/null
+++ b/verilog/rtl/brq_core_top.sv
@@ -0,0 +1,215 @@
+//`include "/home/usman/Documents/ibex/rtl/ibex_pkg.sv"
+//`include "/home/merl/Documents/ibex/rtl/prim_pkg.sv"
+
+module brq_core_top #(
+    parameter bit                 PMPEnable        = 1'b0,
+    parameter int unsigned        PMPGranularity   = 0,
+    parameter int unsigned        PMPNumRegions    = 0,
+    parameter int unsigned        MHPMCounterNum   = 0,
+    parameter int unsigned        MHPMCounterWidth = 40,
+    parameter bit                 RV32E            = 1'b0,
+    parameter brq_pkg::rv32m_e    RV32M            = brq_pkg::RV32MFast,
+    parameter brq_pkg::rv32b_e    RV32B            = brq_pkg::RV32BNone,
+    parameter brq_pkg::regfile_e  RegFile          = brq_pkg::RegFileFF,
+    parameter bit                 BranchTargetALU  = 1'b0,
+    parameter bit                 WritebackStage   = 1'b1,
+    parameter bit                 ICache           = 1'b0,
+    parameter bit                 ICacheECC        = 1'b0,
+    parameter bit                 BranchPredictor  = 1'b0,
+    parameter bit                 DbgTriggerEn     = 1'b0,
+    parameter int unsigned        DbgHwBreakNum    = 1,
+    parameter bit                 Securebrq        = 1'b0,
+    parameter int unsigned        DmHaltAddr       = 0,
+    parameter int unsigned        DmExceptionAddr  = 0
+)
+(
+  input clk_i,
+  input rst_ni,
+
+  // instruction memory interface 
+    input tlul_pkg::tl_d2h_t tl_i_i,
+    output tlul_pkg::tl_h2d_t tl_i_o,
+
+  // data memory interface 
+    input tlul_pkg::tl_d2h_t tl_d_i,
+    output tlul_pkg::tl_h2d_t tl_d_o,
+
+    input  logic        test_en_i,     // enable all clk_i gates for testing
+
+    input  logic [31:0] hart_id_i,
+    input  logic [31:0] boot_addr_i,
+
+        // Interrupt inputs
+    input  logic        irq_software_i,
+    input  logic        irq_timer_i,
+    input  logic        irq_external_i,
+    input  logic [14:0] irq_fast_i,
+    input  logic        irq_nm_i,       // non-maskeable interrupt
+
+    // Debug Interface
+    input  logic        debug_req_i,
+
+        // CPU Control Signals
+    input  logic        fetch_enable_i,
+    output logic        alert_minor_o,
+    output logic        alert_major_o,
+    output logic        core_sleep_o
+);
+import brq_pkg::*;
+
+  //logic rst_ni;
+  //assign rst_ni = rst_ni;
+  // Instruction interface (internal)
+  logic        instr_req;
+  logic        instr_gnt;
+  logic        instr_rvalid;
+  logic [31:0] instr_addr;
+  logic [31:0] instr_rdata;
+  logic        instr_err;
+
+  // Data interface (internal)
+  logic        data_req;
+  logic        data_gnt;
+  logic        data_rvalid;
+  logic        data_we;
+  logic [3:0]  data_be;
+  logic [31:0] data_addr;
+  logic [31:0] data_wdata;
+  logic [31:0] data_rdata;
+  logic        data_err;
+
+
+
+brq_core #(
+    .PMPEnable        (PMPEnable),
+    .PMPGranularity   (PMPGranularity), 
+    .PMPNumRegions    (PMPNumRegions), 
+    .MHPMCounterNum   (MHPMCounterNum), 
+    .MHPMCounterWidth (MHPMCounterWidth), 
+    .RV32E            (RV32E), 
+    .RV32M            (RV32M), 
+    .RV32B            (RV32B), 
+    .RegFile          (RegFile), 
+    .BranchTargetALU  (BranchTargetALU), 
+    .WritebackStage   (WritebackStage), 
+    .ICache           (ICache), 
+    .ICacheECC        (ICacheECC), 
+    .BranchPredictor  (BranchPredictor), 
+    .DbgTriggerEn     (DbgTriggerEn), 
+    .DbgHwBreakNum    (DbgHwBreakNum), 
+    .Securebrq        (Securebrq),
+    .DmHaltAddr       (DmHaltAddr), 
+    .DmExceptionAddr  (DmExceptionAddr) 
+) u_core (
+    // clk_i and rst_ni
+    .clk_i (clk_i),
+    .rst_ni(rst_ni),
+
+    .test_en_i (test_en_i),     // enable all clk_i gates for testing
+
+    .hart_id_i  (hart_id_i),
+    .boot_addr_i(boot_addr_i),
+
+    // Instruction memory interface
+    .instr_req_o    (instr_req),
+    .instr_gnt_i    (instr_gnt),
+    .instr_rvalid_i (instr_rvalid),
+    .instr_addr_o   (instr_addr),
+    .instr_rdata_i  (instr_rdata),
+    .instr_err_i    (instr_err),
+
+    // Data memory interface
+    .data_req_o     (data_req),
+    .data_gnt_i     (data_gnt),
+    .data_rvalid_i  (data_rvalid),
+    .data_we_o      (data_we),
+    .data_be_o      (data_be),
+    .data_addr_o    (data_addr),
+    .data_wdata_o   (data_wdata),
+    .data_rdata_i   (data_rdata),
+    .data_err_i     (data_err),
+
+    // Interrupt inputs
+    .irq_software_i (irq_software_i),
+    .irq_timer_i    (irq_timer_i),
+    .irq_external_i (irq_external_i),
+    .irq_fast_i     (irq_fast_i),
+    .irq_nm_i       (irq_nm_i),       // non-maskeable interrupt
+
+    // Debug Interface
+    .debug_req_i     (debug_req_i),
+
+    // RISC-V Formal Interface
+    // Does not comply with the coding standards of _i/_o suffixes, but follows
+    // the convention of RISC-V Formal Interface Specification.
+`ifdef RVFI
+    .rvfi_valid (),
+    .rvfi_order (),
+    .rvfi_insn (),
+    .rvfi_trap (),
+    .rvfi_halt (),
+    .rvfi_intr (),
+    .rvfi_mode (),
+    .rvfi_ixl (),
+    .rvfi_rs1_addr (),
+    .rvfi_rs2_addr (),
+    .rvfi_rs3_addr (),
+    .rvfi_rs1_rdata (),
+    .rvfi_rs2_rdata (),
+    .rvfi_rs3_rdata (),
+    .rvfi_rd_addr (),
+    .rvfi_rd_wdata (),
+    .rvfi_pc_rdata (),
+    .rvfi_pc_wdata (),
+    .rvfi_mem_addr (),
+    .rvfi_mem_rmask (),
+    .rvfi_mem_wmask (),
+    .rvfi_mem_rdata (),
+    .rvfi_mem_wdata (),
+`endif
+
+    // CPU Control Signals
+    .fetch_enable_i (fetch_enable_i),
+    .alert_minor_o (alert_minor_o),
+    .alert_major_o (alert_major_o),
+    .core_sleep_o (core_sleep_o)
+);
+
+tlul_host_adapter #(
+  .MAX_REQS(2)
+) intr_interface (
+  .clk_i (clk_i),
+  .rst_ni (rst_ni),
+  .req_i (instr_req),
+  .gnt_o (instr_gnt),
+  .addr_i (instr_addr),
+  .we_i (1'b0),
+  .wdata_i (32'b0),
+  .be_i (4'hF),
+  .valid_o (instr_rvalid),
+  .rdata_o (instr_rdata),
+  .err_o (instr_err),
+  .tl_h_c_a (tl_i_o),
+  .tl_h_c_d (tl_i_i)
+);
+
+tlul_host_adapter #(
+  .MAX_REQS (2)
+) data_interface (
+  .clk_i (clk_i),
+  .rst_ni (rst_ni),
+  .req_i (data_req),
+  .gnt_o (data_gnt),
+  .addr_i (data_addr),
+  .we_i (data_we),
+  .wdata_i (data_wdata),
+  .be_i (data_be),
+  .valid_o (data_rvalid),
+  .rdata_o (data_rdata),
+  .err_o (data_err),
+  .tl_h_c_a (tl_d_o),
+  .tl_h_c_d (tl_d_i)
+);
+
+
+endmodule
diff --git a/verilog/rtl/brq_counter.sv b/verilog/rtl/brq_counter.sv
new file mode 100644
index 0000000..3fedb28
--- /dev/null
+++ b/verilog/rtl/brq_counter.sv
@@ -0,0 +1,82 @@
+module brq_counter #(
+  parameter int CounterWidth = 32
+) (
+  input  logic        clk_i,
+  input  logic        rst_ni,
+
+  input  logic        counter_inc_i,
+  input  logic        counterh_we_i,
+  input  logic        counter_we_i,
+  input  logic [31:0] counter_val_i,
+  output logic [63:0] counter_val_o
+);
+
+  logic [63:0]             counter;
+  logic [CounterWidth-1:0] counter_upd;
+  logic [63:0]             counter_load;
+  logic                    we;
+  logic [CounterWidth-1:0] counter_d;
+
+  // Update
+  always_comb begin
+
+    // Write
+    we = counter_we_i | counterh_we_i;
+    counter_load[63:32] = counter[63:32];
+    counter_load[31:0]  = counter_val_i;
+    if (counterh_we_i) begin
+      counter_load[63:32] = counter_val_i;
+      counter_load[31:0]  = counter[31:0];
+    end
+
+    // Increment
+    counter_upd = counter[CounterWidth-1:0] + {{CounterWidth-1{1'b0}},1'b1};
+
+    // Next value logic
+    if (we) begin
+      counter_d = counter_load[CounterWidth-1:0];
+    end else if (counter_inc_i)begin
+      counter_d = counter_upd[CounterWidth-1:0];
+    end else begin
+      counter_d = counter[CounterWidth-1:0];
+    end
+  end
+
+`ifdef FPGA_XILINX
+  // Set DSP pragma for supported xilinx FPGAs
+  localparam int DspPragma = CounterWidth < 49  ? "yes" : "no";
+  (* use_dsp = DspPragma *) logic [CounterWidth-1:0] counter_q;
+
+  // DSP output register requires synchronous reset.
+  `define COUNTER_FLOP_RST posedge clk_i
+`else
+  logic [CounterWidth-1:0] counter_q;
+
+  `define COUNTER_FLOP_RST posedge clk_i or negedge rst_ni
+`endif
+
+  // Counter flop
+  always_ff @(`COUNTER_FLOP_RST) begin
+    if (!rst_ni) begin
+      counter_q <= '0;
+    end else begin
+      counter_q <= counter_d;
+    end
+  end
+
+  if (CounterWidth < 64) begin : g_counter_narrow
+    logic [63:CounterWidth] unused_counter_load;
+
+    assign counter[CounterWidth-1:0] = counter_q;
+    assign counter[63:CounterWidth]  = '0;
+    assign unused_counter_load       = counter_load[63:CounterWidth];
+  end else begin : g_counter_full
+    assign counter = counter_q;
+  end
+
+  assign counter_val_o = counter;
+
+endmodule
+
+// Keep helper defines file-local.
+`undef COUNTER_FLOP_RST
diff --git a/verilog/rtl/brq_cs_registers.sv b/verilog/rtl/brq_cs_registers.sv
new file mode 100644
index 0000000..6b33279
--- /dev/null
+++ b/verilog/rtl/brq_cs_registers.sv
@@ -0,0 +1,1510 @@
+
+/**
+ * Control and Status Registers
+ *
+ * Control and Status Registers (CSRs) following the RISC-V Privileged
+ * Specification, draft version 1.11
+ */
+
+module brq_cs_registers #(
+    parameter bit                DbgTriggerEn      = 0,
+    parameter int unsigned       DbgHwBreakNum     = 1,
+    parameter bit                DataIndTiming     = 1'b0,
+    parameter bit                DummyInstructions = 1'b0,
+    parameter bit                ShadowCSR         = 1'b0,
+    parameter bit                ICache            = 1'b0,
+    parameter int unsigned       MHPMCounterNum    = 10,
+    parameter int unsigned       MHPMCounterWidth  = 40,
+    parameter bit                PMPEnable         = 0,
+    parameter int unsigned       PMPGranularity    = 0,
+    parameter int unsigned       PMPNumRegions     = 4,
+    parameter bit                RV32E             = 0,
+    parameter brq_pkg::rv32m_e   RV32M             = brq_pkg::RV32MFast,
+    parameter brq_pkg::rvfloat_e RVF               = brq_pkg::RV64FDouble // for floating point
+) (
+    // Clock and Reset
+    input  logic                 clk_i,
+    input  logic                 rst_ni,
+
+    // Hart ID
+    input  logic [31:0]          hart_id_i,
+
+    // Privilege mode
+    output brq_pkg::priv_lvl_e  priv_mode_id_o,
+    output brq_pkg::priv_lvl_e  priv_mode_if_o,
+    output brq_pkg::priv_lvl_e  priv_mode_lsu_o,
+    output logic                csr_mstatus_tw_o,
+
+    // mtvec
+    output logic [31:0]          csr_mtvec_o,
+    input  logic                 csr_mtvec_init_i,
+    input  logic [31:0]          boot_addr_i,
+
+    // Interface to registers (SRAM like)
+    input  logic                csr_access_i,
+    input  brq_pkg::csr_num_e   csr_addr_i,
+    input  logic [31:0]         csr_wdata_i,
+    input  brq_pkg::csr_op_e    csr_op_i,
+    input                       csr_op_en_i,
+    output logic [31:0]         csr_rdata_o,
+
+    // interrupts
+    input  logic                 irq_software_i,
+    input  logic                 irq_timer_i,
+    input  logic                 irq_external_i,
+    input  logic [14:0]          irq_fast_i,
+    input  logic                 nmi_mode_i,
+    output logic                 irq_pending_o,          // interrupt request pending
+    output brq_pkg::irqs_t       irqs_o,                 // interrupt requests qualified with mie
+    output logic                 csr_mstatus_mie_o,
+    output logic [31:0]          csr_mepc_o,
+
+    // PMP
+    output brq_pkg::pmp_cfg_t    csr_pmp_cfg_o  [PMPNumRegions],
+    output logic [33:0]          csr_pmp_addr_o [PMPNumRegions],
+
+    // debug
+    input  logic                debug_mode_i,
+    input  brq_pkg::dbg_cause_e debug_cause_i,
+    input  logic                debug_csr_save_i,
+    output logic [31:0]         csr_depc_o,
+    output logic                debug_single_step_o,
+    output logic                debug_ebreakm_o,
+    output logic                debug_ebreaku_o,
+    output logic                trigger_match_o,
+
+    input  logic [31:0]         pc_if_i,
+    input  logic [31:0]         pc_id_i,
+    input  logic [31:0]         pc_wb_i,
+
+    // CPU control bits
+    output logic                data_ind_timing_o,
+    output logic                dummy_instr_en_o,
+    output logic [2:0]          dummy_instr_mask_o,
+    output logic                dummy_instr_seed_en_o,
+    output logic [31:0]         dummy_instr_seed_o,
+    output logic                icache_enable_o,
+    output logic                csr_shadow_err_o,
+
+    // Exception save/restore
+    input  logic                csr_save_if_i,
+    input  logic                csr_save_id_i,
+    input  logic                csr_save_wb_i,
+    input  logic                csr_restore_mret_i,
+    input  logic                csr_restore_dret_i,
+    input  logic                csr_save_cause_i,
+    input  brq_pkg::exc_cause_e csr_mcause_i,
+    input  logic [31:0]         csr_mtval_i,
+    output logic                illegal_csr_insn_o,     // access to non-existent CSR,
+                                                         // with wrong priviledge level, or
+                                                         // missing write permissions
+    // Performance Counters
+    input  logic                instr_ret_i,            // instr retired in ID/EX stage
+    input  logic                instr_ret_compressed_i, // compressed instr retired
+    input  logic                iside_wait_i,           // core waiting for the iside
+    input  logic                jump_i,                 // jump instr seen (j, jr, jal, jalr)
+    input  logic                branch_i,               // branch instr seen (bf, bnf)
+    input  logic                branch_taken_i,         // branch was taken
+    input  logic                mem_load_i,             // load from memory in this cycle
+    input  logic                mem_store_i,            // store to memory in this cycle
+    input  logic                dside_wait_i,           // core waiting for the dside
+    input  logic                mul_wait_i,             // core waiting for multiply
+    input  logic                div_wait_i,              // core waiting for divide
+
+    // floating point
+    input  logic                  fp_rm_dynamic_i,
+    output fpnew_pkg::roundmode_e fp_frm_o,
+    input  fpnew_pkg::status_t    fp_status_i,
+    input  logic                  is_fp_instr_i
+);
+  import brq_pkg::*;
+  import fpnew_pkg::roundmode_e;
+
+  localparam int unsigned RV32MEnabled = (RV32M == RV32MNone) ? 0 : 1;
+  localparam int unsigned PMPAddrWidth = (PMPGranularity > 0) ? 33 - PMPGranularity : 32;
+
+  localparam int unsigned SinglePrecision = (RVF == RV32FSingle) ? 1 : 0;
+  localparam int unsigned DoublePrecision = (RVF == RV64FDouble) ? 1 : 0;
+
+  // misa
+  localparam logic [31:0] MISA_VALUE =
+      (0                 <<  0)  // A - Atomic Instructions extension
+    | (1                 <<  2)  // C - Compressed extension
+    | (DoublePrecision   <<  3)  // D - Double precision floating-point extension
+    | (32'(RV32E)        <<  4)  // E - RV32E base ISA
+    | (SinglePrecision   <<  5)  // F - Single precision floating-point extension
+    | (32'(!RV32E)       <<  8)  // I - RV32I/64I/128I base ISA
+    | (RV32MEnabled      << 12)  // M - Integer Multiply/Divide extension
+    | (0                 << 13)  // N - User level interrupts supported
+    | (0                 << 18)  // S - Supervisor mode implemented
+    | (1                 << 20)  // U - User mode implemented
+    | (0                 << 23)  // X - Non-standard extensions present
+    | (32'(CSR_MISA_MXL) << 30); // M-XLEN
+
+  typedef struct packed {
+    logic      mie;
+    logic      mpie;
+    priv_lvl_e mpp;
+    logic      mprv;
+    logic      tw;
+  } status_t;
+
+  typedef struct packed {
+    logic      mpie;
+    priv_lvl_e mpp;
+  } status_stk_t;
+
+  typedef struct packed {
+      x_debug_ver_e xdebugver;
+      logic [11:0]  zero2;
+      logic         ebreakm;
+      logic         zero1;
+      logic         ebreaks;
+      logic         ebreaku;
+      logic         stepie;
+      logic         stopcount;
+      logic         stoptime;
+      dbg_cause_e   cause;
+      logic         zero0;
+      logic         mprven;
+      logic         nmip;
+      logic         step;
+      priv_lvl_e    prv;
+  } dcsr_t;
+
+  // CPU control register fields
+  typedef struct packed {
+    logic [2:0]  dummy_instr_mask;
+    logic        dummy_instr_en;
+    logic        data_ind_timing;
+    logic        icache_enable;
+  } cpu_ctrl_t;
+
+  // Interrupt and exception control signals
+  logic [31:0] exception_pc;
+
+  // CSRs
+  fpnew_pkg::status_t fflags_q, fflags_d, fflag_wdata;
+
+  logic        fflags_en;
+  logic        frm_en;
+  roundmode_e  frm_q, frm_d;
+  priv_lvl_e   priv_lvl_q, priv_lvl_d;
+  status_t     mstatus_q, mstatus_d;
+  logic        mstatus_err;
+  logic        mstatus_en;
+  irqs_t       mie_q, mie_d;
+  logic        mie_en;
+  logic [31:0] mscratch_q;
+  logic        mscratch_en;
+  logic [31:0] mepc_q, mepc_d;
+  logic        mepc_en;
+  logic  [5:0] mcause_q, mcause_d;
+  logic        mcause_en;
+  logic [31:0] mtval_q, mtval_d;
+  logic        mtval_en;
+  logic [31:0] mtvec_q, mtvec_d;
+  logic        mtvec_err;
+  logic        mtvec_en;
+  irqs_t       mip;
+  dcsr_t       dcsr_q, dcsr_d;
+  logic        dcsr_en;
+  logic [31:0] depc_q, depc_d;
+  logic        depc_en;
+  logic [31:0] dscratch0_q;
+  logic [31:0] dscratch1_q;
+  logic        dscratch0_en, dscratch1_en;
+
+  // CSRs for recoverable NMIs
+  // NOTE: these CSRS are nonstandard, see https://github.com/riscv/riscv-isa-manual/issues/261
+  status_stk_t mstack_q, mstack_d;
+  logic        mstack_en;
+  logic [31:0] mstack_epc_q, mstack_epc_d;
+  logic  [5:0] mstack_cause_q, mstack_cause_d;
+
+  // PMP Signals
+  logic [31:0]                 pmp_addr_rdata  [PMP_MAX_REGIONS];
+  logic [PMP_CFG_W-1:0]        pmp_cfg_rdata   [PMP_MAX_REGIONS];
+  logic                        pmp_csr_err;
+
+  // Hardware performance monitor signals
+  logic [31:0]                 mcountinhibit;
+  // Only have mcountinhibit flops for counters that actually exist
+  logic [MHPMCounterNum+3-1:0] mcountinhibit_d, mcountinhibit_q;
+  logic                        mcountinhibit_we;
+
+  // mhpmcounter flops are elaborated below providing only the precise number that is required based
+  // on MHPMCounterNum/MHPMCounterWidth. This signal connects to the Q output of these flops
+  // where they exist and is otherwise 0.
+  logic [63:0] mhpmcounter [32];
+  logic [31:0] mhpmcounter_we;
+  logic [31:0] mhpmcounterh_we;
+  logic [31:0] mhpmcounter_incr;
+  logic [31:0] mhpmevent [32];
+  logic  [4:0] mhpmcounter_idx;
+  logic        unused_mhpmcounter_we_1;
+  logic        unused_mhpmcounterh_we_1;
+  logic        unused_mhpmcounter_incr_1;
+
+  // Debug / trigger registers
+  logic [31:0] tselect_rdata;
+  logic [31:0] tmatch_control_rdata;
+  logic [31:0] tmatch_value_rdata;
+
+  // CPU control bits
+  cpu_ctrl_t   cpuctrl_q, cpuctrl_d, cpuctrl_wdata;
+  logic        cpuctrl_we;
+  logic        cpuctrl_err;
+
+  // CSR update logic
+  logic [31:0] csr_wdata_int;
+  logic [31:0] csr_rdata_int;
+  logic        csr_we_int;
+  logic        csr_wreq;
+
+  // Access violation signals
+  logic        illegal_csr;
+  logic        illegal_csr_priv;
+  logic        illegal_csr_write;
+
+  logic [7:0]  unused_boot_addr;
+  logic [2:0]  unused_csr_addr;
+
+  assign unused_boot_addr = boot_addr_i[7:0];
+
+  /////////////
+  // CSR reg //
+  /////////////
+
+  logic illegal_dyn_mod;
+  logic illegal_csr_dyn_mod;
+  logic [$bits(csr_num_e)-1:0] csr_addr;
+  assign csr_addr           = {csr_addr_i};
+  assign unused_csr_addr    = csr_addr[7:5];
+  assign mhpmcounter_idx    = csr_addr[4:0];
+
+  assign illegal_csr_dyn_mod = illegal_dyn_mod & fp_rm_dynamic_i;
+
+  // See RISC-V Privileged Specification, version 1.11, Section 2.1
+  assign illegal_csr_priv    = (csr_addr[9:8] > {priv_lvl_q});
+  assign illegal_csr_write   = (csr_addr[11:10] == 2'b11) && csr_wreq;
+  assign illegal_csr_insn_o  = (csr_access_i & (illegal_csr | illegal_csr_write | illegal_csr_priv)) | illegal_csr_dyn_mod;
+
+  // mip CSR is purely combinational - must be able to re-enable the clock upon WFI
+  assign mip.irq_software = irq_software_i;
+  assign mip.irq_timer    = irq_timer_i;
+  assign mip.irq_external = irq_external_i;
+  assign mip.irq_fast     = irq_fast_i;
+  
+  // Floating point
+  always_comb begin
+    unique case (frm_q)
+      3'b000,
+      3'b001,
+      3'b010,
+      3'b011,
+      3'b100: illegal_dyn_mod =  1'b0;
+      3'b101,
+      3'b110,
+      3'b111: illegal_dyn_mod =  1'b1;
+    endcase 
+    fp_frm_o = frm_q;
+  end
+  
+  // read logic
+  always_comb begin
+    csr_rdata_int = '0;
+    illegal_csr   = 1'b0;
+
+    unique case (csr_addr_i)
+      // fcsr: floating-point control and status register (frm+fflags)
+      CSR_FCSR: csr_rdata_int = {24'b0 , frm_q, fflags_q};
+      
+      // fflags: floating-point accrued exception
+      CSR_FFLAG: csr_rdata_int = {27'b0 , fflags_q};
+      
+      // frm: floating-point dynamic rounding mode
+      CSR_FRM: begin
+        csr_rdata_int = {29'b0 , frm_q};
+      end
+
+      // mhartid: unique hardware thread id
+      CSR_MHARTID: csr_rdata_int = hart_id_i;
+
+      // mstatus: always M-mode, contains IE bit
+      CSR_MSTATUS: begin
+        csr_rdata_int                                                   = '0;
+        csr_rdata_int[CSR_MSTATUS_MIE_BIT]                              = mstatus_q.mie;
+        csr_rdata_int[CSR_MSTATUS_MPIE_BIT]                             = mstatus_q.mpie;
+        csr_rdata_int[CSR_MSTATUS_MPP_BIT_HIGH:CSR_MSTATUS_MPP_BIT_LOW] = mstatus_q.mpp;
+        csr_rdata_int[CSR_MSTATUS_MPRV_BIT]                             = mstatus_q.mprv;
+        csr_rdata_int[CSR_MSTATUS_TW_BIT]                               = mstatus_q.tw;
+      end
+
+      // misa
+      CSR_MISA: csr_rdata_int = MISA_VALUE;
+
+      // interrupt enable
+      CSR_MIE: begin
+        csr_rdata_int                                     = '0;
+        csr_rdata_int[CSR_MSIX_BIT]                       = mie_q.irq_software;
+        csr_rdata_int[CSR_MTIX_BIT]                       = mie_q.irq_timer;
+        csr_rdata_int[CSR_MEIX_BIT]                       = mie_q.irq_external;
+        csr_rdata_int[CSR_MFIX_BIT_HIGH:CSR_MFIX_BIT_LOW] = mie_q.irq_fast;
+      end
+
+      CSR_MSCRATCH: csr_rdata_int = mscratch_q;
+
+      // mtvec: trap-vector base address
+      CSR_MTVEC: csr_rdata_int = mtvec_q;
+
+      // mepc: exception program counter
+      CSR_MEPC: csr_rdata_int = mepc_q;
+
+      // mcause: exception cause
+      CSR_MCAUSE: csr_rdata_int = {mcause_q[5], 26'b0, mcause_q[4:0]};
+
+      // mtval: trap value
+      CSR_MTVAL: csr_rdata_int = mtval_q;
+
+      // mip: interrupt pending
+      CSR_MIP: begin
+        csr_rdata_int                                     = '0;
+        csr_rdata_int[CSR_MSIX_BIT]                       = mip.irq_software;
+        csr_rdata_int[CSR_MTIX_BIT]                       = mip.irq_timer;
+        csr_rdata_int[CSR_MEIX_BIT]                       = mip.irq_external;
+        csr_rdata_int[CSR_MFIX_BIT_HIGH:CSR_MFIX_BIT_LOW] = mip.irq_fast;
+      end
+
+      // PMP registers
+      CSR_PMPCFG0:   csr_rdata_int = {pmp_cfg_rdata[3],  pmp_cfg_rdata[2],
+                                      pmp_cfg_rdata[1],  pmp_cfg_rdata[0]};
+      CSR_PMPCFG1:   csr_rdata_int = {pmp_cfg_rdata[7],  pmp_cfg_rdata[6],
+                                      pmp_cfg_rdata[5],  pmp_cfg_rdata[4]};
+      CSR_PMPCFG2:   csr_rdata_int = {pmp_cfg_rdata[11], pmp_cfg_rdata[10],
+                                      pmp_cfg_rdata[9],  pmp_cfg_rdata[8]};
+      CSR_PMPCFG3:   csr_rdata_int = {pmp_cfg_rdata[15], pmp_cfg_rdata[14],
+                                      pmp_cfg_rdata[13], pmp_cfg_rdata[12]};
+      CSR_PMPADDR0:  csr_rdata_int = pmp_addr_rdata[0];
+      CSR_PMPADDR1:  csr_rdata_int = pmp_addr_rdata[1];
+      CSR_PMPADDR2:  csr_rdata_int = pmp_addr_rdata[2];
+      CSR_PMPADDR3:  csr_rdata_int = pmp_addr_rdata[3];
+      CSR_PMPADDR4:  csr_rdata_int = pmp_addr_rdata[4];
+      CSR_PMPADDR5:  csr_rdata_int = pmp_addr_rdata[5];
+      CSR_PMPADDR6:  csr_rdata_int = pmp_addr_rdata[6];
+      CSR_PMPADDR7:  csr_rdata_int = pmp_addr_rdata[7];
+      CSR_PMPADDR8:  csr_rdata_int = pmp_addr_rdata[8];
+      CSR_PMPADDR9:  csr_rdata_int = pmp_addr_rdata[9];
+      CSR_PMPADDR10: csr_rdata_int = pmp_addr_rdata[10];
+      CSR_PMPADDR11: csr_rdata_int = pmp_addr_rdata[11];
+      CSR_PMPADDR12: csr_rdata_int = pmp_addr_rdata[12];
+      CSR_PMPADDR13: csr_rdata_int = pmp_addr_rdata[13];
+      CSR_PMPADDR14: csr_rdata_int = pmp_addr_rdata[14];
+      CSR_PMPADDR15: csr_rdata_int = pmp_addr_rdata[15];
+
+      CSR_DCSR: begin
+        csr_rdata_int = dcsr_q;
+        illegal_csr = ~debug_mode_i;
+      end
+      CSR_DPC: begin
+        csr_rdata_int = depc_q;
+        illegal_csr = ~debug_mode_i;
+      end
+      CSR_DSCRATCH0: begin
+        csr_rdata_int = dscratch0_q;
+        illegal_csr = ~debug_mode_i;
+      end
+      CSR_DSCRATCH1: begin
+        csr_rdata_int = dscratch1_q;
+        illegal_csr = ~debug_mode_i;
+      end
+
+      // machine counter/timers
+      CSR_MCOUNTINHIBIT: csr_rdata_int = mcountinhibit;
+      CSR_MHPMEVENT3,
+      CSR_MHPMEVENT4,  CSR_MHPMEVENT5,  CSR_MHPMEVENT6,  CSR_MHPMEVENT7,
+      CSR_MHPMEVENT8,  CSR_MHPMEVENT9,  CSR_MHPMEVENT10, CSR_MHPMEVENT11,
+      CSR_MHPMEVENT12, CSR_MHPMEVENT13, CSR_MHPMEVENT14, CSR_MHPMEVENT15,
+      CSR_MHPMEVENT16, CSR_MHPMEVENT17, CSR_MHPMEVENT18, CSR_MHPMEVENT19,
+      CSR_MHPMEVENT20, CSR_MHPMEVENT21, CSR_MHPMEVENT22, CSR_MHPMEVENT23,
+      CSR_MHPMEVENT24, CSR_MHPMEVENT25, CSR_MHPMEVENT26, CSR_MHPMEVENT27,
+      CSR_MHPMEVENT28, CSR_MHPMEVENT29, CSR_MHPMEVENT30, CSR_MHPMEVENT31: begin
+        csr_rdata_int = mhpmevent[mhpmcounter_idx];
+      end
+
+      CSR_MCYCLE,
+      CSR_MINSTRET,
+      CSR_MHPMCOUNTER3,
+      CSR_MHPMCOUNTER4,  CSR_MHPMCOUNTER5,  CSR_MHPMCOUNTER6,  CSR_MHPMCOUNTER7,
+      CSR_MHPMCOUNTER8,  CSR_MHPMCOUNTER9,  CSR_MHPMCOUNTER10, CSR_MHPMCOUNTER11,
+      CSR_MHPMCOUNTER12, CSR_MHPMCOUNTER13, CSR_MHPMCOUNTER14, CSR_MHPMCOUNTER15,
+      CSR_MHPMCOUNTER16, CSR_MHPMCOUNTER17, CSR_MHPMCOUNTER18, CSR_MHPMCOUNTER19,
+      CSR_MHPMCOUNTER20, CSR_MHPMCOUNTER21, CSR_MHPMCOUNTER22, CSR_MHPMCOUNTER23,
+      CSR_MHPMCOUNTER24, CSR_MHPMCOUNTER25, CSR_MHPMCOUNTER26, CSR_MHPMCOUNTER27,
+      CSR_MHPMCOUNTER28, CSR_MHPMCOUNTER29, CSR_MHPMCOUNTER30, CSR_MHPMCOUNTER31: begin
+        csr_rdata_int = mhpmcounter[mhpmcounter_idx][31:0];
+      end
+
+      CSR_MCYCLEH,
+      CSR_MINSTRETH,
+      CSR_MHPMCOUNTER3H,
+      CSR_MHPMCOUNTER4H,  CSR_MHPMCOUNTER5H,  CSR_MHPMCOUNTER6H,  CSR_MHPMCOUNTER7H,
+      CSR_MHPMCOUNTER8H,  CSR_MHPMCOUNTER9H,  CSR_MHPMCOUNTER10H, CSR_MHPMCOUNTER11H,
+      CSR_MHPMCOUNTER12H, CSR_MHPMCOUNTER13H, CSR_MHPMCOUNTER14H, CSR_MHPMCOUNTER15H,
+      CSR_MHPMCOUNTER16H, CSR_MHPMCOUNTER17H, CSR_MHPMCOUNTER18H, CSR_MHPMCOUNTER19H,
+      CSR_MHPMCOUNTER20H, CSR_MHPMCOUNTER21H, CSR_MHPMCOUNTER22H, CSR_MHPMCOUNTER23H,
+      CSR_MHPMCOUNTER24H, CSR_MHPMCOUNTER25H, CSR_MHPMCOUNTER26H, CSR_MHPMCOUNTER27H,
+      CSR_MHPMCOUNTER28H, CSR_MHPMCOUNTER29H, CSR_MHPMCOUNTER30H, CSR_MHPMCOUNTER31H: begin
+        csr_rdata_int = mhpmcounter[mhpmcounter_idx][63:32];
+      end
+
+      // Debug triggers
+      CSR_TSELECT: begin
+        csr_rdata_int = tselect_rdata;
+        illegal_csr   = ~DbgTriggerEn;
+      end
+      CSR_TDATA1: begin
+        csr_rdata_int = tmatch_control_rdata;
+        illegal_csr   = ~DbgTriggerEn;
+      end
+      CSR_TDATA2: begin
+        csr_rdata_int = tmatch_value_rdata;
+        illegal_csr   = ~DbgTriggerEn;
+      end
+      CSR_TDATA3: begin
+        csr_rdata_int = '0;
+        illegal_csr   = ~DbgTriggerEn;
+      end
+      CSR_MCONTEXT: begin
+        csr_rdata_int = '0;
+        illegal_csr   = ~DbgTriggerEn;
+      end
+      CSR_SCONTEXT: begin
+        csr_rdata_int = '0;
+        illegal_csr   = ~DbgTriggerEn;
+      end
+
+      // Custom CSR for controlling CPU features
+      CSR_CPUCTRL: begin
+        csr_rdata_int = {{32-$bits(cpu_ctrl_t){1'b0}},cpuctrl_q};
+      end
+
+      // Custom CSR for LFSR re-seeding (cannot be read)
+      CSR_SECURESEED: begin
+        csr_rdata_int = '0;
+      end
+
+      default: begin
+        illegal_csr = 1'b1;
+      end
+    endcase
+  end
+
+  // write logic
+  always_comb begin
+    exception_pc = pc_id_i;
+
+    // Floating point
+    fflags_d    = fflags_q;
+    fflags_en   = 1'b0;
+
+    frm_d  = frm_q;
+    frm_en = 1'b0;
+
+    priv_lvl_d   = priv_lvl_q;
+    mstatus_en   = 1'b0;
+    mstatus_d    = mstatus_q;
+    mie_en       = 1'b0;
+    mscratch_en  = 1'b0;
+    mepc_en      = 1'b0;
+    mepc_d       = {csr_wdata_int[31:1], 1'b0};
+    mcause_en    = 1'b0;
+    mcause_d     = {csr_wdata_int[31], csr_wdata_int[4:0]};
+    mtval_en     = 1'b0;
+    mtval_d      = csr_wdata_int;
+    mtvec_en     = csr_mtvec_init_i;
+    // mtvec.MODE set to vectored
+    // mtvec.BASE must be 256-byte aligned
+    mtvec_d      = csr_mtvec_init_i ? {boot_addr_i[31:2], 2'b00} :
+                                      {csr_wdata_int[31:2], 2'b00};
+    dcsr_en      = 1'b0;
+    dcsr_d       = dcsr_q;
+    depc_d       = {csr_wdata_int[31:1], 1'b0};
+    depc_en      = 1'b0;
+    dscratch0_en = 1'b0;
+    dscratch1_en = 1'b0;
+
+    mstack_en      = 1'b0;
+    mstack_d.mpie  = mstatus_q.mpie;
+    mstack_d.mpp   = mstatus_q.mpp;
+    mstack_epc_d   = mepc_q;
+    mstack_cause_d = mcause_q;
+
+    mcountinhibit_we = 1'b0;
+    mhpmcounter_we   = '0;
+    mhpmcounterh_we  = '0;
+
+    cpuctrl_we       = 1'b0;
+
+    if (csr_we_int) begin
+      unique case (csr_addr_i)
+        // mstatus: IE bit
+
+        CSR_FCSR: begin 
+          fflags_en = 1'b1;
+          frm_en    = 1'b1;
+          fflags_d  = csr_wdata_int[4:0];
+          frm_d     = roundmode_e'(csr_wdata_int[7:5]);  
+        end
+        
+
+        CSR_FFLAG : begin
+          fflags_en = 1'b1;
+          fflags_d  = fpnew_pkg::status_t'(csr_wdata_int[4:0]);
+        end
+
+        CSR_FRM: begin
+          frm_en  = 1'b1;
+          frm_d   = roundmode_e'(csr_wdata_int[2:0]); 
+        end
+
+        CSR_MSTATUS: begin
+          mstatus_en = 1'b1;
+          mstatus_d    = '{
+              mie:  csr_wdata_int[CSR_MSTATUS_MIE_BIT],
+              mpie: csr_wdata_int[CSR_MSTATUS_MPIE_BIT],
+              mpp:  priv_lvl_e'(csr_wdata_int[CSR_MSTATUS_MPP_BIT_HIGH:CSR_MSTATUS_MPP_BIT_LOW]),
+              mprv: csr_wdata_int[CSR_MSTATUS_MPRV_BIT],
+              tw:   csr_wdata_int[CSR_MSTATUS_TW_BIT]
+          };
+          // Convert illegal values to M-mode
+          if ((mstatus_d.mpp != PRIV_LVL_M) && (mstatus_d.mpp != PRIV_LVL_U)) begin
+            mstatus_d.mpp = PRIV_LVL_M;
+          end
+        end
+
+        // interrupt enable
+        CSR_MIE: mie_en = 1'b1;
+
+        CSR_MSCRATCH: mscratch_en = 1'b1;
+
+        // mepc: exception program counter
+        CSR_MEPC: mepc_en = 1'b1;
+
+        // mcause
+        CSR_MCAUSE: mcause_en = 1'b1;
+
+        // mtval: trap value
+        CSR_MTVAL: mtval_en = 1'b1;
+
+        // mtvec
+        CSR_MTVEC: mtvec_en = 1'b1;
+
+        CSR_DCSR: begin
+          dcsr_d = csr_wdata_int;
+          dcsr_d.xdebugver = XDEBUGVER_STD;
+          // Change to PRIV_LVL_M if software writes an unsupported value
+          if ((dcsr_d.prv != PRIV_LVL_M) && (dcsr_d.prv != PRIV_LVL_U)) begin
+            dcsr_d.prv = PRIV_LVL_M;
+          end
+
+          // Read-only for SW
+          dcsr_d.cause = dcsr_q.cause;
+
+          // currently not supported:
+          dcsr_d.nmip = 1'b0;
+          dcsr_d.mprven = 1'b0;
+          dcsr_d.stopcount = 1'b0;
+          dcsr_d.stoptime = 1'b0;
+
+          // forced to be zero
+          dcsr_d.zero0 = 1'b0;
+          dcsr_d.zero1 = 1'b0;
+          dcsr_d.zero2 = 12'h0;
+          dcsr_en      = 1'b1;
+        end
+
+        // dpc: debug program counter
+        CSR_DPC: depc_en = 1'b1;
+
+        CSR_DSCRATCH0: dscratch0_en = 1'b1;
+        CSR_DSCRATCH1: dscratch1_en = 1'b1;
+
+        // machine counter/timers
+        CSR_MCOUNTINHIBIT: mcountinhibit_we = 1'b1;
+
+        CSR_MCYCLE,
+        CSR_MINSTRET,
+        CSR_MHPMCOUNTER3,
+        CSR_MHPMCOUNTER4,  CSR_MHPMCOUNTER5,  CSR_MHPMCOUNTER6,  CSR_MHPMCOUNTER7,
+        CSR_MHPMCOUNTER8,  CSR_MHPMCOUNTER9,  CSR_MHPMCOUNTER10, CSR_MHPMCOUNTER11,
+        CSR_MHPMCOUNTER12, CSR_MHPMCOUNTER13, CSR_MHPMCOUNTER14, CSR_MHPMCOUNTER15,
+        CSR_MHPMCOUNTER16, CSR_MHPMCOUNTER17, CSR_MHPMCOUNTER18, CSR_MHPMCOUNTER19,
+        CSR_MHPMCOUNTER20, CSR_MHPMCOUNTER21, CSR_MHPMCOUNTER22, CSR_MHPMCOUNTER23,
+        CSR_MHPMCOUNTER24, CSR_MHPMCOUNTER25, CSR_MHPMCOUNTER26, CSR_MHPMCOUNTER27,
+        CSR_MHPMCOUNTER28, CSR_MHPMCOUNTER29, CSR_MHPMCOUNTER30, CSR_MHPMCOUNTER31: begin
+          mhpmcounter_we[mhpmcounter_idx] = 1'b1;
+        end
+
+        CSR_MCYCLEH,
+        CSR_MINSTRETH,
+        CSR_MHPMCOUNTER3H,
+        CSR_MHPMCOUNTER4H,  CSR_MHPMCOUNTER5H,  CSR_MHPMCOUNTER6H,  CSR_MHPMCOUNTER7H,
+        CSR_MHPMCOUNTER8H,  CSR_MHPMCOUNTER9H,  CSR_MHPMCOUNTER10H, CSR_MHPMCOUNTER11H,
+        CSR_MHPMCOUNTER12H, CSR_MHPMCOUNTER13H, CSR_MHPMCOUNTER14H, CSR_MHPMCOUNTER15H,
+        CSR_MHPMCOUNTER16H, CSR_MHPMCOUNTER17H, CSR_MHPMCOUNTER18H, CSR_MHPMCOUNTER19H,
+        CSR_MHPMCOUNTER20H, CSR_MHPMCOUNTER21H, CSR_MHPMCOUNTER22H, CSR_MHPMCOUNTER23H,
+        CSR_MHPMCOUNTER24H, CSR_MHPMCOUNTER25H, CSR_MHPMCOUNTER26H, CSR_MHPMCOUNTER27H,
+        CSR_MHPMCOUNTER28H, CSR_MHPMCOUNTER29H, CSR_MHPMCOUNTER30H, CSR_MHPMCOUNTER31H: begin
+          mhpmcounterh_we[mhpmcounter_idx] = 1'b1;
+        end
+
+        CSR_CPUCTRL: cpuctrl_we = 1'b1;
+
+        default:;
+      endcase
+    end
+
+    // exception controller gets priority over other writes
+    unique case (1'b1)
+
+      csr_save_cause_i: begin
+        unique case (1'b1)
+          csr_save_if_i: begin
+            exception_pc = pc_if_i;
+          end
+          csr_save_id_i: begin
+            exception_pc = pc_id_i;
+          end
+          csr_save_wb_i: begin
+            exception_pc = pc_wb_i;
+          end
+          default:;
+        endcase
+
+        // Any exception, including debug mode, causes a switch to M-mode
+        priv_lvl_d = PRIV_LVL_M;
+
+        if (debug_csr_save_i) begin
+          // all interrupts are masked
+          // do not update cause, epc, tval, epc and status
+          dcsr_d.prv   = priv_lvl_q;
+          dcsr_d.cause = debug_cause_i;
+          dcsr_en      = 1'b1;
+          depc_d       = exception_pc;
+          depc_en      = 1'b1;
+        end else if (!debug_mode_i) begin
+          // In debug mode, "exceptions do not update any registers. That
+          // includes cause, epc, tval, dpc and mstatus." [Debug Spec v0.13.2, p.39]
+          mtval_en       = 1'b1;
+          mtval_d        = csr_mtval_i;
+          mstatus_en     = 1'b1;
+          mstatus_d.mie  = 1'b0; // disable interrupts
+          // save current status
+          mstatus_d.mpie = mstatus_q.mie;
+          mstatus_d.mpp  = priv_lvl_q;
+          mepc_en        = 1'b1;
+          mepc_d         = exception_pc;
+          mcause_en      = 1'b1;
+          mcause_d       = {csr_mcause_i};
+          // save previous status for recoverable NMI
+          mstack_en      = 1'b1;
+        end
+      end // csr_save_cause_i
+
+      csr_restore_dret_i: begin // DRET
+        priv_lvl_d = dcsr_q.prv;
+      end // csr_restore_dret_i
+
+      csr_restore_mret_i: begin // MRET
+        priv_lvl_d     = mstatus_q.mpp;
+        mstatus_en     = 1'b1;
+        mstatus_d.mie  = mstatus_q.mpie; // re-enable interrupts
+
+        if (nmi_mode_i) begin
+          // when returning from an NMI restore state from mstack CSR
+          mstatus_d.mpie = mstack_q.mpie;
+          mstatus_d.mpp  = mstack_q.mpp;
+          mepc_en        = 1'b1;
+          mepc_d         = mstack_epc_q;
+          mcause_en      = 1'b1;
+          mcause_d       = mstack_cause_q;
+        end else begin
+          // otherwise just set mstatus.MPIE/MPP
+          // See RISC-V Privileged Specification, version 1.11, Section 3.1.6.1
+          mstatus_d.mpie = 1'b1;
+          mstatus_d.mpp  = PRIV_LVL_U;
+        end
+      end // csr_restore_mret_i
+
+      default:;
+    endcase
+  end
+
+  // Update current priv level
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      priv_lvl_q     <= PRIV_LVL_M;
+    end else begin
+      priv_lvl_q     <= priv_lvl_d;
+    end
+  end
+
+  // Send current priv level to the decoder
+  assign priv_mode_id_o = priv_lvl_q;
+  // New instruction fetches need to account for updates to priv_lvl_q this cycle
+  assign priv_mode_if_o = priv_lvl_d;
+  // Load/store instructions must factor in MPRV for PMP checking
+  assign priv_mode_lsu_o = mstatus_q.mprv ? mstatus_q.mpp : priv_lvl_q;
+
+  // CSR operation logic
+  always_comb begin
+    unique case (csr_op_i)
+      CSR_OP_WRITE: csr_wdata_int =  csr_wdata_i;
+      CSR_OP_SET:   csr_wdata_int =  csr_wdata_i | csr_rdata_o;
+      CSR_OP_CLEAR: csr_wdata_int = ~csr_wdata_i & csr_rdata_o;
+      CSR_OP_READ:  csr_wdata_int = csr_wdata_i;
+    //  default:      csr_wdata_int = csr_wdata_i;
+    endcase
+  end
+
+  assign csr_wreq = csr_op_en_i &
+    (csr_op_i inside {CSR_OP_WRITE,
+                      CSR_OP_SET,
+                      CSR_OP_CLEAR});
+
+  // only write CSRs during one clock cycle
+  assign csr_we_int  = csr_wreq & ~illegal_csr_insn_o;
+
+  assign csr_rdata_o = csr_rdata_int;
+
+  // directly output some registers
+  assign csr_mepc_o  = mepc_q;
+  assign csr_depc_o  = depc_q;
+  assign csr_mtvec_o = mtvec_q;
+
+  assign csr_mstatus_mie_o   = mstatus_q.mie;
+  assign csr_mstatus_tw_o    = mstatus_q.tw;
+  assign debug_single_step_o = dcsr_q.step;
+  assign debug_ebreakm_o     = dcsr_q.ebreakm;
+  assign debug_ebreaku_o     = dcsr_q.ebreaku;
+
+  // Qualify incoming interrupt requests in mip CSR with mie CSR for controller and to re-enable
+  // clock upon WFI (must be purely combinational).
+  assign irqs_o        = mip & mie_q;
+  assign irq_pending_o = |irqs_o;
+
+  ////////////////////////
+  // CSR instantiations //
+  ////////////////////////
+
+  // MSTATUS
+  localparam status_t MSTATUS_RST_VAL = '{mie:  1'b0,
+                                          mpie: 1'b1,
+                                          mpp:  PRIV_LVL_U,
+                                          mprv: 1'b0,
+                                          tw:   1'b0};
+  brq_csr #(
+    .Width      ($bits(status_t)),
+    .ShadowCopy (ShadowCSR),
+    .ResetValue ({MSTATUS_RST_VAL})
+  ) u_mstatus_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  ({mstatus_d}),
+    .wr_en_i    (mstatus_en),
+    .rd_data_o  (mstatus_q),
+    .rd_error_o (mstatus_err)
+  );
+
+  assign fflag_wdata = is_fp_instr_i ? fp_status_i : fflags_d;
+  // FFLAGS
+  brq_csr #(
+    .Width      (5),
+    .ShadowCopy (1'b0),
+    .ResetValue ('0)
+  ) fflags_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  (fflag_wdata),
+    .wr_en_i    (fflags_en | is_fp_instr_i),
+    .rd_data_o  (fflags_q),
+    .rd_error_o ()
+  );
+
+  // FRM
+  brq_csr #(
+    .Width      (3),
+    .ShadowCopy (1'b0),
+    .ResetValue ('0)
+  ) frm_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  (frm_d),
+    .wr_en_i    (frm_en),
+    .rd_data_o  (frm_q),
+    .rd_error_o ()
+  );
+  
+  // MEPC
+  brq_csr #(
+    .Width      (32),
+    .ShadowCopy (1'b0),
+    .ResetValue ('0)
+  ) u_mepc_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  (mepc_d),
+    .wr_en_i    (mepc_en),
+    .rd_data_o  (mepc_q),
+    .rd_error_o ()
+  );
+
+  // MIE
+  assign mie_d.irq_software = csr_wdata_int[CSR_MSIX_BIT];
+  assign mie_d.irq_timer    = csr_wdata_int[CSR_MTIX_BIT];
+  assign mie_d.irq_external = csr_wdata_int[CSR_MEIX_BIT];
+  assign mie_d.irq_fast     = csr_wdata_int[CSR_MFIX_BIT_HIGH:CSR_MFIX_BIT_LOW];
+  brq_csr #(
+    .Width      ($bits(irqs_t)),
+    .ShadowCopy (1'b0),
+    .ResetValue ('0)
+  ) u_mie_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  ({mie_d}),
+    .wr_en_i    (mie_en),
+    .rd_data_o  (mie_q),
+    .rd_error_o ()
+  );
+
+  // MSCRATCH
+  brq_csr #(
+    .Width      (32),
+    .ShadowCopy (1'b0),
+    .ResetValue ('0)
+  ) u_mscratch_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  (csr_wdata_int),
+    .wr_en_i    (mscratch_en),
+    .rd_data_o  (mscratch_q),
+    .rd_error_o ()
+  );
+
+  // MCAUSE
+  brq_csr #(
+    .Width      (6),
+    .ShadowCopy (1'b0),
+    .ResetValue ('0)
+  ) u_mcause_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  (mcause_d),
+    .wr_en_i    (mcause_en),
+    .rd_data_o  (mcause_q),
+    .rd_error_o ()
+  );
+
+  // MTVAL
+  brq_csr #(
+    .Width      (32),
+    .ShadowCopy (1'b0),
+    .ResetValue ('0)
+  ) u_mtval_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  (mtval_d),
+    .wr_en_i    (mtval_en),
+    .rd_data_o  (mtval_q),
+    .rd_error_o ()
+  );
+
+  // MTVEC
+  brq_csr #(
+    .Width      (32),
+    .ShadowCopy (ShadowCSR),
+    .ResetValue (32'd1)
+  ) u_mtvec_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  (mtvec_d),
+    .wr_en_i    (mtvec_en),
+    .rd_data_o  (mtvec_q),
+    .rd_error_o (mtvec_err)
+  );
+
+  // DCSR
+  localparam dcsr_t DCSR_RESET_VAL = '{
+      xdebugver: XDEBUGVER_STD,
+      cause:     DBG_CAUSE_NONE, // 3'h0
+      prv:       PRIV_LVL_M,
+      default:   '0
+  };
+  brq_csr #(
+    .Width      ($bits(dcsr_t)),
+    .ShadowCopy (1'b0),
+    .ResetValue ({DCSR_RESET_VAL})
+  ) u_dcsr_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  ({dcsr_d}),
+    .wr_en_i    (dcsr_en),
+    .rd_data_o  (dcsr_q),
+    .rd_error_o ()
+  );
+
+  // DEPC
+  brq_csr #(
+    .Width      (32),
+    .ShadowCopy (1'b0),
+    .ResetValue ('0)
+  ) u_depc_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  (depc_d),
+    .wr_en_i    (depc_en),
+    .rd_data_o  (depc_q),
+    .rd_error_o ()
+  );
+
+  // DSCRATCH0
+  brq_csr #(
+    .Width      (32),
+    .ShadowCopy (1'b0),
+    .ResetValue ('0)
+  ) u_dscratch0_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  (csr_wdata_int),
+    .wr_en_i    (dscratch0_en),
+    .rd_data_o  (dscratch0_q),
+    .rd_error_o ()
+  );
+
+  // DSCRATCH1
+  brq_csr #(
+    .Width      (32),
+    .ShadowCopy (1'b0),
+    .ResetValue ('0)
+  ) u_dscratch1_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  (csr_wdata_int),
+    .wr_en_i    (dscratch1_en),
+    .rd_data_o  (dscratch1_q),
+    .rd_error_o ()
+  );
+
+  // MSTACK
+  localparam status_stk_t MSTACK_RESET_VAL = '{
+      mpie: 1'b1,
+      mpp:  PRIV_LVL_U
+  };
+  brq_csr #(
+    .Width      ($bits(status_stk_t)),
+    .ShadowCopy (1'b0),
+    .ResetValue ({MSTACK_RESET_VAL})
+  ) u_mstack_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  ({mstack_d}),
+    .wr_en_i    (mstack_en),
+    .rd_data_o  (mstack_q),
+    .rd_error_o ()
+  );
+
+  // MSTACK_EPC
+  brq_csr #(
+    .Width      (32),
+    .ShadowCopy (1'b0),
+    .ResetValue ('0)
+  ) u_mstack_epc_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  (mstack_epc_d),
+    .wr_en_i    (mstack_en),
+    .rd_data_o  (mstack_epc_q),
+    .rd_error_o ()
+  );
+
+  // MSTACK_CAUSE
+  brq_csr #(
+    .Width      (6),
+    .ShadowCopy (1'b0),
+    .ResetValue ('0)
+  ) u_mstack_cause_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  (mstack_cause_d),
+    .wr_en_i    (mstack_en),
+    .rd_data_o  (mstack_cause_q),
+    .rd_error_o ()
+  );
+
+  // -----------------
+  // PMP registers
+  // -----------------
+
+  if (PMPEnable) begin : g_pmp_registers
+    pmp_cfg_t                    pmp_cfg         [PMPNumRegions];
+    pmp_cfg_t                    pmp_cfg_wdata   [PMPNumRegions];
+    logic [PMPAddrWidth-1:0]     pmp_addr        [PMPNumRegions];
+    logic [PMPNumRegions-1:0]    pmp_cfg_we;
+    logic [PMPNumRegions-1:0]    pmp_cfg_err;
+    logic [PMPNumRegions-1:0]    pmp_addr_we;
+    logic [PMPNumRegions-1:0]    pmp_addr_err;
+
+    // Expanded / qualified register read data
+    for (genvar i = 0; i < PMP_MAX_REGIONS; i++) begin : g_exp_rd_data
+      if (i < PMPNumRegions) begin : g_implemented_regions
+        // Add in zero padding for reserved fields
+        assign pmp_cfg_rdata[i] = {pmp_cfg[i].lock, 2'b00, pmp_cfg[i].mode,
+                                   pmp_cfg[i].exec, pmp_cfg[i].write, pmp_cfg[i].read};
+
+        // Address field read data depends on the current programmed mode and the granularity
+        // See RISC-V Privileged Specification, version 1.11, Section 3.6.1
+        if (PMPGranularity == 0) begin : g_pmp_g0
+          // If G == 0, read data is unmodified
+          assign pmp_addr_rdata[i] = pmp_addr[i];
+
+        end else if (PMPGranularity == 1) begin : g_pmp_g1
+          // If G == 1, bit [G-1] reads as zero in TOR or OFF mode
+          always_comb begin
+            pmp_addr_rdata[i] = pmp_addr[i];
+            if ((pmp_cfg[i].mode == PMP_MODE_OFF) || (pmp_cfg[i].mode == PMP_MODE_TOR)) begin
+              pmp_addr_rdata[i][PMPGranularity-1:0] = '0;
+            end
+          end
+
+        end else begin : g_pmp_g2
+          // For G >= 2, bits are masked to one or zero depending on the mode
+          always_comb begin
+            // In NAPOT mode, bits [G-2:0] must read as one
+            pmp_addr_rdata[i] = {pmp_addr[i], {PMPGranularity-1{1'b1}}};
+
+            if ((pmp_cfg[i].mode == PMP_MODE_OFF) || (pmp_cfg[i].mode == PMP_MODE_TOR)) begin
+              // In TOR or OFF mode, bits [G-1:0] must read as zero
+              pmp_addr_rdata[i][PMPGranularity-1:0] = '0;
+            end
+          end
+        end
+
+      end else begin : g_other_regions
+        // Non-implemented regions read as zero
+        assign pmp_cfg_rdata[i]  = '0;
+        assign pmp_addr_rdata[i] = '0;
+      end
+    end
+
+    // Write data calculation
+    for (genvar i = 0; i < PMPNumRegions; i++) begin : g_pmp_csrs
+      // -------------------------
+      // Instantiate cfg registers
+      // -------------------------
+      assign pmp_cfg_we[i] = csr_we_int & ~pmp_cfg[i].lock &
+                             (csr_addr == (CSR_OFF_PMP_CFG + (i[11:0] >> 2)));
+
+      // Select the correct WDATA (each CSR contains 4 CFG fields, each with 2 RES bits)
+      assign pmp_cfg_wdata[i].lock  = csr_wdata_int[(i%4)*PMP_CFG_W+7];
+      // NA4 mode is not selectable when G > 0, mode is treated as OFF
+      always_comb begin
+        unique case (csr_wdata_int[(i%4)*PMP_CFG_W+3+:2])
+          2'b00   : pmp_cfg_wdata[i].mode = PMP_MODE_OFF;
+          2'b01   : pmp_cfg_wdata[i].mode = PMP_MODE_TOR;
+          2'b10   : pmp_cfg_wdata[i].mode = (PMPGranularity == 0) ? PMP_MODE_NA4:
+                                                                    PMP_MODE_OFF;
+          2'b11   : pmp_cfg_wdata[i].mode = PMP_MODE_NAPOT;
+          default : pmp_cfg_wdata[i].mode = PMP_MODE_OFF;
+        endcase
+      end
+      assign pmp_cfg_wdata[i].exec  = csr_wdata_int[(i%4)*PMP_CFG_W+2];
+      // W = 1, R = 0 is a reserved combination. For now, we force W to 0 if R == 0
+      assign pmp_cfg_wdata[i].write = &csr_wdata_int[(i%4)*PMP_CFG_W+:2];
+      assign pmp_cfg_wdata[i].read  = csr_wdata_int[(i%4)*PMP_CFG_W];
+
+      brq_csr #(
+        .Width      ($bits(pmp_cfg_t)),
+        .ShadowCopy (ShadowCSR),
+        .ResetValue ('0)
+      ) u_pmp_cfg_csr (
+        .clk_i      (clk_i),
+        .rst_ni     (rst_ni),
+        .wr_data_i  ({pmp_cfg_wdata[i]}),
+        .wr_en_i    (pmp_cfg_we[i]),
+        .rd_data_o  (pmp_cfg[i]),
+        .rd_error_o (pmp_cfg_err[i])
+      );
+
+      // --------------------------
+      // Instantiate addr registers
+      // --------------------------
+      if (i < PMPNumRegions - 1) begin : g_lower
+        assign pmp_addr_we[i] = csr_we_int & ~pmp_cfg[i].lock &
+                                (~pmp_cfg[i+1].lock | (pmp_cfg[i+1].mode != PMP_MODE_TOR)) &
+                                (csr_addr == (CSR_OFF_PMP_ADDR + i[11:0]));
+      end else begin : g_upper
+        assign pmp_addr_we[i] = csr_we_int & ~pmp_cfg[i].lock &
+                                (csr_addr == (CSR_OFF_PMP_ADDR + i[11:0]));
+      end
+
+      brq_csr #(
+        .Width      (PMPAddrWidth),
+        .ShadowCopy (ShadowCSR),
+        .ResetValue ('0)
+      ) u_pmp_addr_csr (
+        .clk_i      (clk_i),
+        .rst_ni     (rst_ni),
+        .wr_data_i  (csr_wdata_int[31-:PMPAddrWidth]),
+        .wr_en_i    (pmp_addr_we[i]),
+        .rd_data_o  (pmp_addr[i]),
+        .rd_error_o (pmp_addr_err[i])
+      );
+
+      assign csr_pmp_cfg_o[i]  = pmp_cfg[i];
+      assign csr_pmp_addr_o[i] = {pmp_addr_rdata[i], 2'b00};
+    end
+
+    assign pmp_csr_err = (|pmp_cfg_err) | (|pmp_addr_err);
+
+  end else begin : g_no_pmp_tieoffs
+    // Generate tieoffs when PMP is not configured
+    for (genvar i = 0; i < PMP_MAX_REGIONS; i++) begin : g_rdata
+      assign pmp_addr_rdata[i] = '0;
+      assign pmp_cfg_rdata[i]  = '0;
+    end
+    for (genvar i = 0; i < PMPNumRegions; i++) begin : g_outputs
+      assign csr_pmp_cfg_o[i]  = pmp_cfg_t'(1'b0);
+      assign csr_pmp_addr_o[i] = '0;
+    end
+    assign pmp_csr_err = 1'b0;
+  end
+
+  //////////////////////////
+  //  Performance monitor //
+  //////////////////////////
+
+  // update enable signals
+  always_comb begin : mcountinhibit_update
+    if (mcountinhibit_we == 1'b1) begin
+      // bit 1 must always be 0
+      mcountinhibit_d = {csr_wdata_int[MHPMCounterNum+2:2], 1'b0, csr_wdata_int[0]};
+    end else begin
+      mcountinhibit_d = mcountinhibit_q;
+    end
+  end
+
+  // event selection (hardwired) & control
+  always_comb begin : gen_mhpmcounter_incr
+
+    // Assign inactive counters (first to prevent latch inference)
+    for (int unsigned i=0; i<32; i++) begin : gen_mhpmcounter_incr_inactive
+      mhpmcounter_incr[i] = 1'b0;
+    end
+
+    // When adding or altering performance counter meanings and default
+    // mappings please update dv/verilator/pcount/cpp/brq_pcounts.cc
+    // appropriately.
+    //
+    // active counters
+    mhpmcounter_incr[0]  = 1'b1;                   // mcycle
+    mhpmcounter_incr[1]  = 1'b0;                   // reserved
+    mhpmcounter_incr[2]  = instr_ret_i;            // minstret
+    mhpmcounter_incr[3]  = dside_wait_i;           // cycles waiting for data memory
+    mhpmcounter_incr[4]  = iside_wait_i;           // cycles waiting for instr fetches
+    mhpmcounter_incr[5]  = mem_load_i;             // num of loads
+    mhpmcounter_incr[6]  = mem_store_i;            // num of stores
+    mhpmcounter_incr[7]  = jump_i;                 // num of jumps (unconditional)
+    mhpmcounter_incr[8]  = branch_i;               // num of branches (conditional)
+    mhpmcounter_incr[9]  = branch_taken_i;         // num of taken branches (conditional)
+    mhpmcounter_incr[10] = instr_ret_compressed_i; // num of compressed instr
+    mhpmcounter_incr[11] = mul_wait_i;             // cycles waiting for multiply
+    mhpmcounter_incr[12] = div_wait_i;             // cycles waiting for divide
+  end
+
+  // event selector (hardwired, 0 means no event)
+  always_comb begin : gen_mhpmevent
+
+    // activate all
+    for (int i=0; i<32; i++) begin : gen_mhpmevent_active
+      mhpmevent[i]    =   '0;
+      mhpmevent[i][i] = 1'b1;
+    end
+
+    // deactivate
+    mhpmevent[1] = '0; // not existing, reserved
+    for (int unsigned i=3+MHPMCounterNum; i<32; i++) begin : gen_mhpmevent_inactive
+      mhpmevent[i] = '0;
+    end
+  end
+
+  // mcycle
+  brq_counter #(
+    .CounterWidth(64)
+  ) mcycle_counter_i (
+    .clk_i(clk_i),
+    .rst_ni(rst_ni),
+    .counter_inc_i(mhpmcounter_incr[0] & ~mcountinhibit[0]),
+    .counterh_we_i(mhpmcounterh_we[0]),
+    .counter_we_i(mhpmcounter_we[0]),
+    .counter_val_i(csr_wdata_int),
+    .counter_val_o(mhpmcounter[0])
+  );
+
+  // minstret
+  brq_counter #(
+    .CounterWidth(64)
+  ) minstret_counter_i (
+    .clk_i(clk_i),
+    .rst_ni(rst_ni),
+    .counter_inc_i(mhpmcounter_incr[2] & ~mcountinhibit[2]),
+    .counterh_we_i(mhpmcounterh_we[2]),
+    .counter_we_i(mhpmcounter_we[2]),
+    .counter_val_i(csr_wdata_int),
+    .counter_val_o(mhpmcounter[2])
+  );
+
+  // reserved:
+  assign mhpmcounter[1]            = '0;
+  assign unused_mhpmcounter_we_1   = mhpmcounter_we[1];
+  assign unused_mhpmcounterh_we_1  = mhpmcounterh_we[1];
+  assign unused_mhpmcounter_incr_1 = mhpmcounter_incr[1];
+
+  for (genvar cnt=0; cnt < 29; cnt++) begin : gen_cntrs
+    if (cnt < MHPMCounterNum) begin : gen_imp
+      brq_counter #(
+        .CounterWidth(MHPMCounterWidth)
+      ) mcounters_variable_i (
+        .clk_i(clk_i),
+        .rst_ni(rst_ni),
+        .counter_inc_i(mhpmcounter_incr[cnt+3] & ~mcountinhibit[cnt+3]),
+        .counterh_we_i(mhpmcounterh_we[cnt+3]),
+        .counter_we_i(mhpmcounter_we[cnt+3]),
+        .counter_val_i(csr_wdata_int),
+        .counter_val_o(mhpmcounter[cnt+3])
+      );
+    end else begin : gen_unimp
+      assign mhpmcounter[cnt+3] = '0;
+    end
+  end
+
+  if(MHPMCounterNum < 29) begin : g_mcountinhibit_reduced
+    logic [29-MHPMCounterNum-1:0] unused_mhphcounter_we;
+    logic [29-MHPMCounterNum-1:0] unused_mhphcounterh_we;
+    logic [29-MHPMCounterNum-1:0] unused_mhphcounter_incr;
+
+    assign mcountinhibit = {{29-MHPMCounterNum{1'b1}}, mcountinhibit_q};
+    // Lint tieoffs for unused bits
+    assign unused_mhphcounter_we   = mhpmcounter_we[31:MHPMCounterNum+3];
+    assign unused_mhphcounterh_we  = mhpmcounterh_we[31:MHPMCounterNum+3];
+    assign unused_mhphcounter_incr = mhpmcounter_incr[31:MHPMCounterNum+3];
+  end else begin : g_mcountinhibit_full
+    assign mcountinhibit = mcountinhibit_q;
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      mcountinhibit_q <= '0;
+    end else begin
+      mcountinhibit_q <= mcountinhibit_d;
+    end
+  end
+
+  /////////////////////////////
+  // Debug trigger registers //
+  /////////////////////////////
+
+  if (DbgTriggerEn) begin : gen_trigger_regs
+    localparam int unsigned DbgHwNumLen = DbgHwBreakNum > 1 ? $clog2(DbgHwBreakNum) : 1;
+    // Register values
+    logic [DbgHwNumLen-1:0]   tselect_d, tselect_q;
+    logic                     tmatch_control_d;
+    logic [DbgHwBreakNum-1:0] tmatch_control_q;
+    logic [31:0]              tmatch_value_d;
+    logic [31:0]              tmatch_value_q[DbgHwBreakNum];
+    // Write enables
+    logic                     tselect_we;
+    logic [DbgHwBreakNum-1:0] tmatch_control_we;
+    logic [DbgHwBreakNum-1:0] tmatch_value_we;
+    // Trigger comparison result
+    logic [DbgHwBreakNum-1:0] trigger_match;
+
+    // Write select
+    assign tselect_we = csr_we_int & debug_mode_i & (csr_addr_i == CSR_TSELECT);
+    for (genvar i = 0; i < DbgHwBreakNum; i++) begin : g_dbg_tmatch_we
+      assign tmatch_control_we[i] = (i[DbgHwNumLen-1:0] == tselect_q) & csr_we_int & debug_mode_i &
+                                    (csr_addr_i == CSR_TDATA1);
+      assign tmatch_value_we[i]   = (i[DbgHwNumLen-1:0] == tselect_q) & csr_we_int & debug_mode_i &
+                                    (csr_addr_i == CSR_TDATA2);
+    end
+
+    // Debug interface tests the available number of triggers by writing and reading the trigger
+    // select register. Only allow changes to the register if it is within the supported region.
+    assign tselect_d = (csr_wdata_int < DbgHwBreakNum) ? csr_wdata_int[DbgHwNumLen-1:0] :
+                                                         DbgHwBreakNum-1;
+    // tmatch_control is enabled when the execute bit is set
+    assign tmatch_control_d = csr_wdata_int[2];
+    assign tmatch_value_d   = csr_wdata_int[31:0];
+
+    // Registers
+    brq_csr #(
+      .Width      (DbgHwNumLen),
+      .ShadowCopy (1'b0),
+      .ResetValue ('0)
+    ) u_tselect_csr (
+      .clk_i      (clk_i),
+      .rst_ni     (rst_ni),
+      .wr_data_i  (tselect_d),
+      .wr_en_i    (tselect_we),
+      .rd_data_o  (tselect_q),
+      .rd_error_o ()
+    );
+
+    for (genvar i = 0; i < DbgHwBreakNum; i++) begin : g_dbg_tmatch_reg
+      brq_csr #(
+        .Width      (1),
+        .ShadowCopy (1'b0),
+        .ResetValue ('0)
+      ) u_tmatch_control_csr (
+        .clk_i      (clk_i),
+        .rst_ni     (rst_ni),
+        .wr_data_i  (tmatch_control_d),
+        .wr_en_i    (tmatch_control_we[i]),
+        .rd_data_o  (tmatch_control_q[i]),
+        .rd_error_o ()
+    );
+
+      brq_csr #(
+        .Width      (32),
+        .ShadowCopy (1'b0),
+        .ResetValue ('0)
+      ) u_tmatch_value_csr (
+        .clk_i      (clk_i),
+        .rst_ni     (rst_ni),
+        .wr_data_i  (tmatch_value_d),
+        .wr_en_i    (tmatch_value_we[i]),
+        .rd_data_o  (tmatch_value_q[i]),
+        .rd_error_o ()
+      );
+    end
+
+    // Assign read data
+    // TSELECT - number of supported triggers defined by parameter DbgHwBreakNum
+    localparam int unsigned TSelectRdataPadlen = DbgHwNumLen >= 32 ? 0 : (32 - DbgHwNumLen);
+    assign tselect_rdata = {{TSelectRdataPadlen{1'b0}}, tselect_q};
+
+    // TDATA0 - only support simple address matching
+    assign tmatch_control_rdata = {4'h2,                         // type    : address/data match
+                                   1'b1,                         // dmode   : access from D mode only
+                                   6'h00,                        // maskmax : exact match only
+                                   1'b0,                         // hit     : not supported
+                                   1'b0,                         // select  : address match only
+                                   1'b0,                         // timing  : match before execution
+                                   2'b00,                        // sizelo  : match any access
+                                   4'h1,                         // action  : enter debug mode
+                                   1'b0,                         // chain   : not supported
+                                   4'h0,                         // match   : simple match
+                                   1'b1,                         // m       : match in m-mode
+                                   1'b0,                         // 0       : zero
+                                   1'b0,                         // s       : not supported
+                                   1'b1,                         // u       : match in u-mode
+                                   tmatch_control_q[tselect_q],  // execute : match instruction address
+                                   1'b0,                         // store   : not supported
+                                   1'b0};                        // load    : not supported
+    // TDATA1 - address match value only
+    assign tmatch_value_rdata = tmatch_value_q[tselect_q];
+
+    // Breakpoint matching
+    // We match against the next address, as the breakpoint must be taken before execution
+    for (genvar i = 0; i < DbgHwBreakNum; i++) begin : g_dbg_trigger_match
+      assign trigger_match[i] = tmatch_control_q[i] & (pc_if_i[31:0] == tmatch_value_q[i]);
+    end
+    assign trigger_match_o = |trigger_match;
+
+  end else begin : gen_no_trigger_regs
+    assign tselect_rdata        = 'b0;
+    assign tmatch_control_rdata = 'b0;
+    assign tmatch_value_rdata   = 'b0;
+    assign trigger_match_o      = 'b0;
+  end
+
+  //////////////////////////
+  // CPU control register //
+  //////////////////////////
+
+  // Cast register write data
+  assign cpuctrl_wdata = cpu_ctrl_t'(csr_wdata_int[$bits(cpu_ctrl_t)-1:0]);
+
+  // Generate fixed time execution bit
+  if (DataIndTiming) begin : gen_dit
+    assign cpuctrl_d.data_ind_timing = cpuctrl_wdata.data_ind_timing;
+
+  end else begin : gen_no_dit
+    // tieoff for the unused bit
+    logic unused_dit;
+    assign unused_dit = cpuctrl_wdata.data_ind_timing;
+
+    // field will always read as zero if not configured
+    assign cpuctrl_d.data_ind_timing = 1'b0;
+  end
+
+  assign data_ind_timing_o = cpuctrl_q.data_ind_timing;
+
+  // Generate dummy instruction signals
+  if (DummyInstructions) begin : gen_dummy
+    assign cpuctrl_d.dummy_instr_en   = cpuctrl_wdata.dummy_instr_en;
+    assign cpuctrl_d.dummy_instr_mask = cpuctrl_wdata.dummy_instr_mask;
+
+    // Signal a write to the seed register
+    assign dummy_instr_seed_en_o = csr_we_int && (csr_addr == CSR_SECURESEED);
+    assign dummy_instr_seed_o    = csr_wdata_int;
+
+  end else begin : gen_no_dummy
+    // tieoff for the unused bit
+    logic       unused_dummy_en;
+    logic [2:0] unused_dummy_mask;
+    assign unused_dummy_en   = cpuctrl_wdata.dummy_instr_en;
+    assign unused_dummy_mask = cpuctrl_wdata.dummy_instr_mask;
+
+    // field will always read as zero if not configured
+    assign cpuctrl_d.dummy_instr_en   = 1'b0;
+    assign cpuctrl_d.dummy_instr_mask = 3'b000;
+    assign dummy_instr_seed_en_o      = 1'b0;
+    assign dummy_instr_seed_o         = '0;
+  end
+
+  assign dummy_instr_en_o   = cpuctrl_q.dummy_instr_en;
+  assign dummy_instr_mask_o = cpuctrl_q.dummy_instr_mask;
+
+  // Generate icache enable bit
+  if (ICache) begin : gen_icache_enable
+    assign cpuctrl_d.icache_enable = cpuctrl_wdata.icache_enable;
+  end else begin : gen_no_icache
+    // tieoff for the unused icen bit
+    logic unused_icen;
+    assign unused_icen = cpuctrl_wdata.icache_enable;
+
+    // icen field will always read as zero if ICache not configured
+    assign cpuctrl_d.icache_enable = 1'b0;
+  end
+
+  assign icache_enable_o = cpuctrl_q.icache_enable;
+
+  brq_csr #(
+    .Width      ($bits(cpu_ctrl_t)),
+    .ShadowCopy (ShadowCSR),
+    .ResetValue ('0)
+  ) u_cpuctrl_csr (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .wr_data_i  ({cpuctrl_d}),
+    .wr_en_i    (cpuctrl_we),
+    .rd_data_o  (cpuctrl_q),
+    .rd_error_o (cpuctrl_err)
+  );
+
+  assign csr_shadow_err_o = mstatus_err | mtvec_err | pmp_csr_err | cpuctrl_err;
+
+
+
+endmodule
\ No newline at end of file
diff --git a/verilog/rtl/brq_csr.sv b/verilog/rtl/brq_csr.sv
new file mode 100644
index 0000000..b1c4477
--- /dev/null
+++ b/verilog/rtl/brq_csr.sv
@@ -0,0 +1,53 @@
+
+
+/**
+ * Control / status register primitive
+ */
+
+
+module brq_csr #(
+    parameter int unsigned    Width      = 32,
+    parameter bit             ShadowCopy = 1'b0,
+    parameter bit [Width-1:0] ResetValue = '0
+ ) (
+    input  logic             clk_i,
+    input  logic             rst_ni,
+
+    input  logic [Width-1:0] wr_data_i,
+    input  logic             wr_en_i,
+    output logic [Width-1:0] rd_data_o,
+
+    output logic             rd_error_o
+);
+
+  logic [Width-1:0] rdata_q;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      rdata_q <= ResetValue;
+    end else if (wr_en_i) begin
+      rdata_q <= wr_data_i;
+    end
+  end
+
+  assign rd_data_o = rdata_q;
+
+  if (ShadowCopy) begin : gen_shadow
+    logic [Width-1:0] shadow_q;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        shadow_q <= ~ResetValue;
+      end else if (wr_en_i) begin
+        shadow_q <= ~wr_data_i;
+      end
+    end
+
+    assign rd_error_o = rdata_q != ~shadow_q;
+
+  end else begin : gen_no_shadow
+    assign rd_error_o = 1'b0;
+  end
+
+
+endmodule
diff --git a/verilog/rtl/brq_exu.sv b/verilog/rtl/brq_exu.sv
new file mode 100644
index 0000000..883cf7a
--- /dev/null
+++ b/verilog/rtl/brq_exu.sv
@@ -0,0 +1,194 @@
+/**
+ * Execution stage
+ *
+ * Execution block: Hosts ALU and MUL/DIV unit
+ */
+module brq_exu #(
+    parameter brq_pkg::rv32m_e RV32M           = brq_pkg::RV32MFast,
+    parameter brq_pkg::rv32b_e RV32B           = brq_pkg::RV32BNone,
+    parameter bit              BranchTargetALU = 0
+) (
+    input  logic                  clk_i,
+    input  logic                  rst_ni,
+
+    // ALU
+    input  brq_pkg::alu_op_e      alu_operator_i,
+    input  logic [31:0]           alu_operand_a_i,
+    input  logic [31:0]           alu_operand_b_i,
+    input  logic                  alu_instr_first_cycle_i,
+
+    // Branch Target ALU
+    // All of these signals are unusued when BranchTargetALU == 0
+    input  logic [31:0]           bt_a_operand_i,
+    input  logic [31:0]           bt_b_operand_i,
+
+    // Multiplier/Divider
+    input  brq_pkg::md_op_e       multdiv_operator_i,
+    input  logic                  mult_en_i,             // dynamic enable signal, for FSM control
+    input  logic                  div_en_i,              // dynamic enable signal, for FSM control
+    input  logic                  mult_sel_i,            // static decoder output, for data muxes
+    input  logic                  div_sel_i,             // static decoder output, for data muxes
+    input  logic  [1:0]           multdiv_signed_mode_i,
+    input  logic [31:0]           multdiv_operand_a_i,
+    input  logic [31:0]           multdiv_operand_b_i,
+    input  logic                  multdiv_ready_id_i,
+    input  logic                  data_ind_timing_i,
+
+    // intermediate val reg
+    output logic [1:0]            imd_val_we_o,
+    output logic [33:0]           imd_val_d_o[2],
+    input  logic [33:0]           imd_val_q_i[2],
+
+    // Outputs
+    output logic [31:0]           alu_adder_result_ex_o, // to LSU
+    output logic [31:0]           result_ex_o,
+    output logic [31:0]           branch_target_o,       // to IF
+    output logic                  branch_decision_o,     // to ID
+
+    output logic                  ex_valid_o             // EX has valid output
+);
+
+  import brq_pkg::*;
+
+  logic [31:0] alu_result, multdiv_result;
+
+  logic [32:0] multdiv_alu_operand_b, multdiv_alu_operand_a;
+  logic [33:0] alu_adder_result_ext;
+  logic        alu_cmp_result, alu_is_equal_result;
+  logic        multdiv_valid;
+  logic        multdiv_sel;
+  logic [31:0] alu_imd_val_q[2];
+  logic [31:0] alu_imd_val_d[2];
+  logic [ 1:0] alu_imd_val_we;
+  logic [33:0] multdiv_imd_val_d[2];
+  logic [ 1:0] multdiv_imd_val_we;
+
+  /*
+    The multdiv_i output is never selected if RV32M=RV32MNone
+    At synthesis time, all the combinational and sequential logic
+    from the multdiv_i module are eliminated
+  */
+  if (RV32M != RV32MNone) begin : gen_multdiv_m
+    assign multdiv_sel = mult_sel_i | div_sel_i;
+  end else begin : gen_multdiv_no_m
+    assign multdiv_sel = 1'b0;
+  end
+
+  // Intermediate Value Register Mux
+  assign imd_val_d_o[0] = multdiv_sel ? multdiv_imd_val_d[0] : {2'b0, alu_imd_val_d[0]};
+  assign imd_val_d_o[1] = multdiv_sel ? multdiv_imd_val_d[1] : {2'b0, alu_imd_val_d[1]};
+  assign imd_val_we_o   = multdiv_sel ? multdiv_imd_val_we : alu_imd_val_we;
+
+  assign alu_imd_val_q = '{imd_val_q_i[0][31:0], imd_val_q_i[1][31:0]};
+
+  assign result_ex_o  = multdiv_sel ? multdiv_result : alu_result;
+
+  // branch handling
+  assign branch_decision_o  = alu_cmp_result;
+
+  if (BranchTargetALU) begin : g_branch_target_alu
+    logic [32:0] bt_alu_result;
+    logic        unused_bt_carry;
+
+    assign bt_alu_result   = bt_a_operand_i + bt_b_operand_i;
+
+    assign unused_bt_carry = bt_alu_result[32];
+    assign branch_target_o = bt_alu_result[31:0];
+  end else begin : g_no_branch_target_alu
+    // Unused bt_operand signals cause lint errors, this avoids them
+    logic [31:0] unused_bt_a_operand, unused_bt_b_operand;
+
+    assign unused_bt_a_operand = bt_a_operand_i;
+    assign unused_bt_b_operand = bt_b_operand_i;
+
+    assign branch_target_o = alu_adder_result_ex_o;
+  end
+
+  /////////
+  // ALU //
+  /////////
+
+  brq_exu_alu #(
+    .RV32B(RV32B)
+  ) alu_i                  (
+      .operator_i          ( alu_operator_i          ),
+      .operand_a_i         ( alu_operand_a_i         ),
+      .operand_b_i         ( alu_operand_b_i         ),
+      .instr_first_cycle_i ( alu_instr_first_cycle_i ),
+      .imd_val_q_i         ( alu_imd_val_q           ),
+      .imd_val_we_o        ( alu_imd_val_we          ),
+      .imd_val_d_o         ( alu_imd_val_d           ),
+      .multdiv_operand_a_i ( multdiv_alu_operand_a   ),
+      .multdiv_operand_b_i ( multdiv_alu_operand_b   ),
+      .multdiv_sel_i       ( multdiv_sel             ),
+      .adder_result_o      ( alu_adder_result_ex_o   ),
+      .adder_result_ext_o  ( alu_adder_result_ext    ),
+      .result_o            ( alu_result              ),
+      .comparison_result_o ( alu_cmp_result          ),
+      .is_equal_result_o   ( alu_is_equal_result     )
+  );
+
+  ////////////////
+  // Multiplier //
+  ////////////////
+
+  if (RV32M == RV32MSlow) begin : gen_multdiv_slow
+    brq_exu_multdiv_slow multdiv_i (
+        .clk_i              ( clk_i                 ),
+        .rst_ni             ( rst_ni                ),
+        .mult_en_i          ( mult_en_i             ),
+        .div_en_i           ( div_en_i              ),
+        .mult_sel_i         ( mult_sel_i            ),
+        .div_sel_i          ( div_sel_i             ),
+        .operator_i         ( multdiv_operator_i    ),
+        .signed_mode_i      ( multdiv_signed_mode_i ),
+        .op_a_i             ( multdiv_operand_a_i   ),
+        .op_b_i             ( multdiv_operand_b_i   ),
+        .alu_adder_ext_i    ( alu_adder_result_ext  ),
+        .alu_adder_i        ( alu_adder_result_ex_o ),
+        .equal_to_zero_i    ( alu_is_equal_result   ),
+        .data_ind_timing_i  ( data_ind_timing_i     ),
+        .valid_o            ( multdiv_valid         ),
+        .alu_operand_a_o    ( multdiv_alu_operand_a ),
+        .alu_operand_b_o    ( multdiv_alu_operand_b ),
+        .imd_val_q_i        ( imd_val_q_i           ),
+        .imd_val_d_o        ( multdiv_imd_val_d     ),
+        .imd_val_we_o       ( multdiv_imd_val_we    ),
+        .multdiv_ready_id_i ( multdiv_ready_id_i    ),
+        .multdiv_result_o   ( multdiv_result        )
+    );
+  end else if (RV32M == RV32MFast || RV32M == RV32MSingleCycle) begin : gen_multdiv_fast
+    brq_exu_multdiv_fast #     (
+        .RV32M ( RV32M )
+    ) multdiv_i             (
+        .clk_i              ( clk_i                 ),
+        .rst_ni             ( rst_ni                ),
+        .mult_en_i          ( mult_en_i             ),
+        .div_en_i           ( div_en_i              ),
+        .mult_sel_i         ( mult_sel_i            ),
+        .div_sel_i          ( div_sel_i             ),
+        .operator_i         ( multdiv_operator_i    ),
+        .signed_mode_i      ( multdiv_signed_mode_i ),
+        .op_a_i             ( multdiv_operand_a_i   ),
+        .op_b_i             ( multdiv_operand_b_i   ),
+        .alu_operand_a_o    ( multdiv_alu_operand_a ),
+        .alu_operand_b_o    ( multdiv_alu_operand_b ),
+        .alu_adder_ext_i    ( alu_adder_result_ext  ),
+        .alu_adder_i        ( alu_adder_result_ex_o ),
+        .equal_to_zero_i    ( alu_is_equal_result   ),
+        .data_ind_timing_i  ( data_ind_timing_i     ),
+        .imd_val_q_i        ( imd_val_q_i           ),
+        .imd_val_d_o        ( multdiv_imd_val_d     ),
+        .imd_val_we_o       ( multdiv_imd_val_we    ),
+        .multdiv_ready_id_i ( multdiv_ready_id_i    ),
+        .valid_o            ( multdiv_valid         ),
+        .multdiv_result_o   ( multdiv_result        )
+    );
+  end
+
+  // Multiplier/divider may require multiple cycles. The ALU output is valid in the same cycle
+  // unless the intermediate result register is being written (which indicates this isn't the
+  // final cycle of ALU operation).
+  assign ex_valid_o = multdiv_sel ? multdiv_valid : ~(|alu_imd_val_we);
+
+endmodule
diff --git a/verilog/rtl/brq_exu_alu.sv b/verilog/rtl/brq_exu_alu.sv
new file mode 100644
index 0000000..99fe60a
--- /dev/null
+++ b/verilog/rtl/brq_exu_alu.sv
@@ -0,0 +1,1267 @@
+
+/**
+ * Arithmetic logic unit
+ */
+module brq_exu_alu #(
+  parameter brq_pkg::rv32b_e RV32B = brq_pkg::RV32BNone
+) (
+    input  brq_pkg::alu_op_e operator_i,
+    input  logic [31:0]       operand_a_i,
+    input  logic [31:0]       operand_b_i,
+
+    input  logic              instr_first_cycle_i,
+
+    input  logic [32:0]       multdiv_operand_a_i,
+    input  logic [32:0]       multdiv_operand_b_i,
+
+    input  logic              multdiv_sel_i,
+
+    input  logic [31:0]       imd_val_q_i[2],
+    output logic [31:0]       imd_val_d_o[2],
+    output logic [1:0]        imd_val_we_o,
+
+    output logic [31:0]       adder_result_o,
+    output logic [33:0]       adder_result_ext_o,
+
+    output logic [31:0]       result_o,
+    output logic              comparison_result_o,
+    output logic              is_equal_result_o
+);
+  import brq_pkg::*;
+
+  logic [31:0] operand_a_rev;
+  logic [32:0] operand_b_neg;
+
+  // bit reverse operand_a for left shifts and bit counting
+  for (genvar k = 0; k < 32; k++) begin : gen_rev_operand_a
+    assign operand_a_rev[k] = operand_a_i[31-k];
+  end
+
+  ///////////
+  // Adder //
+  ///////////
+
+  logic        adder_op_b_negate;
+  logic [32:0] adder_in_a, adder_in_b;
+  logic [31:0] adder_result;
+
+  always_comb begin
+    adder_op_b_negate = 1'b0;
+    unique case (operator_i)
+      // Adder OPs
+      ALU_SUB,
+
+      // Comparator OPs
+      ALU_EQ,   ALU_NE,
+      ALU_GE,   ALU_GEU,
+      ALU_LT,   ALU_LTU,
+      ALU_SLT,  ALU_SLTU,
+
+      // MinMax OPs (RV32B Ops)
+      ALU_MIN,  ALU_MINU,
+      ALU_MAX,  ALU_MAXU: adder_op_b_negate = 1'b1;
+
+      default:;
+    endcase
+  end
+
+  // prepare operand a
+  assign adder_in_a    = multdiv_sel_i ? multdiv_operand_a_i : {operand_a_i,1'b1};
+
+  // prepare operand b
+  assign operand_b_neg = {operand_b_i,1'b0} ^ {33{1'b1}};
+  always_comb begin
+    unique case(1'b1)
+      multdiv_sel_i:     adder_in_b = multdiv_operand_b_i;
+      adder_op_b_negate: adder_in_b = operand_b_neg;
+      default :          adder_in_b = {operand_b_i, 1'b0};
+    endcase
+  end
+
+  // actual adder
+  assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b);
+
+  assign adder_result       = adder_result_ext_o[32:1];
+
+  assign adder_result_o     = adder_result;
+
+  ////////////////
+  // Comparison //
+  ////////////////
+
+  logic is_equal;
+  logic is_greater_equal;  // handles both signed and unsigned forms
+  logic cmp_signed;
+
+  always_comb begin
+    unique case (operator_i)
+      ALU_GE,
+      ALU_LT,
+      ALU_SLT,
+      // RV32B only
+      ALU_MIN,
+      ALU_MAX: cmp_signed = 1'b1;
+
+      default: cmp_signed = 1'b0;
+    endcase
+  end
+
+  assign is_equal = (adder_result == 32'b0);
+  assign is_equal_result_o = is_equal;
+
+  // Is greater equal
+  always_comb begin
+    if ((operand_a_i[31] ^ operand_b_i[31]) == 1'b0) begin
+      is_greater_equal = (adder_result[31] == 1'b0);
+    end else begin
+      is_greater_equal = operand_a_i[31] ^ (cmp_signed);
+    end
+  end
+
+  // GTE unsigned:
+  // (a[31] == 1 && b[31] == 1) => adder_result[31] == 0
+  // (a[31] == 0 && b[31] == 0) => adder_result[31] == 0
+  // (a[31] == 1 && b[31] == 0) => 1
+  // (a[31] == 0 && b[31] == 1) => 0
+
+  // GTE signed:
+  // (a[31] == 1 && b[31] == 1) => adder_result[31] == 0
+  // (a[31] == 0 && b[31] == 0) => adder_result[31] == 0
+  // (a[31] == 1 && b[31] == 0) => 0
+  // (a[31] == 0 && b[31] == 1) => 1
+
+  // generate comparison result
+  logic cmp_result;
+
+  always_comb begin
+    unique case (operator_i)
+      ALU_EQ:             cmp_result =  is_equal;
+      ALU_NE:             cmp_result = ~is_equal;
+      ALU_GE,   ALU_GEU,
+      ALU_MAX,  ALU_MAXU: cmp_result = is_greater_equal; // RV32B only
+      ALU_LT,   ALU_LTU,
+      ALU_MIN,  ALU_MINU, //RV32B only
+      ALU_SLT,  ALU_SLTU: cmp_result = ~is_greater_equal;
+
+      default: cmp_result = is_equal;
+    endcase
+  end
+
+  assign comparison_result_o = cmp_result;
+
+  ///////////
+  // Shift //
+  ///////////
+
+  // The shifter structure consists of a 33-bit shifter: 32-bit operand + 1 bit extension for
+  // arithmetic shifts and one-shift support.
+  // Rotations and funnel shifts are implemented as multi-cycle instructions.
+  // The shifter is also used for single-bit instructions and bit-field place as detailed below.
+  //
+  // Standard Shifts
+  // ===============
+  // For standard shift instructions, the direction of the shift is to the right by default. For
+  // left shifts, the signal shift_left signal is set. If so, the operand is initially reversed,
+  // shifted to the right by the specified amount and shifted back again. For arithmetic- and
+  // one-shifts the 33rd bit of the shifter operand can is set accordingly.
+  //
+  // Multicycle Shifts
+  // =================
+  //
+  // Rotation
+  // --------
+  // For rotations, the operand signals operand_a_i and operand_b_i are kept constant to rs1 and
+  // rs2 respectively.
+  //
+  // Rotation pseudocode:
+  //   shift_amt = rs2 & 31;
+  //   multicycle_result = (rs1 >> shift_amt) | (rs1 << (32 - shift_amt));
+  //                       ^-- cycle 0 -----^ ^-- cycle 1 --------------^
+  //
+  // Funnel Shifts
+  // -------------
+  // For funnel shifs, operand_a_i is tied to rs1 in the first cycle and rs3 in the
+  // second cycle. operand_b_i is always tied to rs2. The order of applying the shift amount or
+  // its complement is determined by bit [5] of shift_amt.
+  //
+  // Funnel shift Pseudocode: (fsl)
+  //  shift_amt = rs2 & 63;
+  //  shift_amt_compl = 32 - shift_amt[4:0]
+  //  if (shift_amt >=33):
+  //     multicycle_result = (rs1 >> shift_amt_compl[4:0]) | (rs3 << shift_amt[4:0]);
+  //                         ^-- cycle 0 ----------------^ ^-- cycle 1 ------------^
+  //  else if (shift_amt <= 31 && shift_amt > 0):
+  //     multicycle_result = (rs1 << shift_amt[4:0]) | (rs3 >> shift_amt_compl[4:0]);
+  //                         ^-- cycle 0 ----------^ ^-- cycle 1 -------------------^
+  //  For shift_amt == 0, 32, both shift_amt[4:0] and shift_amt_compl[4:0] == '0.
+  //  these cases need to be handled separately outside the shifting structure:
+  //  else if (shift_amt == 32):
+  //     multicycle_result = rs3
+  //  else if (shift_amt == 0):
+  //     multicycle_result = rs1.
+  //
+  // Single-Bit Instructions
+  // =======================
+  // Single bit instructions operate on bit operand_b_i[4:0] of operand_a_i.
+
+  // The operations sbset, sbclr and sbinv are implemented by generation of a bit-mask using the
+  // shifter structure. This is done by left-shifting the operand 32'h1 by the required amount.
+  // The signal shift_sbmode multiplexes the shifter input and sets the signal shift_left.
+  // Further processing is taken care of by a separate structure.
+  //
+  // For sbext, the bit defined by operand_b_i[4:0] is to be returned. This is done by simply
+  // shifting operand_a_i to the right by the required amount and returning bit [0] of the result.
+  //
+  // Bit-Field Place
+  // ===============
+  // The shifter structure is shared to compute bfp_mask << bfp_off.
+
+  logic       shift_left;
+  logic       shift_ones;
+  logic       shift_arith;
+  logic       shift_funnel;
+  logic       shift_sbmode;
+  logic [5:0] shift_amt;
+  logic [5:0] shift_amt_compl; // complementary shift amount (32 - shift_amt)
+
+  logic [31:0] shift_operand;
+  logic [32:0] shift_result_ext;
+  logic        unused_shift_result_ext;
+  logic [31:0] shift_result;
+  logic [31:0] shift_result_rev;
+
+  // zbf
+  logic bfp_op;
+  logic [4:0]  bfp_len;
+  logic [4:0]  bfp_off;
+  logic [31:0] bfp_mask;
+  logic [31:0] bfp_mask_rev;
+  logic [31:0] bfp_result;
+
+  // bfp: shares the shifter structure to compute bfp_mask << bfp_off
+  assign bfp_op = (RV32B != RV32BNone) ? (operator_i == ALU_BFP) : 1'b0;
+  assign bfp_len = {~(|operand_b_i[27:24]), operand_b_i[27:24]}; // len = 0 encodes for len = 16
+  assign bfp_off = operand_b_i[20:16];
+  assign bfp_mask = (RV32B != RV32BNone) ? ~(32'hffff_ffff << bfp_len) : '0;
+  for (genvar i=0; i<32; i++) begin : gen_rev_bfp_mask
+    assign bfp_mask_rev[i] = bfp_mask[31-i];
+  end
+
+  assign bfp_result =(RV32B != RV32BNone) ?
+      (~shift_result & operand_a_i) | ((operand_b_i & bfp_mask) << bfp_off) : '0;
+
+  // bit shift_amt[5]: word swap bit: only considered for FSL/FSR.
+  // if set, reverse operations in first and second cycle.
+  assign shift_amt[5] = operand_b_i[5] & shift_funnel;
+  assign shift_amt_compl = 32 - operand_b_i[4:0];
+
+  always_comb begin
+    if (bfp_op) begin
+      shift_amt[4:0] = bfp_off ; // length field of bfp control word
+    end else begin
+      shift_amt[4:0] = instr_first_cycle_i ?
+          (operand_b_i[5] && shift_funnel ? shift_amt_compl[4:0] : operand_b_i[4:0]) :
+          (operand_b_i[5] && shift_funnel ? operand_b_i[4:0] : shift_amt_compl[4:0]);
+    end
+  end
+
+  // single-bit mode: shift
+  assign shift_sbmode = (RV32B != RV32BNone) ?
+      (operator_i == ALU_SBSET) | (operator_i == ALU_SBCLR) | (operator_i == ALU_SBINV) : 1'b0;
+
+  // left shift if this is:
+  // * a standard left shift (slo, sll)
+  // * a rol in the first cycle
+  // * a ror in the second cycle
+  // * fsl: without word-swap bit: first cycle, else: second cycle
+  // * fsr: without word-swap bit: second cycle, else: first cycle
+  // * a single-bit instruction: sbclr, sbset, sbinv (excluding sbext)
+  // * bfp: bfp_mask << bfp_off
+  always_comb begin
+    unique case (operator_i)
+      ALU_SLL: shift_left = 1'b1;
+      ALU_SLO,
+      ALU_BFP: shift_left = (RV32B != RV32BNone) ? 1'b1 : 1'b0;
+      ALU_ROL: shift_left = (RV32B != RV32BNone) ? instr_first_cycle_i : 0;
+      ALU_ROR: shift_left = (RV32B != RV32BNone) ? ~instr_first_cycle_i : 0;
+      ALU_FSL: shift_left = (RV32B != RV32BNone) ?
+        (shift_amt[5] ? ~instr_first_cycle_i : instr_first_cycle_i) : 1'b0;
+      ALU_FSR: shift_left = (RV32B != RV32BNone) ?
+          (shift_amt[5] ? instr_first_cycle_i : ~instr_first_cycle_i) : 1'b0;
+      default: shift_left = 1'b0;
+    endcase
+    if (shift_sbmode) begin
+      shift_left = 1'b1;
+    end
+  end
+
+  assign shift_arith  = (operator_i == ALU_SRA);
+  assign shift_ones   =
+      (RV32B != RV32BNone) ? (operator_i == ALU_SLO) | (operator_i == ALU_SRO) : 1'b0;
+  assign shift_funnel =
+      (RV32B != RV32BNone) ? (operator_i == ALU_FSL) | (operator_i == ALU_FSR) : 1'b0;
+
+  // shifter structure.
+  always_comb begin
+    // select shifter input
+    // for bfp, sbmode and shift_left the corresponding bit-reversed input is chosen.
+    if (RV32B == RV32BNone) begin
+      shift_operand = shift_left ? operand_a_rev : operand_a_i;
+    end else begin
+      unique case (1'b1)
+        bfp_op:       shift_operand = bfp_mask_rev;
+        shift_sbmode: shift_operand = 32'h8000_0000;
+        default:      shift_operand = shift_left ? operand_a_rev : operand_a_i;
+      endcase
+    end
+
+    shift_result_ext =
+        $unsigned($signed({shift_ones | (shift_arith & shift_operand[31]), shift_operand}) >>>
+                  shift_amt[4:0]);
+
+    shift_result            = shift_result_ext[31:0];
+    unused_shift_result_ext = shift_result_ext[32];
+
+    for (int unsigned i=0; i<32; i++) begin
+      shift_result_rev[i] = shift_result[31-i];
+    end
+
+    shift_result = shift_left ? shift_result_rev : shift_result;
+
+  end
+
+  ///////////////////
+  // Bitwise Logic //
+  ///////////////////
+
+  logic bwlogic_or;
+  logic bwlogic_and;
+  logic [31:0] bwlogic_operand_b;
+  logic [31:0] bwlogic_or_result;
+  logic [31:0] bwlogic_and_result;
+  logic [31:0] bwlogic_xor_result;
+  logic [31:0] bwlogic_result;
+
+  logic bwlogic_op_b_negate;
+
+  always_comb begin
+    unique case (operator_i)
+      // Logic-with-negate OPs (RV32B Ops)
+      ALU_XNOR,
+      ALU_ORN,
+      ALU_ANDN: bwlogic_op_b_negate = (RV32B != RV32BNone) ? 1'b1 : 1'b0;
+      ALU_CMIX: bwlogic_op_b_negate = (RV32B != RV32BNone) ? ~instr_first_cycle_i : 1'b0;
+      default:  bwlogic_op_b_negate = 1'b0;
+    endcase
+  end
+
+  assign bwlogic_operand_b = bwlogic_op_b_negate ? operand_b_neg[32:1] : operand_b_i;
+
+  assign bwlogic_or_result  = operand_a_i | bwlogic_operand_b;
+  assign bwlogic_and_result = operand_a_i & bwlogic_operand_b;
+  assign bwlogic_xor_result = operand_a_i ^ bwlogic_operand_b;
+
+  assign bwlogic_or  = (operator_i == ALU_OR)  | (operator_i == ALU_ORN);
+  assign bwlogic_and = (operator_i == ALU_AND) | (operator_i == ALU_ANDN);
+
+  always_comb begin
+    unique case (1'b1)
+      bwlogic_or:  bwlogic_result = bwlogic_or_result;
+      bwlogic_and: bwlogic_result = bwlogic_and_result;
+      default:     bwlogic_result = bwlogic_xor_result;
+    endcase
+  end
+
+  logic [5:0]  bitcnt_result;
+  logic [31:0] minmax_result;
+  logic [31:0] pack_result;
+  logic [31:0] sext_result;
+  logic [31:0] singlebit_result;
+  logic [31:0] rev_result;
+  logic [31:0] shuffle_result;
+  logic [31:0] butterfly_result;
+  logic [31:0] invbutterfly_result;
+  logic [31:0] clmul_result;
+  logic [31:0] multicycle_result;
+
+  if (RV32B != RV32BNone) begin : g_alu_rvb
+
+    /////////////////
+    // Bitcounting //
+    /////////////////
+
+    // The bit-counter structure computes the number of set bits in its operand. Partial results
+    // (from left to right) are needed to compute the control masks for computation of bext/bdep
+    // by the butterfly network, if implemented.
+    // For pcnt, clz and ctz, only the end result is used.
+
+    logic        zbe_op;
+    logic        bitcnt_ctz;
+    logic        bitcnt_clz;
+    logic        bitcnt_cz;
+    logic [31:0] bitcnt_bits;
+    logic [31:0] bitcnt_mask_op;
+    logic [31:0] bitcnt_bit_mask;
+    logic [ 5:0] bitcnt_partial [32];
+    logic [31:0] bitcnt_partial_lsb_d;
+    logic [31:0] bitcnt_partial_msb_d;
+
+
+    assign bitcnt_ctz    = operator_i == ALU_CTZ;
+    assign bitcnt_clz    = operator_i == ALU_CLZ;
+    assign bitcnt_cz     = bitcnt_ctz | bitcnt_clz;
+    assign bitcnt_result = bitcnt_partial[31];
+
+    // Bit-mask generation for clz and ctz:
+    // The bit mask is generated by spreading the lowest-order set bit in the operand to all
+    // higher order bits. The resulting mask is inverted to cover the lowest order zeros. In order
+    // to create the bit mask for leading zeros, the input operand needs to be reversed.
+    assign bitcnt_mask_op = bitcnt_clz ? operand_a_rev : operand_a_i;
+
+    always_comb begin
+      bitcnt_bit_mask = bitcnt_mask_op;
+      bitcnt_bit_mask |= bitcnt_bit_mask << 1;
+      bitcnt_bit_mask |= bitcnt_bit_mask << 2;
+      bitcnt_bit_mask |= bitcnt_bit_mask << 4;
+      bitcnt_bit_mask |= bitcnt_bit_mask << 8;
+      bitcnt_bit_mask |= bitcnt_bit_mask << 16;
+      bitcnt_bit_mask = ~bitcnt_bit_mask;
+    end
+
+    assign zbe_op = (operator_i == ALU_BEXT) | (operator_i == ALU_BDEP);
+
+    always_comb begin
+      case(1'b1)
+        zbe_op:      bitcnt_bits = operand_b_i;
+        bitcnt_cz:   bitcnt_bits = bitcnt_bit_mask & ~bitcnt_mask_op; // clz / ctz
+        default:     bitcnt_bits = operand_a_i; // pcnt
+      endcase
+    end
+
+    // The parallel prefix counter is of the structure of a Brent-Kung Adder. In the first
+    // log2(width) stages, the sum of the n preceding bit lines is computed for the bit lines at
+    // positions 2**n-1 (power-of-two positions) where n denotes the current stage.
+    // In stage n=log2(width), the count for position width-1 (the MSB) is finished.
+    // For the intermediate values, an inverse adder tree then computes the bit counts for the bit
+    // lines at positions
+    // m = 2**(n-1) + i*2**(n-2), where i = [1 ... width / 2**(n-1)-1] and n = [log2(width) ... 2].
+    // Thus, at every subsequent stage the result of two previously unconnected sub-trees is
+    // summed, starting at the node summing bits [width/2-1 : 0] and [3*width/4-1: width/2]
+    // and moving to iteratively sum up all the sub-trees.
+    // The inverse adder tree thus features log2(width) - 1 stages the first of these stages is a
+    // single addition at position 3*width/4 - 1. It does not interfere with the last
+    // stage of the primary adder tree. These stages can thus be folded together, resulting in a
+    // total of 2*log2(width)-2 stages.
+    // For more details refer to R. Brent, H. T. Kung, "A Regular Layout for Parallel Adders",
+    // (1982).
+    // For a bitline at position p, only bits
+    // bitcnt_partial[max(i, such that p % log2(i) == 0)-1 : 0] are needed for generation of the
+    // butterfly network control signals. The adders in the intermediate value adder tree thus need
+    // not be full 5-bit adders. We leave the optimization to the synthesis tools.
+    //
+    // Consider the following 8-bit example for illustraton.
+    //
+    // let bitcnt_bits = 8'babcdefgh.
+    //
+    //                   a  b  c  d  e  f  g  h
+    //                   | /:  | /:  | /:  | /:
+    //                   |/ :  |/ :  |/ :  |/ :
+    // stage 1:          +  :  +  :  +  :  +  :
+    //                   |  : /:  :  |  : /:  :
+    //                   |,--+ :  :  |,--+ :  :
+    // stage 2:          +  :  :  :  +  :  :  :
+    //                   |  :  |  : /:  :  :  :
+    //                   |,-----,--+ :  :  :  : ^-primary adder tree
+    // stage 3:          +  :  +  :  :  :  :  : -------------------------
+    //                   :  | /| /| /| /| /|  : ,-intermediate adder tree
+    //                   :  |/ |/ |/ |/ |/ :  :
+    // stage 4           :  +  +  +  +  +  :  :
+    //                   :  :  :  :  :  :  :  :
+    // bitcnt_partial[i] 7  6  5  4  3  2  1  0
+
+    always_comb begin
+      bitcnt_partial = '{default: '0};
+      // stage 1
+      for (int unsigned i=1; i<32; i+=2) begin
+        bitcnt_partial[i] = {5'h0, bitcnt_bits[i]} + {5'h0, bitcnt_bits[i-1]};
+      end
+      // stage 2
+      for (int unsigned i=3; i<32; i+=4) begin
+        bitcnt_partial[i] = bitcnt_partial[i-2] + bitcnt_partial[i];
+      end
+      // stage 3
+      for (int unsigned i=7; i<32; i+=8) begin
+        bitcnt_partial[i] = bitcnt_partial[i-4] + bitcnt_partial[i];
+      end
+      // stage 4
+      for (int unsigned i=15; i <32; i+=16) begin
+        bitcnt_partial[i] = bitcnt_partial[i-8] + bitcnt_partial[i];
+      end
+      // stage 5
+      bitcnt_partial[31] = bitcnt_partial[15] + bitcnt_partial[31];
+      // ^- primary adder tree
+      // -------------------------------
+      // ,-intermediate value adder tree
+      bitcnt_partial[23] = bitcnt_partial[15] + bitcnt_partial[23];
+
+      // stage 6
+      for (int unsigned i=11; i<32; i+=8) begin
+        bitcnt_partial[i] = bitcnt_partial[i-4] + bitcnt_partial[i];
+      end
+
+      // stage 7
+      for (int unsigned i=5; i<32; i+=4) begin
+        bitcnt_partial[i] = bitcnt_partial[i-2] + bitcnt_partial[i];
+      end
+      // stage 8
+      bitcnt_partial[0] = {5'h0, bitcnt_bits[0]};
+      for (int unsigned i=2; i<32; i+=2) begin
+        bitcnt_partial[i] = bitcnt_partial[i-1] + {5'h0, bitcnt_bits[i]};
+      end
+    end
+
+    ///////////////
+    // Min / Max //
+    ///////////////
+
+    assign minmax_result = cmp_result ? operand_a_i : operand_b_i;
+
+    //////////
+    // Pack //
+    //////////
+
+    logic packu;
+    logic packh;
+    assign packu = operator_i == ALU_PACKU;
+    assign packh = operator_i == ALU_PACKH;
+
+    always_comb begin
+      unique case (1'b1)
+        packu:   pack_result = {operand_b_i[31:16], operand_a_i[31:16]};
+        packh:   pack_result = {16'h0, operand_b_i[7:0], operand_a_i[7:0]};
+        default: pack_result = {operand_b_i[15:0], operand_a_i[15:0]};
+      endcase
+    end
+
+    //////////
+    // Sext //
+    //////////
+
+    assign sext_result = (operator_i == ALU_SEXTB) ?
+        { {24{operand_a_i[7]}}, operand_a_i[7:0]} : { {16{operand_a_i[15]}}, operand_a_i[15:0]};
+
+    /////////////////////////////
+    // Single-bit Instructions //
+    /////////////////////////////
+
+    always_comb begin
+      unique case (operator_i)
+        ALU_SBSET: singlebit_result = operand_a_i | shift_result;
+        ALU_SBCLR: singlebit_result = operand_a_i & ~shift_result;
+        ALU_SBINV: singlebit_result = operand_a_i ^ shift_result;
+        default:   singlebit_result = {31'h0, shift_result[0]}; // ALU_SBEXT
+      endcase
+    end
+
+    ////////////////////////////////////
+    // General Reverse and Or-combine //
+    ////////////////////////////////////
+
+    // Only a subset of the General reverse and or-combine instructions are implemented in the
+    // balanced version of the B extension. Currently rev, rev8 and orc.b are supported in the
+    // base extension.
+
+    logic [4:0] zbp_shift_amt;
+    logic gorc_op;
+
+    assign gorc_op = (operator_i == ALU_GORC);
+    assign zbp_shift_amt[2:0] = (RV32B == RV32BFull) ? shift_amt[2:0] : {3{&shift_amt[2:0]}};
+    assign zbp_shift_amt[4:3] = (RV32B == RV32BFull) ? shift_amt[4:3] : {2{&shift_amt[4:3]}};
+
+    always_comb begin
+      rev_result = operand_a_i;
+
+      if (zbp_shift_amt[0]) begin
+        rev_result = (gorc_op ? rev_result : 32'h0)       |
+                     ((rev_result & 32'h5555_5555) <<  1) |
+                     ((rev_result & 32'haaaa_aaaa) >>  1);
+      end
+
+      if (zbp_shift_amt[1]) begin
+        rev_result = (gorc_op ? rev_result : 32'h0)       |
+                     ((rev_result & 32'h3333_3333) <<  2) |
+                     ((rev_result & 32'hcccc_cccc) >>  2);
+      end
+
+      if (zbp_shift_amt[2]) begin
+        rev_result = (gorc_op ? rev_result : 32'h0)       |
+                     ((rev_result & 32'h0f0f_0f0f) <<  4) |
+                     ((rev_result & 32'hf0f0_f0f0) >>  4);
+      end
+
+      if (zbp_shift_amt[3]) begin
+        rev_result = (gorc_op & (RV32B == RV32BFull) ? rev_result : 32'h0) |
+                     ((rev_result & 32'h00ff_00ff) <<  8) |
+                     ((rev_result & 32'hff00_ff00) >>  8);
+      end
+
+      if (zbp_shift_amt[4]) begin
+        rev_result = (gorc_op & (RV32B == RV32BFull) ? rev_result : 32'h0) |
+                     ((rev_result & 32'h0000_ffff) << 16) |
+                     ((rev_result & 32'hffff_0000) >> 16);
+      end
+    end
+
+    logic crc_hmode;
+    logic crc_bmode;
+    logic [31:0] clmul_result_rev;
+
+    if (RV32B == RV32BFull) begin : gen_alu_rvb_full
+
+      /////////////////////////
+      // Shuffle / Unshuffle //
+      /////////////////////////
+
+      localparam logic [31:0] SHUFFLE_MASK_L [4] =
+          '{32'h00ff_0000, 32'h0f00_0f00, 32'h3030_3030, 32'h4444_4444};
+      localparam logic [31:0] SHUFFLE_MASK_R [4] =
+          '{32'h0000_ff00, 32'h00f0_00f0, 32'h0c0c_0c0c, 32'h2222_2222};
+
+      localparam logic [31:0] FLIP_MASK_L [4] =
+          '{32'h2200_1100, 32'h0044_0000, 32'h4411_0000, 32'h1100_0000};
+      localparam logic [31:0] FLIP_MASK_R [4] =
+          '{32'h0088_0044, 32'h0000_2200, 32'h0000_8822, 32'h0000_0088};
+
+      logic [31:0] SHUFFLE_MASK_NOT [4];
+      for(genvar i = 0; i < 4; i++) begin : gen_shuffle_mask_not
+        assign SHUFFLE_MASK_NOT[i] = ~(SHUFFLE_MASK_L[i] | SHUFFLE_MASK_R[i]);
+      end
+
+      logic shuffle_flip;
+      assign shuffle_flip = operator_i == ALU_UNSHFL;
+
+      logic [3:0] shuffle_mode;
+
+      always_comb begin
+        shuffle_result = operand_a_i;
+
+        if (shuffle_flip) begin
+          shuffle_mode[3] = shift_amt[0];
+          shuffle_mode[2] = shift_amt[1];
+          shuffle_mode[1] = shift_amt[2];
+          shuffle_mode[0] = shift_amt[3];
+        end else begin
+          shuffle_mode = shift_amt[3:0];
+        end
+
+        if (shuffle_flip) begin
+          shuffle_result = (shuffle_result & 32'h8822_4411) |
+              ((shuffle_result << 6)  & FLIP_MASK_L[0]) |
+              ((shuffle_result >> 6)  & FLIP_MASK_R[0]) |
+              ((shuffle_result << 9)  & FLIP_MASK_L[1]) |
+              ((shuffle_result >> 9)  & FLIP_MASK_R[1]) |
+              ((shuffle_result << 15) & FLIP_MASK_L[2]) |
+              ((shuffle_result >> 15) & FLIP_MASK_R[2]) |
+              ((shuffle_result << 21) & FLIP_MASK_L[3]) |
+              ((shuffle_result >> 21) & FLIP_MASK_R[3]);
+        end
+
+        if (shuffle_mode[3]) begin
+          shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[0]) |
+              (((shuffle_result << 8) & SHUFFLE_MASK_L[0]) |
+              ((shuffle_result >> 8) & SHUFFLE_MASK_R[0]));
+        end
+        if (shuffle_mode[2]) begin
+          shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[1]) |
+              (((shuffle_result << 4) & SHUFFLE_MASK_L[1]) |
+              ((shuffle_result >> 4) & SHUFFLE_MASK_R[1]));
+        end
+        if (shuffle_mode[1]) begin
+          shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[2]) |
+              (((shuffle_result << 2) & SHUFFLE_MASK_L[2]) |
+              ((shuffle_result >> 2) & SHUFFLE_MASK_R[2]));
+        end
+        if (shuffle_mode[0]) begin
+          shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[3]) |
+              (((shuffle_result << 1) & SHUFFLE_MASK_L[3]) |
+              ((shuffle_result >> 1) & SHUFFLE_MASK_R[3]));
+        end
+
+        if (shuffle_flip) begin
+          shuffle_result = (shuffle_result & 32'h8822_4411) |
+              ((shuffle_result << 6)  & FLIP_MASK_L[0]) |
+              ((shuffle_result >> 6)  & FLIP_MASK_R[0]) |
+              ((shuffle_result << 9)  & FLIP_MASK_L[1]) |
+              ((shuffle_result >> 9)  & FLIP_MASK_R[1]) |
+              ((shuffle_result << 15) & FLIP_MASK_L[2]) |
+              ((shuffle_result >> 15) & FLIP_MASK_R[2]) |
+              ((shuffle_result << 21) & FLIP_MASK_L[3]) |
+              ((shuffle_result >> 21) & FLIP_MASK_R[3]);
+        end
+      end
+
+      ///////////////
+      // Butterfly //
+      ///////////////
+
+      // The butterfly / inverse butterfly network executing bext/bdep (zbe) instructions.
+      // For bdep, the control bits mask of a local left region is generated by
+      // the inverse of a n-bit left rotate and complement upon wrap (LROTC) operation by the number
+      // of ones in the deposit bitmask to the right of the segment. n hereby denotes the width
+      // of the according segment. The bitmask for a pertaining local right region is equal to the
+      // corresponding local left region. Bext uses an analogue inverse process.
+      // Consider the following 8-bit example.  For details, see Hilewitz et al. "Fast Bit Gather,
+      // Bit Scatter and Bit Permuation Instructions for Commodity Microprocessors", (2008).
+      //
+      // The bext/bdep instructions are completed in 2 cycles. In the first cycle, the control
+      // bitmask is prepared by executing the parallel prefix bit count. In the second cycle,
+      // the bit swapping is executed according to the control masks.
+
+      // 8-bit example:  (Hilewitz et al.)
+      // Consider the instruction bdep operand_a_i deposit_mask
+      // Let operand_a_i = 8'babcd_efgh
+      //    deposit_mask = 8'b1010_1101
+      //
+      // control bitmask for stage 1:
+      //  - number of ones in the right half of the deposit bitmask: 3
+      //  - width of the segment: 4
+      //  - control bitmask = ~LROTC(4'b0, 3)[3:0] = 4'b1000
+      //
+      // control bitmask:   c3 c2  c1 c0  c3 c2  c1 c0
+      //                    1  0   0  0   1  0   0  0
+      //                    <- L ----->   <- R ----->
+      // operand_a_i        a  b   c  d   e  f   g  h
+      //                    :\ |   |  |  /:  |   |  |
+      //                    : +|---|--|-+ :  |   |  |
+      //                    :/ |   |  |  \:  |   |  |
+      // stage 1            e  b   c  d   a  f   g  h
+      //                    <L->   <R->   <L->   <R->
+      // control bitmask:   c3 c2  c3 c2  c1 c0  c1 c0
+      //                    1  1   1  1   1  0   1  0
+      //                    :\ :\ /: /:   :\ |  /:  |
+      //                    : +:-+-:+ :   : +|-+ :  |
+      //                    :/ :/ \: \:   :/ |  \:  |
+      // stage 2            c  d   e  b   g  f   a  h
+      //                    L  R   L  R   L  R   L  R
+      // control bitmask:   c3 c3  c2 c2  c1 c1  c0 c0
+      //                    1  1   0  0   1  1   0  0
+      //                    :\/:   |  |   :\/:   |  |
+      //                    :  :   |  |   :  :   |  |
+      //                    :/\:   |  |   :/\:   |  |
+      // stage 3            d  c   e  b   f  g   a  h
+      // & deposit bitmask: 1  0   1  0   1  1   0  1
+      // result:            d  0   e  0   f  g   0  h
+
+      logic [ 5:0] bitcnt_partial_q [32];
+
+      // first cycle
+      // Store partial bitcnts
+      for (genvar i=0; i<32; i++) begin : gen_bitcnt_reg_in_lsb
+        assign bitcnt_partial_lsb_d[i] = bitcnt_partial[i][0];
+      end
+
+      for (genvar i=0; i<16; i++) begin : gen_bitcnt_reg_in_b1
+        assign bitcnt_partial_msb_d[i] = bitcnt_partial[2*i+1][1];
+      end
+
+      for (genvar i=0; i<8; i++) begin : gen_bitcnt_reg_in_b2
+        assign bitcnt_partial_msb_d[16+i] = bitcnt_partial[4*i+3][2];
+      end
+
+      for (genvar i=0; i<4; i++) begin : gen_bitcnt_reg_in_b3
+        assign bitcnt_partial_msb_d[24+i] = bitcnt_partial[8*i+7][3];
+      end
+
+      for (genvar i=0; i<2; i++) begin : gen_bitcnt_reg_in_b4
+        assign bitcnt_partial_msb_d[28+i] = bitcnt_partial[16*i+15][4];
+      end
+
+      assign bitcnt_partial_msb_d[30] = bitcnt_partial[31][5];
+      assign bitcnt_partial_msb_d[31] = 1'b0; // unused
+
+      // Second cycle
+      // Load partial bitcnts
+      always_comb begin
+        bitcnt_partial_q = '{default: '0};
+
+        for (int unsigned i=0; i<32; i++) begin : gen_bitcnt_reg_out_lsb
+          bitcnt_partial_q[i][0] = imd_val_q_i[0][i];
+        end
+
+        for (int unsigned i=0; i<16; i++) begin : gen_bitcnt_reg_out_b1
+          bitcnt_partial_q[2*i+1][1] = imd_val_q_i[1][i];
+        end
+
+        for (int unsigned i=0; i<8; i++) begin : gen_bitcnt_reg_out_b2
+          bitcnt_partial_q[4*i+3][2] = imd_val_q_i[1][16+i];
+        end
+
+        for (int unsigned i=0; i<4; i++) begin : gen_bitcnt_reg_out_b3
+          bitcnt_partial_q[8*i+7][3] = imd_val_q_i[1][24+i];
+        end
+
+        for (int unsigned i=0; i<2; i++) begin : gen_bitcnt_reg_out_b4
+          bitcnt_partial_q[16*i+15][4] = imd_val_q_i[1][28+i];
+        end
+
+        bitcnt_partial_q[31][5] = imd_val_q_i[1][30];
+      end
+
+      logic [31:0] butterfly_mask_l[5];
+      logic [31:0] butterfly_mask_r[5];
+      logic [31:0] butterfly_mask_not[5];
+      logic [31:0] lrotc_stage [5]; // left rotate and complement upon wrap
+
+      // number of bits in local r = 32 / 2**(stage + 1) = 16/2**stage
+      `define _N(stg) (16 >> stg)
+
+      // bext / bdep control bit generation
+      for (genvar stg=0; stg<5; stg++) begin : gen_butterfly_ctrl_stage
+        // number of segs: 2** stg
+        for (genvar seg=0; seg<2**stg; seg++) begin : gen_butterfly_ctrl
+
+          assign lrotc_stage[stg][2*`_N(stg)*(seg+1)-1 : 2*`_N(stg)*seg] =
+              {{`_N(stg){1'b0}},{`_N(stg){1'b1}}} <<
+                bitcnt_partial_q[`_N(stg)*(2*seg+1)-1][$clog2(`_N(stg)):0];
+
+          assign butterfly_mask_l[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)]
+                   = ~lrotc_stage[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)];
+
+          assign butterfly_mask_r[stg][`_N(stg)*(2*seg+1)-1 : `_N(stg)*(2*seg)]
+                   = ~lrotc_stage[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)];
+
+          assign butterfly_mask_l[stg][`_N(stg)*(2*seg+1)-1 : `_N(stg)*(2*seg)]   = '0;
+          assign butterfly_mask_r[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)] = '0;
+        end
+      end
+      `undef _N
+
+      for (genvar stg=0; stg<5; stg++) begin : gen_butterfly_not
+        assign butterfly_mask_not[stg] =
+            ~(butterfly_mask_l[stg] | butterfly_mask_r[stg]);
+      end
+
+      always_comb begin
+        butterfly_result = operand_a_i;
+
+        butterfly_result = butterfly_result & butterfly_mask_not[0] |
+            ((butterfly_result & butterfly_mask_l[0]) >> 16)|
+            ((butterfly_result & butterfly_mask_r[0]) << 16);
+
+        butterfly_result = butterfly_result & butterfly_mask_not[1] |
+            ((butterfly_result & butterfly_mask_l[1]) >> 8)|
+            ((butterfly_result & butterfly_mask_r[1]) << 8);
+
+        butterfly_result = butterfly_result & butterfly_mask_not[2] |
+            ((butterfly_result & butterfly_mask_l[2]) >> 4)|
+            ((butterfly_result & butterfly_mask_r[2]) << 4);
+
+        butterfly_result = butterfly_result & butterfly_mask_not[3] |
+            ((butterfly_result & butterfly_mask_l[3]) >> 2)|
+            ((butterfly_result & butterfly_mask_r[3]) << 2);
+
+        butterfly_result = butterfly_result & butterfly_mask_not[4] |
+            ((butterfly_result & butterfly_mask_l[4]) >> 1)|
+            ((butterfly_result & butterfly_mask_r[4]) << 1);
+
+        butterfly_result = butterfly_result & operand_b_i;
+      end
+
+      always_comb begin
+        invbutterfly_result = operand_a_i & operand_b_i;
+
+        invbutterfly_result = invbutterfly_result & butterfly_mask_not[4] |
+            ((invbutterfly_result & butterfly_mask_l[4]) >> 1)|
+            ((invbutterfly_result & butterfly_mask_r[4]) << 1);
+
+        invbutterfly_result = invbutterfly_result & butterfly_mask_not[3] |
+            ((invbutterfly_result & butterfly_mask_l[3]) >> 2)|
+            ((invbutterfly_result & butterfly_mask_r[3]) << 2);
+
+        invbutterfly_result = invbutterfly_result & butterfly_mask_not[2] |
+            ((invbutterfly_result & butterfly_mask_l[2]) >> 4)|
+            ((invbutterfly_result & butterfly_mask_r[2]) << 4);
+
+        invbutterfly_result = invbutterfly_result & butterfly_mask_not[1] |
+            ((invbutterfly_result & butterfly_mask_l[1]) >> 8)|
+            ((invbutterfly_result & butterfly_mask_r[1]) << 8);
+
+        invbutterfly_result = invbutterfly_result & butterfly_mask_not[0] |
+            ((invbutterfly_result & butterfly_mask_l[0]) >> 16)|
+            ((invbutterfly_result & butterfly_mask_r[0]) << 16);
+      end
+
+      ///////////////////////////////////////////////////
+      // Carry-less Multiply + Cyclic Redundancy Check //
+      ///////////////////////////////////////////////////
+
+      // Carry-less multiplication can be understood as multiplication based on
+      // the addition interpreted as the bit-wise xor operation.
+      //
+      // Example: 1101 X 1011 = 1111111:
+      //
+      //       1011 X 1101
+      //       -----------
+      //              1101
+      //         xor 1101
+      //         ---------
+      //             10111
+      //        xor 0000
+      //        ----------
+      //            010111
+      //       xor 1101
+      //       -----------
+      //           1111111
+      //
+      // Architectural details:
+      //         A 32 x 32-bit array
+      //         [ operand_b[i] ? (operand_a << i) : '0 for i in 0 ... 31 ]
+      //         is generated. The entries of the array are pairwise 'xor-ed'
+      //         together in a 5-stage binary tree.
+      //
+      //
+      // Cyclic Redundancy Check:
+      //
+      // CRC-32 (CRC-32/ISO-HDLC) and CRC-32C (CRC-32/ISCSI) are directly implemented. For
+      // documentation of the crc configuration (crc-polynomials, initialization, reflection, etc.)
+      // see http://reveng.sourceforge.net/crc-catalogue/all.htm
+      // A useful guide to crc arithmetic and algorithms is given here:
+      // http://www.piclist.com/techref/method/math/crcguide.html.
+      //
+      // The CRC operation solves the following equation using binary polynomial arithmetic:
+      //
+      // rev(rd)(x) = rev(rs1)(x) * x**n mod {1, P}(x)
+      //
+      // where P denotes lower 32 bits of the corresponding CRC polynomial, rev(a) the bit reversal
+      // of a, n = 8,16, or 32 for .b, .h, .w -variants. {a, b} denotes bit concatenation.
+      //
+      // Using barret reduction, one can show that
+      //
+      // M(x) mod P(x) = R(x) =
+      //          (M(x) * x**n) & {deg(P(x)'{1'b1}}) ^ (M(x) x**-(deg(P(x) - n)) cx mu(x) cx P(x),
+      //
+      // Where mu(x) = polydiv(x**64, {1,P}) & 0xffffffff. Here, 'cx' refers to carry-less
+      // multiplication. Substituting rev(rd)(x) for R(x) and rev(rs1)(x) for M(x) and solving for
+      // rd(x) with P(x) a crc32 polynomial (deg(P(x)) = 32), we get
+      //
+      // rd = rev( (rev(rs1) << n)  ^ ((rev(rs1) >> (32-n)) cx mu cx P)
+      //    = (rs1 >> n) ^ rev(rev( (rs1 << (32-n)) cx rev(mu)) cx P)
+      //                       ^-- cycle 0--------------------^
+      //      ^- cycle 1 -------------------------------------------^
+      //
+      // In the last step we used the fact that carry-less multiplication is bit-order agnostic:
+      // rev(a cx b) = rev(a) cx rev(b).
+
+      logic clmul_rmode;
+      logic clmul_hmode;
+      logic [31:0] clmul_op_a;
+      logic [31:0] clmul_op_b;
+      logic [31:0] operand_b_rev;
+      logic [31:0] clmul_and_stage[32];
+      logic [31:0] clmul_xor_stage1[16];
+      logic [31:0] clmul_xor_stage2[8];
+      logic [31:0] clmul_xor_stage3[4];
+      logic [31:0] clmul_xor_stage4[2];
+
+      logic [31:0] clmul_result_raw;
+
+      for (genvar i=0; i<32; i++) begin: gen_rev_operand_b
+        assign operand_b_rev[i] = operand_b_i[31-i];
+      end
+
+      assign clmul_rmode = operator_i == ALU_CLMULR;
+      assign clmul_hmode = operator_i == ALU_CLMULH;
+
+      // CRC
+      localparam logic [31:0] CRC32_POLYNOMIAL = 32'h04c1_1db7;
+      localparam logic [31:0] CRC32_MU_REV = 32'hf701_1641;
+
+      localparam logic [31:0] CRC32C_POLYNOMIAL = 32'h1edc_6f41;
+      localparam logic [31:0] CRC32C_MU_REV = 32'hdea7_13f1;
+
+      logic crc_op;
+
+      logic crc_cpoly;
+
+      logic [31:0] crc_operand;
+      logic [31:0] crc_poly;
+      logic [31:0] crc_mu_rev;
+
+      assign crc_op = (operator_i == ALU_CRC32C_W) | (operator_i == ALU_CRC32_W) |
+                      (operator_i == ALU_CRC32C_H) | (operator_i == ALU_CRC32_H) |
+                      (operator_i == ALU_CRC32C_B) | (operator_i == ALU_CRC32_B);
+
+      assign crc_cpoly = (operator_i == ALU_CRC32C_W) |
+                         (operator_i == ALU_CRC32C_H) |
+                         (operator_i == ALU_CRC32C_B);
+
+      assign crc_hmode = (operator_i == ALU_CRC32_H) | (operator_i == ALU_CRC32C_H);
+      assign crc_bmode = (operator_i == ALU_CRC32_B) | (operator_i == ALU_CRC32C_B);
+
+      assign crc_poly   = crc_cpoly ? CRC32C_POLYNOMIAL : CRC32_POLYNOMIAL;
+      assign crc_mu_rev = crc_cpoly ? CRC32C_MU_REV : CRC32_MU_REV;
+
+      always_comb begin
+        unique case(1'b1)
+          crc_bmode: crc_operand = {operand_a_i[7:0], 24'h0};
+          crc_hmode: crc_operand = {operand_a_i[15:0], 16'h0};
+          default:   crc_operand = operand_a_i;
+        endcase
+      end
+
+      // Select clmul input
+      always_comb begin
+        if (crc_op) begin
+          clmul_op_a = instr_first_cycle_i ? crc_operand : imd_val_q_i[0];
+          clmul_op_b = instr_first_cycle_i ? crc_mu_rev : crc_poly;
+        end else begin
+          clmul_op_a = clmul_rmode | clmul_hmode ? operand_a_rev : operand_a_i;
+          clmul_op_b = clmul_rmode | clmul_hmode ? operand_b_rev : operand_b_i;
+        end
+      end
+
+      for (genvar i=0; i<32; i++) begin : gen_clmul_and_op
+        assign clmul_and_stage[i] = clmul_op_b[i] ? clmul_op_a << i : '0;
+      end
+
+      for (genvar i=0; i<16; i++) begin : gen_clmul_xor_op_l1
+        assign clmul_xor_stage1[i] = clmul_and_stage[2*i] ^ clmul_and_stage[2*i+1];
+      end
+
+      for (genvar i=0; i<8; i++) begin : gen_clmul_xor_op_l2
+        assign clmul_xor_stage2[i] = clmul_xor_stage1[2*i] ^ clmul_xor_stage1[2*i+1];
+      end
+
+      for (genvar i=0; i<4; i++) begin : gen_clmul_xor_op_l3
+        assign clmul_xor_stage3[i] = clmul_xor_stage2[2*i] ^ clmul_xor_stage2[2*i+1];
+      end
+
+      for (genvar i=0; i<2; i++) begin : gen_clmul_xor_op_l4
+        assign clmul_xor_stage4[i] = clmul_xor_stage3[2*i] ^ clmul_xor_stage3[2*i+1];
+      end
+
+      assign clmul_result_raw = clmul_xor_stage4[0] ^ clmul_xor_stage4[1];
+
+      for (genvar i=0; i<32; i++) begin : gen_rev_clmul_result
+        assign clmul_result_rev[i] = clmul_result_raw[31-i];
+      end
+
+      // clmulr_result = rev(clmul(rev(a), rev(b)))
+      // clmulh_result = clmulr_result >> 1
+      always_comb begin
+        case(1'b1)
+          clmul_rmode: clmul_result = clmul_result_rev;
+          clmul_hmode: clmul_result = {1'b0, clmul_result_rev[31:1]};
+          default:     clmul_result = clmul_result_raw;
+        endcase
+      end
+    end else begin : gen_alu_rvb_notfull
+      logic [31:0] unused_imd_val_q_1;
+      assign unused_imd_val_q_1   = imd_val_q_i[1];
+      assign shuffle_result       = '0;
+      assign butterfly_result     = '0;
+      assign invbutterfly_result  = '0;
+      assign clmul_result         = '0;
+      // support signals
+      assign bitcnt_partial_lsb_d = '0;
+      assign bitcnt_partial_msb_d = '0;
+      assign clmul_result_rev     = '0;
+      assign crc_bmode            = '0;
+      assign crc_hmode            = '0;
+    end
+
+    //////////////////////////////////////
+    // Multicycle Bitmanip Instructions //
+    //////////////////////////////////////
+    // Ternary instructions + Shift Rotations + Bit extract/deposit + CRC
+    // For ternary instructions (zbt), operand_a_i is tied to rs1 in the first cycle and rs3 in the
+    // second cycle. operand_b_i is always tied to rs2.
+
+    always_comb begin
+      unique case (operator_i)
+        ALU_CMOV: begin
+          multicycle_result = (operand_b_i == 32'h0) ? operand_a_i : imd_val_q_i[0];
+          imd_val_d_o = '{operand_a_i, 32'h0};
+          if (instr_first_cycle_i) begin
+            imd_val_we_o = 2'b01;
+          end else begin
+            imd_val_we_o = 2'b00;
+          end
+        end
+
+        ALU_CMIX: begin
+          multicycle_result = imd_val_q_i[0] | bwlogic_and_result;
+          imd_val_d_o = '{bwlogic_and_result, 32'h0};
+          if (instr_first_cycle_i) begin
+            imd_val_we_o = 2'b01;
+          end else begin
+            imd_val_we_o = 2'b00;
+          end
+        end
+
+        ALU_FSR, ALU_FSL,
+        ALU_ROL, ALU_ROR: begin
+          if (shift_amt[4:0] == 5'h0) begin
+            multicycle_result = shift_amt[5] ? operand_a_i : imd_val_q_i[0];
+          end else begin
+            multicycle_result = imd_val_q_i[0] | shift_result;
+          end
+          imd_val_d_o = '{shift_result, 32'h0};
+          if (instr_first_cycle_i) begin
+            imd_val_we_o = 2'b01;
+          end else begin
+            imd_val_we_o = 2'b00;
+          end
+        end
+
+        ALU_CRC32_W, ALU_CRC32C_W,
+        ALU_CRC32_H, ALU_CRC32C_H,
+        ALU_CRC32_B, ALU_CRC32C_B: begin
+          if (RV32B == RV32BFull) begin
+            unique case(1'b1)
+              crc_bmode: multicycle_result = clmul_result_rev ^ (operand_a_i >> 8);
+              crc_hmode: multicycle_result = clmul_result_rev ^ (operand_a_i >> 16);
+              default:   multicycle_result = clmul_result_rev;
+            endcase
+            imd_val_d_o = '{clmul_result_rev, 32'h0};
+            if (instr_first_cycle_i) begin
+              imd_val_we_o = 2'b01;
+            end else begin
+              imd_val_we_o = 2'b00;
+            end
+          end else begin
+            imd_val_d_o = '{operand_a_i, 32'h0};
+            imd_val_we_o = 2'b00;
+            multicycle_result = '0;
+          end
+        end
+
+        ALU_BEXT, ALU_BDEP: begin
+          if (RV32B == RV32BFull) begin
+            multicycle_result = (operator_i == ALU_BDEP) ? butterfly_result : invbutterfly_result;
+            imd_val_d_o = '{bitcnt_partial_lsb_d, bitcnt_partial_msb_d};
+            if (instr_first_cycle_i) begin
+              imd_val_we_o = 2'b11;
+            end else begin
+              imd_val_we_o = 2'b00;
+            end
+          end else begin
+            imd_val_d_o = '{operand_a_i, 32'h0};
+            imd_val_we_o = 2'b00;
+            multicycle_result = '0;
+          end
+        end
+
+        default: begin
+          imd_val_d_o = '{operand_a_i, 32'h0};
+          imd_val_we_o = 2'b00;
+          multicycle_result = '0;
+        end
+      endcase
+    end
+
+
+  end else begin : g_no_alu_rvb
+    logic [31:0] unused_imd_val_q[2];
+    assign unused_imd_val_q           = imd_val_q_i;
+    logic [31:0] unused_butterfly_result;
+    assign unused_butterfly_result    = butterfly_result;
+    logic [31:0] unused_invbutterfly_result;
+    assign unused_invbutterfly_result = invbutterfly_result;
+    // RV32B result signals
+    assign bitcnt_result       = '0;
+    assign minmax_result       = '0;
+    assign pack_result         = '0;
+    assign sext_result         = '0;
+    assign singlebit_result    = '0;
+    assign rev_result          = '0;
+    assign shuffle_result      = '0;
+    assign butterfly_result    = '0;
+    assign invbutterfly_result = '0;
+    assign clmul_result        = '0;
+    assign multicycle_result   = '0;
+    // RV32B support signals
+    assign imd_val_d_o         = '{default: '0};
+    assign imd_val_we_o        = '{default: '0};
+  end
+
+  ////////////////
+  // Result mux //
+  ////////////////
+
+  always_comb begin
+    result_o   = '0;
+
+    unique case (operator_i)
+      // Bitwise Logic Operations (negate: RV32B)
+      ALU_XOR,  ALU_XNOR,
+      ALU_OR,   ALU_ORN,
+      ALU_AND,  ALU_ANDN: result_o = bwlogic_result;
+
+      // Adder Operations
+      ALU_ADD,  ALU_SUB: result_o = adder_result;
+
+      // Shift Operations
+      ALU_SLL,  ALU_SRL,
+      ALU_SRA,
+      // RV32B
+      ALU_SLO,  ALU_SRO: result_o = shift_result;
+
+      // Shuffle Operations (RV32B)
+      ALU_SHFL, ALU_UNSHFL: result_o = shuffle_result;
+
+      // Comparison Operations
+      ALU_EQ,   ALU_NE,
+      ALU_GE,   ALU_GEU,
+      ALU_LT,   ALU_LTU,
+      ALU_SLT,  ALU_SLTU: result_o = {31'h0,cmp_result};
+
+      // MinMax Operations (RV32B)
+      ALU_MIN,  ALU_MAX,
+      ALU_MINU, ALU_MAXU: result_o = minmax_result;
+
+      // Bitcount Operations (RV32B)
+      ALU_CLZ, ALU_CTZ,
+      ALU_PCNT: result_o = {26'h0, bitcnt_result};
+
+      // Pack Operations (RV32B)
+      ALU_PACK, ALU_PACKH,
+      ALU_PACKU: result_o = pack_result;
+
+      // Sign-Extend (RV32B)
+      ALU_SEXTB, ALU_SEXTH: result_o = sext_result;
+
+      // Ternary Bitmanip Operations (RV32B)
+      ALU_CMIX, ALU_CMOV,
+      ALU_FSL,  ALU_FSR,
+      // Rotate Shift (RV32B)
+      ALU_ROL, ALU_ROR,
+      // Cyclic Redundancy Checks (RV32B)
+      ALU_CRC32_W, ALU_CRC32C_W,
+      ALU_CRC32_H, ALU_CRC32C_H,
+      ALU_CRC32_B, ALU_CRC32C_B,
+      // Bit Extract / Deposit (RV32B)
+      ALU_BEXT, ALU_BDEP: result_o = multicycle_result;
+
+      // Single-Bit Bitmanip Operations (RV32B)
+      ALU_SBSET, ALU_SBCLR,
+      ALU_SBINV, ALU_SBEXT: result_o = singlebit_result;
+
+      // General Reverse / Or-combine (RV32B)
+      ALU_GREV, ALU_GORC: result_o = rev_result;
+
+      // Bit Field Place (RV32B)
+      ALU_BFP: result_o = bfp_result;
+
+      // Carry-less Multiply Operations (RV32B)
+      ALU_CLMUL, ALU_CLMULR,
+      ALU_CLMULH: result_o = clmul_result;
+
+      default: ;
+    endcase
+  end
+
+  logic unused_shift_amt_compl;
+  assign unused_shift_amt_compl = shift_amt_compl[5];
+
+endmodule
diff --git a/verilog/rtl/brq_exu_multdiv_fast.sv b/verilog/rtl/brq_exu_multdiv_fast.sv
new file mode 100644
index 0000000..3bb8217
--- /dev/null
+++ b/verilog/rtl/brq_exu_multdiv_fast.sv
@@ -0,0 +1,514 @@
+
+`define OP_L 15:0
+`define OP_H 31:16
+
+/**
+ * Fast Multiplier and Division
+ *
+ * 16x16 kernel multiplier and Long Division
+ */
+
+
+
+module brq_exu_multdiv_fast #(
+    parameter brq_pkg::rv32m_e RV32M = brq_pkg::RV32MFast
+  ) (
+    input  logic             clk_i,
+    input  logic             rst_ni,
+    input  logic             mult_en_i,  // dynamic enable signal, for FSM control
+    input  logic             div_en_i,   // dynamic enable signal, for FSM control
+    input  logic             mult_sel_i, // static decoder output, for data muxes
+    input  logic             div_sel_i,  // static decoder output, for data muxes
+    input  brq_pkg::md_op_e operator_i,
+    input  logic  [1:0]      signed_mode_i,
+    input  logic [31:0]      op_a_i,
+    input  logic [31:0]      op_b_i,
+    input  logic [33:0]      alu_adder_ext_i,
+    input  logic [31:0]      alu_adder_i,
+    input  logic             equal_to_zero_i,
+    input  logic             data_ind_timing_i,
+
+    output logic [32:0]      alu_operand_a_o,
+    output logic [32:0]      alu_operand_b_o,
+
+    input  logic [33:0]      imd_val_q_i[2],
+    output logic [33:0]      imd_val_d_o[2],
+    output logic [1:0]       imd_val_we_o,
+
+    input  logic             multdiv_ready_id_i,
+
+    output logic [31:0]      multdiv_result_o,
+    output logic             valid_o
+);
+
+  import brq_pkg::*;
+
+  // Both multiplier variants
+  logic signed [34:0] mac_res_signed;
+  logic        [34:0] mac_res_ext;
+  logic        [33:0] accum;
+  logic        sign_a, sign_b;
+  logic        mult_valid;
+  logic        signed_mult;
+
+  // Results that become intermediate value depending on whether mul or div is being calculated
+  logic [33:0] mac_res_d, op_remainder_d;
+  // Raw output of MAC calculation
+  logic [33:0] mac_res;
+
+  // Divider signals
+  logic        div_sign_a, div_sign_b;
+  logic        is_greater_equal;
+  logic        div_change_sign, rem_change_sign;
+  logic [31:0] one_shift;
+  logic [31:0] op_denominator_q;
+  logic [31:0] op_numerator_q;
+  logic [31:0] op_quotient_q;
+  logic [31:0] op_denominator_d;
+  logic [31:0] op_numerator_d;
+  logic [31:0] op_quotient_d;
+  logic [31:0] next_remainder;
+  logic [32:0] next_quotient;
+  logic [31:0] res_adder_h;
+  logic        div_valid;
+  logic [ 4:0] div_counter_q, div_counter_d;
+  logic        multdiv_en;
+  logic        mult_hold;
+  logic        div_hold;
+  logic        div_by_zero_d, div_by_zero_q;
+
+  logic        mult_en_internal;
+  logic        div_en_internal;
+
+  typedef enum logic [2:0] {
+    MD_IDLE, MD_ABS_A, MD_ABS_B, MD_COMP, MD_LAST, MD_CHANGE_SIGN, MD_FINISH
+  } md_fsm_e;
+  md_fsm_e md_state_q, md_state_d;
+
+  logic unused_mult_sel_i;
+  assign unused_mult_sel_i = mult_sel_i;
+
+  assign mult_en_internal = mult_en_i & ~mult_hold;
+  assign div_en_internal  = div_en_i & ~div_hold;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      div_counter_q    <= '0;
+      md_state_q       <= MD_IDLE;
+      op_numerator_q   <= '0;
+      op_quotient_q    <= '0;
+      div_by_zero_q    <= '0;
+    end else if (div_en_internal) begin
+      div_counter_q    <= div_counter_d;
+      op_numerator_q   <= op_numerator_d;
+      op_quotient_q    <= op_quotient_d;
+      md_state_q       <= md_state_d;
+      div_by_zero_q    <= div_by_zero_d;
+    end
+  end
+
+
+  assign multdiv_en = mult_en_internal | div_en_internal;
+
+  // Intermediate value register shared with ALU
+  assign imd_val_d_o[0] = div_sel_i ? op_remainder_d : mac_res_d;
+  assign imd_val_we_o[0] = multdiv_en;
+
+  assign imd_val_d_o[1] = {2'b0, op_denominator_d};
+  assign imd_val_we_o[1] = div_en_internal;
+  assign op_denominator_q = imd_val_q_i[1][31:0];
+  logic [1:0] unused_imd_val;
+  assign unused_imd_val = imd_val_q_i[1][33:32];
+  logic unused_mac_res_ext;
+  assign unused_mac_res_ext = mac_res_ext[34];
+
+  assign signed_mult      = (signed_mode_i != 2'b00);
+  assign multdiv_result_o = div_sel_i ? imd_val_q_i[0][31:0] : mac_res_d[31:0];
+
+  // The single cycle multiplier uses three 17 bit multipliers to compute MUL instructions in a
+  // single cycle and MULH instructions in two cycles.
+  if (RV32M == RV32MSingleCycle) begin : gen_mult_single_cycle
+
+    typedef enum logic {
+      MULL, MULH
+    } mult_fsm_e;
+    mult_fsm_e mult_state_q, mult_state_d;
+
+    logic signed [33:0] mult1_res, mult2_res, mult3_res;
+    logic [33:0]        mult1_res_uns;
+    logic [33:32]       unused_mult1_res_uns;
+    logic [15:0]        mult1_op_a, mult1_op_b;
+    logic [15:0]        mult2_op_a, mult2_op_b;
+    logic [15:0]        mult3_op_a, mult3_op_b;
+    logic               mult1_sign_a, mult1_sign_b;
+    logic               mult2_sign_a, mult2_sign_b;
+    logic               mult3_sign_a, mult3_sign_b;
+    logic [33:0]        summand1, summand2, summand3;
+
+    assign mult1_res = $signed({mult1_sign_a, mult1_op_a}) * $signed({mult1_sign_b, mult1_op_b});
+    assign mult2_res = $signed({mult2_sign_a, mult2_op_a}) * $signed({mult2_sign_b, mult2_op_b});
+    assign mult3_res = $signed({mult3_sign_a, mult3_op_a}) * $signed({mult3_sign_b, mult3_op_b});
+
+    assign mac_res_signed = $signed(summand1) + $signed(summand2) + $signed(summand3);
+
+    assign mult1_res_uns  = $unsigned(mult1_res);
+    assign mac_res_ext    = $unsigned(mac_res_signed);
+    assign mac_res        = mac_res_ext[33:0];
+
+    assign sign_a = signed_mode_i[0] & op_a_i[31];
+    assign sign_b = signed_mode_i[1] & op_b_i[31];
+
+    // The first two multipliers are only used in state 1 (MULL). We can assign them statically.
+    // al*bl
+    assign mult1_sign_a = 1'b0;
+    assign mult1_sign_b = 1'b0;
+    assign mult1_op_a = op_a_i[`OP_L];
+    assign mult1_op_b = op_b_i[`OP_L];
+
+    // al*bh
+    assign mult2_sign_a = 1'b0;
+    assign mult2_sign_b = sign_b;
+    assign mult2_op_a = op_a_i[`OP_L];
+    assign mult2_op_b = op_b_i[`OP_H];
+
+    // used in MULH
+    assign accum[17:0] = imd_val_q_i[0][33:16];
+    assign accum[33:18] = {16{signed_mult & imd_val_q_i[0][33]}};
+
+    always_comb begin
+      // Default values == MULL
+
+      // ah*bl
+      mult3_sign_a = sign_a;
+      mult3_sign_b = 1'b0;
+      mult3_op_a = op_a_i[`OP_H];
+      mult3_op_b = op_b_i[`OP_L];
+
+      summand1 = {18'h0, mult1_res_uns[`OP_H]};
+      summand2 = $unsigned(mult2_res);
+      summand3 = $unsigned(mult3_res);
+
+      // mac_res = A*B[47:16], mult1_res = A*B[15:0]
+      mac_res_d = {2'b0, mac_res[`OP_L], mult1_res_uns[`OP_L]};
+      mult_valid = mult_en_i;
+      mult_state_d = MULL;
+
+      mult_hold = 1'b0;
+
+      unique case (mult_state_q)
+
+        MULL: begin
+          if (operator_i != MD_OP_MULL) begin
+            mac_res_d = mac_res;
+            mult_valid = 1'b0;
+            mult_state_d = MULH;
+          end else begin
+            mult_hold = ~multdiv_ready_id_i;
+          end
+        end
+
+        MULH: begin
+          // ah*bh
+          mult3_sign_a = sign_a;
+          mult3_sign_b = sign_b;
+          mult3_op_a = op_a_i[`OP_H];
+          mult3_op_b = op_b_i[`OP_H];
+          mac_res_d = mac_res;
+
+          summand1 = '0;
+          summand2 = accum;
+          summand3 = mult3_res;
+
+          mult_state_d = MULL;
+          mult_valid = 1'b1;
+
+          mult_hold = ~multdiv_ready_id_i;
+        end
+
+        default: begin
+          mult_state_d = MULL;
+        end
+
+      endcase // mult_state_q
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        mult_state_q <= MULL;
+      end else begin
+        if (mult_en_internal) begin
+          mult_state_q <= mult_state_d;
+        end
+      end
+    end
+
+    assign unused_mult1_res_uns = mult1_res_uns[33:32];
+
+    // States must be knwon/valid.
+
+
+  // The fast multiplier uses one 17 bit multiplier to compute MUL instructions in 3 cycles
+  // and MULH instructions in 4 cycles.
+  end else begin : gen_mult_fast
+    logic [15:0] mult_op_a;
+    logic [15:0] mult_op_b;
+
+    typedef enum logic [1:0] {
+      ALBL, ALBH, AHBL, AHBH
+    } mult_fsm_e;
+    mult_fsm_e mult_state_q, mult_state_d;
+
+    // The 2 MSBs of mac_res_ext (mac_res_ext[34:33]) are always equal since:
+    // 1. The 2 MSBs of the multiplicants are always equal, and
+    // 2. The 16 MSBs of the addend (accum[33:18]) are always equal.
+    // Thus, it is safe to ignore mac_res_ext[34].
+    assign mac_res_signed =
+        $signed({sign_a, mult_op_a}) * $signed({sign_b, mult_op_b}) + $signed(accum);
+    assign mac_res_ext    = $unsigned(mac_res_signed);
+    assign mac_res        = mac_res_ext[33:0];
+
+    always_comb begin
+      mult_op_a    = op_a_i[`OP_L];
+      mult_op_b    = op_b_i[`OP_L];
+      sign_a       = 1'b0;
+      sign_b       = 1'b0;
+      accum        = imd_val_q_i[0];
+      mac_res_d    = mac_res;
+      mult_state_d = mult_state_q;
+      mult_valid   = 1'b0;
+      mult_hold    = 1'b0;
+
+      unique case (mult_state_q)
+
+        ALBL: begin
+          // al*bl
+          mult_op_a = op_a_i[`OP_L];
+          mult_op_b = op_b_i[`OP_L];
+          sign_a    = 1'b0;
+          sign_b    = 1'b0;
+          accum     = '0;
+          mac_res_d = mac_res;
+          mult_state_d = ALBH;
+        end
+
+        ALBH: begin
+          // al*bh<<16
+          mult_op_a = op_a_i[`OP_L];
+          mult_op_b = op_b_i[`OP_H];
+          sign_a    = 1'b0;
+          sign_b    = signed_mode_i[1] & op_b_i[31];
+          // result of AL*BL (in imd_val_q_i[0]) always unsigned with no carry
+          accum     = {18'b0, imd_val_q_i[0][31:16]};
+          if (operator_i == MD_OP_MULL) begin
+            mac_res_d = {2'b0, mac_res[`OP_L], imd_val_q_i[0][`OP_L]};
+          end else begin
+            // MD_OP_MULH
+            mac_res_d = mac_res;
+          end
+          mult_state_d = AHBL;
+        end
+
+        AHBL: begin
+          // ah*bl<<16
+          mult_op_a = op_a_i[`OP_H];
+          mult_op_b = op_b_i[`OP_L];
+          sign_a    = signed_mode_i[0] & op_a_i[31];
+          sign_b    = 1'b0;
+          if (operator_i == MD_OP_MULL) begin
+            accum        = {18'b0, imd_val_q_i[0][31:16]};
+            mac_res_d    = {2'b0, mac_res[15:0], imd_val_q_i[0][15:0]};
+            mult_valid   = 1'b1;
+
+            // Note no state transition will occur if mult_hold is set
+            mult_state_d = ALBL;
+            mult_hold    = ~multdiv_ready_id_i;
+          end else begin
+            accum        = imd_val_q_i[0];
+            mac_res_d    = mac_res;
+            mult_state_d = AHBH;
+          end
+        end
+
+        AHBH: begin
+          // only MD_OP_MULH here
+          // ah*bh
+          mult_op_a = op_a_i[`OP_H];
+          mult_op_b = op_b_i[`OP_H];
+          sign_a    = signed_mode_i[0] & op_a_i[31];
+          sign_b    = signed_mode_i[1] & op_b_i[31];
+          accum[17: 0]  = imd_val_q_i[0][33:16];
+          accum[33:18]  = {16{signed_mult & imd_val_q_i[0][33]}};
+          // result of AH*BL is not signed only if signed_mode_i == 2'b00
+          mac_res_d    = mac_res;
+          mult_valid   = 1'b1;
+
+          // Note no state transition will occur if mult_hold is set
+          mult_state_d = ALBL;
+          mult_hold    = ~multdiv_ready_id_i;
+        end
+        default: begin
+          mult_state_d = ALBL;
+        end
+      endcase // mult_state_q
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        mult_state_q <= ALBL;
+      end else begin
+        if (mult_en_internal) begin
+          mult_state_q <= mult_state_d;
+        end
+      end
+    end
+
+    // States must be knwon/valid.
+   
+  end // gen_mult_fast
+
+  // Divider
+  assign res_adder_h    = alu_adder_ext_i[32:1];
+  logic [1:0] unused_alu_adder_ext;
+  assign unused_alu_adder_ext = {alu_adder_ext_i[33],alu_adder_ext_i[0]};
+
+  assign next_remainder = is_greater_equal ? res_adder_h[31:0] : imd_val_q_i[0][31:0];
+  assign next_quotient  = is_greater_equal ? {1'b0, op_quotient_q} | {1'b0, one_shift} :
+                                             {1'b0, op_quotient_q};
+
+  assign one_shift      = {31'b0, 1'b1} << div_counter_q;
+
+  // The adder in the ALU computes alu_operand_a_o + alu_operand_b_o which means
+  // Remainder - Divisor. If Remainder - Divisor >= 0, is_greater_equal is equal to 1,
+  // the next Remainder is Remainder - Divisor contained in res_adder_h and the
+  always_comb begin
+    if ((imd_val_q_i[0][31] ^ op_denominator_q[31]) == 1'b0) begin
+      is_greater_equal = (res_adder_h[31] == 1'b0);
+    end else begin
+      is_greater_equal = imd_val_q_i[0][31];
+    end
+  end
+
+  assign div_sign_a      = op_a_i[31] & signed_mode_i[0];
+  assign div_sign_b      = op_b_i[31] & signed_mode_i[1];
+  assign div_change_sign = (div_sign_a ^ div_sign_b) & ~div_by_zero_q;
+  assign rem_change_sign = div_sign_a;
+
+
+  always_comb begin
+    div_counter_d    = div_counter_q - 5'h1;
+    op_remainder_d   = imd_val_q_i[0];
+    op_quotient_d    = op_quotient_q;
+    md_state_d       = md_state_q;
+    op_numerator_d   = op_numerator_q;
+    op_denominator_d = op_denominator_q;
+    alu_operand_a_o  = {32'h0  , 1'b1};
+    alu_operand_b_o  = {~op_b_i, 1'b1};
+    div_valid        = 1'b0;
+    div_hold         = 1'b0;
+    div_by_zero_d    = div_by_zero_q;
+
+    unique case(md_state_q)
+      MD_IDLE: begin
+        if (operator_i == MD_OP_DIV) begin
+          // Check if the Denominator is 0
+          // quotient for division by 0 is specified to be -1
+          // Note with data-independent time option, the full divide operation will proceed as
+          // normal and will naturally return -1
+          op_remainder_d = '1;
+          md_state_d     = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A;
+          // Record that this is a div by zero to stop the sign change at the end of the
+          // division (in data_ind_timing mode).
+          div_by_zero_d  = equal_to_zero_i;
+        end else begin
+          // Check if the Denominator is 0
+          // remainder for division by 0 is specified to be the numerator (operand a)
+          // Note with data-independent time option, the full divide operation will proceed as
+          // normal and will naturally return operand a
+          op_remainder_d = {2'b0, op_a_i};
+          md_state_d     = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A;
+        end
+        // 0 - B = 0 iff B == 0
+        alu_operand_a_o  = {32'h0  , 1'b1};
+        alu_operand_b_o  = {~op_b_i, 1'b1};
+        div_counter_d    = 5'd31;
+      end
+
+      MD_ABS_A: begin
+        // quotient
+        op_quotient_d   = '0;
+        // A abs value
+        op_numerator_d  = div_sign_a ? alu_adder_i : op_a_i;
+        md_state_d      = MD_ABS_B;
+        div_counter_d   = 5'd31;
+        // ABS(A) = 0 - A
+        alu_operand_a_o = {32'h0  , 1'b1};
+        alu_operand_b_o = {~op_a_i, 1'b1};
+      end
+
+      MD_ABS_B: begin
+        // remainder
+        op_remainder_d   = { 33'h0, op_numerator_q[31]};
+        // B abs value
+        op_denominator_d = div_sign_b ? alu_adder_i : op_b_i;
+        md_state_d       = MD_COMP;
+        div_counter_d    = 5'd31;
+        // ABS(B) = 0 - B
+        alu_operand_a_o  = {32'h0  , 1'b1};
+        alu_operand_b_o  = {~op_b_i, 1'b1};
+      end
+
+      MD_COMP: begin
+        op_remainder_d  = {1'b0, next_remainder[31:0], op_numerator_q[div_counter_d]};
+        op_quotient_d   = next_quotient[31:0];
+        md_state_d      = (div_counter_q == 5'd1) ? MD_LAST : MD_COMP;
+        // Division
+        alu_operand_a_o = {imd_val_q_i[0][31:0], 1'b1}; // it contains the remainder
+        alu_operand_b_o = {~op_denominator_q[31:0], 1'b1};  // -denominator two's compliment
+      end
+
+      MD_LAST: begin
+        if (operator_i == MD_OP_DIV) begin
+          // this time we save the quotient in op_remainder_d (i.e. imd_val_q_i[0]) since
+          // we do not need anymore the remainder
+          op_remainder_d = {1'b0, next_quotient};
+        end else begin
+          // this time we do not save the quotient anymore since we need only the remainder
+          op_remainder_d = {2'b0, next_remainder[31:0]};
+        end
+        // Division
+        alu_operand_a_o  = {imd_val_q_i[0][31:0], 1'b1}; // it contains the remainder
+        alu_operand_b_o  = {~op_denominator_q[31:0], 1'b1};  // -denominator two's compliment
+
+        md_state_d = MD_CHANGE_SIGN;
+      end
+
+      MD_CHANGE_SIGN: begin
+        md_state_d  = MD_FINISH;
+        if (operator_i == MD_OP_DIV) begin
+          op_remainder_d = (div_change_sign) ? {2'h0, alu_adder_i} : imd_val_q_i[0];
+        end else begin
+          op_remainder_d = (rem_change_sign) ? {2'h0, alu_adder_i} : imd_val_q_i[0];
+        end
+        // ABS(Quotient) = 0 - Quotient (or Remainder)
+        alu_operand_a_o  = {32'h0  , 1'b1};
+        alu_operand_b_o  = {~imd_val_q_i[0][31:0], 1'b1};
+      end
+
+      MD_FINISH: begin
+        // Hold result until ID stage is ready to accept it
+        // Note no state transition will occur if div_hold is set
+        md_state_d = MD_IDLE;
+        div_hold   = ~multdiv_ready_id_i;
+        div_valid   = 1'b1;
+      end
+
+      default: begin
+        md_state_d = MD_IDLE;
+      end
+    endcase // md_state_q
+  end
+
+  assign valid_o = mult_valid | div_valid;
+
+
+endmodule // brq_mult
diff --git a/verilog/rtl/brq_exu_multdiv_slow.sv b/verilog/rtl/brq_exu_multdiv_slow.sv
new file mode 100644
index 0000000..5592cb6
--- /dev/null
+++ b/verilog/rtl/brq_exu_multdiv_slow.sv
@@ -0,0 +1,356 @@
+
+
+/**
+ * Slow Multiplier and Division
+ *
+ * Baugh-Wooley multiplier and Long Division
+ */
+
+
+module brq_exu_multdiv_slow
+(
+    input  logic             clk_i,
+    input  logic             rst_ni,
+    input  logic             mult_en_i,  // dynamic enable signal, for FSM control
+    input  logic             div_en_i,   // dynamic enable signal, for FSM control
+    input  logic             mult_sel_i, // static decoder output, for data muxes
+    input  logic             div_sel_i,  // static decoder output, for data muxes
+    input  brq_pkg::md_op_e operator_i,
+    input  logic  [1:0]      signed_mode_i,
+    input  logic [31:0]      op_a_i,
+    input  logic [31:0]      op_b_i,
+    input  logic [33:0]      alu_adder_ext_i,
+    input  logic [31:0]      alu_adder_i,
+    input  logic             equal_to_zero_i,
+    input  logic             data_ind_timing_i,
+
+    output logic [32:0]      alu_operand_a_o,
+    output logic [32:0]      alu_operand_b_o,
+
+    input  logic [33:0]      imd_val_q_i[2],
+    output logic [33:0]      imd_val_d_o[2],
+    output logic  [1:0]      imd_val_we_o,
+
+    input  logic             multdiv_ready_id_i,
+
+    output logic [31:0]      multdiv_result_o,
+
+    output logic             valid_o
+);
+
+  import brq_pkg::*;
+
+  typedef enum logic [2:0] {
+    MD_IDLE, MD_ABS_A, MD_ABS_B, MD_COMP, MD_LAST, MD_CHANGE_SIGN, MD_FINISH
+  } md_fsm_e;
+  md_fsm_e md_state_q, md_state_d;
+
+  logic [32:0] accum_window_q, accum_window_d;
+  logic        unused_imd_val0;
+  logic [ 1:0] unused_imd_val1;
+
+  logic [32:0] res_adder_l;
+  logic [32:0] res_adder_h;
+
+  logic [ 4:0] multdiv_count_q, multdiv_count_d;
+  logic [32:0] op_b_shift_q, op_b_shift_d;
+  logic [32:0] op_a_shift_q, op_a_shift_d;
+  logic [32:0] op_a_ext, op_b_ext;
+  logic [32:0] one_shift;
+  logic [32:0] op_a_bw_pp, op_a_bw_last_pp;
+  logic [31:0] b_0;
+  logic        sign_a, sign_b;
+  logic [32:0] next_quotient;
+  logic [31:0] next_remainder;
+  logic [31:0] op_numerator_q, op_numerator_d;
+  logic        is_greater_equal;
+  logic        div_change_sign, rem_change_sign;
+  logic        div_by_zero_d, div_by_zero_q;
+  logic        multdiv_hold;
+  logic        multdiv_en;
+
+   // (accum_window_q + op_a_shift_q)
+  assign res_adder_l = alu_adder_ext_i[32:0];
+   // (accum_window_q + op_a_shift_q)>>1
+  assign res_adder_h = alu_adder_ext_i[33:1];
+
+  /////////////////////
+  // ALU Operand MUX //
+  /////////////////////
+
+  // Intermediate value register shared with ALU
+  assign imd_val_d_o[0]  = {1'b0,accum_window_d};
+  assign imd_val_we_o[0] = ~multdiv_hold;
+  assign accum_window_q  = imd_val_q_i[0][32:0];
+  assign unused_imd_val0 = imd_val_q_i[0][33];
+
+  assign imd_val_d_o[1]  = {2'b00, op_numerator_d};
+  assign imd_val_we_o[1] = multdiv_en;
+  assign op_numerator_q  = imd_val_q_i[1][31:0];
+  assign unused_imd_val1 = imd_val_q_i[1][33:32];
+
+  always_comb begin
+    alu_operand_a_o = accum_window_q;
+
+    unique case(operator_i)
+
+      MD_OP_MULL: begin
+        alu_operand_b_o = op_a_bw_pp;
+      end
+
+      MD_OP_MULH: begin
+        alu_operand_b_o = (md_state_q == MD_LAST) ? op_a_bw_last_pp : op_a_bw_pp;
+      end
+
+      MD_OP_DIV,
+      MD_OP_REM: begin
+        unique case(md_state_q)
+          MD_IDLE: begin
+            // 0 - B = 0 iff B == 0
+            alu_operand_a_o = {32'h0  , 1'b1};
+            alu_operand_b_o = {~op_b_i, 1'b1};
+          end
+          MD_ABS_A: begin
+            // ABS(A) = 0 - A
+            alu_operand_a_o = {32'h0  , 1'b1};
+            alu_operand_b_o = {~op_a_i, 1'b1};
+          end
+          MD_ABS_B: begin
+            // ABS(B) = 0 - B
+            alu_operand_a_o = {32'h0  , 1'b1};
+            alu_operand_b_o = {~op_b_i, 1'b1};
+          end
+          MD_CHANGE_SIGN: begin
+            // ABS(Quotient) = 0 - Quotient (or Reminder)
+            alu_operand_a_o = {32'h0  , 1'b1};
+            alu_operand_b_o = {~accum_window_q[31:0], 1'b1};
+          end
+          default: begin
+            // Division
+            alu_operand_a_o = {accum_window_q[31:0], 1'b1}; // it contains the remainder
+            alu_operand_b_o = {~op_b_shift_q[31:0], 1'b1};  // -denominator two's compliment
+          end
+        endcase
+      end
+      //default: begin
+      //  alu_operand_a_o = accum_window_q;
+      //  alu_operand_b_o = {~op_b_shift_q[31:0], 1'b1};
+     // end
+    endcase
+  end
+
+  // Multiplier partial product calculation
+  assign b_0             = {32{op_b_shift_q[0]}};
+  assign op_a_bw_pp      = { ~(op_a_shift_q[32] & op_b_shift_q[0]),  (op_a_shift_q[31:0] & b_0) };
+  assign op_a_bw_last_pp = {  (op_a_shift_q[32] & op_b_shift_q[0]), ~(op_a_shift_q[31:0] & b_0) };
+
+  // Sign extend the input operands
+  assign sign_a   = op_a_i[31] & signed_mode_i[0];
+  assign sign_b   = op_b_i[31] & signed_mode_i[1];
+
+  assign op_a_ext = {sign_a, op_a_i};
+  assign op_b_ext = {sign_b, op_b_i};
+
+  // Divider calculations
+
+  // The adder in the ALU computes Remainder - Divisor. If Remainder - Divisor >= 0,
+  // is_greater_equal is true, the next Remainder is the subtraction result and the Quotient
+  // multdiv_count_q-th bit is set to 1.
+  assign is_greater_equal = (accum_window_q[31] == op_b_shift_q[31]) ?
+      ~res_adder_h[31] : accum_window_q[31];
+
+  assign one_shift      = {32'b0, 1'b1} << multdiv_count_q;
+
+  assign next_remainder = is_greater_equal ? res_adder_h[31:0]        : accum_window_q[31:0];
+  assign next_quotient  = is_greater_equal ? op_a_shift_q | one_shift : op_a_shift_q;
+
+  assign div_change_sign  = (sign_a ^ sign_b) & ~div_by_zero_q;
+  assign rem_change_sign  = sign_a;
+
+  always_comb begin
+    multdiv_count_d  = multdiv_count_q;
+    accum_window_d   = accum_window_q;
+    op_b_shift_d     = op_b_shift_q;
+    op_a_shift_d     = op_a_shift_q;
+    op_numerator_d   = op_numerator_q;
+    md_state_d       = md_state_q;
+    multdiv_hold     = 1'b0;
+    div_by_zero_d    = div_by_zero_q;
+    if (mult_sel_i || div_sel_i) begin
+      unique case(md_state_q)
+        MD_IDLE: begin
+          unique case(operator_i)
+            MD_OP_MULL: begin
+              op_a_shift_d   = op_a_ext << 1;
+              accum_window_d = {       ~(op_a_ext[32]   &     op_b_i[0]),
+                                         op_a_ext[31:0] & {32{op_b_i[0]}}  };
+              op_b_shift_d   = op_b_ext >> 1;
+              // Proceed with multiplication by 0/1 in data-independent time mode
+              md_state_d     = (!data_ind_timing_i && ((op_b_ext >> 1) == 0)) ? MD_LAST : MD_COMP;
+            end
+            MD_OP_MULH: begin
+              op_a_shift_d   = op_a_ext;
+              accum_window_d = { 1'b1, ~(op_a_ext[32]   &     op_b_i[0]),
+                                         op_a_ext[31:1] & {31{op_b_i[0]}}  };
+              op_b_shift_d   = op_b_ext >> 1;
+              md_state_d     = MD_COMP;
+            end
+            MD_OP_DIV: begin
+              // Check if the denominator is 0
+              // quotient for division by 0 is specified to be -1
+              // Note with data-independent time option, the full divide operation will proceed as
+              // normal and will naturally return -1
+              accum_window_d = {33{1'b1}};
+              md_state_d     = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A;
+              // Record that this is a div by zero to stop the sign change at the end of the
+              // division (in data_ind_timing mode).
+              div_by_zero_d  = equal_to_zero_i;
+            end
+            MD_OP_REM: begin
+              // Check if the denominator is 0
+              // remainder for division by 0 is specified to be the numerator (operand a)
+              // Note with data-independent time option, the full divide operation will proceed as
+              // normal and will naturally return operand a
+              accum_window_d = op_a_ext;
+              md_state_d     = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A;
+            end
+           // default:;
+          endcase
+          multdiv_count_d   = 5'd31;
+        end
+
+        MD_ABS_A: begin
+          // quotient
+          op_a_shift_d   = '0;
+          // A abs value
+          op_numerator_d = sign_a ? alu_adder_i : op_a_i;
+          md_state_d     = MD_ABS_B;
+        end
+
+        MD_ABS_B: begin
+          // remainder
+          accum_window_d = {32'h0,op_numerator_q[31]};
+          // B abs value
+          op_b_shift_d   = sign_b ? {1'b0,alu_adder_i} : {1'b0,op_b_i};
+          md_state_d     = MD_COMP;
+        end
+
+        MD_COMP: begin
+          multdiv_count_d = multdiv_count_q - 5'h1;
+          unique case(operator_i)
+            MD_OP_MULL: begin
+              accum_window_d = res_adder_l;
+              op_a_shift_d   = op_a_shift_q << 1;
+              op_b_shift_d   = op_b_shift_q >> 1;
+              // Multiplication is complete once op_b is zero, unless in data_ind_timing mode where
+              // the maximum possible shift-add operations will be completed regardless of op_b
+              md_state_d     = ((!data_ind_timing_i && (op_b_shift_d == 0)) ||
+                                (multdiv_count_q == 5'd1)) ? MD_LAST : MD_COMP;
+            end
+            MD_OP_MULH: begin
+              accum_window_d = res_adder_h;
+              op_a_shift_d   = op_a_shift_q;
+              op_b_shift_d   = op_b_shift_q >> 1;
+              md_state_d     = (multdiv_count_q == 5'd1) ? MD_LAST : MD_COMP;
+            end
+            MD_OP_DIV,
+            MD_OP_REM: begin
+              accum_window_d = {next_remainder[31:0], op_numerator_q[multdiv_count_d]};
+              op_a_shift_d   = next_quotient;
+              md_state_d     = (multdiv_count_q == 5'd1) ? MD_LAST : MD_COMP;
+            end
+          //  default: ;
+          endcase
+        end
+
+        MD_LAST: begin
+          unique case(operator_i)
+            MD_OP_MULL: begin
+              accum_window_d = res_adder_l;
+
+              // Note no state transition will occur if multdiv_hold is set
+              md_state_d   = MD_IDLE;
+              multdiv_hold = ~multdiv_ready_id_i;
+            end
+            MD_OP_MULH: begin
+              accum_window_d = res_adder_l;
+              md_state_d     = MD_IDLE;
+
+              // Note no state transition will occur if multdiv_hold is set
+              md_state_d   = MD_IDLE;
+              multdiv_hold = ~multdiv_ready_id_i;
+            end
+            MD_OP_DIV: begin
+              // this time we save the quotient in accum_window_q since we do not need anymore the
+              // remainder
+              accum_window_d = next_quotient;
+              md_state_d     = MD_CHANGE_SIGN;
+            end
+            MD_OP_REM: begin
+              // this time we do not save the quotient anymore since we need only the remainder
+              accum_window_d = {1'b0, next_remainder[31:0]};
+              md_state_d     = MD_CHANGE_SIGN;
+            end
+          //  default: ;
+          endcase
+        end
+
+        MD_CHANGE_SIGN: begin
+          md_state_d = MD_FINISH;
+          unique case(operator_i)
+            MD_OP_DIV:
+              accum_window_d = div_change_sign ? {1'b0,alu_adder_i} : accum_window_q;
+            MD_OP_REM:
+              accum_window_d = rem_change_sign ? {1'b0,alu_adder_i} : accum_window_q;
+            default: ;
+          endcase
+        end
+
+        MD_FINISH: begin
+          // Note no state transition will occur if multdiv_hold is set
+          md_state_d   = MD_IDLE;
+          multdiv_hold = ~multdiv_ready_id_i;
+        end
+
+        default: begin
+          md_state_d = MD_IDLE;
+        end
+      endcase // md_state_q
+    end // (mult_sel_i || div_sel_i)
+  end
+
+  //////////////////////////////////////////
+  // Mutliplier / Divider state registers //
+  //////////////////////////////////////////
+
+  assign multdiv_en = (mult_en_i | div_en_i) & ~multdiv_hold;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      multdiv_count_q  <= 5'h0;
+      op_b_shift_q     <= 33'h0;
+      op_a_shift_q     <= 33'h0;
+      md_state_q       <= MD_IDLE;
+      div_by_zero_q    <= 1'b0;
+    end else if (multdiv_en) begin
+      multdiv_count_q  <= multdiv_count_d;
+      op_b_shift_q     <= op_b_shift_d;
+      op_a_shift_q     <= op_a_shift_d;
+      md_state_q       <= md_state_d;
+      div_by_zero_q    <= div_by_zero_d;
+    end
+  end
+
+  /////////////
+  // Outputs //
+  /////////////
+
+  assign valid_o = (md_state_q == MD_FINISH) |
+                   (md_state_q == MD_LAST &
+                   (operator_i == MD_OP_MULL |
+                    operator_i == MD_OP_MULH));
+
+  assign multdiv_result_o = div_en_i ? accum_window_q[31:0] : res_adder_l[31:0];
+
+
+endmodule
\ No newline at end of file
diff --git a/verilog/rtl/brq_fp_register_file_ff.sv b/verilog/rtl/brq_fp_register_file_ff.sv
new file mode 100644
index 0000000..3b1b597
--- /dev/null
+++ b/verilog/rtl/brq_fp_register_file_ff.sv
@@ -0,0 +1,77 @@
+//`timescale 1ns/1ps
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * RISC-V register file
+ *
+ * Register file with 31 or 15x 32 bit wide registers. Register 0 is fixed to 0.
+ * This register file is based on flip flops. Use this register file when
+ * targeting FPGA synthesis or Verilator simulation.
+ */
+
+module brq_fp_register_file_ff #(
+    parameter brq_pkg::rvfloat_e RVF       = brq_pkg::RV32FSingle,
+    parameter int unsigned       DataWidth = 32
+    ) (
+    // Clock and Reset
+    input  logic                 clk_i,
+    input  logic                 rst_ni,
+
+    //Read port R1
+    input  logic [4:0]           raddr_a_i,
+    output logic [DataWidth-1:0] rdata_a_o,
+
+    //Read port R2
+    input  logic [4:0]           raddr_b_i,
+    output logic [DataWidth-1:0] rdata_b_o,
+
+    //Read port R2
+    input  logic [4:0]           raddr_c_i,
+    output logic [DataWidth-1:0] rdata_c_o,
+
+
+    // Write port W1
+    input  logic [4:0]           waddr_a_i,
+    input  logic [DataWidth-1:0] wdata_a_i,
+    input  logic                 we_a_i
+
+);
+import brq_pkg::rvfloat_e;
+
+  localparam int unsigned ADDR_WIDTH = (RVF==brq_pkg::RV64FDouble) ? 6 : 5;
+  localparam int unsigned NUM_WORDS  = (RVF==brq_pkg::RV64FDouble) ? 64 : 32;
+
+  logic [NUM_WORDS-1:0][DataWidth-1:0] rf_reg;
+  logic [NUM_WORDS-1:0][DataWidth-1:0] rf_reg_q;
+  logic [NUM_WORDS-1:0]                we_a_dec;
+
+  always_comb begin : we_a_decoder
+    for (int unsigned i = 0; i < NUM_WORDS; i++) begin
+      we_a_dec[i] = (waddr_a_i == 5'(i)) ?  we_a_i : 1'b0;
+    end
+  end
+
+  for (genvar i = 0; i < NUM_WORDS; i++) begin : g_rf_flops
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        rf_reg_q[i] <= '0;
+      end else if(we_a_dec[i]) begin
+        rf_reg_q[i] <= wdata_a_i;
+      end
+//      else begin
+//        rf_reg_q[5] <= 32'h41a00000;
+//        rf_reg_q[6] <= 32'h41200000;
+//      end
+    end
+  end
+
+  assign rf_reg[NUM_WORDS-1:0] = rf_reg_q[NUM_WORDS-1:0];
+
+  assign rdata_a_o = rf_reg[raddr_a_i];
+  assign rdata_b_o = rf_reg[raddr_b_i];
+  assign rdata_c_o = rf_reg[raddr_c_i];
+
+endmodule
diff --git a/verilog/rtl/brq_idu.sv b/verilog/rtl/brq_idu.sv
new file mode 100644
index 0000000..df82b18
--- /dev/null
+++ b/verilog/rtl/brq_idu.sv
@@ -0,0 +1,1077 @@
+
+
+`ifdef RISCV_FORMAL
+  `define RVFI
+`endif
+
+/**
+ * Instruction Decode Stage
+ *
+ * Decode stage of the core. It decodes the instructions and hosts the register
+ * file.
+ */
+
+module brq_idu #(
+    parameter bit                RV32E           = 0,
+    parameter brq_pkg::rv32m_e   RV32M           = brq_pkg::RV32MFast,
+    parameter brq_pkg::rv32b_e   RV32B           = brq_pkg::RV32BNone,
+    parameter brq_pkg::rvfloat_e RVF             = brq_pkg::RV64FDouble,
+    parameter bit                DataIndTiming   = 1'b0,
+    parameter bit                BranchTargetALU = 0,
+    parameter bit                SpecBranch      = 0,
+    parameter bit                WritebackStage  = 0,
+    parameter bit                BranchPredictor = 0
+) (
+    input  logic                      clk_i,
+    input  logic                      rst_ni,
+
+    output logic                      ctrl_busy_o,
+    output logic                      illegal_insn_o,
+
+    // Interface to IF stage
+    input  logic                      instr_valid_i,
+    input  logic [31:0]               instr_rdata_i,         // from IF-ID pipeline registers
+    input  logic [31:0]               instr_rdata_alu_i,     // from IF-ID pipeline registers
+    input  logic [15:0]               instr_rdata_c_i,       // from IF-ID pipeline registers
+    input  logic                      instr_is_compressed_i,
+    input  logic                      instr_bp_taken_i,
+    output logic                      instr_req_o,
+    output logic                      instr_first_cycle_id_o,
+    output logic                      instr_valid_clear_o,   // kill instr in IF-ID reg
+    output logic                      id_in_ready_o,         // ID stage is ready for next instr
+    output logic                      icache_inval_o,
+
+    // Jumps and branches
+    input  logic                      branch_decision_i,
+
+    // IF and ID stage signals
+    output logic                      pc_set_o,
+    output logic                      pc_set_spec_o,
+    output brq_pkg::pc_sel_e          pc_mux_o,
+    output logic                      nt_branch_mispredict_o,
+    output brq_pkg::exc_pc_sel_e      exc_pc_mux_o,
+    output brq_pkg::exc_cause_e       exc_cause_o,
+
+    input  logic                      illegal_c_insn_i,
+    input  logic                      instr_fetch_err_i,
+    input  logic                      instr_fetch_err_plus2_i,
+
+    input  logic [31:0]               pc_id_i,
+
+    // Stalls
+    input  logic                      ex_valid_i,       // EX stage has valid output
+    input  logic                      lsu_resp_valid_i, // LSU has valid output, or is done
+    // ALU
+    output brq_pkg::alu_op_e          alu_operator_ex_o,
+    output logic [31:0]               alu_operand_a_ex_o,
+    output logic [31:0]               alu_operand_b_ex_o,
+
+    // Multicycle Operation Stage Register
+    input  logic [1:0]                imd_val_we_ex_i,
+    input  logic [33:0]               imd_val_d_ex_i[2],
+    output logic [33:0]               imd_val_q_ex_o[2],
+
+    // Branch target ALU
+    output logic [31:0]               bt_a_operand_o,
+    output logic [31:0]               bt_b_operand_o,
+
+    // MUL, DIV
+    output logic                      mult_en_ex_o,
+    output logic                      div_en_ex_o,
+    output logic                      mult_sel_ex_o,
+    output logic                      div_sel_ex_o,
+    output brq_pkg::md_op_e           multdiv_operator_ex_o,
+    output logic  [1:0]               multdiv_signed_mode_ex_o,
+    output logic [31:0]               multdiv_operand_a_ex_o,
+    output logic [31:0]               multdiv_operand_b_ex_o,
+    output logic                      multdiv_ready_id_o,
+
+    // CSR
+    output logic                      csr_access_o,
+    output brq_pkg::csr_op_e          csr_op_o,
+    output logic                      csr_op_en_o,
+    output logic                      csr_save_if_o,
+    output logic                      csr_save_id_o,
+    output logic                      csr_save_wb_o,
+    output logic                      csr_restore_mret_id_o,
+    output logic                      csr_restore_dret_id_o,
+    output logic                      csr_save_cause_o,
+    output logic [31:0]               csr_mtval_o,
+    input  brq_pkg::priv_lvl_e        priv_mode_i,
+    input  logic                      csr_mstatus_tw_i,
+    input  logic                      illegal_csr_insn_i,
+    input  logic                      data_ind_timing_i,
+
+    // Interface to load store unit
+    output logic                      lsu_req_o,
+    output logic                      lsu_we_o,
+    output logic [1:0]                lsu_type_o,
+    output logic                      lsu_sign_ext_o,
+    output logic [31:0]               lsu_wdata_o,
+
+    input  logic                      lsu_req_done_i, // Data req to LSU is complete and
+                                                      // instruction can move to writeback
+                                                      // (only relevant where writeback stage is
+                                                      // present)
+
+    input  logic                      lsu_addr_incr_req_i,
+    input  logic [31:0]               lsu_addr_last_i,
+
+    // Interrupt signals
+    input  logic                      csr_mstatus_mie_i,
+    input  logic                      irq_pending_i,
+    input  brq_pkg::irqs_t            irqs_i,
+    input  logic                      irq_nm_i,
+    output logic                      nmi_mode_o,
+
+    input  logic                      lsu_load_err_i,
+    input  logic                      lsu_store_err_i,
+
+    // Debug Signal
+    output logic                      debug_mode_o,
+    output brq_pkg::dbg_cause_e       debug_cause_o,
+    output logic                      debug_csr_save_o,
+    input  logic                      debug_req_i,
+    input  logic                      debug_single_step_i,
+    input  logic                      debug_ebreakm_i,
+    input  logic                      debug_ebreaku_i,
+    input  logic                      trigger_match_i,
+
+    // Write back signal
+    input  logic [31:0]               result_ex_i,
+    input  logic [31:0]               csr_rdata_i,
+
+    // Register file read
+    output logic [4:0]                rf_raddr_a_o,
+    input  logic [31:0]               rf_rdata_a_i,
+    output logic [4:0]                rf_raddr_b_o,
+    input  logic [31:0]               rf_rdata_b_i,
+    output logic                      rf_ren_a_o,
+    output logic                      rf_ren_b_o,
+
+    // Register file write (via writeback)
+    output logic [4:0]                rf_waddr_id_o,
+    output logic [31:0]               rf_wdata_id_o,
+    output logic                      rf_we_id_o,
+    output logic                      rf_rd_a_wb_match_o,
+    output logic                      rf_rd_b_wb_match_o,
+
+    // Register write information from writeback (for resolving data hazards)
+    input  logic [4:0]                rf_waddr_wb_i,
+    input  logic [31:0]               rf_wdata_fwd_wb_i,
+    input  logic                      rf_write_wb_i,
+
+    output  logic                     en_wb_o,
+    output  brq_pkg::wb_instr_type_e  instr_type_wb_o,
+    output  logic                     instr_perf_count_id_o,
+    input logic                       ready_wb_i,
+    input logic                       outstanding_load_wb_i,
+    input logic                       outstanding_store_wb_i,
+
+    // Performance Counters
+    output logic                      perf_jump_o,    // executing a jump instr
+    output logic                      perf_branch_o,  // executing a branch instr
+    output logic                      perf_tbranch_o, // executing a taken branch instr
+    output logic                      perf_dside_wait_o, // instruction in ID/EX is awaiting memory
+                                                         // access to finish before proceeding
+    output logic                      perf_mul_wait_o,
+    output logic                      perf_div_wait_o,
+    output logic                      instr_id_done_o,
+
+    // Floating point extensions IO
+    output fpnew_pkg::roundmode_e     fp_rounding_mode_o,    // defines the rounding mode 
+   // output brq_pkg::op_b_sel_e        fp_alu_op_b_mux_sel_o, // operand b selection: reg value or
+                                                             // immediate 
+    input  logic [31:0]               fp_rf_rdata_a_i,
+    input  logic [31:0]               fp_rf_rdata_b_i,
+    input  logic [31:0]               fp_rf_rdata_c_i,
+    output logic [4:0]                fp_rf_raddr_a_o,
+    output logic [4:0]                fp_rf_raddr_b_o,
+    output logic [4:0]                fp_rf_raddr_c_o,
+    //output logic                      fp_rf_ren_a_o,     
+    //output logic                      fp_rf_ren_b_o,     
+    //output logic                      fp_rf_ren_c_o,
+    output logic [4:0]                fp_rf_waddr_o,
+    output logic                      fp_rf_we_o,
+
+    output fpnew_pkg::operation_e     fp_alu_operator_o,
+    output logic                      fp_alu_op_mod_o,
+    output fpnew_pkg::fp_format_e     fp_src_fmt_o,
+    output fpnew_pkg::fp_format_e     fp_dst_fmt_o,
+    output logic                      fp_rm_dynamic_o,
+    output logic                      fp_flush_o,
+    output logic                      is_fp_instr_o,
+    output logic                      use_fp_rs1_o,
+    output logic                      use_fp_rs2_o,
+    output logic                      use_fp_rs3_o,
+    output logic                      use_fp_rd_o,
+    input  logic                      fpu_busy_i,
+    input  logic                      fp_rf_write_wb_i,
+    input  logic [31:0]               fp_rf_wdata_fwd_wb_i,
+    output logic [2:0][31:0]          fp_operands_o,
+    output logic                      fp_load_o
+);
+
+  import brq_pkg::*;
+
+  // Decoder/Controller, ID stage internal signals
+  logic        illegal_insn_dec;
+  logic        ebrk_insn;
+  logic        mret_insn_dec;
+  logic        dret_insn_dec;
+  logic        ecall_insn_dec;
+  logic        wfi_insn_dec;
+
+  logic        wb_exception;
+
+  logic        branch_in_dec;
+  logic        branch_spec, branch_set_spec;
+  logic        branch_set, branch_set_d;
+  logic        branch_not_set;
+  logic        branch_taken;
+  logic        jump_in_dec;
+  logic        jump_set_dec;
+  logic        jump_set;
+
+  logic        instr_first_cycle;
+  logic        instr_executing;
+  logic        instr_done;
+  logic        controller_run;
+  logic        stall_ld_hz;
+  logic        stall_mem;
+  logic        stall_multdiv;
+  logic        stall_branch;
+  logic        stall_jump;
+  logic        stall_id;
+  logic        stall_wb;
+  logic        flush_id;
+  logic        multicycle_done;
+
+  // Immediate decoding and sign extension
+  logic [31:0] imm_i_type;
+  logic [31:0] imm_s_type;
+  logic [31:0] imm_b_type;
+  logic [31:0] imm_u_type;
+  logic [31:0] imm_j_type;
+  logic [31:0] zimm_rs1_type;
+
+  logic [31:0] imm_a;       // contains the immediate for operand b
+  logic [31:0] imm_b;       // contains the immediate for operand b
+
+  // Register file interface
+
+  rf_wd_sel_e  rf_wdata_sel;
+  logic        rf_we_dec, rf_we_raw;
+  logic        rf_ren_a, rf_ren_b;
+
+  assign rf_ren_a_o = rf_ren_a;
+  assign rf_ren_b_o = rf_ren_b;
+
+  logic [31:0] rf_rdata_a_fwd;
+  logic [31:0] rf_rdata_b_fwd;
+
+  // ALU Control
+  alu_op_e     alu_operator;
+  op_a_sel_e   alu_op_a_mux_sel, alu_op_a_mux_sel_dec;
+  op_b_sel_e   alu_op_b_mux_sel, alu_op_b_mux_sel_dec;
+  logic        alu_multicycle_dec;
+  logic        stall_alu;
+
+  logic [33:0] imd_val_q[2];
+
+  op_a_sel_e   bt_a_mux_sel;
+  imm_b_sel_e  bt_b_mux_sel;
+
+  imm_a_sel_e  imm_a_mux_sel;
+  imm_b_sel_e  imm_b_mux_sel, imm_b_mux_sel_dec;
+
+  // Multiplier Control
+  logic        mult_en_id, mult_en_dec; // use integer multiplier
+  logic        div_en_id, div_en_dec;   // use integer division or reminder
+  logic        multdiv_en_dec;
+  md_op_e      multdiv_operator;
+  logic [1:0]  multdiv_signed_mode;
+
+  // Data Memory Control
+  logic        lsu_we;
+  logic [1:0]  lsu_type;
+  logic        lsu_sign_ext;
+  logic        lsu_req, lsu_req_dec;
+  logic        data_req_allowed;
+
+  // CSR control
+  logic        csr_pipe_flush;
+
+  logic [31:0] alu_operand_a;
+  logic [31:0] alu_operand_b;
+
+  // Floating point 
+  logic        fp_swap_oprnds;
+  logic [31:0] fp_rf_rdata_a_fwd;
+  logic [31:0] fp_rf_rdata_b_fwd;
+  logic [31:0] fp_rf_rdata_c_fwd;
+  logic [31:0] temp;
+  logic [31:0] fpu_op_a;
+  logic [31:0] fpu_op_b;
+  logic [31:0] fpu_op_c;
+  logic        mv_instr;
+  logic [31:0] result_wb;
+
+  /////////////
+  // LSU Mux //
+  /////////////
+
+  // Misaligned loads/stores result in two aligned loads/stores, compute second address
+  assign alu_op_a_mux_sel = lsu_addr_incr_req_i ? OP_A_FWD        : alu_op_a_mux_sel_dec;
+  assign alu_op_b_mux_sel = lsu_addr_incr_req_i ? OP_B_IMM        : alu_op_b_mux_sel_dec;
+  assign imm_b_mux_sel    = lsu_addr_incr_req_i ? IMM_B_INCR_ADDR : imm_b_mux_sel_dec;
+
+  ///////////////////
+  // Operand MUXES //
+  ///////////////////
+
+  // Main ALU immediate MUX for Operand A
+  assign imm_a = (imm_a_mux_sel == IMM_A_Z) ? zimm_rs1_type : '0;
+
+  // Main ALU MUX for Operand A
+  always_comb begin : alu_operand_a_mux
+    unique case (alu_op_a_mux_sel)
+      OP_A_REG_A:  alu_operand_a = rf_rdata_a_fwd;
+      OP_A_FWD:    alu_operand_a = lsu_addr_last_i;
+      OP_A_CURRPC: alu_operand_a = pc_id_i;
+      OP_A_IMM:    alu_operand_a = imm_a;
+      //default:     alu_operand_a = pc_id_i;
+    endcase
+  end
+
+  if (BranchTargetALU) begin : g_btalu_muxes
+    // Branch target ALU operand A mux
+    always_comb begin : bt_operand_a_mux
+      unique case (bt_a_mux_sel)
+        OP_A_REG_A:  bt_a_operand_o = rf_rdata_a_fwd;
+        OP_A_CURRPC: bt_a_operand_o = pc_id_i;
+        default:     bt_a_operand_o = pc_id_i;
+      endcase
+    end
+
+    // Branch target ALU operand B mux
+    always_comb begin : bt_immediate_b_mux
+      unique case (bt_b_mux_sel)
+        IMM_B_I:         bt_b_operand_o = imm_i_type;
+        IMM_B_B:         bt_b_operand_o = imm_b_type;
+        IMM_B_J:         bt_b_operand_o = imm_j_type;
+        IMM_B_INCR_PC:   bt_b_operand_o = instr_is_compressed_i ? 32'h2 : 32'h4;
+        default:         bt_b_operand_o = instr_is_compressed_i ? 32'h2 : 32'h4;
+      endcase
+    end
+
+    // Reduced main ALU immediate MUX for Operand B
+    always_comb begin : immediate_b_mux
+      unique case (imm_b_mux_sel)
+        IMM_B_I:         imm_b = imm_i_type;
+        IMM_B_S:         imm_b = imm_s_type;
+        IMM_B_U:         imm_b = imm_u_type;
+        IMM_B_INCR_PC:   imm_b = instr_is_compressed_i ? 32'h2 : 32'h4;
+        IMM_B_INCR_ADDR: imm_b = 32'h4;
+        default:         imm_b = 32'h4;
+      endcase
+    end
+   
+  end else begin : g_nobtalu
+    op_a_sel_e  unused_a_mux_sel;
+    imm_b_sel_e unused_b_mux_sel;
+
+    assign unused_a_mux_sel = bt_a_mux_sel;
+    assign unused_b_mux_sel = bt_b_mux_sel;
+    assign bt_a_operand_o   = '0;
+    assign bt_b_operand_o   = '0;
+
+    // Full main ALU immediate MUX for Operand B
+    always_comb begin : immediate_b_mux
+      unique case (imm_b_mux_sel)
+        IMM_B_I:         imm_b = imm_i_type;
+        IMM_B_S:         imm_b = imm_s_type;
+        IMM_B_B:         imm_b = imm_b_type;
+        IMM_B_U:         imm_b = imm_u_type;
+        IMM_B_J:         imm_b = imm_j_type;
+        IMM_B_INCR_PC:   imm_b = instr_is_compressed_i ? 32'h2 : 32'h4;
+        IMM_B_INCR_ADDR: imm_b = 32'h4;
+        default:         imm_b = 32'h4;
+      endcase
+    end
+
+  end
+
+  // ALU MUX for Operand B
+  assign alu_operand_b = (alu_op_b_mux_sel == OP_B_IMM) ? imm_b : rf_rdata_b_fwd;
+
+  /////////////////////////////////////////
+  // Multicycle Operation Stage Register //
+  /////////////////////////////////////////
+
+  for (genvar i=0; i<2; i++) begin : gen_intermediate_val_reg
+    always_ff @(posedge clk_i or negedge rst_ni) begin : intermediate_val_reg
+      if (!rst_ni) begin
+        imd_val_q[i] <= '0;
+      end else if (imd_val_we_ex_i[i]) begin
+        imd_val_q[i] <= imd_val_d_ex_i[i];
+      end
+    end
+  end
+
+  assign imd_val_q_ex_o = imd_val_q;
+
+  /////////////
+  // Decoder //
+  /////////////
+
+  brq_idu_decoder #(
+      .RV32E           ( RV32E           ),
+      .RV32M           ( RV32M           ),
+      .RV32B           ( RV32B           ),
+      .BranchTargetALU ( BranchTargetALU )
+  ) decoder_i (
+      .clk_i                           ( clk_i                ),
+      .rst_ni                          ( rst_ni               ),
+
+      // controller
+      .illegal_insn_o                  ( illegal_insn_dec     ),
+      .ebrk_insn_o                     ( ebrk_insn            ),
+      .mret_insn_o                     ( mret_insn_dec        ),
+      .dret_insn_o                     ( dret_insn_dec        ),
+      .ecall_insn_o                    ( ecall_insn_dec       ),
+      .wfi_insn_o                      ( wfi_insn_dec         ),
+      .jump_set_o                      ( jump_set_dec         ),
+      .branch_taken_i                  ( branch_taken         ),
+      .icache_inval_o                  ( icache_inval_o       ),
+
+      // from IF-ID pipeline register
+      .instr_first_cycle_i             ( instr_first_cycle    ),
+      .instr_rdata_i                   ( instr_rdata_i        ),
+      .instr_rdata_alu_i               ( instr_rdata_alu_i    ),
+      .illegal_c_insn_i                ( illegal_c_insn_i     ),
+
+      // immediates
+      .imm_a_mux_sel_o                 ( imm_a_mux_sel        ),
+      .imm_b_mux_sel_o                 ( imm_b_mux_sel_dec    ),
+      .bt_a_mux_sel_o                  ( bt_a_mux_sel         ),
+      .bt_b_mux_sel_o                  ( bt_b_mux_sel         ),
+
+      .imm_i_type_o                    ( imm_i_type           ),
+      .imm_s_type_o                    ( imm_s_type           ),
+      .imm_b_type_o                    ( imm_b_type           ),
+      .imm_u_type_o                    ( imm_u_type           ),
+      .imm_j_type_o                    ( imm_j_type           ),
+      .zimm_rs1_type_o                 ( zimm_rs1_type        ),
+
+      // register file
+      .rf_wdata_sel_o                  ( rf_wdata_sel         ),
+      .rf_we_o                         ( rf_we_dec            ),
+
+      .rf_raddr_a_o                    ( rf_raddr_a_o         ),
+      .rf_raddr_b_o                    ( rf_raddr_b_o         ),
+      .rf_waddr_o                      ( rf_waddr_id_o        ),
+      .rf_ren_a_o                      ( rf_ren_a             ),
+      .rf_ren_b_o                      ( rf_ren_b             ),
+
+      // ALU
+      .alu_operator_o                  ( alu_operator         ),
+      .alu_op_a_mux_sel_o              ( alu_op_a_mux_sel_dec ),
+      .alu_op_b_mux_sel_o              ( alu_op_b_mux_sel_dec ),
+      .alu_multicycle_o                ( alu_multicycle_dec   ),
+
+      // MULT & DIV
+      .mult_en_o                       ( mult_en_dec          ),
+      .div_en_o                        ( div_en_dec           ),
+      .mult_sel_o                      ( mult_sel_ex_o        ),
+      .div_sel_o                       ( div_sel_ex_o         ),
+      .multdiv_operator_o              ( multdiv_operator     ),
+      .multdiv_signed_mode_o           ( multdiv_signed_mode  ),
+
+      // CSRs
+      .csr_access_o                    ( csr_access_o         ),
+      .csr_op_o                        ( csr_op_o             ),
+
+      // LSU
+      .data_req_o                      ( lsu_req_dec          ),
+      .data_we_o                       ( lsu_we               ),
+      .data_type_o                     ( lsu_type             ),
+      .data_sign_extension_o           ( lsu_sign_ext         ),
+
+      // jump/branches
+      .jump_in_dec_o                   ( jump_in_dec          ),
+      .branch_in_dec_o                 ( branch_in_dec        ),
+
+      // Floating point extensions IO
+      .fp_rounding_mode_o              ( fp_rounding_mode_o    ),   // defines the rounding mode 
+      .fp_rf_raddr_a_o                 ( fp_rf_raddr_a_o       ),
+      .fp_rf_raddr_b_o                 ( fp_rf_raddr_b_o       ),
+      .fp_rf_raddr_c_o                 ( fp_rf_raddr_c_o       ),
+      .fp_rf_waddr_o                   ( fp_rf_waddr_o         ),
+      .fp_rf_we_o                      ( fp_rf_we_o            ),
+      .fp_alu_operator_o               ( fp_alu_operator_o     ),
+      .fp_alu_op_mod_o                 ( fp_alu_op_mod_o       ),
+      .fp_src_fmt_o                    ( fp_src_fmt_o          ),
+      .fp_dst_fmt_o                    ( fp_dst_fmt_o          ),
+      .fp_rm_dynamic_o                 ( fp_rm_dynamic_o       ),
+      .is_fp_instr_o                   ( is_fp_instr_o         ), 
+      .use_fp_rs1_o                    ( use_fp_rs1_o          ),
+      .use_fp_rs2_o                    ( use_fp_rs2_o          ),
+      .use_fp_rs3_o                    ( use_fp_rs3_o          ),
+      .use_fp_rd_o                     ( use_fp_rd_o           ),
+      .fp_swap_oprnds_o                ( fp_swap_oprnds        ),
+      .fp_load_o                       ( fp_load_o             ),
+      .mv_instr_o                      ( mv_instr              )
+  );
+
+//  assign fpu_op_a = use_fp_rs1_o ? fp_rf_rdata_a_fwd : rf_rdata_a_fwd;
+//  assign fpu_op_b = use_fp_rs2_o ? fp_rf_rdata_b_fwd : rf_rdata_b_fwd;
+//  assign fpu_op_c = fp_rf_rdata_c_fwd;
+
+  ///////////////////////
+  // Register File MUX //
+  ///////////////////////
+
+  // Suppress register write if there is an illegal CSR access or instruction is not executing
+  assign rf_we_id_o = rf_we_raw & instr_executing & ~illegal_csr_insn_i;
+  
+  // Register file write data mux
+  always_comb begin : rf_wdata_id_mux
+    unique case (rf_wdata_sel)
+      RF_WD_EX:  rf_wdata_id_o = result_wb;
+      RF_WD_CSR: rf_wdata_id_o = csr_rdata_i;
+     // default:   rf_wdata_id_o = result_wb;
+    endcase
+  end
+
+  /////////////////////////////////
+  // CSR-related pipline flushes //
+  /////////////////////////////////
+  always_comb begin : csr_pipeline_flushes
+    csr_pipe_flush = 1'b0;
+
+    // A pipeline flush is needed to let the controller react after modifying certain CSRs:
+    // - When enabling interrupts, pending IRQs become visible to the controller only during
+    //   the next cycle. If during that cycle the core disables interrupts again, it does not
+    //   see any pending IRQs and consequently does not start to handle interrupts.
+    // - When modifying debug CSRs - TODO: Check if this is really needed
+    if (csr_op_en_o == 1'b1 && (csr_op_o == CSR_OP_WRITE || csr_op_o == CSR_OP_SET)) begin
+      if (csr_num_e'(instr_rdata_i[31:20]) == CSR_MSTATUS   ||
+          csr_num_e'(instr_rdata_i[31:20]) == CSR_MIE) begin
+        csr_pipe_flush = 1'b1;
+      end
+    end else if (csr_op_en_o == 1'b1 && csr_op_o != CSR_OP_READ) begin
+      if (csr_num_e'(instr_rdata_i[31:20]) == CSR_DCSR      ||
+          csr_num_e'(instr_rdata_i[31:20]) == CSR_DPC       ||
+          csr_num_e'(instr_rdata_i[31:20]) == CSR_DSCRATCH0 ||
+          csr_num_e'(instr_rdata_i[31:20]) == CSR_DSCRATCH1) begin
+        csr_pipe_flush = 1'b1;
+      end
+    end
+  end
+
+  ////////////////
+  // Controller //
+  ////////////////
+
+  assign illegal_insn_o = instr_valid_i & (illegal_insn_dec | illegal_csr_insn_i);
+
+  brq_idu_controller #(
+    .WritebackStage  ( WritebackStage  ),
+    .BranchPredictor ( BranchPredictor )
+  ) controller_i (
+      .clk_i                          ( clk_i                   ),
+      .rst_ni                         ( rst_ni                  ),
+
+      .ctrl_busy_o                    ( ctrl_busy_o             ),
+
+      // decoder related signals
+      .illegal_insn_i                 ( illegal_insn_o          ),
+      .ecall_insn_i                   ( ecall_insn_dec          ),
+      .mret_insn_i                    ( mret_insn_dec           ),
+      .dret_insn_i                    ( dret_insn_dec           ),
+      .wfi_insn_i                     ( wfi_insn_dec            ),
+      .ebrk_insn_i                    ( ebrk_insn               ),
+      .csr_pipe_flush_i               ( csr_pipe_flush          ),
+
+      // from IF-ID pipeline
+      .instr_valid_i                  ( instr_valid_i           ),
+      .instr_i                        ( instr_rdata_i           ),
+      .instr_compressed_i             ( instr_rdata_c_i         ),
+      .instr_is_compressed_i          ( instr_is_compressed_i   ),
+      .instr_bp_taken_i               ( instr_bp_taken_i        ),
+      .instr_fetch_err_i              ( instr_fetch_err_i       ),
+      .instr_fetch_err_plus2_i        ( instr_fetch_err_plus2_i ),
+      .pc_id_i                        ( pc_id_i                 ),
+
+      // to IF-ID pipeline
+      .instr_valid_clear_o            ( instr_valid_clear_o     ),
+      .id_in_ready_o                  ( id_in_ready_o           ),
+      .controller_run_o               ( controller_run          ),
+
+      // to prefetcher
+      .instr_req_o                    ( instr_req_o             ),
+      .pc_set_o                       ( pc_set_o                ),
+      .pc_set_spec_o                  ( pc_set_spec_o           ),
+      .pc_mux_o                       ( pc_mux_o                ),
+      .nt_branch_mispredict_o         ( nt_branch_mispredict_o  ),
+      .exc_pc_mux_o                   ( exc_pc_mux_o            ),
+      .exc_cause_o                    ( exc_cause_o             ),
+
+      // LSU
+      .lsu_addr_last_i                ( lsu_addr_last_i         ),
+      .load_err_i                     ( lsu_load_err_i          ),
+      .store_err_i                    ( lsu_store_err_i         ),
+      .wb_exception_o                 ( wb_exception            ),
+
+      // jump/branch control
+      .branch_set_i                   ( branch_set              ),
+      .branch_set_spec_i              ( branch_set_spec         ),
+      .branch_not_set_i               ( branch_not_set          ),
+      .jump_set_i                     ( jump_set                ),
+
+      // interrupt signals
+      .csr_mstatus_mie_i              ( csr_mstatus_mie_i       ),
+      .irq_pending_i                  ( irq_pending_i           ),
+      .irqs_i                         ( irqs_i                  ),
+      .irq_nm_i                       ( irq_nm_i                ),
+      .nmi_mode_o                     ( nmi_mode_o              ),
+
+      // CSR Controller Signals
+      .csr_save_if_o                  ( csr_save_if_o           ),
+      .csr_save_id_o                  ( csr_save_id_o           ),
+      .csr_save_wb_o                  ( csr_save_wb_o           ),
+      .csr_restore_mret_id_o          ( csr_restore_mret_id_o   ),
+      .csr_restore_dret_id_o          ( csr_restore_dret_id_o   ),
+      .csr_save_cause_o               ( csr_save_cause_o        ),
+      .csr_mtval_o                    ( csr_mtval_o             ),
+      .priv_mode_i                    ( priv_mode_i             ),
+      .csr_mstatus_tw_i               ( csr_mstatus_tw_i        ),
+
+      // Debug Signal
+      .debug_mode_o                   ( debug_mode_o            ),
+      .debug_cause_o                  ( debug_cause_o           ),
+      .debug_csr_save_o               ( debug_csr_save_o        ),
+      .debug_req_i                    ( debug_req_i             ),
+      .debug_single_step_i            ( debug_single_step_i     ),
+      .debug_ebreakm_i                ( debug_ebreakm_i         ),
+      .debug_ebreaku_i                ( debug_ebreaku_i         ),
+      .trigger_match_i                ( trigger_match_i         ),
+
+      .stall_id_i                     ( stall_id                ),
+      .stall_wb_i                     ( stall_wb                ),
+      .flush_id_o                     ( flush_id                ),
+      .ready_wb_i                     ( ready_wb_i              ),
+
+      // Performance Counters
+      .perf_jump_o                    ( perf_jump_o             ),
+      .perf_tbranch_o                 ( perf_tbranch_o          ),
+      .fpu_busy_i                     ( fpu_busy_i              )
+  );
+
+  assign fp_flush_o     = flush_id;
+  assign multdiv_en_dec = mult_en_dec | div_en_dec;
+
+  assign lsu_req         = instr_executing ? data_req_allowed & lsu_req_dec  : 1'b0;
+  assign mult_en_id      = instr_executing ? mult_en_dec                     : 1'b0;
+  assign div_en_id       = instr_executing ? div_en_dec                      : 1'b0;
+
+  assign lsu_req_o               = lsu_req;
+  assign lsu_we_o                = lsu_we;
+  assign lsu_type_o              = lsu_type;
+  assign lsu_sign_ext_o          = lsu_sign_ext;
+  assign lsu_wdata_o             = fpu_op_b; //rf_rdata_b_fwd;
+  // csr_op_en_o is set when CSR access should actually happen.
+  // csv_access_o is set when CSR access instruction is present and is used to compute whether a CSR
+  // access is illegal. A combinational loop would be created if csr_op_en_o was used along (as
+  // asserting it for an illegal csr access would result in a flush that would need to deassert it).
+  assign csr_op_en_o             = csr_access_o & instr_executing & instr_id_done_o;
+
+  assign alu_operator_ex_o           = alu_operator;
+  assign alu_operand_a_ex_o          = alu_operand_a;
+  assign alu_operand_b_ex_o          = alu_operand_b;
+
+  assign mult_en_ex_o                = mult_en_id;
+  assign div_en_ex_o                 = div_en_id;
+
+  assign multdiv_operator_ex_o       = multdiv_operator;
+  assign multdiv_signed_mode_ex_o    = multdiv_signed_mode;
+  assign multdiv_operand_a_ex_o      = rf_rdata_a_fwd;
+  assign multdiv_operand_b_ex_o      = rf_rdata_b_fwd;
+
+  ////////////////////////
+  // Branch set control //
+  ////////////////////////
+
+  if (BranchTargetALU && !DataIndTiming) begin : g_branch_set_direct
+    // Branch set fed straight to controller with branch target ALU
+    // (condition pass/fail used same cycle as generated instruction request)
+    assign branch_set      = branch_set_d;
+    assign branch_set_spec = branch_spec;
+  end else begin : g_branch_set_flop
+    // Branch set flopped without branch target ALU, or in fixed time execution mode
+    // (condition pass/fail used next cycle where branch target is calculated)
+    logic branch_set_q;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        branch_set_q <= 1'b0;
+      end else begin
+        branch_set_q <= branch_set_d;
+      end
+    end
+
+    // Branches always take two cycles in fixed time execution mode, with or without the branch
+    // target ALU (to avoid a path from the branch decision into the branch target ALU operand
+    // muxing).
+    assign branch_set      = (BranchTargetALU && !data_ind_timing_i) ? branch_set_d : branch_set_q;
+    // Use the speculative branch signal when BTALU is enabled
+    assign branch_set_spec = (BranchTargetALU && !data_ind_timing_i) ? branch_spec : branch_set_q;
+  end
+
+  // Branch condition is calculated in the first cycle and flopped for use in the second cycle
+  // (only used in fixed time execution mode to determine branch destination).
+  if (DataIndTiming) begin : g_sec_branch_taken
+    logic branch_taken_q;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        branch_taken_q <= 1'b0;
+      end else begin
+        branch_taken_q <= branch_decision_i;
+      end
+    end
+
+    assign branch_taken = ~data_ind_timing_i | branch_taken_q;
+
+  end else begin : g_nosec_branch_taken
+
+    // Signal unused without fixed time execution mode - only taken branches will trigger branch_set
+    assign branch_taken = 1'b1;
+
+  end
+
+  // Holding branch_set/jump_set high for more than one cycle should not cause a functional issue.
+  // However it could generate needless prefetch buffer flushes and instruction fetches. The ID/EX
+  // designs ensures that this never happens for non-predicted branches.
+
+
+  ///////////////
+  // ID-EX FSM //
+  ///////////////
+
+  typedef enum logic { FIRST_CYCLE, MULTI_CYCLE } id_fsm_e;
+  id_fsm_e id_fsm_q, id_fsm_d;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : id_pipeline_reg
+    if (!rst_ni) begin
+      id_fsm_q            <= FIRST_CYCLE;
+    end else begin
+      id_fsm_q            <= id_fsm_d;
+    end
+  end
+
+  // ID/EX stage can be in two states, FIRST_CYCLE and MULTI_CYCLE. An instruction enters
+  // MULTI_CYCLE if it requires multiple cycles to complete regardless of stalls and other
+  // considerations. An instruction may be held in FIRST_CYCLE if it's unable to begin executing
+  // (this is controlled by instr_executing).
+
+  always_comb begin
+    id_fsm_d                = id_fsm_q;
+    rf_we_raw               = rf_we_dec;
+    stall_multdiv           = 1'b0;
+    stall_jump              = 1'b0;
+    stall_branch            = 1'b0;
+    stall_alu               = 1'b0;
+    branch_set_d            = 1'b0;
+    branch_spec             = 1'b0;
+    branch_not_set          = 1'b0;
+    jump_set                = 1'b0;
+    perf_branch_o           = 1'b0;
+
+    if (instr_executing) begin
+      unique case (id_fsm_q)
+        FIRST_CYCLE: begin
+          unique case (1'b1)
+            lsu_req_dec: begin
+              if (!WritebackStage) begin
+                // LSU operation
+                id_fsm_d    = MULTI_CYCLE;
+              end else begin
+                if(~lsu_req_done_i) begin
+                  id_fsm_d  = MULTI_CYCLE;
+                end
+              end
+            end
+            multdiv_en_dec: begin
+              // MUL or DIV operation
+              if (~ex_valid_i) begin
+                // When single-cycle multiply is configured mul can finish in the first cycle so
+                // only enter MULTI_CYCLE state if a result isn't immediately available
+                id_fsm_d      = MULTI_CYCLE;
+                rf_we_raw     = 1'b0;
+                stall_multdiv = 1'b1;
+              end
+            end
+            branch_in_dec: begin
+              // cond branch operation
+              // All branches take two cycles in fixed time execution mode, regardless of branch
+              // condition.
+              id_fsm_d      = (data_ind_timing_i || (!BranchTargetALU && branch_decision_i)) ?
+                                  MULTI_CYCLE : FIRST_CYCLE;
+              stall_branch  = (~BranchTargetALU & branch_decision_i) | data_ind_timing_i;
+              branch_set_d  = branch_decision_i | data_ind_timing_i;
+
+              if (BranchPredictor) begin
+                branch_not_set = ~branch_decision_i;
+              end
+
+              // Speculative branch (excludes branch_decision_i)
+              branch_spec   = SpecBranch ? 1'b1 : branch_decision_i;
+              perf_branch_o = 1'b1;
+            end
+            jump_in_dec: begin
+              // uncond branch operation
+              // BTALU means jumps only need one cycle
+              id_fsm_d      = BranchTargetALU ? FIRST_CYCLE : MULTI_CYCLE;
+              stall_jump    = ~BranchTargetALU;
+              jump_set      = jump_set_dec;
+            end
+            alu_multicycle_dec: begin
+              stall_alu     = 1'b1;
+              id_fsm_d      = MULTI_CYCLE;
+              rf_we_raw     = 1'b0;
+            end
+            default: begin
+              id_fsm_d      = FIRST_CYCLE;
+            end
+          endcase
+        end
+
+        MULTI_CYCLE: begin
+          if(multdiv_en_dec) begin
+            rf_we_raw       = rf_we_dec & ex_valid_i;
+          end
+
+          if (multicycle_done & ready_wb_i) begin
+            id_fsm_d        = FIRST_CYCLE;
+          end else begin
+            stall_multdiv   = multdiv_en_dec;
+            stall_branch    = branch_in_dec;
+            stall_jump      = jump_in_dec;
+          end
+        end
+
+       // default: begin
+       //   id_fsm_d          = FIRST_CYCLE;
+       // end
+      endcase
+    end
+  end
+
+  // Note for the two-stage configuration ready_wb_i is always set
+  assign multdiv_ready_id_o = ready_wb_i;
+
+
+  // Stall ID/EX stage for reason that relates to instruction in ID/EX
+  assign stall_id = stall_ld_hz | stall_mem | stall_multdiv | stall_jump | stall_branch |
+                      stall_alu;
+
+  assign instr_done = ~stall_id & ~flush_id & instr_executing;
+
+  // Signal instruction in ID is in it's first cycle. It can remain in its
+  // first cycle if it is stalled.
+  assign instr_first_cycle      = instr_valid_i & (id_fsm_q == FIRST_CYCLE);
+  // Used by RVFI to know when to capture register read data
+  // Used by ALU to access RS3 if ternary instruction.
+  assign instr_first_cycle_id_o = instr_first_cycle;
+
+  if (WritebackStage) begin : gen_stall_mem
+    // Register read address matches write address in WB
+    logic rf_rd_a_wb_match;
+    logic rf_rd_b_wb_match;
+    logic fp_rf_rd_a_wb_match;
+    logic fp_rf_rd_b_wb_match;
+    logic fp_rf_rd_c_wb_match;
+    // Hazard between registers being read and written
+    logic rf_rd_a_hz;
+    logic rf_rd_b_hz;
+    logic rf_rd_c_hz;
+
+    logic outstanding_memory_access;
+
+    logic instr_kill;
+
+    assign multicycle_done = lsu_req_dec ? ~stall_mem : ex_valid_i;
+
+    // Is a memory access ongoing that isn't finishing this cycle
+    assign outstanding_memory_access = (outstanding_load_wb_i | outstanding_store_wb_i) &
+                                       ~lsu_resp_valid_i;
+
+    // Can start a new memory access if any previous one has finished or is finishing
+    assign data_req_allowed = ~outstanding_memory_access;
+
+    // Instruction won't execute because:
+    // - There is a pending exception in writeback
+    //   The instruction in ID/EX will be flushed and the core will jump to an exception handler
+    // - The controller isn't running instructions
+    //   This either happens in preparation for a flush and jump to an exception handler e.g. in
+    //   response to an IRQ or debug request or whilst the core is sleeping or resetting/fetching
+    //   first instruction in which case any valid instruction in ID/EX should be ignored.
+    // - There was an error on instruction fetch
+    assign instr_kill = instr_fetch_err_i |
+                        wb_exception      |
+                        ~controller_run;
+
+    // With writeback stage instructions must be prevented from executing if there is:
+    // - A load hazard
+    // - A pending memory access
+    //   If it receives an error response this results in a precise exception from WB so ID/EX
+    //   instruction must not execute until error response is known).
+    // - A load/store error
+    //   This will cause a precise exception for the instruction in WB so ID/EX instruction must not
+    //   execute
+    assign instr_executing = instr_valid_i              &
+                             ~instr_kill                &
+                             ~stall_ld_hz               &
+                             ~outstanding_memory_access;
+
+
+    // Stall for reasons related to memory:
+    // * There is an outstanding memory access that won't resolve this cycle (need to wait to allow
+    //   precise exceptions)
+    // * There is a load/store request not being granted or which is unaligned and waiting to issue
+    //   a second request (needs to stay in ID for the address calculation)
+    assign stall_mem = instr_valid_i &
+                       (outstanding_memory_access | (lsu_req_dec & ~lsu_req_done_i));
+
+    // If we stall a load in ID for any reason, it must not make an LSU request
+    // (otherwide we might issue two requests for the same instruction)
+ 
+
+    assign rf_rd_a_wb_match = (rf_waddr_wb_i == rf_raddr_a_o) & |rf_raddr_a_o;
+    assign rf_rd_b_wb_match = (rf_waddr_wb_i == rf_raddr_b_o) & |rf_raddr_b_o;
+
+    assign fp_rf_rd_a_wb_match = (rf_waddr_wb_i == rf_raddr_a_o);
+    assign fp_rf_rd_b_wb_match = (rf_waddr_wb_i == rf_raddr_b_o);
+    
+    assign fp_rf_rd_c_wb_match = (rf_waddr_wb_i == fp_rf_raddr_c_o); 
+
+    assign rf_rd_a_wb_match_o = rf_rd_a_wb_match;
+    assign rf_rd_b_wb_match_o = rf_rd_b_wb_match;
+
+    // If instruction is reading register that load will be writing stall in
+    // ID until load is complete. No need to stall when reading zero register.
+    assign rf_rd_a_hz = rf_rd_a_wb_match & (rf_ren_a | use_fp_rs1_o);
+    assign rf_rd_b_hz = rf_rd_b_wb_match & (rf_ren_b | use_fp_rs2_o);
+    assign rf_rd_c_hz = rf_rd_b_wb_match & use_fp_rs3_o;
+
+    // If instruction is read register that writeback is writing forward writeback data to read
+    // data. Note this doesn't factor in load data as it arrives too late, such hazards are
+    // resolved via a stall (see above).
+
+    assign rf_rdata_a_fwd = rf_rd_a_wb_match & rf_write_wb_i ? rf_wdata_fwd_wb_i : rf_rdata_a_i;
+    assign rf_rdata_b_fwd = rf_rd_b_wb_match & rf_write_wb_i ? rf_wdata_fwd_wb_i : rf_rdata_b_i;
+    
+    // forwarding for floating point unit
+    assign fp_rf_rdata_a_fwd = fp_rf_rd_a_wb_match & fp_rf_write_wb_i ? fp_rf_wdata_fwd_wb_i : fp_rf_rdata_a_i;
+    assign fp_rf_rdata_b_fwd = fp_rf_rd_b_wb_match & fp_rf_write_wb_i ? fp_rf_wdata_fwd_wb_i : fp_rf_rdata_b_i;
+    assign fp_rf_rdata_c_fwd = fp_rf_rd_c_wb_match & fp_rf_write_wb_i ? fp_rf_wdata_fwd_wb_i : fp_rf_rdata_c_i; 
+
+    assign stall_ld_hz = outstanding_load_wb_i & (rf_rd_a_hz | rf_rd_b_hz | rf_rd_c_hz);
+
+    assign instr_type_wb_o = ~lsu_req_dec ? WB_INSTR_OTHER :
+                              lsu_we      ? WB_INSTR_STORE :
+                                            WB_INSTR_LOAD;
+
+    assign instr_id_done_o = en_wb_o & ready_wb_i;
+
+    // Stall ID/EX as instruction in ID/EX cannot proceed to writeback yet
+    assign stall_wb = en_wb_o & ~ready_wb_i;
+
+    assign perf_dside_wait_o = instr_valid_i & ~instr_kill &
+                               (outstanding_memory_access | stall_ld_hz);
+  end else begin : gen_no_stall_mem
+
+    assign multicycle_done = lsu_req_dec ? lsu_resp_valid_i : ex_valid_i;
+
+    assign data_req_allowed = instr_first_cycle;
+
+    // Without Writeback Stage always stall the first cycle of a load/store.
+    // Then stall until it is complete
+    assign stall_mem = instr_valid_i & (lsu_req_dec & (~lsu_resp_valid_i | instr_first_cycle));
+
+    // No load hazards without Writeback Stage
+    assign stall_ld_hz   = 1'b0;
+
+    // Without writeback stage any valid instruction that hasn't seen an error will execute
+    assign instr_executing = instr_valid_i & ~instr_fetch_err_i & controller_run;
+
+
+    // No data forwarding without writeback stage so always take source register data direct from
+    // register file
+    assign rf_rdata_a_fwd = rf_rdata_a_i;
+    assign rf_rdata_b_fwd = rf_rdata_b_i;
+
+    assign fp_rf_rdata_a_fwd = fp_rf_rdata_a_i;
+    assign fp_rf_rdata_b_fwd = fp_rf_rdata_b_i;
+    assign fp_rf_rdata_c_fwd = fp_rf_rdata_c_i;
+
+    assign rf_rd_a_wb_match_o = 1'b0;
+    assign rf_rd_b_wb_match_o = 1'b0;
+
+    // Unused Writeback stage only IO & wiring
+    // Assign inputs and internal wiring to unused signals to satisfy lint checks
+    // Tie-off outputs to constant values
+    logic unused_data_req_done_ex;
+    logic [4:0] unused_rf_waddr_wb;
+    logic unused_rf_write_wb;
+    logic unused_outstanding_load_wb;
+    logic unused_outstanding_store_wb;
+    logic unused_wb_exception;
+    logic [31:0] unused_rf_wdata_fwd_wb;
+
+    assign unused_data_req_done_ex     = lsu_req_done_i;
+    assign unused_rf_waddr_wb          = rf_waddr_wb_i;
+    assign unused_rf_write_wb          = rf_write_wb_i;
+    assign unused_outstanding_load_wb  = outstanding_load_wb_i;
+    assign unused_outstanding_store_wb = outstanding_store_wb_i;
+    assign unused_wb_exception         = wb_exception;
+    assign unused_rf_wdata_fwd_wb      = rf_wdata_fwd_wb_i;
+
+    assign instr_type_wb_o = WB_INSTR_OTHER;
+    assign stall_wb        = 1'b0;
+
+    assign perf_dside_wait_o = instr_executing & lsu_req_dec & ~lsu_resp_valid_i;
+
+    assign instr_id_done_o = instr_done;
+  end
+  
+    /* Swap operands */
+  always_comb begin : swapping
+    fpu_op_a = use_fp_rs1_o ? fp_rf_rdata_a_fwd : rf_rdata_a_fwd;
+    fpu_op_b = use_fp_rs2_o ? fp_rf_rdata_b_fwd : rf_rdata_b_fwd;
+    if (fp_swap_oprnds) begin
+      fpu_op_c = fpu_op_a;
+    end else begin
+      fpu_op_c = fp_rf_rdata_c_fwd;
+    end
+    fp_operands_o = {fpu_op_c , fpu_op_b , fpu_op_a};
+  end
+   
+  assign result_wb = mv_instr ? fpu_op_a : result_ex_i;
+
+  // Signal which instructions to count as retired in minstret, all traps along with ebrk and
+  // ecall instructions are not counted.
+  assign instr_perf_count_id_o = ~ebrk_insn & ~ecall_insn_dec & ~illegal_insn_dec &
+      ~illegal_csr_insn_i & ~instr_fetch_err_i;
+
+  // An instruction is ready to move to the writeback stage (or retire if there is no writeback
+  // stage)
+  assign en_wb_o = instr_done;
+
+  assign perf_mul_wait_o = stall_multdiv & mult_en_dec;
+  assign perf_div_wait_o = stall_multdiv & div_en_dec;
+
+
+endmodule
diff --git a/verilog/rtl/brq_idu_controller.sv b/verilog/rtl/brq_idu_controller.sv
new file mode 100644
index 0000000..c35f470
--- /dev/null
+++ b/verilog/rtl/brq_idu_controller.sv
@@ -0,0 +1,815 @@
+/**
+ * Main controller of the processor
+ */
+
+module brq_idu_controller #(
+    parameter bit WritebackStage  = 0,
+    parameter bit BranchPredictor = 0
+ ) (
+    input  logic                  clk_i,
+    input  logic                  rst_ni,
+
+    output logic                  ctrl_busy_o,           // core is busy processing instrs
+
+    // decoder related signals
+    input  logic                  illegal_insn_i,          // decoder has an invalid instr
+    input  logic                  ecall_insn_i,            // decoder has ECALL instr
+    input  logic                  mret_insn_i,             // decoder has MRET instr
+    input  logic                  dret_insn_i,             // decoder has DRET instr
+    input  logic                  wfi_insn_i,              // decoder has WFI instr
+    input  logic                  ebrk_insn_i,             // decoder has EBREAK instr
+    input  logic                  csr_pipe_flush_i,        // do CSR-related pipeline flush
+
+    // instr from IF-ID pipeline stage
+    input  logic                  instr_valid_i,           // instr is valid
+    input  logic [31:0]           instr_i,                 // uncompressed instr data for mtval
+    input  logic [15:0]           instr_compressed_i,      // instr compressed data for mtval
+    input  logic                  instr_is_compressed_i,   // instr is compressed
+    input  logic                  instr_bp_taken_i,        // instr was predicted taken branch
+    input  logic                  instr_fetch_err_i,       // instr has error
+    input  logic                  instr_fetch_err_plus2_i, // instr error is x32
+    input  logic [31:0]           pc_id_i,                 // instr address
+
+    // to IF-ID pipeline stage
+    output logic                  instr_valid_clear_o,     // kill instr in IF-ID reg
+    output logic                  id_in_ready_o,           // ID stage is ready for new instr
+    output logic                  controller_run_o,        // Controller is in standard instruction
+                                                           // run mode
+
+    // to prefetcher
+    output logic                  instr_req_o,             // start fetching instructions
+    output logic                  pc_set_o,                // jump to address set by pc_mux
+    output logic                  pc_set_spec_o,           // speculative branch
+    output brq_pkg::pc_sel_e      pc_mux_o,                // IF stage fetch address selector
+                                                           // (boot, normal, exception...)
+    output logic                  nt_branch_mispredict_o,  // Not-taken branch in ID/EX was
+                                                           // mispredicted (predicted taken)
+    output brq_pkg::exc_pc_sel_e exc_pc_mux_o,            // IF stage selector for exception PC
+    output brq_pkg::exc_cause_e  exc_cause_o,             // for IF stage, CSRs
+
+    // LSU
+    input  logic [31:0]           lsu_addr_last_i,         // for mtval
+    input  logic                  load_err_i,
+    input  logic                  store_err_i,
+    output logic                  wb_exception_o,          // Instruction in WB taking an exception
+
+    // jump/branch signals
+    input  logic                  branch_set_i,            // branch set signal (branch definitely
+                                                           // taken)
+    input  logic                  branch_set_spec_i,       // speculative branch signal (branch
+                                                           // may be taken)
+    input  logic                  branch_not_set_i,        // branch is definitely not taken
+    input  logic                  jump_set_i,              // jump taken set signal
+
+    // interrupt signals
+    input  logic                  csr_mstatus_mie_i,       // M-mode interrupt enable bit
+    input  logic                  irq_pending_i,           // interrupt request pending
+    input  brq_pkg::irqs_t        irqs_i,                  // interrupt requests qualified with
+                                                           // mie CSR
+    input  logic                  irq_nm_i,                // non-maskeable interrupt
+    output logic                  nmi_mode_o,              // core executing NMI handler
+
+    // debug signals
+    input  logic                  debug_req_i,
+    output brq_pkg::dbg_cause_e   debug_cause_o,
+    output logic                  debug_csr_save_o,
+    output logic                  debug_mode_o,
+    input  logic                  debug_single_step_i,
+    input  logic                  debug_ebreakm_i,
+    input  logic                  debug_ebreaku_i,
+    input  logic                  trigger_match_i,
+
+    output logic                  csr_save_if_o,
+    output logic                  csr_save_id_o,
+    output logic                  csr_save_wb_o,
+    output logic                  csr_restore_mret_id_o,
+    output logic                  csr_restore_dret_id_o,
+    output logic                  csr_save_cause_o,
+    output logic [31:0]           csr_mtval_o,
+    input  brq_pkg::priv_lvl_e    priv_mode_i,
+    input  logic                  csr_mstatus_tw_i,
+
+    // stall & flush signals
+    input  logic                  stall_id_i,
+    input  logic                  stall_wb_i,
+    output logic                  flush_id_o,
+    input  logic                  ready_wb_i,
+
+    // performance monitors
+    output logic                  perf_jump_o,             // we are executing a jump
+                                                           // instruction (j, jr, jal, jalr)
+    output logic                  perf_tbranch_o,          // we are executing a taken branch
+                                                           // instruction
+    input  logic                  fpu_busy_i
+);
+  import brq_pkg::*;
+
+  // FSM state encoding
+  typedef enum logic [3:0] {
+    RESET, BOOT_SET, WAIT_SLEEP, SLEEP, FIRST_FETCH, DECODE, FLUSH,
+    IRQ_TAKEN, DBG_TAKEN_IF, DBG_TAKEN_ID
+  } ctrl_fsm_e;
+
+  ctrl_fsm_e ctrl_fsm_cs, ctrl_fsm_ns;
+
+  logic nmi_mode_q, nmi_mode_d;
+  logic debug_mode_q, debug_mode_d;
+  logic load_err_q, load_err_d;
+  logic store_err_q, store_err_d;
+  logic exc_req_q, exc_req_d;
+  logic illegal_insn_q, illegal_insn_d;
+
+  // Of the various exception/fault signals, which one takes priority in FLUSH and hence controls
+  // what happens next (setting exc_cause, csr_mtval etc)
+  logic instr_fetch_err_prio;
+  logic illegal_insn_prio;
+  logic ecall_insn_prio;
+  logic ebrk_insn_prio;
+  logic store_err_prio;
+  logic load_err_prio;
+
+  logic stall;
+  logic halt_if;
+  logic retain_id;
+  logic flush_id;
+  logic illegal_dret;
+  logic illegal_umode;
+  logic exc_req_lsu;
+  logic special_req_all;
+  logic special_req_branch;
+  logic enter_debug_mode;
+  logic ebreak_into_debug;
+  logic handle_irq;
+
+  logic [3:0] mfip_id;
+  logic       unused_irq_timer;
+
+  logic ecall_insn;
+  logic mret_insn;
+  logic dret_insn;
+  logic wfi_insn;
+  logic ebrk_insn;
+  logic csr_pipe_flush;
+  logic instr_fetch_err;
+
+`ifndef SYNTHESIS
+  // synopsys translate_off
+  // make sure we are called later so that we do not generate messages for
+  // glitches
+  always_ff @(negedge clk_i) begin
+    // print warning in case of decoding errors
+    if ((ctrl_fsm_cs == DECODE) && instr_valid_i && !instr_fetch_err_i && illegal_insn_d) begin
+      $display("%t: Illegal instruction (hart %0x) at PC 0x%h: 0x%h", $time, brq_core.hart_id_i,
+               brq_idu.pc_id_i, brq_idu.instr_rdata_i);
+    end
+  end
+  // synopsys translate_on
+`endif
+
+  ////////////////
+  // Exceptions //
+  ////////////////
+
+  assign load_err_d  = load_err_i;
+  assign store_err_d = store_err_i;
+
+  // Decoder doesn't take instr_valid into account, factor it in here.
+  assign ecall_insn      = ecall_insn_i      & instr_valid_i;
+  assign mret_insn       = mret_insn_i       & instr_valid_i;
+  assign dret_insn       = dret_insn_i       & instr_valid_i;
+  assign wfi_insn        = wfi_insn_i        & instr_valid_i;
+  assign ebrk_insn       = ebrk_insn_i       & instr_valid_i;
+  assign csr_pipe_flush  = csr_pipe_flush_i  & instr_valid_i;
+  assign instr_fetch_err = instr_fetch_err_i & instr_valid_i;
+
+  // "Executing DRET outside of Debug Mode causes an illegal instruction exception."
+  // [Debug Spec v0.13.2, p.41]
+  assign illegal_dret = dret_insn & ~debug_mode_q;
+
+  // Some instructions can only be executed in M-Mode
+  assign illegal_umode = (priv_mode_i != PRIV_LVL_M) &
+                         // MRET must be in M-Mode. TW means trap WFI to M-Mode.
+                         (mret_insn | (csr_mstatus_tw_i & wfi_insn));
+
+  // This is recorded in the illegal_insn_q flop to help timing.  Specifically
+  // it is needed to break the path from brq_cs_registers/illegal_csr_insn_o
+  // to pc_set_o.  Clear when controller is in FLUSH so it won't remain set
+  // once illegal instruction is handled.
+  // All terms in this expression are qualified by instr_valid_i
+  assign illegal_insn_d = (illegal_insn_i | illegal_dret | illegal_umode) & (ctrl_fsm_cs != FLUSH);
+
+  // exception requests
+  // requests are flopped in exc_req_q.  This is cleared when controller is in
+  // the FLUSH state so the cycle following exc_req_q won't remain set for an
+  // exception request that has just been handled.
+  // All terms in this expression are qualified by instr_valid_i
+  assign exc_req_d = (ecall_insn | ebrk_insn | illegal_insn_d | instr_fetch_err) &
+                     (ctrl_fsm_cs != FLUSH);
+
+  // LSU exception requests
+  assign exc_req_lsu = store_err_i | load_err_i;
+
+
+  // special requests: special instructions, pipeline flushes, exceptions...
+
+  // To avoid creating a path from data_err_i -> instr_req_o and to help timing the below
+  // special_req_all has a version that only applies to branches. For a branch the controller needs
+  // to set pc_set_o but only if there is no special request. If the generic special_req_all signal
+  // is used then a variety of signals that will never cause a special request during a branch
+  // instruction end up factored into pc_set_o. The special_req_branch only considers the special
+  // request reasons that are relevant to a branch.
+
+  // generic special request signal, applies to all instructions
+  // All terms in this expression are qualified by instr_valid_i except exc_req_lsu which can come
+  // from the Writeback stage with no instr_valid_i from the ID stage
+  assign special_req_all = mret_insn | dret_insn | wfi_insn | csr_pipe_flush |
+      exc_req_d | exc_req_lsu;
+
+  // special request that can specifically occur during branch instructions
+  // All terms in this expression are qualified by instr_valid_i
+  assign special_req_branch = instr_fetch_err & (ctrl_fsm_cs != FLUSH);
+
+
+  // Exception/fault prioritisation is taken from Table 3.7 of Priviledged Spec v1.11
+  if (WritebackStage) begin : g_wb_exceptions
+    always_comb begin
+      instr_fetch_err_prio = 0;
+      illegal_insn_prio    = 0;
+      ecall_insn_prio      = 0;
+      ebrk_insn_prio       = 0;
+      store_err_prio       = 0;
+      load_err_prio        = 0;
+
+      // Note that with the writeback stage store/load errors occur on the instruction in writeback,
+      // all other exception/faults occur on the instruction in ID/EX. The faults from writeback
+      // must take priority as that instruction is architecurally ordered before the one in ID/EX.
+      if (store_err_q) begin
+        store_err_prio = 1'b1;
+      end else if (load_err_q) begin
+        load_err_prio  = 1'b1;
+      end else if (instr_fetch_err) begin
+        instr_fetch_err_prio = 1'b1;
+      end else if (illegal_insn_q) begin
+        illegal_insn_prio = 1'b1;
+      end else if (ecall_insn) begin
+        ecall_insn_prio = 1'b1;
+      end else if (ebrk_insn) begin
+        ebrk_insn_prio = 1'b1;
+      end
+    end
+
+    // Instruction in writeback is generating an exception so instruction in ID must not execute
+    assign wb_exception_o = load_err_q | store_err_q | load_err_i | store_err_i;
+  end else begin : g_no_wb_exceptions
+    always_comb begin
+      instr_fetch_err_prio = 0;
+      illegal_insn_prio    = 0;
+      ecall_insn_prio      = 0;
+      ebrk_insn_prio       = 0;
+      store_err_prio       = 0;
+      load_err_prio        = 0;
+
+      if (instr_fetch_err) begin
+        instr_fetch_err_prio = 1'b1;
+      end else if (illegal_insn_q) begin
+        illegal_insn_prio = 1'b1;
+      end else if (ecall_insn) begin
+        ecall_insn_prio = 1'b1;
+      end else if (ebrk_insn) begin
+        ebrk_insn_prio = 1'b1;
+      end else if (store_err_q) begin
+        store_err_prio = 1'b1;
+      end else if (load_err_q) begin
+        load_err_prio  = 1'b1;
+      end
+    end
+    assign wb_exception_o = 1'b0;
+  end
+
+
+  ////////////////
+  // Interrupts //
+  ////////////////
+
+  // Enter debug mode due to an external debug_req_i or because the core is in
+  // single step mode (dcsr.step == 1). Single step must be qualified with
+  // instruction valid otherwise the core will immediately enter debug mode
+  // due to a recently flushed IF (or a delay in an instruction returning from
+  // memory) before it has had anything to single step.
+  // Also enter debug mode on a trigger match (hardware breakpoint)
+  assign enter_debug_mode = (debug_req_i | (debug_single_step_i & instr_valid_i) |
+                             trigger_match_i) & ~debug_mode_q;
+
+  // Set when an ebreak should enter debug mode rather than jump to exception
+  // handler
+  assign ebreak_into_debug = priv_mode_i == PRIV_LVL_M ? debug_ebreakm_i :
+                             priv_mode_i == PRIV_LVL_U ? debug_ebreaku_i :
+                                                         1'b0;
+
+  // Interrupts including NMI are ignored,
+  // - while in debug mode [Debug Spec v0.13.2, p.39],
+  // - while in NMI mode (nested NMIs are not supported, NMI has highest priority and
+  //   cannot be interrupted by regular interrupts).
+  assign handle_irq = ~debug_mode_q & ~nmi_mode_q &
+      (irq_nm_i | (irq_pending_i & csr_mstatus_mie_i));
+
+  // generate ID of fast interrupts, highest priority to highest ID
+  always_comb begin : gen_mfip_id
+    if      (irqs_i.irq_fast[14]) mfip_id = 4'd14;
+    else if (irqs_i.irq_fast[13]) mfip_id = 4'd13;
+    else if (irqs_i.irq_fast[12]) mfip_id = 4'd12;
+    else if (irqs_i.irq_fast[11]) mfip_id = 4'd11;
+    else if (irqs_i.irq_fast[10]) mfip_id = 4'd10;
+    else if (irqs_i.irq_fast[ 9]) mfip_id = 4'd9;
+    else if (irqs_i.irq_fast[ 8]) mfip_id = 4'd8;
+    else if (irqs_i.irq_fast[ 7]) mfip_id = 4'd7;
+    else if (irqs_i.irq_fast[ 6]) mfip_id = 4'd6;
+    else if (irqs_i.irq_fast[ 5]) mfip_id = 4'd5;
+    else if (irqs_i.irq_fast[ 4]) mfip_id = 4'd4;
+    else if (irqs_i.irq_fast[ 3]) mfip_id = 4'd3;
+    else if (irqs_i.irq_fast[ 2]) mfip_id = 4'd2;
+    else if (irqs_i.irq_fast[ 1]) mfip_id = 4'd1;
+    else                          mfip_id = 4'd0;
+  end
+
+  assign unused_irq_timer = irqs_i.irq_timer;
+
+  /////////////////////
+  // Core controller //
+  /////////////////////
+
+  always_comb begin
+    // Default values
+    instr_req_o           = 1'b1;
+
+    csr_save_if_o         = 1'b0;
+    csr_save_id_o         = 1'b0;
+    csr_save_wb_o         = 1'b0;
+    csr_restore_mret_id_o = 1'b0;
+    csr_restore_dret_id_o = 1'b0;
+    csr_save_cause_o      = 1'b0;
+    csr_mtval_o           = '0;
+
+    // The values of pc_mux and exc_pc_mux are only relevant if pc_set is set. Some of the states
+    // below always set pc_mux and exc_pc_mux but only set pc_set if certain conditions are met.
+    // This avoid having to factor those conditions into the pc_mux and exc_pc_mux select signals
+    // helping timing.
+    pc_mux_o               = PC_BOOT;
+    pc_set_o               = 1'b0;
+    pc_set_spec_o          = 1'b0;
+    nt_branch_mispredict_o = 1'b0;
+
+    exc_pc_mux_o           = EXC_PC_IRQ;
+    exc_cause_o            = EXC_CAUSE_INSN_ADDR_MISA; // = 6'h00
+
+    ctrl_fsm_ns            = ctrl_fsm_cs;
+
+    ctrl_busy_o            = 1'b1;
+
+    halt_if                = 1'b0;
+    retain_id              = 1'b0;
+    flush_id               = 1'b0;
+
+    debug_csr_save_o       = 1'b0;
+    debug_cause_o          = DBG_CAUSE_EBREAK;
+    debug_mode_d           = debug_mode_q;
+    nmi_mode_d             = nmi_mode_q;
+
+    perf_tbranch_o         = 1'b0;
+    perf_jump_o            = 1'b0;
+
+    controller_run_o       = 1'b0;
+
+    unique case (ctrl_fsm_cs)
+      RESET: begin
+        instr_req_o   = 1'b0;
+        pc_mux_o      = PC_BOOT;
+        pc_set_o      = 1'b1;
+        pc_set_spec_o = 1'b1;
+        ctrl_fsm_ns   = BOOT_SET;
+      end
+
+      BOOT_SET: begin
+        // copy boot address to instr fetch address
+        instr_req_o   = 1'b1;
+        pc_mux_o      = PC_BOOT;
+        pc_set_o      = 1'b1;
+        pc_set_spec_o = 1'b1;
+
+        ctrl_fsm_ns = FIRST_FETCH;
+      end
+
+      WAIT_SLEEP: begin
+        ctrl_busy_o   = 1'b0;
+        instr_req_o   = 1'b0;
+        halt_if       = 1'b1;
+        flush_id      = 1'b1;
+        ctrl_fsm_ns   = SLEEP;
+      end
+
+      SLEEP: begin
+        // instruction in IF stage is already valid
+        // we begin execution when an interrupt has arrived
+        instr_req_o   = 1'b0;
+        halt_if       = 1'b1;
+        flush_id      = 1'b1;
+
+        // normal execution flow
+        // in debug mode or single step mode we leave immediately (wfi=nop)
+        if (irq_nm_i || irq_pending_i || debug_req_i || debug_mode_q || debug_single_step_i) begin
+          ctrl_fsm_ns = FIRST_FETCH;
+        end else begin
+          // Make sure clock remains disabled.
+          ctrl_busy_o = 1'b0;
+        end
+      end
+
+      FIRST_FETCH: begin
+        // Stall because of IF miss
+        if (id_in_ready_o) begin
+          ctrl_fsm_ns = DECODE;
+        end
+
+        // handle interrupts
+        if (handle_irq) begin
+          // We are handling an interrupt. Set halt_if to tell IF not to give
+          // us any more instructions before it redirects to the handler, but
+          // don't set flush_id: we must allow this instruction to complete
+          // (since it might have outstanding loads or stores).
+          ctrl_fsm_ns = IRQ_TAKEN;
+          halt_if     = 1'b1;
+        end
+
+        // enter debug mode
+        if (enter_debug_mode) begin
+          ctrl_fsm_ns = DBG_TAKEN_IF;
+          // Halt IF only for now, ID will be flushed in DBG_TAKEN_IF as the
+          // ID state is needed for correct debug mode entry
+          halt_if     = 1'b1;
+        end
+      end
+
+      DECODE: begin
+        // normal operating mode of the ID stage, in case of debug and interrupt requests,
+        // priorities are as follows (lower number == higher priority)
+        // 1. currently running (multicycle) instructions and exceptions caused by these
+        // 2. debug requests
+        // 3. interrupt requests
+
+        controller_run_o = 1'b1;
+
+        // Set PC mux for branch and jump here to ease timing. Value is only relevant if pc_set_o is
+        // also set. Setting the mux value here avoids factoring in special_req and instr_valid_i
+        // which helps timing.
+        pc_mux_o = PC_JUMP;
+
+
+        // Get ready for special instructions, exceptions, pipeline flushes
+        if (special_req_all) begin
+          // Halt IF but don't flush ID. This leaves a valid instruction in
+          // ID so controller can determine appropriate action in the
+          // FLUSH state.
+          retain_id = 1'b1;
+
+          // Wait for the writeback stage to either be ready for a new instruction or raise its own
+          // exception before going to FLUSH. If the instruction in writeback raises an exception it
+          // must take priority over any exception from an instruction in ID/EX. Only once the
+          // writeback stage is ready can we be certain that won't happen. Without a writeback
+          // stage ready_wb_i == 1 so the FSM will always go directly to FLUSH.
+
+          if (ready_wb_i | wb_exception_o) begin
+            ctrl_fsm_ns = FLUSH;
+          end
+        end
+
+        if (!special_req_branch) begin
+          if (branch_set_i || jump_set_i) begin
+            // Only set the PC if the branch predictor hasn't already done the branch for us
+            pc_set_o       = BranchPredictor ? ~instr_bp_taken_i : 1'b1;
+
+            perf_tbranch_o = branch_set_i;
+            perf_jump_o    = jump_set_i;
+          end
+
+          if (BranchPredictor) begin
+            if (instr_bp_taken_i & branch_not_set_i) begin
+              // If the instruction is a branch that was predicted to be taken but was not taken
+              // signal a mispredict.
+              nt_branch_mispredict_o = 1'b1;
+            end
+          end
+        end
+
+        // pc_set signal excluding branch taken condition
+        if ((branch_set_spec_i || jump_set_i) && !special_req_branch) begin
+          // Only speculatively set the PC if the branch predictor hasn't already done the branch
+          // for us
+          pc_set_spec_o = BranchPredictor ? ~instr_bp_taken_i : 1'b1;
+        end
+
+        // If entering debug mode or handling an IRQ the core needs to wait
+        // until the current instruction has finished executing. Stall IF
+        // during that time.
+        if ((enter_debug_mode || handle_irq) && stall) begin
+          halt_if = 1'b1;
+        end
+
+        if (!stall && !special_req_all) begin
+          if (enter_debug_mode) begin
+            // enter debug mode
+            ctrl_fsm_ns = DBG_TAKEN_IF;
+            // Halt IF only for now, ID will be flushed in DBG_TAKEN_IF as the
+            // ID state is needed for correct debug mode entry
+            halt_if     = 1'b1;
+          end else if (handle_irq) begin
+            // handle interrupt (not in debug mode)
+            ctrl_fsm_ns = IRQ_TAKEN;
+            // We are handling an interrupt (not in debug mode). Set halt_if to
+            // tell IF not to give us any more instructions before it redirects
+            // to the handler, but don't set flush_id: we must allow this
+            // instruction to complete (since it might have outstanding loads
+            // or stores).
+            halt_if     = 1'b1;
+          end
+        end
+
+      end // DECODE
+
+      IRQ_TAKEN: begin
+        pc_mux_o     = PC_EXC;
+        exc_pc_mux_o = EXC_PC_IRQ;
+
+        if (handle_irq) begin
+          pc_set_o         = 1'b1;
+          pc_set_spec_o    = 1'b1;
+
+          csr_save_if_o    = 1'b1;
+          csr_save_cause_o = 1'b1;
+
+          // interrupt priorities according to Privileged Spec v1.11 p.31
+          if (irq_nm_i && !nmi_mode_q) begin
+            exc_cause_o = EXC_CAUSE_IRQ_NM;
+            nmi_mode_d  = 1'b1; // enter NMI mode
+          end else if (irqs_i.irq_fast != 15'b0) begin
+            // generate exception cause ID from fast interrupt ID:
+            // - first bit distinguishes interrupts from exceptions,
+            // - second bit adds 16 to fast interrupt ID
+            // for example EXC_CAUSE_IRQ_FAST_0 = {1'b1, 5'd16}
+            exc_cause_o = exc_cause_e'({2'b11, mfip_id});
+          end else if (irqs_i.irq_external) begin
+            exc_cause_o = EXC_CAUSE_IRQ_EXTERNAL_M;
+          end else if (irqs_i.irq_software) begin
+            exc_cause_o = EXC_CAUSE_IRQ_SOFTWARE_M;
+          end else begin // irqs_i.irq_timer
+            exc_cause_o = EXC_CAUSE_IRQ_TIMER_M;
+          end
+        end
+
+        ctrl_fsm_ns = DECODE;
+      end
+
+      DBG_TAKEN_IF: begin
+        pc_mux_o     = PC_EXC;
+        exc_pc_mux_o = EXC_PC_DBD;
+
+        // enter debug mode and save PC in IF to dpc
+        // jump to debug exception handler in debug memory
+        if (debug_single_step_i || debug_req_i || trigger_match_i) begin
+          flush_id         = 1'b1;
+          pc_set_o         = 1'b1;
+          pc_set_spec_o    = 1'b1;
+
+          csr_save_if_o    = 1'b1;
+          debug_csr_save_o = 1'b1;
+
+          csr_save_cause_o = 1'b1;
+          if (trigger_match_i) begin
+            debug_cause_o = DBG_CAUSE_TRIGGER;
+          end else if (debug_single_step_i) begin
+            debug_cause_o = DBG_CAUSE_STEP;
+          end else begin
+            debug_cause_o = DBG_CAUSE_HALTREQ;
+          end
+
+          // enter debug mode
+          debug_mode_d = 1'b1;
+        end
+
+        ctrl_fsm_ns  = DECODE;
+      end
+
+      DBG_TAKEN_ID: begin
+        // enter debug mode and save PC in ID to dpc, used when encountering
+        // 1. EBREAK during debug mode
+        // 2. EBREAK with forced entry into debug mode (ebreakm or ebreaku set).
+        // regular ebreak's go through FLUSH.
+        //
+        // for 1. do not update dcsr and dpc, for 2. do so [Debug Spec v0.13.2, p.39]
+        // jump to debug exception handler in debug memory
+        flush_id      = 1'b1;
+        pc_mux_o      = PC_EXC;
+        pc_set_o      = 1'b1;
+        pc_set_spec_o = 1'b1;
+        exc_pc_mux_o  = EXC_PC_DBD;
+
+        // update dcsr and dpc
+        if (ebreak_into_debug && !debug_mode_q) begin // ebreak with forced entry
+
+          // dpc (set to the address of the EBREAK, i.e. set to PC in ID stage)
+          csr_save_cause_o = 1'b1;
+          csr_save_id_o    = 1'b1;
+
+          // dcsr
+          debug_csr_save_o = 1'b1;
+          debug_cause_o    = DBG_CAUSE_EBREAK;
+        end
+
+        // enter debug mode
+        debug_mode_d = 1'b1;
+
+        ctrl_fsm_ns  = DECODE;
+      end
+
+      FLUSH: begin
+        // flush the pipeline
+        halt_if     = 1'b1;
+        flush_id    = 1'b1;
+        ctrl_fsm_ns = DECODE;
+
+        // As pc_mux and exc_pc_mux can take various values in this state they aren't set early
+        // here.
+
+        // exceptions: set exception PC, save PC and exception cause
+        // exc_req_lsu is high for one clock cycle only (in DECODE)
+        if (exc_req_q || store_err_q || load_err_q) begin
+          pc_set_o         = 1'b1;
+          pc_set_spec_o    = 1'b1;
+          pc_mux_o         = PC_EXC;
+          exc_pc_mux_o     = debug_mode_q ? EXC_PC_DBG_EXC : EXC_PC_EXC;
+
+          if (WritebackStage) begin : g_writeback_mepc_save
+            // With the writeback stage present whether an instruction accessing memory will cause
+            // an exception is only known when it is in writeback. So when taking such an exception
+            // epc must come from writeback.
+            csr_save_id_o  = ~(store_err_q | load_err_q);
+            csr_save_wb_o  = store_err_q | load_err_q;
+          end else begin : g_no_writeback_mepc_save
+            csr_save_id_o  = 1'b0;
+          end
+
+          csr_save_cause_o = 1'b1;
+
+          // Exception/fault prioritisation logic will have set exactly 1 X_prio signal
+          unique case (1'b1)
+            instr_fetch_err_prio: begin
+                exc_cause_o = EXC_CAUSE_INSTR_ACCESS_FAULT;
+                csr_mtval_o = instr_fetch_err_plus2_i ? (pc_id_i + 32'd2) : pc_id_i;
+            end
+            illegal_insn_prio: begin
+              exc_cause_o = EXC_CAUSE_ILLEGAL_INSN;
+              csr_mtval_o = instr_is_compressed_i ? {16'b0, instr_compressed_i} : instr_i;
+            end
+            ecall_insn_prio: begin
+              exc_cause_o = (priv_mode_i == PRIV_LVL_M) ? EXC_CAUSE_ECALL_MMODE :
+                                                          EXC_CAUSE_ECALL_UMODE;
+            end
+            ebrk_insn_prio: begin
+              if (debug_mode_q | ebreak_into_debug) begin
+                /*
+                 * EBREAK in debug mode re-enters debug mode
+                 *
+                 * "The only exception is EBREAK. When that is executed in Debug
+                 * Mode, it halts the hart again but without updating dpc or
+                 * dcsr." [Debug Spec v0.13.2, p.39]
+                 */
+
+                /*
+                 * dcsr.ebreakm == 1:
+                 * "EBREAK instructions in M-mode enter Debug Mode."
+                 * [Debug Spec v0.13.2, p.42]
+                 */
+                pc_set_o         = 1'b0;
+                pc_set_spec_o    = 1'b0;
+                csr_save_id_o    = 1'b0;
+                csr_save_cause_o = 1'b0;
+                ctrl_fsm_ns      = DBG_TAKEN_ID;
+                flush_id         = 1'b0;
+              end else begin
+                /*
+                 * "The EBREAK instruction is used by debuggers to cause control
+                 * to be transferred back to a debugging environment. It
+                 * generates a breakpoint exception and performs no other
+                 * operation. [...] ECALL and EBREAK cause the receiving
+                 * privilege mode's epc register to be set to the address of the
+                 * ECALL or EBREAK instruction itself, not the address of the
+                 * following instruction." [Privileged Spec v1.11, p.40]
+                 */
+                exc_cause_o      = EXC_CAUSE_BREAKPOINT;
+              end
+            end
+            store_err_prio: begin
+              exc_cause_o = EXC_CAUSE_STORE_ACCESS_FAULT;
+              csr_mtval_o = lsu_addr_last_i;
+            end
+            load_err_prio: begin
+              exc_cause_o = EXC_CAUSE_LOAD_ACCESS_FAULT;
+              csr_mtval_o = lsu_addr_last_i;
+            end
+            default: ;
+          endcase
+        end else begin
+          // special instructions and pipeline flushes
+          if (mret_insn) begin
+            pc_mux_o              = PC_ERET;
+            pc_set_o              = 1'b1;
+            pc_set_spec_o         = 1'b1;
+            csr_restore_mret_id_o = 1'b1;
+            if (nmi_mode_q) begin
+              nmi_mode_d          = 1'b0; // exit NMI mode
+            end
+          end else if (dret_insn) begin
+            pc_mux_o              = PC_DRET;
+            pc_set_o              = 1'b1;
+            pc_set_spec_o         = 1'b1;
+            debug_mode_d          = 1'b0;
+            csr_restore_dret_id_o = 1'b1;
+          end else if (wfi_insn) begin
+            ctrl_fsm_ns           = WAIT_SLEEP;
+          end else if (csr_pipe_flush && handle_irq) begin
+            // start handling IRQs when doing CSR-related pipeline flushes
+            ctrl_fsm_ns           = IRQ_TAKEN;
+          end
+        end // exc_req_q
+
+        // Entering debug mode due to either single step or debug_req. Ensure
+        // registers are set for exception but then enter debug handler rather
+        // than exception handler [Debug Spec v0.13.2, p.44]
+        // Leave all other signals as is to ensure CSRs and PC get set as if
+        // core was entering exception handler, entry to debug mode will then
+        // see the appropriate state and setup dpc correctly.
+        // If an EBREAK instruction is causing us to enter debug mode on the
+        // same cycle as a debug_req or single step, honor the EBREAK and
+        // proceed to DBG_TAKEN_ID.
+        if (enter_debug_mode && !(ebrk_insn_prio && ebreak_into_debug)) begin
+          ctrl_fsm_ns = DBG_TAKEN_IF;
+        end
+      end // FLUSH
+
+      default: begin
+        instr_req_o = 1'b0;
+        ctrl_fsm_ns = RESET;
+      end
+    endcase
+  end
+
+  assign flush_id_o = flush_id;
+
+  // signal to CSR when in debug mode
+  assign debug_mode_o = debug_mode_q;
+
+  // signal to CSR when in an NMI handler (for nested exception handling)
+  assign nmi_mode_o = nmi_mode_q;
+
+  ///////////////////
+  // Stall control //
+  ///////////////////
+
+  // If high current instruction cannot complete this cycle. Either because it needs more cycles to
+  // finish (stall_id_i) or because the writeback stage cannot accept it yet (stall_wb_i). If there
+  // is no writeback stage stall_wb_i is a constant 0.
+  assign stall = stall_id_i | stall_wb_i | fpu_busy_i;
+
+  // signal to IF stage that ID stage is ready for next instr
+  assign id_in_ready_o = ~stall & ~halt_if & ~retain_id;
+
+  // kill instr in IF-ID pipeline reg that are done, or if a
+  // multicycle instr causes an exception for example
+  // retain_id is another kind of stall, where the instr_valid bit must remain
+  // set (unless flush_id is set also). It cannot be factored directly into
+  // stall as this causes a combinational loop.
+  assign instr_valid_clear_o = ~(stall | retain_id) | flush_id;
+
+  // update registers
+  always_ff @(posedge clk_i or negedge rst_ni) begin : update_regs
+    if (!rst_ni) begin
+      ctrl_fsm_cs    <= RESET;
+      nmi_mode_q     <= 1'b0;
+      debug_mode_q   <= 1'b0;
+      load_err_q     <= 1'b0;
+      store_err_q    <= 1'b0;
+      exc_req_q      <= 1'b0;
+      illegal_insn_q <= 1'b0;
+    end else begin
+      ctrl_fsm_cs    <= ctrl_fsm_ns;
+      nmi_mode_q     <= nmi_mode_d;
+      debug_mode_q   <= debug_mode_d;
+      load_err_q     <= load_err_d;
+      store_err_q    <= store_err_d;
+      exc_req_q      <= exc_req_d;
+      illegal_insn_q <= illegal_insn_d;
+    end
+  end
+
+ 
+
+endmodule
\ No newline at end of file
diff --git a/verilog/rtl/brq_idu_decoder.sv b/verilog/rtl/brq_idu_decoder.sv
new file mode 100644
index 0000000..3b4c452
--- /dev/null
+++ b/verilog/rtl/brq_idu_decoder.sv
@@ -0,0 +1,1720 @@
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Instruction decoder
+ *
+ * This module is fully combinatorial, clock and reset are used for
+ * assertions only.
+ */
+
+module brq_idu_decoder #(
+    parameter bit RV32E                = 0,
+    parameter brq_pkg::rv32m_e   RV32M = brq_pkg::RV32MFast,
+    parameter brq_pkg::rv32b_e   RV32B = brq_pkg::RV32BNone,
+    parameter brq_pkg::rvfloat_e RVF   = brq_pkg::RV64FDouble,
+    parameter bit BranchTargetALU      = 0
+) (
+    input  logic                 clk_i,
+    input  logic                 rst_ni,
+
+    // to/from controller
+    output logic                 illegal_insn_o,        // illegal instr encountered
+    output logic                 ebrk_insn_o,           // trap instr encountered
+    output logic                 mret_insn_o,           // return from exception instr
+                                                        // encountered
+    output logic                 dret_insn_o,           // return from debug instr encountered
+    output logic                 ecall_insn_o,          // syscall instr encountered
+    output logic                 wfi_insn_o,            // wait for interrupt instr encountered
+    output logic                 jump_set_o,            // jump taken set signal
+    input  logic                 branch_taken_i,        // registered branch decision
+    output logic                 icache_inval_o,
+
+    // from IF-ID pipeline register
+    input  logic                 instr_first_cycle_i,   // instruction read is in its first cycle
+    input  logic [31:0]          instr_rdata_i,         // instruction read from memory/cache
+    input  logic [31:0]          instr_rdata_alu_i,     // instruction read from memory/cache
+                                                        // replicated to ease fan-out)
+
+    input  logic                 illegal_c_insn_i,      // compressed instruction decode failed
+
+    // immediates
+    output brq_pkg::imm_a_sel_e  imm_a_mux_sel_o,       // immediate selection for operand a
+    output brq_pkg::imm_b_sel_e  imm_b_mux_sel_o,       // immediate selection for operand b
+    output brq_pkg::op_a_sel_e   bt_a_mux_sel_o,        // branch target selection operand a
+    output brq_pkg::imm_b_sel_e  bt_b_mux_sel_o,        // branch target selection operand b
+    output logic [31:0]          imm_i_type_o,
+    output logic [31:0]          imm_s_type_o,
+    output logic [31:0]          imm_b_type_o,
+    output logic [31:0]          imm_u_type_o,
+    output logic [31:0]          imm_j_type_o,
+    output logic [31:0]          zimm_rs1_type_o,
+
+    // register file
+    output brq_pkg::rf_wd_sel_e  rf_wdata_sel_o,   // RF write data selection
+    output logic                 rf_we_o,          // write enable for regfile
+    output logic [4:0]           rf_raddr_a_o,
+    output logic [4:0]           rf_raddr_b_o,
+    output logic [4:0]           rf_waddr_o,
+    output logic                 rf_ren_a_o,          // Instruction reads from RF addr A
+    output logic                 rf_ren_b_o,          // Instruction reads from RF addr B
+
+    // ALU
+    output brq_pkg::alu_op_e     alu_operator_o,       // ALU operation selection
+    output brq_pkg::op_a_sel_e   alu_op_a_mux_sel_o,   // operand a selection: reg value, PC,
+                                                       // immediate or zero
+    output brq_pkg::op_b_sel_e   alu_op_b_mux_sel_o,   // operand b selection: reg value or
+                                                       // immediate
+    output logic                 alu_multicycle_o,     // ternary bitmanip instruction
+
+    // MULT & DIV
+    output logic                 mult_en_o,             // perform integer multiplication
+    output logic                 div_en_o,              // perform integer division or remainder
+    output logic                 mult_sel_o,            // as above but static, for data muxes
+    output logic                 div_sel_o,             // as above but static, for data muxes
+
+    output brq_pkg::md_op_e      multdiv_operator_o,
+    output logic [1:0]           multdiv_signed_mode_o,
+
+    // CSRs
+    output logic                 csr_access_o,          // access to CSR
+    output brq_pkg::csr_op_e     csr_op_o,              // operation to perform on CSR
+
+    // LSU
+    output logic                 data_req_o,            // start transaction to data memory
+    output logic                 data_we_o,             // write enable
+    output logic [1:0]           data_type_o,           // size of transaction: byte, half
+                                                        // word or word
+    output logic                 data_sign_extension_o, // sign extension for data read from
+                                                        // memory
+
+    // jump/branches
+    output logic                 jump_in_dec_o,         // jump is being calculated in ALU
+    output logic                 branch_in_dec_o,
+
+    // Floating point extensions IO
+    output fpnew_pkg::roundmode_e fp_rounding_mode_o,      // defines the rounding mode 
+    output brq_pkg::op_b_sel_e    fp_alu_op_b_mux_sel_o,   // operand b selection: reg value or
+                                                           // immediate 
+    output logic [4:0]            fp_rf_raddr_a_o,
+    output logic [4:0]            fp_rf_raddr_b_o,
+    output logic [4:0]            fp_rf_raddr_c_o,
+
+    output logic [4:0]            fp_rf_waddr_o,
+    output logic                  fp_rf_we_o,
+
+    output fpnew_pkg::operation_e fp_alu_operator_o,
+    output logic                  fp_alu_op_mod_o,
+    output logic                  fp_rm_dynamic_o,
+    output fpnew_pkg::fp_format_e fp_src_fmt_o,
+    output fpnew_pkg::fp_format_e fp_dst_fmt_o,
+    output logic                  is_fp_instr_o,
+    output logic                  use_fp_rs1_o,
+    output logic                  use_fp_rs2_o,
+    output logic                  use_fp_rs3_o,
+    output logic                  use_fp_rd_o,
+    output logic                  fp_swap_oprnds_o,
+    output logic                  fp_load_o,
+    output logic                  mv_instr_o
+);
+
+  import brq_pkg::*;
+  import fpnew_pkg::*;
+
+  logic        fp_invalid_rm;
+ 
+  logic        illegal_insn;
+  logic        illegal_reg_rv32e;
+  logic        csr_illegal;
+  logic        rf_we;
+
+  logic [31:0] instr;
+  logic [31:0] instr_alu;
+  // Source/Destination register instruction index
+  logic [4:0] instr_rs1;
+  logic [4:0] instr_rs2;
+  logic [4:0] instr_rs3;
+  logic [4:0] instr_rd;
+
+  logic        use_rs3_d;
+  logic        use_rs3_q;
+
+  csr_op_e     csr_op;
+
+  opcode_e     opcode;
+  opcode_e     opcode_alu;
+
+  // To help timing the flops containing the current instruction are replicated to reduce fan-out.
+  // instr_alu is used to determine the ALU control logic and associated operand/imm select signals
+  // as the ALU is often on the more critical timing paths. instr is used for everything else.
+  assign instr     = instr_rdata_i;
+  assign instr_alu = instr_rdata_alu_i;
+
+  //////////////////////////////////////
+  // Register and immediate selection //
+  //////////////////////////////////////
+
+  // immediate extraction and sign extension
+  assign imm_i_type_o = { {20{instr[31]}}, instr[31:20] };
+  assign imm_s_type_o = { {20{instr[31]}}, instr[31:25], instr[11:7] };
+  assign imm_b_type_o = { {19{instr[31]}}, instr[31], instr[7], instr[30:25], instr[11:8], 1'b0 };
+  assign imm_u_type_o = { instr[31:12], 12'b0 };
+  assign imm_j_type_o = { {12{instr[31]}}, instr[19:12], instr[20], instr[30:21], 1'b0 };
+
+  // immediate for CSR manipulation (zero extended)
+  assign zimm_rs1_type_o = { 27'b0, instr_rs1 }; // rs1
+
+  // the use of rs3 is known one cycle ahead.
+  always_ff  @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      use_rs3_q <= 1'b0;
+    end else begin
+      use_rs3_q <= use_rs3_d;
+    end
+  end
+
+  // source registers
+  assign instr_rs1 = instr[19:15];
+  assign instr_rs2 = instr[24:20];
+  assign instr_rs3 = instr[31:27];
+  assign rf_raddr_a_o = (use_rs3_q & ~instr_first_cycle_i) ? instr_rs3 : instr_rs1; // rs3 / rs1
+  assign rf_raddr_b_o = instr_rs2; // rs2
+
+  // destination register
+  assign instr_rd   = instr[11:7];
+  assign rf_waddr_o = instr_rd; // rd
+
+  // fp source registers
+  assign fp_rf_raddr_a_o = instr_rs1;
+  assign fp_rf_raddr_b_o = instr_rs2;
+  assign fp_rf_raddr_c_o = instr_rs3;
+
+  // fp destination register
+  assign fp_rf_waddr_o   = instr_rd;
+
+  assign fp_rounding_mode_o = roundmode_e'(instr[14:12]);
+  assign fp_invalid_rm      = (instr[14:12] == 3'b101) ? 1'b1 :
+                              (instr[14:12] == 3'b110) ? 1'b1 : 1'b0;
+  assign fp_rm_dynamic_o    = (instr[14:12] == 3'b111) ? 1'b1 : 1'b0;
+
+  assign fp_dst_fmt_o = FP32;
+
+  ////////////////////
+  // Register check //
+  ////////////////////
+  if (RV32E) begin : gen_rv32e_reg_check_active
+    assign illegal_reg_rv32e = ((rf_raddr_a_o[4] & (alu_op_a_mux_sel_o == OP_A_REG_A)) |
+                                (rf_raddr_b_o[4] & (alu_op_b_mux_sel_o == OP_B_REG_B)) |
+                                (rf_waddr_o[4]   & rf_we));
+  end else begin : gen_rv32e_reg_check_inactive
+    assign illegal_reg_rv32e = 1'b0;
+  end
+
+  ///////////////////////
+  // CSR operand check //
+  ///////////////////////
+  always_comb begin : csr_operand_check
+    csr_op_o = csr_op;
+
+    // CSRRSI/CSRRCI must not write 0 to CSRs (uimm[4:0]=='0)
+    // CSRRS/CSRRC must not write from x0 to CSRs (rs1=='0)
+    if ((csr_op == CSR_OP_SET || csr_op == CSR_OP_CLEAR) &&
+        instr_rs1 == '0) begin
+      csr_op_o = CSR_OP_READ;
+    end
+  end
+
+  /////////////
+  // Decoder //
+  /////////////
+
+  always_comb begin
+    jump_in_dec_o         = 1'b0;
+    jump_set_o            = 1'b0;
+    branch_in_dec_o       = 1'b0;
+    icache_inval_o        = 1'b0;
+
+    multdiv_operator_o    = MD_OP_MULL;
+    multdiv_signed_mode_o = 2'b00;
+
+    rf_wdata_sel_o        = RF_WD_EX;
+    rf_we                 = 1'b0;
+    rf_ren_a_o            = 1'b0;
+    rf_ren_b_o            = 1'b0;
+
+    csr_access_o          = 1'b0;
+    csr_illegal           = 1'b0;
+    csr_op                = CSR_OP_READ;
+
+    data_we_o             = 1'b0;
+    data_type_o           = 2'b00;
+    data_sign_extension_o = 1'b0;
+    data_req_o            = 1'b0;
+
+    illegal_insn          = 1'b0;
+    ebrk_insn_o           = 1'b0;
+    mret_insn_o           = 1'b0;
+    dret_insn_o           = 1'b0;
+    ecall_insn_o          = 1'b0;
+    wfi_insn_o            = 1'b0;
+
+    // Floating Point
+    fp_rf_we_o            = 1'b0;
+    is_fp_instr_o         = 1'b0;
+    use_fp_rs1_o          = 1'b0;
+    use_fp_rs2_o          = 1'b0;
+    use_fp_rs3_o          = 1'b0;
+    use_fp_rd_o           = 1'b0;
+    fp_load_o             = 1'b0;
+    fp_src_fmt_o          = FP32; 
+    fp_dst_fmt_o          = FP32;
+    fp_swap_oprnds_o      = 1'b0;
+    mv_instr_o            = 1'b0;
+
+    opcode                = opcode_e'(instr[6:0]);
+
+    unique case (opcode)
+
+      ///////////
+      // Jumps //
+      ///////////
+
+      OPCODE_JAL: begin   // Jump and Link
+        jump_in_dec_o      = 1'b1;
+
+        if (instr_first_cycle_i) begin
+          // Calculate jump target (and store PC + 4 if BranchTargetALU is configured)
+          rf_we            = BranchTargetALU;
+          jump_set_o       = 1'b1;
+        end else begin
+          // Calculate and store PC+4
+          rf_we            = 1'b1;
+        end
+      end
+
+      OPCODE_JALR: begin  // Jump and Link Register
+        jump_in_dec_o      = 1'b1;
+
+        if (instr_first_cycle_i) begin
+          // Calculate jump target (and store PC + 4 if BranchTargetALU is configured)
+          rf_we            = BranchTargetALU;
+          jump_set_o       = 1'b1;
+        end else begin
+          // Calculate and store PC+4
+          rf_we            = 1'b1;
+        end
+        if (instr[14:12] != 3'b0) begin
+          illegal_insn = 1'b1;
+        end
+
+        rf_ren_a_o = 1'b1;
+      end
+
+      OPCODE_BRANCH: begin // Branch
+        branch_in_dec_o       = 1'b1;
+        // Check branch condition selection
+        unique case (instr[14:12])
+          3'b000,
+          3'b001,
+          3'b100,
+          3'b101,
+          3'b110,
+          3'b111:  illegal_insn = 1'b0;
+          default: illegal_insn = 1'b1;
+        endcase
+        rf_ren_a_o = 1'b1;
+        rf_ren_b_o = 1'b1;
+      end
+
+      ////////////////
+      // Load/store //
+      ////////////////
+
+      OPCODE_STORE: begin
+        rf_ren_a_o         = 1'b1;
+        rf_ren_b_o         = 1'b1;
+        data_req_o         = 1'b1;
+        data_we_o          = 1'b1;
+
+        if (instr[14]) begin
+          illegal_insn = 1'b1;
+        end
+
+        // store size
+        unique case (instr[13:12])
+          2'b00:   data_type_o  = 2'b10; // sb
+          2'b01:   data_type_o  = 2'b01; // sh
+          2'b10:   data_type_o  = 2'b00; // sw
+          default: illegal_insn = 1'b1;
+        endcase
+      end
+
+      OPCODE_LOAD: begin
+        rf_ren_a_o          = 1'b1;
+        data_req_o          = 1'b1;
+        data_type_o         = 2'b00;
+
+        // sign/zero extension
+        data_sign_extension_o = ~instr[14];
+
+        // load size
+        unique case (instr[13:12])
+          2'b00: data_type_o = 2'b10; // lb(u)
+          2'b01: data_type_o = 2'b01; // lh(u)
+          2'b10: begin
+            data_type_o = 2'b00;      // lw
+            if (instr[14]) begin
+              illegal_insn = 1'b1;    // lwu does not exist
+            end
+          end
+          default: begin
+            illegal_insn = 1'b1;
+          end
+        endcase
+      end
+
+      /////////
+      // ALU //
+      /////////
+
+      OPCODE_LUI: begin  // Load Upper Immediate
+        rf_we            = 1'b1;
+      end
+
+      OPCODE_AUIPC: begin  // Add Upper Immediate to PC
+        rf_we            = 1'b1;
+      end
+
+      OPCODE_OP_IMM: begin // Register-Immediate ALU Operations
+        rf_ren_a_o       = 1'b1;
+        rf_we            = 1'b1;
+
+        unique case (instr[14:12])
+          3'b000,
+          3'b010,
+          3'b011,
+          3'b100,
+          3'b110,
+          3'b111: illegal_insn = 1'b0;
+
+          3'b001: begin
+            unique case (instr[31:27])
+              5'b0_0000: illegal_insn = (instr[26:25] == 2'b00) ? 1'b0 : 1'b1;        // slli
+              5'b0_0100,                                                              // sloi
+              5'b0_1001,                                                              // sbclri
+              5'b0_0101,                                                              // sbseti
+              5'b0_1101: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1;           // sbinvi
+              5'b0_0001: if (instr[26] == 1'b0) begin
+                illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1;                    // shfl
+              end else begin
+                illegal_insn = 1'b1;
+              end
+              5'b0_1100: begin
+                unique case(instr[26:20])
+                  7'b000_0000,                                                         // clz
+                  7'b000_0001,                                                         // ctz
+                  7'b000_0010,                                                         // pcnt
+                  7'b000_0100,                                                         // sext.b
+                  7'b000_0101: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1;      // sext.h
+                  7'b001_0000,                                                         // crc32.b
+                  7'b001_0001,                                                         // crc32.h
+                  7'b001_0010,                                                         // crc32.w
+                  7'b001_1000,                                                         // crc32c.b
+                  7'b001_1001,                                                         // crc32c.h
+                  7'b001_1010: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1;      // crc32c.w
+
+                  default: illegal_insn = 1'b1;
+                endcase
+              end
+              default : illegal_insn = 1'b1;
+            endcase
+          end
+
+          3'b101: begin
+            if (instr[26]) begin
+              illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1;                       // fsri
+            end else begin
+              unique case (instr[31:27])
+                5'b0_0000,                                                             // srli
+                5'b0_1000: illegal_insn = (instr[26:25] == 2'b00) ? 1'b0 : 1'b1;       // srai
+
+                5'b0_0100,                                                             // sroi
+                5'b0_1100,                                                             // rori
+                5'b0_1001: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1;          // sbexti
+
+                5'b0_1101: begin
+                  if ((RV32B == RV32BFull)) begin
+                    illegal_insn = 1'b0;                                               // grevi
+                  end else begin
+                    unique case (instr[24:20])
+                      5'b11111,                                                        // rev
+                      5'b11000: illegal_insn = (RV32B == RV32BBalanced) ? 1'b0 : 1'b1; // rev8
+
+                      default: illegal_insn = 1'b1;
+                    endcase
+                  end
+                end
+                5'b0_0101: begin
+                  if ((RV32B == RV32BFull)) begin
+                    illegal_insn = 1'b0;                                              // gorci
+                  end else if (instr[24:20] == 5'b00111) begin
+                    illegal_insn = (RV32B == RV32BBalanced) ? 1'b0 : 1'b1;            // orc.b
+                  end
+                end
+                5'b0_0001: begin
+                  if (instr[26] == 1'b0) begin
+                    illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1;                // unshfl
+                  end else begin
+                    illegal_insn = 1'b1;
+                  end
+                end
+
+                default: illegal_insn = 1'b1;
+              endcase
+            end
+          end
+
+         // default: illegal_insn = 1'b1;
+        endcase
+      end
+
+      OPCODE_OP: begin  // Register-Register ALU operation
+        rf_ren_a_o      = 1'b1;
+        rf_ren_b_o      = 1'b1;
+        rf_we           = 1'b1;
+        if ({instr[26], instr[13:12]} == {1'b1, 2'b01}) begin
+          illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // cmix / cmov / fsl / fsr
+        end else begin
+          unique case ({instr[31:25], instr[14:12]})
+            // RV32I ALU operations
+            {7'b000_0000, 3'b000},
+            {7'b010_0000, 3'b000},
+            {7'b000_0000, 3'b010},
+            {7'b000_0000, 3'b011},
+            {7'b000_0000, 3'b100},
+            {7'b000_0000, 3'b110},
+            {7'b000_0000, 3'b111},
+            {7'b000_0000, 3'b001},
+            {7'b000_0000, 3'b101},
+            {7'b010_0000, 3'b101}: illegal_insn = 1'b0;
+
+            // RV32B zbb
+            {7'b010_0000, 3'b111}, // andn
+            {7'b010_0000, 3'b110}, // orn
+            {7'b010_0000, 3'b100}, // xnor
+            {7'b001_0000, 3'b001}, // slo
+            {7'b001_0000, 3'b101}, // sro
+            {7'b011_0000, 3'b001}, // rol
+            {7'b011_0000, 3'b101}, // ror
+            {7'b000_0101, 3'b100}, // min
+            {7'b000_0101, 3'b101}, // max
+            {7'b000_0101, 3'b110}, // minu
+            {7'b000_0101, 3'b111}, // maxu
+            {7'b000_0100, 3'b100}, // pack
+            {7'b010_0100, 3'b100}, // packu
+            {7'b000_0100, 3'b111}, // packh
+            // RV32B zbs
+            {7'b010_0100, 3'b001}, // sbclr
+            {7'b001_0100, 3'b001}, // sbset
+            {7'b011_0100, 3'b001}, // sbinv
+            {7'b010_0100, 3'b101}, // sbext
+            // RV32B zbf
+            {7'b010_0100, 3'b111}: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // bfp
+            // RV32B zbe
+            {7'b010_0100, 3'b110}, // bdep
+            {7'b000_0100, 3'b110}, // bext
+            // RV32B zbp
+            {7'b011_0100, 3'b101}, // grev
+            {7'b001_0100, 3'b101}, // gorc
+            {7'b000_0100, 3'b001}, // shfl
+            {7'b000_0100, 3'b101}, // unshfl
+            // RV32B zbc
+            {7'b000_0101, 3'b001}, // clmul
+            {7'b000_0101, 3'b010}, // clmulr
+            {7'b000_0101, 3'b011}: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // clmulh
+
+            // RV32M instructions
+            {7'b000_0001, 3'b000}: begin // mul
+              multdiv_operator_o    = MD_OP_MULL;
+              multdiv_signed_mode_o = 2'b00;
+              illegal_insn          = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+            end
+            {7'b000_0001, 3'b001}: begin // mulh
+              multdiv_operator_o    = MD_OP_MULH;
+              multdiv_signed_mode_o = 2'b11;
+              illegal_insn          = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+            end
+            {7'b000_0001, 3'b010}: begin // mulhsu
+              multdiv_operator_o    = MD_OP_MULH;
+              multdiv_signed_mode_o = 2'b01;
+              illegal_insn          = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+            end
+            {7'b000_0001, 3'b011}: begin // mulhu
+              multdiv_operator_o    = MD_OP_MULH;
+              multdiv_signed_mode_o = 2'b00;
+              illegal_insn          = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+            end
+            {7'b000_0001, 3'b100}: begin // div
+              multdiv_operator_o    = MD_OP_DIV;
+              multdiv_signed_mode_o = 2'b11;
+              illegal_insn          = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+            end
+            {7'b000_0001, 3'b101}: begin // divu
+              multdiv_operator_o    = MD_OP_DIV;
+              multdiv_signed_mode_o = 2'b00;
+              illegal_insn          = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+            end
+            {7'b000_0001, 3'b110}: begin // rem
+              multdiv_operator_o    = MD_OP_REM;
+              multdiv_signed_mode_o = 2'b11;
+              illegal_insn          = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+            end
+            {7'b000_0001, 3'b111}: begin // remu
+              multdiv_operator_o    = MD_OP_REM;
+              multdiv_signed_mode_o = 2'b00;
+              illegal_insn          = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+            end
+            default: begin
+              illegal_insn = 1'b1;
+            end
+          endcase
+        end
+      end
+
+      /////////////
+      // Special //
+      /////////////
+
+      OPCODE_MISC_MEM: begin
+        unique case (instr[14:12])
+          3'b000: begin
+            // FENCE is treated as a NOP since all memory operations are already strictly ordered.
+            rf_we           = 1'b0;
+          end
+          3'b001: begin
+            // FENCE.I is implemented as a jump to the next PC, this gives the required flushing
+            // behaviour (iside prefetch buffer flushed and response to any outstanding iside
+            // requests will be ignored).
+            // If present, the ICache will also be flushed.
+            jump_in_dec_o   = 1'b1;
+
+            rf_we           = 1'b0;
+
+            if (instr_first_cycle_i) begin
+              jump_set_o       = 1'b1;
+              icache_inval_o   = 1'b1;
+            end
+          end
+          default: begin
+            illegal_insn       = 1'b1;
+          end
+        endcase
+      end
+
+      OPCODE_SYSTEM: begin
+        if (instr[14:12] == 3'b000) begin
+          // non CSR related SYSTEM instructions
+          unique case (instr[31:20])
+            12'h000:  // ECALL
+              // environment (system) call
+              ecall_insn_o = 1'b1;
+
+            12'h001:  // ebreak
+              // debugger trap
+              ebrk_insn_o = 1'b1;
+
+            12'h302:  // mret
+              mret_insn_o = 1'b1;
+
+            12'h7b2:  // dret
+              dret_insn_o = 1'b1;
+
+            12'h105:  // wfi
+              wfi_insn_o = 1'b1;
+
+            default:
+              illegal_insn = 1'b1;
+          endcase
+
+          // rs1 and rd must be 0
+          if (instr_rs1 != 5'b0 || instr_rd != 5'b0) begin
+            illegal_insn = 1'b1;
+          end
+        end else begin
+          // instruction to read/modify CSR
+          csr_access_o     = 1'b1;
+          rf_wdata_sel_o   = RF_WD_CSR;
+          rf_we            = 1'b1;
+
+          if (~instr[14]) begin
+            rf_ren_a_o         = 1'b1;
+          end
+
+          unique case (instr[13:12])
+            2'b01:   csr_op = CSR_OP_WRITE;
+            2'b10:   csr_op = CSR_OP_SET;
+            2'b11:   csr_op = CSR_OP_CLEAR;
+            default: csr_illegal = 1'b1;
+          endcase
+
+          illegal_insn = csr_illegal;
+        end
+
+      end
+
+      //////////////////////////////////////////
+      //  Floating Point Extension (F and D)  //
+      //////////////////////////////////////////
+
+      OPCODE_STORE_FP: begin
+        data_req_o         = 1'b1;
+        data_we_o          = 1'b1;
+        data_type_o        = 2'b00;
+
+        use_fp_rs2_o       = 1'b1;
+
+        unique case(instr[14:12])
+          3'b011: begin // FSD
+            illegal_insn = (RVF == RV64FDouble) ? 1'b0 : 1'b1;
+            fp_src_fmt_o = FP64;
+          end
+          3'b010: begin // FSW
+            illegal_insn = (RVF == RV32FNone) ? 1'b1 : 1'b0;
+            fp_src_fmt_o = FP32; 
+          end
+          default: illegal_insn = 1'b1;
+        endcase
+        end
+
+      OPCODE_LOAD_FP: begin
+        data_req_o         = 1'b1;
+        data_type_o        = 2'b00;
+        fp_load_o          = 1'b1;
+
+        use_fp_rd_o        = 1'b1; 
+
+        unique case(instr[14:12])
+          3'b011: begin // FLD
+            illegal_insn = (RVF == RV64FDouble) ? 1'b0 : 1'b1;
+            fp_src_fmt_o = FP64;
+          end
+          3'b010: begin // FLW
+            illegal_insn = (RVF == RV32FNone) ? 1'b1 : 1'b0;
+            fp_src_fmt_o = FP32; 
+          end
+          default: illegal_insn = 1'b1;
+        endcase
+      end
+
+      OPCODE_MADD_FP,  // FMADD.S, FMADD.D
+      OPCODE_MSUB_FP,  // FMSUB.S, FMSUB.D
+      OPCODE_NMSUB_FP, // FNMSUB.S, FNMSUB.D
+      OPCODE_NMADD_FP: begin //FNMADD.S, FNMADD.S
+        fp_rf_we_o         = 1'b1;
+        fp_src_fmt_o       = FP32;
+        is_fp_instr_o      = 1'b1;
+
+        use_fp_rs1_o       = 1'b1;
+        use_fp_rs2_o       = 1'b1;
+        use_fp_rs3_o       = 1'b1;
+        use_fp_rd_o        = 1'b1; 
+        
+        unique case (instr[26:25])
+          01: begin
+            illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+            fp_src_fmt_o = FP64;
+          end
+          00: begin
+            illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+            fp_src_fmt_o = FP32;
+          end
+          default: illegal_insn = 1'b1;
+        endcase
+      end
+
+      OPCODE_OP_FP: begin
+        fp_src_fmt_o       = FP32;
+        is_fp_instr_o      = 1'b1;
+
+        unique case (instr[31:25]) 
+          7'b0000001,       // FADD.D
+          7'b0000101: begin // FSUB.D
+            fp_rf_we_o         = 1'b1;
+            use_fp_rs1_o       = 1'b1;
+            use_fp_rs2_o       = 1'b1;
+            use_fp_rd_o        = 1'b1;
+            fp_swap_oprnds_o   = 1'b1;
+            illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+            fp_src_fmt_o = FP64;
+          end
+          7'b0001001,      // FMUL.D
+          7'b0001101:begin // FDIV.D
+            fp_rf_we_o         = 1'b1;
+            use_fp_rs1_o       = 1'b1;
+            use_fp_rs2_o       = 1'b1;
+            use_fp_rd_o        = 1'b1;
+            illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+            fp_src_fmt_o = FP64;
+          end
+          7'b0000000,       // FADD.S
+          7'b0000100: begin // FSUB.S
+            fp_rf_we_o         = 1'b1;
+            use_fp_rs1_o       = 1'b1;
+            use_fp_rs2_o       = 1'b1;
+            use_fp_rd_o        = 1'b1;
+            fp_swap_oprnds_o   = 1'b1;
+            illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+            fp_src_fmt_o = FP32;
+          end
+          7'b0001000, // FMUL.S
+          7'b0001100: begin // FDIV.S
+            fp_rf_we_o         = 1'b1;
+            use_fp_rs1_o       = 1'b1;
+            use_fp_rs2_o       = 1'b1;
+            use_fp_rd_o        = 1'b1;
+            illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+            fp_src_fmt_o = FP32;
+          end
+          7'b0101101: begin
+            fp_rf_we_o         = 1'b1;
+            use_fp_rs1_o       = 1'b1;
+            use_fp_rd_o        = 1'b1;
+            if (~|instr[24:20]) begin //FSQRT.D
+              illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+              fp_src_fmt_o = FP64;
+            end
+          end
+          7'b0101100: begin // FSQRT.S
+            fp_rf_we_o         = 1'b1;
+            use_fp_rs1_o       = 1'b1;
+            use_fp_rd_o        = 1'b1;
+            if (~|instr[24:20]) begin
+              illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+              fp_src_fmt_o = FP32;
+            end
+          end
+          7'b0010001: begin // FSGNJ.D, FSGNJN.D, FSGNJX.D
+            fp_rf_we_o         = 1'b1;
+            use_fp_rs1_o       = 1'b1;
+            use_fp_rs2_o       = 1'b1;
+            use_fp_rd_o        = 1'b1;
+            if (~(instr[14] | (&instr[13:12]))) begin
+              illegal_insn  = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+              fp_src_fmt_o  = FP64;
+            end
+          end
+          7'b0010000: begin // FSGNJ.S, FSGNJN.S, FSGNJX.S
+            fp_rf_we_o         = 1'b1;
+            use_fp_rs1_o       = 1'b1;
+            use_fp_rs2_o       = 1'b1;
+            use_fp_rd_o        = 1'b1;
+            if (~(instr[14] | (&instr[13:12]))) begin
+              illegal_insn  = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+              fp_src_fmt_o  = FP32;
+            end
+          end
+          7'b0010101: begin // FMIN.D, FMAX.D
+            fp_rf_we_o         = 1'b1;
+            use_fp_rs1_o       = 1'b1;
+            use_fp_rs2_o       = 1'b1;
+            use_fp_rd_o        = 1'b1;
+            if (~|instr[14:13]) begin
+              illegal_insn  = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+              fp_src_fmt_o  = FP64;
+            end
+          end
+          7'b0010100: begin // FMIN.S, FMAX.S
+            fp_rf_we_o         = 1'b1;
+            use_fp_rs1_o       = 1'b1;
+            use_fp_rs2_o       = 1'b1;
+            use_fp_rd_o        = 1'b1;
+            if (~|instr[14:13]) begin
+              illegal_insn  = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+              fp_src_fmt_o  = FP32;
+            end
+          end
+          7'b0100000: begin // FCVT.S.D
+            fp_rf_we_o         = 1'b1;
+            use_fp_rs1_o       = 1'b1;
+            use_fp_rd_o        = 1'b1;
+            if (~(|instr[24:21] | (~instr[20]))) begin
+              illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+              fp_src_fmt_o = FP64;
+            end
+          end
+          7'b1100000: begin // FCVT.W.S, FCVT.WU.S
+            rf_we            = 1'b1;  // write back in int_regfile
+            use_fp_rs1_o     = 1'b1;
+            if (~|instr[24:21]) begin
+              illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+              fp_src_fmt_o = FP32;
+            end
+          end
+          7'b0100001: begin // FCVT.D.S
+            fp_rf_we_o         = 1'b1;
+            use_fp_rs1_o       = 1'b1;
+            use_fp_rd_o        = 1'b1;
+            if (~|instr[24:20]) begin 
+              illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+              fp_src_fmt_o = FP64;
+            end
+          end
+          7'b1110000: begin // FMV.X.W , FCLASS.S
+            rf_we            = 1'b1;  // write back in int_regfile
+            unique case ({instr[24:20],instr[14:12]})
+              {5'b00000,3'b000}: begin
+                use_fp_rs1_o   = 1'b1;
+                illegal_insn   = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+                fp_src_fmt_o   = FP32;
+                mv_instr_o     = 1'b1;
+              end
+              {5'b00000,3'b001}: begin
+                use_fp_rs1_o = 1'b1;
+                illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+                fp_src_fmt_o = FP32;
+              end
+              default: begin
+                illegal_insn =1'b1;
+              end
+            endcase
+          end
+          7'b1010001: begin // FEQ.D, FLT.D, FLE.D
+            rf_we            = 1'b1;  // write back in int_regfile
+            use_fp_rs1_o     = 1'b1;
+            use_fp_rs2_o     = 1'b1;
+            if (~(instr[14]) | (&instr[13:12])) begin
+              illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+              fp_src_fmt_o = FP64;
+            end
+          end
+          7'b1010000: begin // FEQ.S, FLT.S, FLE.S
+            rf_we            = 1'b1;  // write back in int_regfile
+            use_fp_rs1_o     = 1'b1;
+            use_fp_rs2_o     = 1'b1;
+            if (~(instr[14]) | (&instr[13:12])) begin
+              illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+              fp_src_fmt_o = FP32;
+            end
+          end
+          7'b1110001: begin // FCLASS.D
+            rf_we            = 1'b1;  // write back in int_regfile
+            use_fp_rs1_o     = 1'b1;
+            unique case ({instr[24:20],instr[14:12]}) 
+              {5'b00000,3'b001}: begin  
+                illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+                fp_src_fmt_o = FP64;
+              end
+              default: begin
+                illegal_insn =1'b1;
+              end
+            endcase
+          end
+          7'b1100001: begin // // FCVT.W.D, FCVT.WU.D
+            rf_we            = 1'b1;  // write back in int_regfile
+            use_fp_rs1_o     = 1'b1;
+            if (~|instr[24:21]) begin
+              illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+              fp_src_fmt_o = FP64;
+            end
+          end
+          7'b1101000: begin // FCVT.S.W, FCVT.S.WU
+            fp_rf_we_o       = 1'b1;
+            use_fp_rd_o      = 1'b1;
+            if (~|instr[24:21]) begin
+              illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+              fp_src_fmt_o = FP32;
+            end
+          end
+          7'b1111001: begin // FCVT.D.W, FCVT.D.WU
+            rf_we            = 1'b1;  // write back in int_regfile
+            use_fp_rd_o      = 1'b1;
+            if (~|instr[24:21]) begin
+              illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+              fp_src_fmt_o = FP64;
+            end
+          end
+          7'b1111000: begin // FMV.W.X
+            fp_rf_we_o        = 1'b1;
+            use_fp_rd_o       = 1'b1;
+            mv_instr_o        = 1'b1;
+            if (~(|instr[24:20]) | (|instr[14:12])) begin
+              illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+              fp_src_fmt_o = FP32;
+            end
+          end
+          default: illegal_insn = 1'b1;
+        endcase
+      end
+    default: begin
+      illegal_insn = 1'b1;
+    end
+    endcase
+
+    // make sure illegal compressed instructions cause illegal instruction exceptions
+    if (illegal_c_insn_i) begin
+      illegal_insn = 1'b1;
+    end
+
+    // make sure illegal instructions detected in the decoder do not propagate from decoder
+    // into register file, LSU, EX, WB, CSRs, PC
+    // NOTE: instructions can also be detected to be illegal inside the CSRs (upon accesses with
+    // insufficient privileges), or when accessing non-available registers in RV32E,
+    // these cases are not handled here
+    if (illegal_insn) begin
+      rf_we           = 1'b0;
+      data_req_o      = 1'b0;
+      data_we_o       = 1'b0;
+      jump_in_dec_o   = 1'b0;
+      jump_set_o      = 1'b0;
+      branch_in_dec_o = 1'b0;
+      csr_access_o    = 1'b0;
+      
+      // floating point
+      fp_rf_we_o      = 1'b0;
+    end
+  end
+
+  /////////////////////////////
+  // Decoder for ALU control //
+  /////////////////////////////
+
+  always_comb begin
+    alu_operator_o     = ALU_SLTU;
+    alu_op_a_mux_sel_o = OP_A_IMM;
+    alu_op_b_mux_sel_o = OP_B_IMM;
+
+    imm_a_mux_sel_o    = IMM_A_ZERO;
+    imm_b_mux_sel_o    = IMM_B_I;
+
+    bt_a_mux_sel_o     = OP_A_CURRPC;
+    bt_b_mux_sel_o     = IMM_B_I;
+
+
+    opcode_alu         = opcode_e'(instr_alu[6:0]);
+
+    use_rs3_d          = 1'b0;
+    alu_multicycle_o   = 1'b0;
+    mult_sel_o         = 1'b0;
+    div_sel_o          = 1'b0;
+
+    fp_alu_op_mod_o       = 1'b0;
+    fp_alu_operator_o     = FMADD;
+    fp_alu_op_b_mux_sel_o = OP_B_IMM; // op_b_sel_e, OP_B_REG_B
+
+    unique case (opcode_alu)
+
+      ///////////
+      // Jumps //
+      ///////////
+
+      OPCODE_JAL: begin // Jump and Link
+        if (BranchTargetALU) begin
+          bt_a_mux_sel_o = OP_A_CURRPC;
+          bt_b_mux_sel_o = IMM_B_J;
+        end
+
+        // Jumps take two cycles without the BTALU
+        if (instr_first_cycle_i && !BranchTargetALU) begin
+          // Calculate jump target
+          alu_op_a_mux_sel_o  = OP_A_CURRPC;
+          alu_op_b_mux_sel_o  = OP_B_IMM;
+          imm_b_mux_sel_o     = IMM_B_J;
+          alu_operator_o      = ALU_ADD;
+        end else begin
+          // Calculate and store PC+4
+          alu_op_a_mux_sel_o  = OP_A_CURRPC;
+          alu_op_b_mux_sel_o  = OP_B_IMM;
+          imm_b_mux_sel_o     = IMM_B_INCR_PC;
+          alu_operator_o      = ALU_ADD;
+        end
+      end
+
+      OPCODE_JALR: begin // Jump and Link Register
+        if (BranchTargetALU) begin
+          bt_a_mux_sel_o = OP_A_REG_A;
+          bt_b_mux_sel_o = IMM_B_I;
+        end
+
+        // Jumps take two cycles without the BTALU
+        if (instr_first_cycle_i && !BranchTargetALU) begin
+          // Calculate jump target
+          alu_op_a_mux_sel_o  = OP_A_REG_A;
+          alu_op_b_mux_sel_o  = OP_B_IMM;
+          imm_b_mux_sel_o     = IMM_B_I;
+          alu_operator_o      = ALU_ADD;
+        end else begin
+          // Calculate and store PC+4
+          alu_op_a_mux_sel_o  = OP_A_CURRPC;
+          alu_op_b_mux_sel_o  = OP_B_IMM;
+          imm_b_mux_sel_o     = IMM_B_INCR_PC;
+          alu_operator_o      = ALU_ADD;
+        end
+      end
+
+      OPCODE_BRANCH: begin // Branch
+        // Check branch condition selection
+        unique case (instr_alu[14:12])
+          3'b000:  alu_operator_o = ALU_EQ;
+          3'b001:  alu_operator_o = ALU_NE;
+          3'b100:  alu_operator_o = ALU_LT;
+          3'b101:  alu_operator_o = ALU_GE;
+          3'b110:  alu_operator_o = ALU_LTU;
+          3'b111:  alu_operator_o = ALU_GEU;
+          default: ;
+        endcase
+
+        if (BranchTargetALU) begin
+          bt_a_mux_sel_o = OP_A_CURRPC;
+          // Not-taken branch will jump to next instruction (used in secure mode)
+          bt_b_mux_sel_o = branch_taken_i ? IMM_B_B : IMM_B_INCR_PC;
+        end
+
+        // Without branch target ALU, a branch is a two-stage operation using the Main ALU in both
+        // stages
+        if (instr_first_cycle_i) begin
+          // First evaluate the branch condition
+          alu_op_a_mux_sel_o  = OP_A_REG_A;
+          alu_op_b_mux_sel_o  = OP_B_REG_B;
+        end else begin
+          // Then calculate jump target
+          alu_op_a_mux_sel_o  = OP_A_CURRPC;
+          alu_op_b_mux_sel_o  = OP_B_IMM;
+          // Not-taken branch will jump to next instruction (used in secure mode)
+          imm_b_mux_sel_o     = branch_taken_i ? IMM_B_B : IMM_B_INCR_PC;
+          alu_operator_o      = ALU_ADD;
+        end
+      end
+
+      ////////////////
+      // Load/store //
+      ////////////////
+
+      OPCODE_STORE: begin
+        alu_op_a_mux_sel_o = OP_A_REG_A;
+        alu_op_b_mux_sel_o = OP_B_REG_B;
+        alu_operator_o     = ALU_ADD;
+
+        if (!instr_alu[14]) begin
+          // offset from immediate
+          imm_b_mux_sel_o     = IMM_B_S;
+          alu_op_b_mux_sel_o  = OP_B_IMM;
+        end
+      end
+
+      OPCODE_LOAD: begin
+        alu_op_a_mux_sel_o  = OP_A_REG_A;
+
+        // offset from immediate
+        alu_operator_o      = ALU_ADD;
+        alu_op_b_mux_sel_o  = OP_B_IMM;
+        imm_b_mux_sel_o     = IMM_B_I;
+      end
+
+      /////////
+      // ALU //
+      /////////
+
+      OPCODE_LUI: begin  // Load Upper Immediate
+        alu_op_a_mux_sel_o  = OP_A_IMM;
+        alu_op_b_mux_sel_o  = OP_B_IMM;
+        imm_a_mux_sel_o     = IMM_A_ZERO;
+        imm_b_mux_sel_o     = IMM_B_U;
+        alu_operator_o      = ALU_ADD;
+      end
+
+      OPCODE_AUIPC: begin  // Add Upper Immediate to PC
+        alu_op_a_mux_sel_o  = OP_A_CURRPC;
+        alu_op_b_mux_sel_o  = OP_B_IMM;
+        imm_b_mux_sel_o     = IMM_B_U;
+        alu_operator_o      = ALU_ADD;
+      end
+
+      OPCODE_OP_IMM: begin // Register-Immediate ALU Operations
+        alu_op_a_mux_sel_o  = OP_A_REG_A;
+        alu_op_b_mux_sel_o  = OP_B_IMM;
+        imm_b_mux_sel_o     = IMM_B_I;
+
+        unique case (instr_alu[14:12])
+          3'b000: alu_operator_o = ALU_ADD;  // Add Immediate
+          3'b010: alu_operator_o = ALU_SLT;  // Set to one if Lower Than Immediate
+          3'b011: alu_operator_o = ALU_SLTU; // Set to one if Lower Than Immediate Unsigned
+          3'b100: alu_operator_o = ALU_XOR;  // Exclusive Or with Immediate
+          3'b110: alu_operator_o = ALU_OR;   // Or with Immediate
+          3'b111: alu_operator_o = ALU_AND;  // And with Immediate
+
+          3'b001: begin
+            if (RV32B != RV32BNone) begin
+              unique case (instr_alu[31:27])
+                5'b0_0000: alu_operator_o = ALU_SLL;    // Shift Left Logical by Immediate
+                5'b0_0100: alu_operator_o = ALU_SLO;    // Shift Left Ones by Immediate
+                5'b0_1001: alu_operator_o = ALU_SBCLR;  // Clear bit specified by immediate
+                5'b0_0101: alu_operator_o = ALU_SBSET;  // Set bit specified by immediate
+                5'b0_1101: alu_operator_o = ALU_SBINV;  // Invert bit specified by immediate.
+                // Shuffle with Immediate Control Value
+                5'b0_0001: if (instr_alu[26] == 0) alu_operator_o = ALU_SHFL;
+                5'b0_1100: begin
+                  unique case (instr_alu[26:20])
+                    7'b000_0000: alu_operator_o = ALU_CLZ;   // clz
+                    7'b000_0001: alu_operator_o = ALU_CTZ;   // ctz
+                    7'b000_0010: alu_operator_o = ALU_PCNT;  // pcnt
+                    7'b000_0100: alu_operator_o = ALU_SEXTB; // sext.b
+                    7'b000_0101: alu_operator_o = ALU_SEXTH; // sext.h
+                    7'b001_0000: begin
+                      if (RV32B == RV32BFull) begin
+                        alu_operator_o = ALU_CRC32_B;  // crc32.b
+                        alu_multicycle_o = 1'b1;
+                      end
+                    end
+                    7'b001_0001: begin
+                      if (RV32B == RV32BFull) begin
+                        alu_operator_o = ALU_CRC32_H;  // crc32.h
+                        alu_multicycle_o = 1'b1;
+                      end
+                    end
+                    7'b001_0010: begin
+                      if (RV32B == RV32BFull) begin
+                        alu_operator_o = ALU_CRC32_W;  // crc32.w
+                        alu_multicycle_o = 1'b1;
+                      end
+                    end
+                    7'b001_1000: begin
+                      if (RV32B == RV32BFull) begin
+                        alu_operator_o = ALU_CRC32C_B; // crc32c.b
+                        alu_multicycle_o = 1'b1;
+                      end
+                    end
+                    7'b001_1001: begin
+                      if (RV32B == RV32BFull) begin
+                        alu_operator_o = ALU_CRC32C_H; // crc32c.h
+                        alu_multicycle_o = 1'b1;
+                      end
+                    end
+                    7'b001_1010: begin
+                      if (RV32B == RV32BFull) begin
+                        alu_operator_o = ALU_CRC32C_W; // crc32c.w
+                        alu_multicycle_o = 1'b1;
+                      end
+                    end
+                    default: ;
+                  endcase
+                end
+
+                default: ;
+              endcase
+            end else begin
+              alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
+            end
+          end
+
+          3'b101: begin
+            if (RV32B != RV32BNone) begin
+              if (instr_alu[26] == 1'b1) begin
+                alu_operator_o = ALU_FSR;
+                alu_multicycle_o = 1'b1;
+                if (instr_first_cycle_i) begin
+                  use_rs3_d = 1'b1;
+                end else begin
+                  use_rs3_d = 1'b0;
+                end
+              end else begin
+                unique case (instr_alu[31:27])
+                  5'b0_0000: alu_operator_o = ALU_SRL;   // Shift Right Logical by Immediate
+                  5'b0_1000: alu_operator_o = ALU_SRA;   // Shift Right Arithmetically by Immediate
+                  5'b0_0100: alu_operator_o = ALU_SRO;   // Shift Right Ones by Immediate
+                  5'b0_1001: alu_operator_o = ALU_SBEXT; // Extract bit specified by immediate.
+                  5'b0_1100: begin
+                    alu_operator_o = ALU_ROR;            // Rotate Right by Immediate
+                    alu_multicycle_o = 1'b1;
+                  end
+                  5'b0_1101: alu_operator_o = ALU_GREV;  // General Reverse with Imm Control Val
+                  5'b0_0101: alu_operator_o = ALU_GORC;  // General Or-combine with Imm Control Val
+                  // Unshuffle with Immediate Control Value
+                  5'b0_0001: begin
+                    if (RV32B == RV32BFull) begin
+                      if (instr_alu[26] == 1'b0) alu_operator_o = ALU_UNSHFL;
+                    end
+                  end
+                  default: ;
+                endcase
+              end
+
+            end else begin
+              if (instr_alu[31:27] == 5'b0_0000) begin
+                alu_operator_o = ALU_SRL;               // Shift Right Logical by Immediate
+              end else if (instr_alu[31:27] == 5'b0_1000) begin
+                alu_operator_o = ALU_SRA;               // Shift Right Arithmetically by Immediate
+              end
+            end
+          end
+
+        //  default: ;
+        endcase
+      end
+
+      OPCODE_OP: begin  // Register-Register ALU operation
+        alu_op_a_mux_sel_o = OP_A_REG_A;
+        alu_op_b_mux_sel_o = OP_B_REG_B;
+
+        if (instr_alu[26]) begin
+          if (RV32B != RV32BNone) begin
+            unique case ({instr_alu[26:25], instr_alu[14:12]})
+              {2'b11, 3'b001}: begin
+                alu_operator_o   = ALU_CMIX; // cmix
+                alu_multicycle_o = 1'b1;
+                if (instr_first_cycle_i) begin
+                  use_rs3_d = 1'b1;
+                end else begin
+                  use_rs3_d = 1'b0;
+                end
+              end
+              {2'b11, 3'b101}: begin
+                alu_operator_o   = ALU_CMOV; // cmov
+                alu_multicycle_o = 1'b1;
+                if (instr_first_cycle_i) begin
+                  use_rs3_d = 1'b1;
+                end else begin
+                  use_rs3_d = 1'b0;
+                end
+              end
+              {2'b10, 3'b001}: begin
+                alu_operator_o   = ALU_FSL;  // fsl
+                alu_multicycle_o = 1'b1;
+                if (instr_first_cycle_i) begin
+                  use_rs3_d = 1'b1;
+                end else begin
+                  use_rs3_d = 1'b0;
+                end
+              end
+              {2'b10, 3'b101}: begin
+                alu_operator_o   = ALU_FSR;  // fsr
+                alu_multicycle_o = 1'b1;
+                if (instr_first_cycle_i) begin
+                  use_rs3_d = 1'b1;
+                end else begin
+                  use_rs3_d = 1'b0;
+                end
+              end
+              default: ;
+            endcase
+          end
+        end else begin
+          unique case ({instr_alu[31:25], instr_alu[14:12]})
+            // RV32I ALU operations
+            {7'b000_0000, 3'b000}: alu_operator_o = ALU_ADD;   // Add
+            {7'b010_0000, 3'b000}: alu_operator_o = ALU_SUB;   // Sub
+            {7'b000_0000, 3'b010}: alu_operator_o = ALU_SLT;   // Set Lower Than
+            {7'b000_0000, 3'b011}: alu_operator_o = ALU_SLTU;  // Set Lower Than Unsigned
+            {7'b000_0000, 3'b100}: alu_operator_o = ALU_XOR;   // Xor
+            {7'b000_0000, 3'b110}: alu_operator_o = ALU_OR;    // Or
+            {7'b000_0000, 3'b111}: alu_operator_o = ALU_AND;   // And
+            {7'b000_0000, 3'b001}: alu_operator_o = ALU_SLL;   // Shift Left Logical
+            {7'b000_0000, 3'b101}: alu_operator_o = ALU_SRL;   // Shift Right Logical
+            {7'b010_0000, 3'b101}: alu_operator_o = ALU_SRA;   // Shift Right Arithmetic
+
+            // RV32B ALU Operations
+            {7'b001_0000, 3'b001}: if (RV32B != RV32BNone) alu_operator_o = ALU_SLO;   // slo
+            {7'b001_0000, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_SRO;   // sro
+            {7'b011_0000, 3'b001}: begin
+              if (RV32B != RV32BNone) begin
+                alu_operator_o = ALU_ROL;   // rol
+                alu_multicycle_o = 1'b1;
+              end
+            end
+            {7'b011_0000, 3'b101}: begin
+              if (RV32B != RV32BNone) begin
+                alu_operator_o = ALU_ROR;   // ror
+                alu_multicycle_o = 1'b1;
+              end
+            end
+
+            {7'b000_0101, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_MIN;    // min
+            {7'b000_0101, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_MAX;    // max
+            {7'b000_0101, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_MINU;   // minu
+            {7'b000_0101, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_MAXU;   // maxu
+
+            {7'b000_0100, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACK;   // pack
+            {7'b010_0100, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACKU;  // packu
+            {7'b000_0100, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACKH;  // packh
+
+            {7'b010_0000, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_XNOR;   // xnor
+            {7'b010_0000, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_ORN;    // orn
+            {7'b010_0000, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_ANDN;   // andn
+
+            // RV32B zbs
+            {7'b010_0100, 3'b001}: if (RV32B != RV32BNone) alu_operator_o = ALU_SBCLR;  // sbclr
+            {7'b001_0100, 3'b001}: if (RV32B != RV32BNone) alu_operator_o = ALU_SBSET;  // sbset
+            {7'b011_0100, 3'b001}: if (RV32B != RV32BNone) alu_operator_o = ALU_SBINV;  // sbinv
+            {7'b010_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_SBEXT;  // sbext
+
+            // RV32B zbf
+            {7'b010_0100, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_BFP;    // bfp
+
+            // RV32B zbp
+            {7'b011_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GREV;   // grev
+            {7'b001_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GORC;   // grev
+            {7'b000_0100, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SHFL;   // shfl
+            {7'b000_0100, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_UNSHFL; // unshfl
+
+            // RV32B zbc
+            {7'b000_0101, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_CLMUL;  // clmul
+            {7'b000_0101, 3'b010}: if (RV32B == RV32BFull) alu_operator_o = ALU_CLMULR; // clmulr
+            {7'b000_0101, 3'b011}: if (RV32B == RV32BFull) alu_operator_o = ALU_CLMULH; // clmulh
+
+            // RV32B zbe
+            {7'b010_0100, 3'b110}: begin
+              if (RV32B == RV32BFull) begin
+                alu_operator_o = ALU_BDEP;   // bdep
+                alu_multicycle_o = 1'b1;
+              end
+            end
+            {7'b000_0100, 3'b110}: begin
+              if (RV32B == RV32BFull) begin
+                alu_operator_o = ALU_BEXT;   // bext
+                alu_multicycle_o = 1'b1;
+              end
+            end
+
+            // RV32M instructions, all use the same ALU operation
+            {7'b000_0001, 3'b000}: begin // mul
+              alu_operator_o = ALU_ADD;
+              mult_sel_o     = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+            end
+            {7'b000_0001, 3'b001}: begin // mulh
+              alu_operator_o = ALU_ADD;
+              mult_sel_o     = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+            end
+            {7'b000_0001, 3'b010}: begin // mulhsu
+              alu_operator_o = ALU_ADD;
+              mult_sel_o     = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+            end
+            {7'b000_0001, 3'b011}: begin // mulhu
+              alu_operator_o = ALU_ADD;
+              mult_sel_o     = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+            end
+            {7'b000_0001, 3'b100}: begin // div
+              alu_operator_o = ALU_ADD;
+              div_sel_o      = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+            end
+            {7'b000_0001, 3'b101}: begin // divu
+              alu_operator_o = ALU_ADD;
+              div_sel_o      = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+            end
+            {7'b000_0001, 3'b110}: begin // rem
+              alu_operator_o = ALU_ADD;
+              div_sel_o      = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+            end
+            {7'b000_0001, 3'b111}: begin // remu
+              alu_operator_o = ALU_ADD;
+              div_sel_o      = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+            end
+
+            default: ;
+          endcase
+        end
+      end
+
+      /////////////
+      // Special //
+      /////////////
+
+      OPCODE_MISC_MEM: begin
+        unique case (instr_alu[14:12])
+          3'b000: begin
+            // FENCE is treated as a NOP since all memory operations are already strictly ordered.
+            alu_operator_o     = ALU_ADD; // nop
+            alu_op_a_mux_sel_o = OP_A_REG_A;
+            alu_op_b_mux_sel_o = OP_B_IMM;
+          end
+          3'b001: begin
+            // FENCE.I will flush the IF stage, prefetch buffer and ICache if present.
+            if (BranchTargetALU) begin
+              bt_a_mux_sel_o     = OP_A_CURRPC;
+              bt_b_mux_sel_o     = IMM_B_INCR_PC;
+            end else begin
+              alu_op_a_mux_sel_o = OP_A_CURRPC;
+              alu_op_b_mux_sel_o = OP_B_IMM;
+              imm_b_mux_sel_o    = IMM_B_INCR_PC;
+              alu_operator_o     = ALU_ADD;
+            end
+          end
+          default: ;
+        endcase
+      end
+
+      OPCODE_SYSTEM: begin
+        if (instr_alu[14:12] == 3'b000) begin
+          // non CSR related SYSTEM instructions
+          alu_op_a_mux_sel_o = OP_A_REG_A;
+          alu_op_b_mux_sel_o = OP_B_IMM;
+        end else begin
+          // instruction to read/modify CSR
+          alu_op_b_mux_sel_o = OP_B_IMM;
+          imm_a_mux_sel_o    = IMM_A_Z;
+          imm_b_mux_sel_o    = IMM_B_I;  // CSR address is encoded in I imm
+
+          if (instr_alu[14]) begin
+            // rs1 field is used as immediate
+            alu_op_a_mux_sel_o = OP_A_IMM;
+          end else begin
+            alu_op_a_mux_sel_o = OP_A_REG_A;
+          end
+        end
+      end
+      //////////////////////////////////////////
+      //  Floating Point Extension (F and D)  //
+      //////////////////////////////////////////
+
+      OPCODE_STORE_FP: begin
+        alu_op_a_mux_sel_o = OP_A_REG_A;
+        alu_op_b_mux_sel_o = OP_B_REG_B;
+        alu_operator_o     = ALU_ADD;
+
+        unique case(instr[14:12])
+          3'b011: begin // FSD
+            imm_b_mux_sel_o     = IMM_B_S;
+            alu_op_b_mux_sel_o  = OP_B_IMM;
+          end
+          3'b010: begin // FSW
+            imm_b_mux_sel_o     = IMM_B_S;
+            alu_op_b_mux_sel_o  = OP_B_IMM;
+          end
+          default: ;
+        endcase
+      end
+
+      OPCODE_LOAD_FP: begin
+        unique case(instr[14:12])
+          3'b011: begin // FLD
+            alu_op_a_mux_sel_o    = OP_A_REG_A;
+
+            alu_operator_o      = ALU_ADD;
+            alu_op_b_mux_sel_o  = OP_B_IMM;
+            imm_b_mux_sel_o     = IMM_B_I;
+          end
+          3'b010: begin // FLW
+            alu_op_a_mux_sel_o    = OP_A_REG_A;
+
+            alu_operator_o      = ALU_ADD;
+            alu_op_b_mux_sel_o  = OP_B_IMM;
+            imm_b_mux_sel_o     = IMM_B_I;
+          end
+          default: ;
+        endcase
+      end
+
+      OPCODE_MADD_FP:  begin // FMADD.S, FMADD.D
+        unique case (instr[26:25])
+          01: begin
+            fp_alu_operator_o     = FMADD;
+            fp_alu_op_mod_o       = 1'b0;
+          end
+          00: begin
+            fp_alu_operator_o     = FMADD;
+            fp_alu_op_mod_o       = 1'b0;
+          end
+          default: ;
+        endcase
+      end
+
+      OPCODE_MSUB_FP: begin // FMSUB.S, FMSUB.D
+        unique case (instr[26:25])
+          01: begin
+            fp_alu_operator_o     = FMADD;
+            fp_alu_op_mod_o       = 1'b1;
+          end
+          00: begin
+            fp_alu_operator_o     = FMADD;
+            fp_alu_op_mod_o       = 1'b1;
+          end
+          default: ;
+        endcase
+      end
+
+      OPCODE_NMSUB_FP: begin // FNMSUB.S, FNMSUB.D
+        unique case (instr[26:25])
+          01: begin
+            fp_alu_operator_o     = FNMSUB;
+          end
+          00: begin
+            fp_alu_operator_o     = FNMSUB;
+          end
+          default: ;
+        endcase
+      end
+
+      OPCODE_NMADD_FP: begin //FNMADD.S, FNMADD.S     
+        unique case (instr[26:25])
+          01: begin
+            fp_alu_operator_o     = FNMSUB;
+            fp_alu_op_mod_o       = 1'b1;
+          end
+          00: begin
+            fp_alu_operator_o     = FNMSUB;
+            fp_alu_op_mod_o       = 1'b1;
+          end
+          default: ;
+        endcase
+      end
+
+      OPCODE_OP_FP: begin
+        unique case (instr[31:25])
+          7'b0000001: begin // FADD.D
+            fp_alu_operator_o     = ADD;
+          end
+          7'b0000101: begin // FSUB.D
+            fp_alu_operator_o     = ADD;
+            fp_alu_op_mod_o       = 1'b1;
+          end
+          7'b0001001: begin // FMUL.D
+            fp_alu_operator_o     = MUL;
+          end
+          7'b0001101:begin // FDIV.S
+            fp_alu_operator_o     = DIV;
+          end
+          7'b0000000: begin // FADD.S
+            fp_alu_operator_o     = ADD;
+          end
+          7'b0000100: begin // FSUB.S
+            fp_alu_operator_o     = ADD;
+            fp_alu_op_mod_o       = 1'b1;
+          end
+          7'b0001000: begin // FMUL.S
+            fp_alu_operator_o     = MUL;
+          end
+          7'b0001100: begin // FDIV.S
+            fp_alu_operator_o     = DIV;
+          end
+          7'b0101101: begin
+            if (~|instr[24:20]) begin // FSQRT.D
+              fp_alu_operator_o     = SQRT;
+            end
+          end
+          7'b0101100: begin // FSQRT.S
+            if (~|instr[24:20]) begin
+              fp_alu_operator_o     = SQRT;
+            end
+          end
+          7'b0010001: begin // FSGNJ.D, FSGNJN.D, FSGNJX.D
+            if (~(instr[14] | (&instr[13:12]))) begin
+              fp_alu_operator_o     = SGNJ;
+            end
+          end
+          7'b0010000: begin // FSGNJ.S, FSGNJN.S, FSGNJX.S
+            if (~(instr[14] | (&instr[13:12]))) begin
+              fp_alu_operator_o     = SGNJ;
+            end
+          end
+          7'b0010101: begin // FMIN.D, FMAX.D
+            if (~|instr[14:13]) begin
+              fp_alu_operator_o     = MINMAX;
+            end
+          end
+          7'b0010100: begin // FMIN.S, FMAX.S
+            if (~|instr[14:13]) begin
+              fp_alu_operator_o     = MINMAX;
+            end
+          end
+          7'b0100000: begin // FCVT.S.D
+            if (~(|instr[24:21] | (~instr[20]))) begin
+              fp_alu_operator_o     = F2F;
+            end
+          end
+          7'b1100000: begin // FCVT.W.S, FCVT.WU.S
+            if (~|instr[24:21]) begin
+              fp_alu_operator_o     = F2I;
+
+              if (instr[20])
+                fp_alu_op_mod_o       = 1'b1;
+            end
+          end
+          7'b0100001: begin // FCVT.D.S
+            if (~|instr[24:20]) begin 
+              fp_alu_operator_o     = F2F;
+            end
+          end
+          7'b1110000: begin // FMV.X.W , FCLASS.S
+            unique case ({instr[24:20],instr[14:12]})
+              // {3'b0000000,3'b000}: begin
+              //   fp_alu_operator_o     = ADD;   // to be decided YET
+              // end
+              {3'b000,3'b001}: begin
+                fp_alu_operator_o     = CLASSIFY;
+              end
+              default: ;
+            endcase
+          end
+          7'b1010001: begin // FEQ.D, FLT.D, FLE.D
+            if ((~instr[14]) | (&instr[13:12])) begin
+              fp_alu_operator_o     = CMP;
+            end
+          end
+          7'b1010000: begin // FEQ.S, FLT.S, FLE.S
+            if ((~instr[14]) | (&instr[13:12])) begin
+              fp_alu_operator_o     = CMP;
+            end
+          end
+          7'b1110001: begin // FCLASS.D
+            unique case ({instr[24:20],instr[14:12]})
+              {3'b000,3'b001}: begin
+                fp_alu_operator_o     = CLASSIFY;
+              end
+              default: ;
+            endcase
+          end 
+          7'b1100001: begin // // FCVT.W.D, FCVT.WU.D
+            if (~|instr[24:21]) begin
+              fp_alu_operator_o     = F2I;
+              
+              if (instr[20])
+                fp_alu_op_mod_o     = 1'b1;
+            end
+          end
+          7'b1101000: begin // FCVT.S.W, FCVT.S.WU
+            if (~(|instr[24:21])) begin
+              fp_alu_operator_o     = I2F;
+
+              if (instr[20])
+                fp_alu_op_mod_o     = 1'b1;
+            end
+          end
+          7'b1111001: begin // FCVT.D.W, FCVT.D.WU
+            if (~|instr[24:21]) begin
+              fp_alu_operator_o     = I2F;
+
+              if (instr[20])
+                fp_alu_op_mod_o     = 1'b1;
+            end
+          end
+          // 7'b1111000: begin // FMV.W.X
+          //   if ((|instr[24:20]) | (|instr[14:12])) begin
+          //     fp_alu_operator_o     = FMADD;  // to be decided
+          //   end
+          // end
+          default: ;
+        endcase
+      end
+      default: ;
+    endcase
+  end
+
+  // do not enable multdiv in case of illegal instruction exceptions
+  assign mult_en_o = illegal_insn ? 1'b0 : mult_sel_o;
+  assign div_en_o  = illegal_insn ? 1'b0 : div_sel_o;
+
+  // make sure instructions accessing non-available registers in RV32E cause illegal
+  // instruction exceptions
+  assign illegal_insn_o = illegal_insn | illegal_reg_rv32e;
+
+  // do not propgate regfile write enable if non-available registers are accessed in RV32E
+  assign rf_we_o = rf_we & ~illegal_reg_rv32e;
+
+  ////////////////
+  // Assertions //
+  ////////////////
+
+//   // Selectors must be known/valid.
+//  `ASSERT(buraqRegImmAluOpKnown, (opcode == OPCODE_OP_IMM) |->
+//      !$isunknown(instr[14:12]))
+endmodule 
\ No newline at end of file
diff --git a/verilog/rtl/brq_ifu.sv b/verilog/rtl/brq_ifu.sv
new file mode 100644
index 0000000..4331423
--- /dev/null
+++ b/verilog/rtl/brq_ifu.sv
@@ -0,0 +1,501 @@
+
+
+/**
+ * Instruction Fetch Stage
+ *
+ * Instruction fetch unit: Selection of the next PC, and buffering (sampling) of
+ * the read instruction.
+ */
+
+
+
+module brq_ifu #(
+    parameter int unsigned DmHaltAddr        = 32'h1A110800,
+    parameter int unsigned DmExceptionAddr   = 32'h1A110808,
+    parameter bit          DummyInstructions = 1'b0,
+    parameter bit          ICache            = 1'b0,
+    parameter bit          ICacheECC         = 1'b0,
+    parameter bit          PCIncrCheck       = 1'b0,
+    parameter bit          BranchPredictor   = 1'b0
+) (
+    input  logic                   clk_i,
+    input  logic                   rst_ni,
+
+    input  logic [31:0]            boot_addr_i,              // also used for mtvec
+    input  logic                   req_i,                    // instruction request control
+
+    // instruction cache interface
+    output logic                  instr_req_o,
+    output logic [31:0]           instr_addr_o,
+    input  logic                  instr_gnt_i,
+    input  logic                  instr_rvalid_i,
+    input  logic [31:0]           instr_rdata_i,
+    input  logic                  instr_err_i,
+    input  logic                  instr_pmp_err_i,
+
+    // output of ID stage
+    output logic                  instr_valid_id_o,         // instr in IF-ID is valid
+    output logic                  instr_new_id_o,           // instr in IF-ID is new
+    output logic [31:0]           instr_rdata_id_o,         // instr for ID stage
+    output logic [31:0]           instr_rdata_alu_id_o,     // replicated instr for ID stage
+                                                            // to reduce fan-out
+    output logic [15:0]           instr_rdata_c_id_o,       // compressed instr for ID stage
+                                                            // (mtval), meaningful only if
+                                                            // instr_is_compressed_id_o = 1'b1
+    output logic                  instr_is_compressed_id_o, // compressed decoder thinks this
+                                                            // is a compressed instr
+    output logic                  instr_bp_taken_o,         // instruction was predicted to be
+                                                            // a taken branch
+    output logic                  instr_fetch_err_o,        // bus error on fetch
+    output logic                  instr_fetch_err_plus2_o,  // bus error misaligned
+    output logic                  illegal_c_insn_id_o,      // compressed decoder thinks this
+                                                            // is an invalid instr
+    output logic                  dummy_instr_id_o,         // Instruction is a dummy
+    output logic [31:0]           pc_if_o,
+    output logic [31:0]           pc_id_o,
+
+    // control signals
+    input  logic                  instr_valid_clear_i,      // clear instr valid bit in IF-ID
+    input  logic                  pc_set_i,                 // set the PC to a new value
+    input  logic                  pc_set_spec_i,
+    input  brq_pkg::pc_sel_e      pc_mux_i,                 // selector for PC multiplexer
+    input  logic                  nt_branch_mispredict_i,   // Not-taken branch in ID/EX was
+                                                            // mispredicted (predicted taken)
+    input  brq_pkg::exc_pc_sel_e exc_pc_mux_i,             // selects ISR address
+    input  brq_pkg::exc_cause_e  exc_cause,                // selects ISR address for
+                                                            // vectorized interrupt lines
+    input logic                   dummy_instr_en_i,
+    input logic [2:0]             dummy_instr_mask_i,
+    input logic                   dummy_instr_seed_en_i,
+    input logic [31:0]            dummy_instr_seed_i,
+    input logic                   icache_enable_i,
+    input logic                   icache_inval_i,
+
+    // jump and branch target
+    input  logic [31:0]           branch_target_ex_i,       // branch/jump target address
+
+    // CSRs
+    input  logic [31:0]           csr_mepc_i,               // PC to restore after handling
+                                                            // the interrupt/exception
+    input  logic [31:0]           csr_depc_i,               // PC to restore after handling
+                                                            // the debug request
+    input  logic [31:0]           csr_mtvec_i,              // base PC to jump to on exception
+    output logic                  csr_mtvec_init_o,         // tell CS regfile to init mtvec
+
+    // pipeline stall
+    input  logic                  id_in_ready_i,            // ID stage is ready for new instr
+
+    // misc signals
+    output logic                  pc_mismatch_alert_o,
+    output logic                  if_busy_o                 // IF stage is busy fetching instr
+);
+
+  import brq_pkg::*;
+
+  logic              instr_valid_id_d, instr_valid_id_q;
+  logic              instr_new_id_d, instr_new_id_q;
+
+  // prefetch buffer related signals
+  logic              prefetch_busy;
+  logic              branch_req;
+  logic              branch_spec;
+  logic              predicted_branch;
+  logic       [31:0] fetch_addr_n;
+  logic              unused_fetch_addr_n0;
+
+  logic              fetch_valid;
+  logic              fetch_ready;
+  logic       [31:0] fetch_rdata;
+  logic       [31:0] fetch_addr;
+  logic              fetch_err;
+  logic              fetch_err_plus2;
+
+  logic              if_instr_valid;
+  logic       [31:0] if_instr_rdata;
+  logic       [31:0] if_instr_addr;
+  logic              if_instr_err;
+
+  logic       [31:0] exc_pc;
+
+  logic        [5:0] irq_id;
+  logic              unused_irq_bit;
+
+  logic              if_id_pipe_reg_we; // IF-ID pipeline reg write enable
+
+  // Dummy instruction signals
+  logic              stall_dummy_instr;
+  logic [31:0]       instr_out;
+  logic              instr_is_compressed_out;
+  logic              illegal_c_instr_out;
+  logic              instr_err_out;
+
+  logic              predict_branch_taken;
+  logic       [31:0] predict_branch_pc;
+
+  brq_pkg::pc_sel_e pc_mux_internal;
+
+  logic        [7:0] unused_boot_addr;
+  logic        [7:0] unused_csr_mtvec;
+
+  assign unused_boot_addr = boot_addr_i[7:0];
+  assign unused_csr_mtvec = csr_mtvec_i[7:0];
+
+  // extract interrupt ID from exception cause
+  assign irq_id         = {exc_cause};
+  assign unused_irq_bit = irq_id[5];   // MSB distinguishes interrupts from exceptions
+
+  // exception PC selection mux
+  always_comb begin : exc_pc_mux
+    unique case (exc_pc_mux_i)
+      EXC_PC_EXC:     exc_pc = { csr_mtvec_i[31:2], 2'b00 };
+      EXC_PC_IRQ:     exc_pc = { csr_mtvec_i[31:2], 2'b00 };
+      EXC_PC_DBD:     exc_pc = DmHaltAddr;
+      EXC_PC_DBG_EXC: exc_pc = DmExceptionAddr;
+    //  default:        exc_pc = { csr_mtvec_i[31:8], 8'h00                    };
+    endcase
+  end
+
+  // The Branch predictor can provide a new PC which is internal to ifu. Only override the mux
+  // select to choose this if the core isn't already trying to set a PC.
+  assign pc_mux_internal =
+    (BranchPredictor && predict_branch_taken && !pc_set_i) ? PC_BP : pc_mux_i;
+
+  // fetch address selection mux
+  always_comb begin : fetch_addr_mux
+    unique case (pc_mux_internal)
+      PC_BOOT: fetch_addr_n = { boot_addr_i[31:2], 2'b00 };
+      PC_JUMP: fetch_addr_n = branch_target_ex_i;
+      PC_EXC:  fetch_addr_n = exc_pc;                       // set PC to exception handler
+      PC_ERET: fetch_addr_n = csr_mepc_i;                   // restore PC when returning from EXC
+      PC_DRET: fetch_addr_n = csr_depc_i;
+      // Without branch predictor will never get pc_mux_internal == PC_BP. We still handle no branch
+      // predictor case here to ensure redundant mux logic isn't synthesised.
+      PC_BP:   fetch_addr_n = BranchPredictor ? predict_branch_pc : { boot_addr_i[31:2], 2'b00 };
+      default: fetch_addr_n = { boot_addr_i[31:2], 2'b00 };
+    endcase
+  end
+
+  // tell CS register file to initialize mtvec on boot
+  assign csr_mtvec_init_o = (pc_mux_i == PC_BOOT) & pc_set_i;
+
+  if (ICache) begin : gen_ifu_icache
+    // Full I-Cache option
+    brq_ifu_icache #(
+      .BranchPredictor (BranchPredictor),
+      .ICacheECC       (ICacheECC)
+    ) icache_i (
+        .clk_i               ( clk_i                      ),
+        .rst_ni              ( rst_ni                     ),
+
+        .req_i               ( req_i                      ),
+
+        .branch_i            ( branch_req                 ),
+        .branch_spec_i       ( branch_spec                ),
+        .predicted_branch_i  ( predicted_branch           ),
+        .branch_mispredict_i ( nt_branch_mispredict_i     ),
+        .addr_i              ( {fetch_addr_n[31:1], 1'b0} ),
+
+        .ready_i             ( fetch_ready                ),
+        .valid_o             ( fetch_valid                ),
+        .rdata_o             ( fetch_rdata                ),
+        .addr_o              ( fetch_addr                 ),
+        .err_o               ( fetch_err                  ),
+        .err_plus2_o         ( fetch_err_plus2            ),
+
+        .instr_req_o         ( instr_req_o                ),
+        .instr_addr_o        ( instr_addr_o               ),
+        .instr_gnt_i         ( instr_gnt_i                ),
+        .instr_rvalid_i      ( instr_rvalid_i             ),
+        .instr_rdata_i       ( instr_rdata_i              ),
+        .instr_err_i         ( instr_err_i                ),
+        .instr_pmp_err_i     ( instr_pmp_err_i            ),
+
+        .icache_enable_i     ( icache_enable_i            ),
+        .icache_inval_i      ( icache_inval_i             ),
+        .busy_o              ( prefetch_busy              )
+    );
+  end else begin : gen_ifu_prefetch_buffer
+    // prefetch buffer, caches a fixed number of instructions
+    brq_ifu_prefetch_buffer #(
+      .BranchPredictor (BranchPredictor)
+    ) ifu_prefetch_buffer_i (
+        .clk_i               ( clk_i                      ),
+        .rst_ni              ( rst_ni                     ),
+
+        .req_i               ( req_i                      ),
+
+        .branch_i            ( branch_req                 ),
+        .branch_spec_i       ( branch_spec                ),
+        .predicted_branch_i  ( predicted_branch           ),
+        .branch_mispredict_i ( nt_branch_mispredict_i     ),
+        .addr_i              ( {fetch_addr_n[31:1], 1'b0} ),
+
+        .ready_i             ( fetch_ready                ),
+        .valid_o             ( fetch_valid                ),
+        .rdata_o             ( fetch_rdata                ),
+        .addr_o              ( fetch_addr                 ),
+        .err_o               ( fetch_err                  ),
+        .err_plus2_o         ( fetch_err_plus2            ),
+
+        .instr_req_o         ( instr_req_o                ),
+        .instr_addr_o        ( instr_addr_o               ),
+        .instr_gnt_i         ( instr_gnt_i                ),
+        .instr_rvalid_i      ( instr_rvalid_i             ),
+        .instr_rdata_i       ( instr_rdata_i              ),
+        .instr_err_i         ( instr_err_i                ),
+        .instr_pmp_err_i     ( instr_pmp_err_i            ),
+
+        .busy_o              ( prefetch_busy              )
+    );
+    // ICache tieoffs
+    logic unused_icen, unused_icinv;
+    assign unused_icen  = icache_enable_i;
+    assign unused_icinv = icache_inval_i;
+  end
+
+  assign unused_fetch_addr_n0 = fetch_addr_n[0];
+
+  assign branch_req  = pc_set_i | predict_branch_taken;
+  assign branch_spec = pc_set_spec_i | predict_branch_taken;
+
+  assign pc_if_o     = if_instr_addr;
+  assign if_busy_o   = prefetch_busy;
+
+  // compressed instruction decoding, or more precisely compressed instruction
+  // expander
+  //
+  // since it does not matter where we decompress instructions, we do it here
+  // to ease timing closure
+  logic [31:0] instr_decompressed;
+  logic        illegal_c_insn;
+  logic        instr_is_compressed;
+
+  brq_ifu_compressed_decoder ifu_compressed_decoder_i (
+      .clk_i           ( clk_i                    ),
+      .rst_ni          ( rst_ni                   ),
+      .valid_i         ( fetch_valid & ~fetch_err ),
+      .instr_i         ( if_instr_rdata           ),
+      .instr_o         ( instr_decompressed       ),
+      .is_compressed_o ( instr_is_compressed      ),
+      .illegal_instr_o ( illegal_c_insn           )
+  );
+
+  // Dummy instruction insertion
+  if (DummyInstructions) begin : gen_dummy_instr
+    logic        insert_dummy_instr;
+    logic [31:0] dummy_instr_data;
+
+    brq_ifu_dummy_instr dummy_instr_i (
+      .clk_i                 ( clk_i                 ),
+      .rst_ni                ( rst_ni                ),
+      .dummy_instr_en_i      ( dummy_instr_en_i      ),
+      .dummy_instr_mask_i    ( dummy_instr_mask_i    ),
+      .dummy_instr_seed_en_i ( dummy_instr_seed_en_i ),
+      .dummy_instr_seed_i    ( dummy_instr_seed_i    ),
+      .fetch_valid_i         ( fetch_valid           ),
+      .id_in_ready_i         ( id_in_ready_i         ),
+      .insert_dummy_instr_o  ( insert_dummy_instr    ),
+      .dummy_instr_data_o    ( dummy_instr_data      )
+    );
+
+    // Mux between actual instructions and dummy instructions
+    assign instr_out               = insert_dummy_instr ? dummy_instr_data : instr_decompressed;
+    assign instr_is_compressed_out = insert_dummy_instr ? 1'b0 : instr_is_compressed;
+    assign illegal_c_instr_out     = insert_dummy_instr ? 1'b0 : illegal_c_insn;
+    assign instr_err_out           = insert_dummy_instr ? 1'b0 : if_instr_err;
+
+    // Stall the IF stage if we insert a dummy instruction. The dummy will execute between whatever
+    // is currently in the ID stage and whatever is valid from the prefetch buffer this cycle. The
+    // PC of the dummy instruction will match whatever is next from the prefetch buffer.
+    assign stall_dummy_instr = insert_dummy_instr;
+
+    // Register the dummy instruction indication into the ID stage
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        dummy_instr_id_o <= 1'b0;
+      end else if (if_id_pipe_reg_we) begin
+        dummy_instr_id_o <= insert_dummy_instr;
+      end
+    end
+
+  end else begin : gen_no_dummy_instr
+    logic        unused_dummy_en;
+    logic [2:0]  unused_dummy_mask;
+    logic        unused_dummy_seed_en;
+    logic [31:0] unused_dummy_seed;
+
+    assign unused_dummy_en         = dummy_instr_en_i;
+    assign unused_dummy_mask       = dummy_instr_mask_i;
+    assign unused_dummy_seed_en    = dummy_instr_seed_en_i;
+    assign unused_dummy_seed       = dummy_instr_seed_i;
+    assign instr_out               = instr_decompressed;
+    assign instr_is_compressed_out = instr_is_compressed;
+    assign illegal_c_instr_out     = illegal_c_insn;
+    assign instr_err_out           = if_instr_err;
+    assign stall_dummy_instr       = 1'b0;
+    assign dummy_instr_id_o        = 1'b0;
+  end
+
+  // The ID stage becomes valid as soon as any instruction is registered in the ID stage flops.
+  // Note that the current instruction is squashed by the incoming pc_set_i signal.
+  // Valid is held until it is explicitly cleared (due to an instruction completing or an exception)
+  assign instr_valid_id_d = (if_instr_valid & id_in_ready_i & ~pc_set_i) |
+                            (instr_valid_id_q & ~instr_valid_clear_i);
+  assign instr_new_id_d   = if_instr_valid & id_in_ready_i;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      instr_valid_id_q <= 1'b0;
+      instr_new_id_q   <= 1'b0;
+    end else begin
+      instr_valid_id_q <= instr_valid_id_d;
+      instr_new_id_q   <= instr_new_id_d;
+    end
+  end
+
+  assign instr_valid_id_o = instr_valid_id_q;
+  // Signal when a new instruction enters the ID stage (only used for RVFI signalling).
+  assign instr_new_id_o   = instr_new_id_q;
+
+  // IF-ID pipeline registers, frozen when the ID stage is stalled
+  assign if_id_pipe_reg_we = instr_new_id_d;
+
+  always_ff @(posedge clk_i) begin
+    if (if_id_pipe_reg_we) begin
+      instr_rdata_id_o         <= instr_out;
+      // To reduce fan-out and help timing from the instr_rdata_id flops they are replicated.
+      instr_rdata_alu_id_o     <= instr_out;
+      instr_fetch_err_o        <= instr_err_out;
+      instr_fetch_err_plus2_o  <= fetch_err_plus2;
+      instr_rdata_c_id_o       <= if_instr_rdata[15:0];
+      instr_is_compressed_id_o <= instr_is_compressed_out;
+      illegal_c_insn_id_o      <= illegal_c_instr_out;
+      pc_id_o                  <= pc_if_o;
+    end
+  end
+
+  // Check for expected increments of the PC when security hardening enabled
+  if (PCIncrCheck) begin : g_secure_pc
+    logic [31:0] prev_instr_addr_incr;
+    logic        prev_instr_seq_q, prev_instr_seq_d;
+
+    // Do not check for sequential increase after a branch, jump, exception, interrupt or debug
+    // request, all of which will set branch_req. Also do not check after reset or for dummys.
+    assign prev_instr_seq_d = (prev_instr_seq_q | instr_new_id_d) &
+        ~branch_req & ~stall_dummy_instr;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        prev_instr_seq_q <= 1'b0;
+      end else begin
+        prev_instr_seq_q <= prev_instr_seq_d;
+      end
+    end
+
+    assign prev_instr_addr_incr = pc_id_o + ((instr_is_compressed_id_o && !instr_fetch_err_o) ?
+                                             32'd2 : 32'd4);
+
+    // Check that the address equals the previous address +2/+4
+    assign pc_mismatch_alert_o = prev_instr_seq_q & (pc_if_o != prev_instr_addr_incr);
+
+  end else begin : g_no_secure_pc
+    assign pc_mismatch_alert_o = 1'b0;
+  end
+
+  if (BranchPredictor) begin : g_ifu_branch_predictor
+    logic [31:0] instr_skid_data_q;
+    logic [31:0] instr_skid_addr_q;
+    logic        instr_skid_bp_taken_q;
+    logic        instr_skid_valid_q, instr_skid_valid_d;
+    logic        instr_skid_en;
+    logic        instr_bp_taken_q, instr_bp_taken_d;
+
+    logic        predict_branch_taken_raw;
+
+    // ID stages needs to know if branch was predicted taken so it can signal mispredicts
+    always_ff @(posedge clk_i) begin
+      if (if_id_pipe_reg_we) begin
+        instr_bp_taken_q <= instr_bp_taken_d;
+      end
+    end
+
+    // When branch prediction is enabled a skid buffer between the IF and ID/EX stage is introduced.
+    // If an instruction in IF is predicted to be a taken branch and ID/EX is not ready the
+    // instruction in IF is moved to the skid buffer which becomes the output of the IF stage until
+    // the ID/EX stage accepts the instruction. The skid buffer is required as otherwise the ID/EX
+    // ready signal is coupled to the instr_req_o output which produces a feedthrough path from
+    // data_gnt_i -> instr_req_o (which needs to be avoided as for some interconnects this will
+    // result in a combinational loop).
+
+    assign instr_skid_en = predicted_branch & ~id_in_ready_i & ~instr_skid_valid_q;
+
+    assign instr_skid_valid_d = (instr_skid_valid_q & ~id_in_ready_i & ~stall_dummy_instr) |
+                                instr_skid_en;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        instr_skid_valid_q <= 1'b0;
+      end else begin
+        instr_skid_valid_q <= instr_skid_valid_d;
+      end
+    end
+
+    always_ff @(posedge clk_i) begin
+      if (instr_skid_en) begin
+        instr_skid_bp_taken_q <= predict_branch_taken;
+        instr_skid_data_q     <= fetch_rdata;
+        instr_skid_addr_q     <= fetch_addr;
+      end
+    end
+
+    brq_ifu_branch_predict branch_predict_i (
+      .clk_i                  ( clk_i                    ),
+      .rst_ni                 ( rst_ni                   ),
+      .fetch_rdata_i          ( fetch_rdata              ),
+      .fetch_pc_i             ( fetch_addr               ),
+      .fetch_valid_i          ( fetch_valid              ),
+
+      .predict_branch_taken_o ( predict_branch_taken_raw ),
+      .predict_branch_pc_o    ( predict_branch_pc        )
+    );
+
+    // If there is an instruction in the skid buffer there must be no branch prediction.
+    // Instructions are only placed in the skid after they have been predicted to be a taken branch
+    // so with the skid valid any prediction has already occurred.
+    // Do not branch predict on instruction errors.
+    assign predict_branch_taken = predict_branch_taken_raw & ~instr_skid_valid_q & ~fetch_err;
+
+    // pc_set_i takes precendence over branch prediction
+    assign predicted_branch = predict_branch_taken & ~pc_set_i;
+
+    assign if_instr_valid   = fetch_valid | instr_skid_valid_q;
+    assign if_instr_rdata   = instr_skid_valid_q ? instr_skid_data_q : fetch_rdata;
+    assign if_instr_addr    = instr_skid_valid_q ? instr_skid_addr_q : fetch_addr;
+
+    // Don't branch predict on instruction error so only instructions without errors end up in the
+    // skid buffer.
+    assign if_instr_err     = ~instr_skid_valid_q & fetch_err;
+    assign instr_bp_taken_d = instr_skid_valid_q ? instr_skid_bp_taken_q : predict_branch_taken;
+
+    assign fetch_ready = id_in_ready_i & ~stall_dummy_instr & ~instr_skid_valid_q;
+
+    assign instr_bp_taken_o = instr_bp_taken_q;
+
+
+  end else begin : g_no_ifu_branch_predictor
+    assign instr_bp_taken_o     = 1'b0;
+    assign predict_branch_taken = 1'b0;
+    assign predicted_branch     = 1'b0;
+    assign predict_branch_pc    = 32'b0;
+
+    assign if_instr_valid = fetch_valid;
+    assign if_instr_rdata = fetch_rdata;
+    assign if_instr_addr  = fetch_addr;
+    assign if_instr_err   = fetch_err;
+    assign fetch_ready = id_in_ready_i & ~stall_dummy_instr;
+  end
+
+  
+
+
+endmodule
\ No newline at end of file
diff --git a/verilog/rtl/brq_ifu_compressed_decoder.sv b/verilog/rtl/brq_ifu_compressed_decoder.sv
new file mode 100644
index 0000000..1a6fc57
--- /dev/null
+++ b/verilog/rtl/brq_ifu_compressed_decoder.sv
@@ -0,0 +1,276 @@
+
+/**
+ * Compressed instruction decoder
+ *
+ * Decodes RISC-V compressed instructions into their RV32 equivalent.
+ * This module is fully combinatorial, clock and reset are used for
+ * assertions only.
+ */
+
+
+module brq_ifu_compressed_decoder (
+    input  logic        clk_i,
+    input  logic        rst_ni,
+    input  logic        valid_i,
+    input  logic [31:0] instr_i,
+    output logic [31:0] instr_o,
+    output logic        is_compressed_o,
+    output logic        illegal_instr_o
+);
+  import brq_pkg::*;
+
+  // valid_i indicates if instr_i is valid and is used for assertions only.
+  // The following signal is used to avoid possible lint errors.
+  logic unused_valid;
+  assign unused_valid = valid_i;
+
+  ////////////////////////
+  // Compressed decoder //
+  ////////////////////////
+
+  always_comb begin
+    // By default, forward incoming instruction, mark it as legal.
+    instr_o         = instr_i;
+    illegal_instr_o = 1'b0;
+
+    // Check if incoming instruction is compressed.
+    unique case (instr_i[1:0])
+      // C0
+      2'b00: begin
+        unique case (instr_i[15:13])
+          3'b000: begin
+            // c.addi4spn -> addi rd', x2, imm
+            instr_o = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5],
+                       instr_i[6], 2'b00, 5'h02, 3'b000, 2'b01, instr_i[4:2], {OPCODE_OP_IMM}};
+            if (instr_i[12:5] == 8'b0)  illegal_instr_o = 1'b1;
+          end
+
+          3'b010: begin
+            // c.lw -> lw rd', imm(rs1')
+            instr_o = {5'b0, instr_i[5], instr_i[12:10], instr_i[6],
+                       2'b00, 2'b01, instr_i[9:7], 3'b010, 2'b01, instr_i[4:2], {OPCODE_LOAD}};
+          end
+
+          3'b110: begin
+            // c.sw -> sw rs2', imm(rs1')
+            instr_o = {5'b0, instr_i[5], instr_i[12], 2'b01, instr_i[4:2],
+                       2'b01, instr_i[9:7], 3'b010, instr_i[11:10], instr_i[6],
+                       2'b00, {OPCODE_STORE}};
+          end
+
+          3'b001,
+          3'b011,
+          3'b100,
+          3'b101,
+          3'b111: begin
+            illegal_instr_o = 1'b1;
+          end
+
+          //default: begin
+          //  illegal_instr_o = 1'b1;
+          //end
+        endcase
+      end
+
+      // C1
+      //
+      // Register address checks for RV32E are performed in the regular instruction decoder.
+      // If this check fails, an illegal instruction exception is triggered and the controller
+      // writes the actual faulting instruction to mtval.
+      2'b01: begin
+        unique case (instr_i[15:13])
+          3'b000: begin
+            // c.addi -> addi rd, rd, nzimm
+            // c.nop
+            instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2],
+                       instr_i[11:7], 3'b0, instr_i[11:7], {OPCODE_OP_IMM}};
+          end
+
+          3'b001, 3'b101: begin
+            // 001: c.jal -> jal x1, imm
+            // 101: c.j   -> jal x0, imm
+            instr_o = {instr_i[12], instr_i[8], instr_i[10:9], instr_i[6],
+                       instr_i[7], instr_i[2], instr_i[11], instr_i[5:3],
+                       {9 {instr_i[12]}}, 4'b0, ~instr_i[15], {OPCODE_JAL}};
+          end
+
+          3'b010: begin
+            // c.li -> addi rd, x0, nzimm
+            // (c.li hints are translated into an addi hint)
+            instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0,
+                       3'b0, instr_i[11:7], {OPCODE_OP_IMM}};
+          end
+
+          3'b011: begin
+            // c.lui -> lui rd, imm
+            // (c.lui hints are translated into a lui hint)
+            instr_o = {{15 {instr_i[12]}}, instr_i[6:2], instr_i[11:7], {OPCODE_LUI}};
+
+            if (instr_i[11:7] == 5'h02) begin
+              // c.addi16sp -> addi x2, x2, nzimm
+              instr_o = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2],
+                         instr_i[6], 4'b0, 5'h02, 3'b000, 5'h02, {OPCODE_OP_IMM}};
+            end
+
+            if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1;
+          end
+
+          3'b100: begin
+            unique case (instr_i[11:10])
+              2'b00,
+              2'b01: begin
+                // 00: c.srli -> srli rd, rd, shamt
+                // 01: c.srai -> srai rd, rd, shamt
+                // (c.srli/c.srai hints are translated into a srli/srai hint)
+                instr_o = {1'b0, instr_i[10], 5'b0, instr_i[6:2], 2'b01, instr_i[9:7],
+                           3'b101, 2'b01, instr_i[9:7], {OPCODE_OP_IMM}};
+                if (instr_i[12] == 1'b1)  illegal_instr_o = 1'b1;
+              end
+
+              2'b10: begin
+                // c.andi -> andi rd, rd, imm
+                instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7],
+                           3'b111, 2'b01, instr_i[9:7], {OPCODE_OP_IMM}};
+              end
+
+              2'b11: begin
+                unique case ({instr_i[12], instr_i[6:5]})
+                  3'b000: begin
+                    // c.sub -> sub rd', rd', rs2'
+                    instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7],
+                               3'b000, 2'b01, instr_i[9:7], {OPCODE_OP}};
+                  end
+
+                  3'b001: begin
+                    // c.xor -> xor rd', rd', rs2'
+                    instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b100,
+                               2'b01, instr_i[9:7], {OPCODE_OP}};
+                  end
+
+                  3'b010: begin
+                    // c.or  -> or  rd', rd', rs2'
+                    instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b110,
+                               2'b01, instr_i[9:7], {OPCODE_OP}};
+                  end
+
+                  3'b011: begin
+                    // c.and -> and rd', rd', rs2'
+                    instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b111,
+                               2'b01, instr_i[9:7], {OPCODE_OP}};
+                  end
+
+                  3'b100,
+                  3'b101,
+                  3'b110,
+                  3'b111: begin
+                    // 100: c.subw
+                    // 101: c.addw
+                    illegal_instr_o = 1'b1;
+                  end
+
+               //   default: begin
+               //     illegal_instr_o = 1'b1;
+               //   end
+                endcase
+              end
+
+             // default: begin
+             //   illegal_instr_o = 1'b1;
+             // end
+            endcase
+          end
+
+          3'b110, 3'b111: begin
+            // 0: c.beqz -> beq rs1', x0, imm
+            // 1: c.bnez -> bne rs1', x0, imm
+            instr_o = {{4 {instr_i[12]}}, instr_i[6:5], instr_i[2], 5'b0, 2'b01,
+                       instr_i[9:7], 2'b00, instr_i[13], instr_i[11:10], instr_i[4:3],
+                       instr_i[12], {OPCODE_BRANCH}};
+          end
+
+        //  default: begin
+        //    illegal_instr_o = 1'b1;
+        //  end
+        endcase
+      end
+
+      // C2
+      //
+      // Register address checks for RV32E are performed in the regular instruction decoder.
+      // If this check fails, an illegal instruction exception is triggered and the controller
+      // writes the actual faulting instruction to mtval.
+      2'b10: begin
+        unique case (instr_i[15:13])
+          3'b000: begin
+            // c.slli -> slli rd, rd, shamt
+            // (c.ssli hints are translated into a slli hint)
+            instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], {OPCODE_OP_IMM}};
+            if (instr_i[12] == 1'b1)  illegal_instr_o = 1'b1; // reserved for custom extensions
+          end
+
+          3'b010: begin
+            // c.lwsp -> lw rd, imm(x2)
+            instr_o = {4'b0, instr_i[3:2], instr_i[12], instr_i[6:4], 2'b00, 5'h02,
+                       3'b010, instr_i[11:7], OPCODE_LOAD};
+            if (instr_i[11:7] == 5'b0)  illegal_instr_o = 1'b1;
+          end
+
+          3'b100: begin
+            if (instr_i[12] == 1'b0) begin
+              if (instr_i[6:2] != 5'b0) begin
+                // c.mv -> add rd/rs1, x0, rs2
+                // (c.mv hints are translated into an add hint)
+                instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], {OPCODE_OP}};
+              end else begin
+                // c.jr -> jalr x0, rd/rs1, 0
+                instr_o = {12'b0, instr_i[11:7], 3'b0, 5'b0, {OPCODE_JALR}};
+                if (instr_i[11:7] == 5'b0)  illegal_instr_o = 1'b1;
+              end
+            end else begin
+              if (instr_i[6:2] != 5'b0) begin
+                // c.add -> add rd, rd, rs2
+                // (c.add hints are translated into an add hint)
+                instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], {OPCODE_OP}};
+              end else begin
+                if (instr_i[11:7] == 5'b0) begin
+                  // c.ebreak -> ebreak
+                  instr_o = {32'h00_10_00_73};
+                end else begin
+                  // c.jalr -> jalr x1, rs1, 0
+                  instr_o = {12'b0, instr_i[11:7], 3'b000, 5'b00001, {OPCODE_JALR}};
+                end
+              end
+            end
+          end
+
+          3'b110: begin
+            // c.swsp -> sw rs2, imm(x2)
+            instr_o = {4'b0, instr_i[8:7], instr_i[12], instr_i[6:2], 5'h02, 3'b010,
+                       instr_i[11:9], 2'b00, {OPCODE_STORE}};
+          end
+
+          3'b001,
+          3'b011,
+          3'b101,
+          3'b111: begin
+            illegal_instr_o = 1'b1;
+          end
+
+         // default: begin
+         //   illegal_instr_o = 1'b1;
+         // end
+        endcase
+      end
+
+      // Incoming instruction is not compressed.
+      2'b11:;
+
+     // default: begin
+     //   illegal_instr_o = 1'b1;
+     // end
+    endcase
+  end
+
+  assign is_compressed_o = (instr_i[1:0] != 2'b11);
+
+endmodule
\ No newline at end of file
diff --git a/verilog/rtl/brq_ifu_dummy_instr.sv b/verilog/rtl/brq_ifu_dummy_instr.sv
new file mode 100644
index 0000000..91bec36
--- /dev/null
+++ b/verilog/rtl/brq_ifu_dummy_instr.sv
@@ -0,0 +1,141 @@
+
+
+/**
+ * Dummy instruction module
+ *
+ * Provides pseudo-randomly inserted fake instructions for secure code obfuscation
+ */
+
+module brq_ifu_dummy_instr (
+    // Clock and reset
+    input  logic        clk_i,
+    input  logic        rst_ni,
+
+    // Interface to CSRs
+    input  logic        dummy_instr_en_i,
+    input  logic [2:0]  dummy_instr_mask_i,
+    input  logic        dummy_instr_seed_en_i,
+    input  logic [31:0] dummy_instr_seed_i,
+
+    // Interface to IF stage
+    input  logic        fetch_valid_i,
+    input  logic        id_in_ready_i,
+    output logic        insert_dummy_instr_o,
+    output logic [31:0] dummy_instr_data_o
+);
+
+  localparam int unsigned TIMEOUT_CNT_W = 5;
+  localparam int unsigned OP_W          = 5;
+
+  typedef enum logic [1:0] {
+    DUMMY_ADD = 2'b00,
+    DUMMY_MUL = 2'b01,
+    DUMMY_DIV = 2'b10,
+    DUMMY_AND = 2'b11
+  } dummy_instr_e;
+
+  typedef struct packed {
+    dummy_instr_e             instr_type;
+    logic [OP_W-1:0]          op_b;
+    logic [OP_W-1:0]          op_a;
+    logic [TIMEOUT_CNT_W-1:0] cnt;
+  } lfsr_data_t;
+  localparam int unsigned LFSR_OUT_W = $bits(lfsr_data_t);
+
+  lfsr_data_t               lfsr_data;
+  logic [TIMEOUT_CNT_W-1:0] dummy_cnt_incr, dummy_cnt_threshold;
+  logic [TIMEOUT_CNT_W-1:0] dummy_cnt_d, dummy_cnt_q;
+  logic                     dummy_cnt_en;
+  logic                     lfsr_en;
+  logic [LFSR_OUT_W-1:0]    lfsr_state;
+  logic                     insert_dummy_instr;
+  logic [6:0]               dummy_set;
+  logic [2:0]               dummy_opcode;
+  logic [31:0]              dummy_instr;
+  logic [31:0]              dummy_instr_seed_q, dummy_instr_seed_d;
+
+  // Shift the LFSR every time we insert an instruction
+  assign lfsr_en = insert_dummy_instr & id_in_ready_i;
+
+  assign dummy_instr_seed_d = dummy_instr_seed_q ^ dummy_instr_seed_i;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      dummy_instr_seed_q <= '0;
+    end else if (dummy_instr_seed_en_i) begin
+      dummy_instr_seed_q <= dummy_instr_seed_d;
+    end
+  end
+
+  prim_lfsr #(
+      .LfsrDw      ( 32         ),
+      .StateOutDw  ( LFSR_OUT_W )
+  ) lfsr_i (
+      .clk_i     ( clk_i                 ),
+      .rst_ni    ( rst_ni                ),
+      .seed_en_i ( dummy_instr_seed_en_i ),
+      .seed_i    ( dummy_instr_seed_d    ),
+      .lfsr_en_i ( lfsr_en               ),
+      .entropy_i ( '0                    ),
+      .state_o   ( lfsr_state            )
+  );
+
+  // Extract fields from LFSR
+  assign lfsr_data = lfsr_data_t'(lfsr_state);
+
+  // Set count threshold for inserting a new instruction. This is the pseudo-random value from the
+  // LFSR with a mask applied (based on CSR config data) to shorten the period if required.
+  assign dummy_cnt_threshold = lfsr_data.cnt & {dummy_instr_mask_i,{TIMEOUT_CNT_W-3{1'b1}}};
+  assign dummy_cnt_incr      = dummy_cnt_q + {{TIMEOUT_CNT_W-1{1'b0}},1'b1};
+  // Clear the counter everytime a new instruction is inserted
+  assign dummy_cnt_d         = insert_dummy_instr ? '0 : dummy_cnt_incr;
+  // Increment the counter for each executed instruction while dummy instuctions are
+  // enabled.
+  assign dummy_cnt_en        = dummy_instr_en_i & id_in_ready_i &
+                               (fetch_valid_i | insert_dummy_instr);
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      dummy_cnt_q <= '0;
+    end else if (dummy_cnt_en) begin
+      dummy_cnt_q <= dummy_cnt_d;
+    end
+  end
+
+  // Insert a dummy instruction each time the counter hits the threshold
+  assign insert_dummy_instr = dummy_instr_en_i & (dummy_cnt_q == dummy_cnt_threshold);
+
+  // Encode instruction
+  always_comb begin
+    unique case (lfsr_data.instr_type)
+      DUMMY_ADD : begin
+        dummy_set    = 7'b0000000;
+        dummy_opcode = 3'b000;
+      end
+      DUMMY_MUL : begin
+        dummy_set    = 7'b0000001;
+        dummy_opcode = 3'b000;
+      end
+      DUMMY_DIV : begin
+        dummy_set    = 7'b0000001;
+        dummy_opcode = 3'b100;
+      end
+      DUMMY_AND : begin
+        dummy_set    = 7'b0000000;
+        dummy_opcode = 3'b111;
+      end
+      default : begin
+        dummy_set    = 7'b0000000;
+        dummy_opcode = 3'b000;
+      end
+    endcase
+  end
+
+  //                    SET       RS2            RS1            OP           RD
+  assign dummy_instr = {dummy_set,lfsr_data.op_b,lfsr_data.op_a,dummy_opcode,5'h00,7'h33};
+
+  // Assign outputs
+  assign insert_dummy_instr_o = insert_dummy_instr;
+  assign dummy_instr_data_o   = dummy_instr;
+
+endmodule
diff --git a/verilog/rtl/brq_ifu_fifo.sv b/verilog/rtl/brq_ifu_fifo.sv
new file mode 100644
index 0000000..09bb06c
--- /dev/null
+++ b/verilog/rtl/brq_ifu_fifo.sv
@@ -0,0 +1,237 @@
+
+
+/**
+ * Fetch Fifo for 32 bit memory interface
+ *
+ * input port: send address and data to the FIFO
+ * clear_i clears the FIFO for the following cycle, including any new request
+ */
+
+
+
+module brq_ifu_fifo #(
+  parameter int unsigned NUM_REQS = 2
+) (
+    input  logic                clk_i,
+    input  logic                rst_ni,
+
+    // control signals
+    input  logic                clear_i,   // clears the contents of the FIFO
+    output logic [NUM_REQS-1:0] busy_o,
+
+    // input port
+    input  logic                in_valid_i,
+    input  logic [31:0]         in_addr_i,
+    input  logic [31:0]         in_rdata_i,
+    input  logic                in_err_i,
+
+    // output port
+    output logic                out_valid_o,
+    input  logic                out_ready_i,
+    output logic [31:0]         out_addr_o,
+    output logic [31:0]         out_addr_next_o,
+    output logic [31:0]         out_rdata_o,
+    output logic                out_err_o,
+    output logic                out_err_plus2_o
+);
+
+  localparam int unsigned DEPTH = NUM_REQS+1;
+
+  // index 0 is used for output
+  logic [DEPTH-1:0] [31:0]  rdata_d,   rdata_q;
+  logic [DEPTH-1:0]         err_d,     err_q;
+  logic [DEPTH-1:0]         valid_d,   valid_q;
+  logic [DEPTH-1:0]         lowest_free_entry;
+  logic [DEPTH-1:0]         valid_pushed, valid_popped;
+  logic [DEPTH-1:0]         entry_en;
+
+  logic                     pop_fifo;
+  logic             [31:0]  rdata, rdata_unaligned;
+  logic                     err,   err_unaligned, err_plus2;
+  logic                     valid, valid_unaligned;
+
+  logic                     aligned_is_compressed, unaligned_is_compressed;
+
+  logic                     addr_incr_two;
+  logic [31:1]              instr_addr_next;
+  logic [31:1]              instr_addr_d, instr_addr_q;
+  logic                     instr_addr_en;
+  logic                     unused_addr_in;
+
+  /////////////////
+  // Output port //
+  /////////////////
+
+  assign rdata = valid_q[0] ? rdata_q[0] : in_rdata_i;
+  assign err   = valid_q[0] ? err_q[0]   : in_err_i;
+  assign valid = valid_q[0] | in_valid_i;
+
+  // The FIFO contains word aligned memory fetches, but the instructions contained in each entry
+  // might be half-word aligned (due to compressed instructions)
+  // e.g.
+  //              | 31               16 | 15               0 |
+  // FIFO entry 0 | Instr 1 [15:0]      | Instr 0 [15:0]     |
+  // FIFO entry 1 | Instr 2 [15:0]      | Instr 1 [31:16]    |
+  //
+  // The FIFO also has a direct bypass path, so a complete instruction might be made up of data
+  // from the FIFO and new incoming data.
+  //
+
+  // Construct the output data for an unaligned instruction
+  assign rdata_unaligned = valid_q[1] ? {rdata_q[1][15:0], rdata[31:16]} :
+                                        {in_rdata_i[15:0], rdata[31:16]};
+
+  // If entry[1] is valid, an error can come from entry[0] or entry[1], unless the
+  // instruction in entry[0] is compressed (entry[1] is a new instruction)
+  // If entry[1] is not valid, and entry[0] is, an error can come from entry[0] or the incoming
+  // data, unless the instruction in entry[0] is compressed
+  // If entry[0] is not valid, the error must come from the incoming data
+  assign err_unaligned   = valid_q[1] ? ((err_q[1] & ~unaligned_is_compressed) | err_q[0]) :
+                                        ((valid_q[0] & err_q[0]) |
+                                         (in_err_i & (~valid_q[0] | ~unaligned_is_compressed)));
+
+  // Record when an error is caused by the second half of an unaligned 32bit instruction.
+  // Only needs to be correct when unaligned and if err_unaligned is set
+  assign err_plus2       = valid_q[1] ? (err_q[1] & ~err_q[0]) :
+                                        (in_err_i & valid_q[0] & ~err_q[0]);
+
+  // An uncompressed unaligned instruction is only valid if both parts are available
+  assign valid_unaligned = valid_q[1] ? 1'b1 :
+                                        (valid_q[0] & in_valid_i);
+
+  // If there is an error, rdata is unknown
+  assign unaligned_is_compressed = (rdata[17:16] != 2'b11) & ~err;
+  assign aligned_is_compressed   = (rdata[ 1: 0] != 2'b11) & ~err;
+
+  ////////////////////////////////////////
+  // Instruction aligner (if unaligned) //
+  ////////////////////////////////////////
+
+  always_comb begin
+    if (out_addr_o[1]) begin
+      // unaligned case
+      out_rdata_o     = rdata_unaligned;
+      out_err_o       = err_unaligned;
+      out_err_plus2_o = err_plus2;
+
+      if (unaligned_is_compressed) begin
+        out_valid_o = valid;
+      end else begin
+        out_valid_o = valid_unaligned;
+      end
+    end else begin
+      // aligned case
+      out_rdata_o     = rdata;
+      out_err_o       = err;
+      out_err_plus2_o = 1'b0;
+      out_valid_o     = valid;
+    end
+  end
+
+  /////////////////////////
+  // Instruction address //
+  /////////////////////////
+
+  // Update the address on branches and every time an instruction is driven
+  assign instr_addr_en = clear_i | (out_ready_i & out_valid_o);
+
+  // Increment the address by two every time a compressed instruction is popped
+  assign addr_incr_two = instr_addr_q[1] ? unaligned_is_compressed :
+                                           aligned_is_compressed;
+
+  assign instr_addr_next = (instr_addr_q[31:1] +
+                            // Increment address by 4 or 2
+                            {29'd0,~addr_incr_two,addr_incr_two});
+
+  assign instr_addr_d = clear_i ? in_addr_i[31:1] :
+                                  instr_addr_next;
+
+  always_ff @(posedge clk_i) begin
+    if (instr_addr_en) begin
+      instr_addr_q <= instr_addr_d;
+    end
+  end
+
+  // Output both PC of current instruction and instruction following. PC of instruction following is
+  // required for the branch predictor. It's used to fetch the instruction following a branch that
+  // was not-taken but (mis)predicted taken.
+  assign out_addr_next_o = {instr_addr_next, 1'b0};
+  assign out_addr_o      = {instr_addr_q, 1'b0};
+
+  // The LSB of the address is unused, since all addresses are halfword aligned
+  assign unused_addr_in = in_addr_i[0];
+
+  /////////////////
+  // FIFO status //
+  /////////////////
+
+  // Indicate the fill level of fifo-entries. This is used to determine when a new request can be
+  // made on the bus. The prefetch buffer only needs to know about the upper entries which overlap
+  // with NUM_REQS.
+  assign busy_o = valid_q[DEPTH-1:DEPTH-NUM_REQS];
+
+  /////////////////////
+  // FIFO management //
+  /////////////////////
+
+  // Since an entry can contain unaligned instructions, popping an entry can leave the entry valid
+  assign pop_fifo = out_ready_i & out_valid_o & (~aligned_is_compressed | out_addr_o[1]);
+
+  for (genvar i = 0; i < (DEPTH - 1); i++) begin : g_fifo_next
+    // Calculate lowest free entry (write pointer)
+    if (i == 0) begin : g_ent0
+      assign lowest_free_entry[i] = ~valid_q[i];
+    end else begin : g_ent_others
+      assign lowest_free_entry[i] = ~valid_q[i] & valid_q[i-1];
+    end
+
+    // An entry is set when an incoming request chooses the lowest available entry
+    assign valid_pushed[i] = (in_valid_i & lowest_free_entry[i]) |
+                             valid_q[i];
+    // Popping the FIFO shifts all entries down
+    assign valid_popped[i] = pop_fifo ? valid_pushed[i+1] : valid_pushed[i];
+    // All entries are wiped out on a clear
+    assign valid_d[i] = valid_popped[i] & ~clear_i;
+
+    // data flops are enabled if there is new data to shift into it, or
+    assign entry_en[i] = (valid_pushed[i+1] & pop_fifo) |
+                         // a new request is incoming and this is the lowest free entry
+                         (in_valid_i & lowest_free_entry[i] & ~pop_fifo);
+
+    // take the next entry or the incoming data
+    assign rdata_d[i]  = valid_q[i+1] ? rdata_q[i+1] : in_rdata_i;
+    assign err_d  [i]  = valid_q[i+1] ? err_q  [i+1] : in_err_i;
+  end
+  // The top entry is similar but with simpler muxing
+  assign lowest_free_entry[DEPTH-1] = ~valid_q[DEPTH-1] & valid_q[DEPTH-2];
+  assign valid_pushed     [DEPTH-1] = valid_q[DEPTH-1] | (in_valid_i & lowest_free_entry[DEPTH-1]);
+  assign valid_popped     [DEPTH-1] = pop_fifo ? 1'b0 : valid_pushed[DEPTH-1];
+  assign valid_d [DEPTH-1]          = valid_popped[DEPTH-1] & ~clear_i;
+  assign entry_en[DEPTH-1]          = in_valid_i & lowest_free_entry[DEPTH-1];
+  assign rdata_d [DEPTH-1]          = in_rdata_i;
+  assign err_d   [DEPTH-1]          = in_err_i;
+
+  ////////////////////
+  // FIFO registers //
+  ////////////////////
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      valid_q <= '0;
+    end else begin
+      valid_q <= valid_d;
+    end
+  end
+
+  for (genvar i = 0; i < DEPTH; i++) begin : g_fifo_regs
+    always_ff @(posedge clk_i) begin
+      if (entry_en[i]) begin
+        rdata_q[i]   <= rdata_d[i];
+        err_q[i]     <= err_d[i];
+      end
+    end
+  end
+
+
+
+endmodule
diff --git a/verilog/rtl/brq_ifu_icache.sv b/verilog/rtl/brq_ifu_icache.sv
new file mode 100644
index 0000000..51a998c
--- /dev/null
+++ b/verilog/rtl/brq_ifu_icache.sv
@@ -0,0 +1,1052 @@
+
+
+/**
+ * Instruction cache
+ *
+ * Provides an instruction cache along with cache management, instruction buffering and prefetching
+ */
+
+module brq_ifu_icache #(
+  parameter bit          BranchPredictor = 1'b0,
+  // Cache arrangement parameters
+  parameter int unsigned BusWidth        = 32,
+  parameter int unsigned CacheSizeBytes  = 4*1024,
+  parameter bit          ICacheECC       = 1'b0,
+  parameter int unsigned LineSize        = 64,
+  parameter int unsigned NumWays         = 2,
+  // Only cache branch targets
+  parameter bit          BranchCache     = 1'b0
+) (
+    // Clock and reset
+    input  logic                clk_i,
+    input  logic                rst_ni,
+
+    // Signal that the core would like instructions
+    input  logic                req_i,
+
+    // Set the cache's address counter
+    input  logic                branch_i,
+    input  logic                branch_spec_i,
+    input  logic                predicted_branch_i,
+    input  logic                branch_mispredict_i,
+    input  logic [31:0]         addr_i,
+
+    // IF stage interface: Pass fetched instructions to the core
+    input  logic                ready_i,
+    output logic                valid_o,
+    output logic [31:0]         rdata_o,
+    output logic [31:0]         addr_o,
+    output logic                err_o,
+    output logic                err_plus2_o,
+
+    // Instruction memory / interconnect interface: Fetch instruction data from memory
+    output logic                instr_req_o,
+    input  logic                instr_gnt_i,
+    output logic [31:0]         instr_addr_o,
+    input  logic [BusWidth-1:0] instr_rdata_i,
+    input  logic                instr_err_i,
+    input  logic                instr_pmp_err_i,
+    input  logic                instr_rvalid_i,
+
+    // Cache status
+    input  logic                icache_enable_i,
+    input  logic                icache_inval_i,
+    output logic                busy_o
+);
+  // Local constants
+  localparam int unsigned ADDR_W       = 32;
+  // Number of fill buffers (must be >= 2)
+  localparam int unsigned NUM_FB       = 4;
+  // Request throttling threshold
+  localparam int unsigned FB_THRESHOLD = NUM_FB - 2;
+  // Derived parameters
+  localparam int unsigned LINE_SIZE_ECC   = ICacheECC ? (LineSize + 8) : LineSize;
+  localparam int unsigned LINE_SIZE_BYTES = LineSize/8;
+  localparam int unsigned LINE_W          = $clog2(LINE_SIZE_BYTES);
+  localparam int unsigned BUS_BYTES       = BusWidth/8;
+  localparam int unsigned BUS_W           = $clog2(BUS_BYTES);
+  localparam int unsigned LINE_BEATS      = LINE_SIZE_BYTES / BUS_BYTES;
+  localparam int unsigned LINE_BEATS_W    = $clog2(LINE_BEATS);
+  localparam int unsigned NUM_LINES       = CacheSizeBytes / NumWays / LINE_SIZE_BYTES;
+  localparam int unsigned INDEX_W         = $clog2(NUM_LINES);
+  localparam int unsigned INDEX_HI        = INDEX_W + LINE_W - 1;
+  localparam int unsigned TAG_SIZE        = ADDR_W - INDEX_W - LINE_W + 1; // 1 valid bit
+  localparam int unsigned TAG_SIZE_ECC    = ICacheECC ? (TAG_SIZE + 6) : TAG_SIZE;
+  localparam int unsigned OUTPUT_BEATS    = (BUS_BYTES / 2); // number of halfwords
+
+  // Prefetch signals
+  logic [ADDR_W-1:0]                   lookup_addr_aligned;
+  logic [ADDR_W-1:0]                   branch_mispredict_addr;
+  logic [ADDR_W-1:0]                   prefetch_addr_d, prefetch_addr_q;
+  logic                                prefetch_addr_en;
+  logic                                branch_or_mispredict;
+  // Cache pipelipe IC0 signals
+  logic                                branch_suppress;
+  logic                                lookup_throttle;
+  logic                                lookup_req_ic0;
+  logic [ADDR_W-1:0]                   lookup_addr_ic0;
+  logic [INDEX_W-1:0]                  lookup_index_ic0;
+  logic                                fill_req_ic0;
+  logic [INDEX_W-1:0]                  fill_index_ic0;
+  logic [TAG_SIZE-1:0]                 fill_tag_ic0;
+  logic [LineSize-1:0]                 fill_wdata_ic0;
+  logic                                lookup_grant_ic0;
+  logic                                lookup_actual_ic0;
+  logic                                fill_grant_ic0;
+  logic                                tag_req_ic0;
+  logic [INDEX_W-1:0]                  tag_index_ic0;
+  logic [NumWays-1:0]                  tag_banks_ic0;
+  logic                                tag_write_ic0;
+  logic [TAG_SIZE_ECC-1:0]             tag_wdata_ic0;
+  logic                                data_req_ic0;
+  logic [INDEX_W-1:0]                  data_index_ic0;
+  logic [NumWays-1:0]                  data_banks_ic0;
+  logic                                data_write_ic0;
+  logic [LINE_SIZE_ECC-1:0]            data_wdata_ic0;
+  // Cache pipelipe IC1 signals
+  logic [TAG_SIZE_ECC-1:0]             tag_rdata_ic1  [NumWays];
+  logic [LINE_SIZE_ECC-1:0]            data_rdata_ic1 [NumWays];
+  logic [LINE_SIZE_ECC-1:0]            hit_data_ic1;
+  logic                                lookup_valid_ic1;
+  logic [ADDR_W-1:INDEX_HI+1]          lookup_addr_ic1;
+  logic [NumWays-1:0]                  tag_match_ic1;
+  logic                                tag_hit_ic1;
+  logic [NumWays-1:0]                  tag_invalid_ic1;
+  logic [NumWays-1:0]                  lowest_invalid_way_ic1;
+  logic [NumWays-1:0]                  round_robin_way_ic1, round_robin_way_q;
+  logic [NumWays-1:0]                  sel_way_ic1;
+  logic                                ecc_err_ic1;
+  logic                                ecc_write_req;
+  logic [NumWays-1:0]                  ecc_write_ways;
+  logic [INDEX_W-1:0]                  ecc_write_index;
+  // Fill buffer signals
+  logic                                gnt_or_pmp_err, gnt_not_pmp_err;
+  logic [$clog2(NUM_FB)-1:0]           fb_fill_level;
+  logic                                fill_cache_new;
+  logic                                fill_new_alloc;
+  logic                                fill_spec_req, fill_spec_done, fill_spec_hold;
+  logic [NUM_FB-1:0][NUM_FB-1:0]       fill_older_d, fill_older_q;
+  logic [NUM_FB-1:0]                   fill_alloc_sel, fill_alloc;
+  logic [NUM_FB-1:0]                   fill_busy_d, fill_busy_q;
+  logic [NUM_FB-1:0]                   fill_done;
+  logic [NUM_FB-1:0]                   fill_in_ic1;
+  logic [NUM_FB-1:0]                   fill_stale_d, fill_stale_q;
+  logic [NUM_FB-1:0]                   fill_cache_d, fill_cache_q;
+  logic [NUM_FB-1:0]                   fill_hit_ic1, fill_hit_d, fill_hit_q;
+  logic [NUM_FB-1:0][LINE_BEATS_W:0]   fill_ext_cnt_d, fill_ext_cnt_q;
+  logic [NUM_FB-1:0]                   fill_ext_hold_d, fill_ext_hold_q;
+  logic [NUM_FB-1:0]                   fill_ext_done_d, fill_ext_done_q;
+  logic [NUM_FB-1:0][LINE_BEATS_W:0]   fill_rvd_cnt_d, fill_rvd_cnt_q;
+  logic [NUM_FB-1:0]                   fill_rvd_done;
+  logic [NUM_FB-1:0]                   fill_ram_done_d, fill_ram_done_q;
+  logic [NUM_FB-1:0]                   fill_out_grant;
+  logic [NUM_FB-1:0][LINE_BEATS_W:0]   fill_out_cnt_d, fill_out_cnt_q;
+  logic [NUM_FB-1:0]                   fill_out_done;
+  logic [NUM_FB-1:0]                   fill_ext_req, fill_rvd_exp, fill_ram_req, fill_out_req;
+  logic [NUM_FB-1:0]                   fill_data_sel, fill_data_reg, fill_data_hit, fill_data_rvd;
+  logic [NUM_FB-1:0][LINE_BEATS_W-1:0] fill_ext_off, fill_rvd_off;
+  logic [NUM_FB-1:0][LINE_BEATS_W:0]   fill_ext_beat, fill_rvd_beat;
+  logic [NUM_FB-1:0]                   fill_ext_arb, fill_ram_arb, fill_out_arb;
+  logic [NUM_FB-1:0]                   fill_rvd_arb;
+  logic [NUM_FB-1:0]                   fill_entry_en;
+  logic [NUM_FB-1:0]                   fill_addr_en;
+  logic [NUM_FB-1:0]                   fill_way_en;
+  logic [NUM_FB-1:0][LINE_BEATS-1:0]   fill_data_en;
+  logic [NUM_FB-1:0][LINE_BEATS-1:0]   fill_err_d, fill_err_q;
+  logic [ADDR_W-1:0]                   fill_addr_q [NUM_FB];
+  logic [NumWays-1:0]                  fill_way_q  [NUM_FB];
+  logic [LineSize-1:0]                 fill_data_d [NUM_FB];
+  logic [LineSize-1:0]                 fill_data_q [NUM_FB];
+  logic [ADDR_W-1:BUS_W]               fill_ext_req_addr;
+  logic [ADDR_W-1:0]                   fill_ram_req_addr;
+  logic [NumWays-1:0]                  fill_ram_req_way;
+  logic [LineSize-1:0]                 fill_ram_req_data;
+  logic [LineSize-1:0]                 fill_out_data;
+  logic [LINE_BEATS-1:0]               fill_out_err;
+  // External req signals
+  logic                                instr_req;
+  logic [ADDR_W-1:BUS_W]               instr_addr;
+  // Data output signals
+  logic                                skid_complete_instr;
+  logic                                skid_ready;
+  logic                                output_compressed;
+  logic                                skid_valid_d, skid_valid_q, skid_en;
+  logic [15:0]                         skid_data_d, skid_data_q;
+  logic                                skid_err_q;
+  logic                                output_valid;
+  logic                                addr_incr_two;
+  logic                                output_addr_en;
+  logic [ADDR_W-1:1]                   output_addr_incr;
+  logic [ADDR_W-1:1]                   output_addr_d, output_addr_q;
+  logic [15:0]                         output_data_lo, output_data_hi;
+  logic                                data_valid, output_ready;
+  logic [LineSize-1:0]                 line_data;
+  logic [LINE_BEATS-1:0]               line_err;
+  logic [31:0]                         line_data_muxed;
+  logic                                line_err_muxed;
+  logic [31:0]                         output_data;
+  logic                                output_err;
+  // Invalidations
+  logic                                start_inval, inval_done;
+  logic                                reset_inval_q;
+  logic                                inval_prog_d, inval_prog_q;
+  logic [INDEX_W-1:0]                  inval_index_d, inval_index_q;
+
+  //////////////////////////
+  // Instruction prefetch //
+  //////////////////////////
+
+  if (BranchPredictor) begin : g_branch_predictor
+    // Where the branch predictor is present record what address followed a predicted branch.  If
+    // that branch is predicted taken but mispredicted (so not-taken) this is used to resume on
+    // the not-taken code path.
+    logic [31:0] branch_mispredict_addr_q;
+    logic        branch_mispredict_addr_en;
+
+    assign branch_mispredict_addr_en = branch_i & predicted_branch_i;
+
+    always_ff @(posedge clk_i) begin
+      if (branch_mispredict_addr_en) begin
+        branch_mispredict_addr_q <= {output_addr_incr, 1'b0};
+      end
+    end
+
+    assign branch_mispredict_addr = branch_mispredict_addr_q;
+
+  end else begin : g_no_branch_predictor
+    logic        unused_predicted_branch;
+
+    assign unused_predicted_branch   = predicted_branch_i;
+
+    assign branch_mispredict_addr = '0;
+  end
+
+  assign branch_or_mispredict = branch_i | branch_mispredict_i;
+
+  assign lookup_addr_aligned = {lookup_addr_ic0[ADDR_W-1:LINE_W],{LINE_W{1'b0}}};
+
+  // The prefetch address increments by one cache line for each granted request.
+  // This address is also updated if there is a branch that is not granted, since the target
+  // address (addr_i) is only valid for one cycle while branch_i is high.
+
+  // The captured branch target address is not forced to be aligned since the offset in the cache
+  // line must also be recorded for later use by the fill buffers.
+  assign prefetch_addr_d     =
+      lookup_grant_ic0 ? (lookup_addr_aligned + {{ADDR_W-LINE_W-1{1'b0}},1'b1,{LINE_W{1'b0}}}) :
+      branch_i         ? addr_i :
+                         branch_mispredict_addr;
+
+  assign prefetch_addr_en    = branch_or_mispredict | lookup_grant_ic0;
+
+  always_ff @(posedge clk_i) begin
+    if (prefetch_addr_en) begin
+      prefetch_addr_q <= prefetch_addr_d;
+    end
+  end
+
+  ////////////////////////
+  // Pipeline stage IC0 //
+  ////////////////////////
+
+  // Cache lookup
+  assign lookup_throttle  = (fb_fill_level > FB_THRESHOLD[$clog2(NUM_FB)-1:0]);
+
+  assign lookup_req_ic0   = req_i & ~&fill_busy_q & (branch_or_mispredict | ~lookup_throttle) &
+                            ~ecc_write_req;
+  assign lookup_addr_ic0  = branch_spec_i       ? addr_i :
+                            branch_mispredict_i ? branch_mispredict_addr :
+                                                  prefetch_addr_q;
+  assign lookup_index_ic0 = lookup_addr_ic0[INDEX_HI:LINE_W];
+
+  // Cache write
+  assign fill_req_ic0   = (|fill_ram_req);
+  assign fill_index_ic0 = fill_ram_req_addr[INDEX_HI:LINE_W];
+  assign fill_tag_ic0   = {(~inval_prog_q & ~ecc_write_req),fill_ram_req_addr[ADDR_W-1:INDEX_HI+1]};
+  assign fill_wdata_ic0 = fill_ram_req_data;
+
+  // Suppress a new lookup on a not-taken branch (as the address will be incorrect)
+  assign branch_suppress   = branch_spec_i & ~branch_i;
+
+  // Arbitrated signals - lookups have highest priority
+  assign lookup_grant_ic0  = lookup_req_ic0 & ~branch_suppress;
+  assign fill_grant_ic0    = fill_req_ic0 & (~lookup_req_ic0 | branch_suppress) & ~inval_prog_q &
+                             ~ecc_write_req;
+  // Qualified lookup grant to mask ram signals in IC1 if access was not made
+  assign lookup_actual_ic0 = lookup_grant_ic0 & icache_enable_i & ~inval_prog_q & ~start_inval;
+
+  // Tagram
+  assign tag_req_ic0   = lookup_req_ic0 | fill_req_ic0 | inval_prog_q | ecc_write_req;
+  assign tag_index_ic0 = inval_prog_q   ? inval_index_q :
+                         ecc_write_req  ? ecc_write_index :
+                         fill_grant_ic0 ? fill_index_ic0 :
+                                          lookup_index_ic0;
+  assign tag_banks_ic0 = ecc_write_req  ? ecc_write_ways :
+                         fill_grant_ic0 ? fill_ram_req_way :
+                                          {NumWays{1'b1}};
+  assign tag_write_ic0 = fill_grant_ic0 | inval_prog_q | ecc_write_req;
+
+  // Dataram
+  assign data_req_ic0   = lookup_req_ic0 | fill_req_ic0;
+  assign data_index_ic0 = tag_index_ic0;
+  assign data_banks_ic0 = tag_banks_ic0;
+  assign data_write_ic0 = tag_write_ic0;
+
+  // Append ECC checkbits to write data if required
+  if (ICacheECC) begin : gen_ecc_wdata
+
+    // Tagram ECC
+    // Reuse the same ecc encoding module for larger cache sizes by padding with zeros
+    logic [21:0]          tag_ecc_input_padded;
+    logic [27:0]          tag_ecc_output_padded;
+    logic [22-TAG_SIZE:0] tag_ecc_output_unused;
+
+    assign tag_ecc_input_padded  = {{22-TAG_SIZE{1'b0}},fill_tag_ic0};
+    assign tag_ecc_output_unused = tag_ecc_output_padded[21:TAG_SIZE-1];
+
+    prim_secded_28_22_enc tag_ecc_enc (
+      .in  (tag_ecc_input_padded),
+      .out (tag_ecc_output_padded)
+    );
+
+    assign tag_wdata_ic0 = {tag_ecc_output_padded[27:22],tag_ecc_output_padded[TAG_SIZE-1:0]};
+
+    // Dataram ECC
+    prim_secded_72_64_enc data_ecc_enc (
+      .in  (fill_wdata_ic0),
+      .out (data_wdata_ic0)
+    );
+
+  end else begin : gen_noecc_wdata
+    assign tag_wdata_ic0  = fill_tag_ic0;
+    assign data_wdata_ic0 = fill_wdata_ic0;
+  end
+
+  ////////////////
+  // IC0 -> IC1 //
+  ////////////////
+
+  for (genvar way = 0; way < NumWays; way++) begin : gen_rams
+    // Tag RAM instantiation
+    prim_ram_1p #(
+      .Width           (TAG_SIZE_ECC),
+      .Depth           (NUM_LINES),
+      .DataBitsPerMask (TAG_SIZE_ECC)
+    ) tag_bank (
+      .clk_i    (clk_i),
+      .req_i    (tag_req_ic0 & tag_banks_ic0[way]),
+      .write_i  (tag_write_ic0),
+      .wmask_i  ({TAG_SIZE_ECC{1'b1}}),
+      .addr_i   (tag_index_ic0),
+      .wdata_i  (tag_wdata_ic0),
+      .rdata_o  (tag_rdata_ic1[way])
+    );
+    // Data RAM instantiation
+    prim_ram_1p #(
+      .Width           (LINE_SIZE_ECC),
+      .Depth           (NUM_LINES),
+      .DataBitsPerMask (LINE_SIZE_ECC)
+    ) data_bank (
+      .clk_i    (clk_i),
+      .req_i    (data_req_ic0 & data_banks_ic0[way]),
+      .write_i  (data_write_ic0),
+      .wmask_i  ({LINE_SIZE_ECC{1'b1}}),
+      .addr_i   (data_index_ic0),
+      .wdata_i  (data_wdata_ic0),
+      .rdata_o  (data_rdata_ic1[way])
+    );
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      lookup_valid_ic1 <= 1'b0;
+    end else begin
+      lookup_valid_ic1 <= lookup_actual_ic0;
+    end
+  end
+
+  always_ff @(posedge clk_i) begin
+    if (lookup_grant_ic0) begin
+      lookup_addr_ic1 <= lookup_addr_ic0[ADDR_W-1:INDEX_HI+1];
+      fill_in_ic1     <= fill_alloc_sel;
+    end
+  end
+
+  ////////////////////////
+  // Pipeline stage IC1 //
+  ////////////////////////
+
+  // Tag matching
+  for (genvar way = 0; way < NumWays; way++) begin : gen_tag_match
+    assign tag_match_ic1[way]   = (tag_rdata_ic1[way][TAG_SIZE-1:0] ==
+                                   {1'b1,lookup_addr_ic1[ADDR_W-1:INDEX_HI+1]});
+    assign tag_invalid_ic1[way] = ~tag_rdata_ic1[way][TAG_SIZE-1];
+  end
+
+  assign tag_hit_ic1 = |tag_match_ic1;
+
+  // Hit data mux
+  always_comb begin
+    hit_data_ic1 = 'b0;
+    for (int way = 0; way < NumWays; way++) begin
+      if (tag_match_ic1[way]) begin
+        hit_data_ic1 |= data_rdata_ic1[way];
+      end
+    end
+  end
+
+  // Way selection for allocations to the cache (onehot signals)
+  // 1 first invalid way
+  // 2 global round-robin (pseudorandom) way
+  assign lowest_invalid_way_ic1[0] = tag_invalid_ic1[0];
+  assign round_robin_way_ic1[0]    = round_robin_way_q[NumWays-1];
+  for (genvar way = 1; way < NumWays; way++) begin : gen_lowest_way
+    assign lowest_invalid_way_ic1[way] = tag_invalid_ic1[way] & ~|tag_invalid_ic1[way-1:0];
+    assign round_robin_way_ic1[way]    = round_robin_way_q[way-1];
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      round_robin_way_q <= {{NumWays-1{1'b0}},1'b1};
+    end else if (lookup_valid_ic1) begin
+      round_robin_way_q <= round_robin_way_ic1;
+    end
+  end
+
+  assign sel_way_ic1 = |tag_invalid_ic1 ? lowest_invalid_way_ic1 :
+                                          round_robin_way_q;
+
+  // ECC checking logic
+  if (ICacheECC) begin : gen_data_ecc_checking
+    logic [NumWays-1:0] tag_err_ic1;
+    logic [1:0]         data_err_ic1;
+    logic               ecc_correction_write_d, ecc_correction_write_q;
+    logic [NumWays-1:0] ecc_correction_ways_d, ecc_correction_ways_q;
+    logic [INDEX_W-1:0] lookup_index_ic1, ecc_correction_index_q;
+
+    // Tag ECC checking
+    for (genvar way = 0; way < NumWays; way++) begin : gen_tag_ecc
+      logic [1:0]  tag_err_bank_ic1;
+      logic [27:0] tag_rdata_padded_ic1;
+
+      // Expand the tag rdata with extra padding if the tag size is less than the maximum
+      assign tag_rdata_padded_ic1 = {tag_rdata_ic1[way][TAG_SIZE_ECC-1-:6],
+                                     {22-TAG_SIZE{1'b0}},
+                                     tag_rdata_ic1[way][TAG_SIZE-1:0]};
+
+      prim_secded_28_22_dec data_ecc_dec (
+        .in         (tag_rdata_padded_ic1),
+        .d_o        (),
+        .syndrome_o (),
+        .err_o      (tag_err_bank_ic1)
+      );
+      assign tag_err_ic1[way] = |tag_err_bank_ic1;
+    end
+
+    // Data ECC checking
+    // Note - could generate for all ways and mux after
+    prim_secded_72_64_dec data_ecc_dec (
+      .in         (hit_data_ic1),
+      .d_o        (),
+      .syndrome_o (),
+      .err_o      (data_err_ic1)
+    );
+
+    assign ecc_err_ic1 = lookup_valid_ic1 & ((|data_err_ic1) | (|tag_err_ic1));
+
+    // Error correction
+    // All ways will be invalidated on a tag error to prevent X-propagation from data_err_ic1 on
+    // spurious hits. Also prevents the same line being allocated twice when there was a true
+    // hit and a spurious hit.
+    assign ecc_correction_ways_d  = {NumWays{|tag_err_ic1}} |
+                                    (tag_match_ic1 & {NumWays{|data_err_ic1}});
+    assign ecc_correction_write_d = ecc_err_ic1;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        ecc_correction_write_q <= 1'b0;
+      end else begin
+        ecc_correction_write_q <= ecc_correction_write_d;
+      end
+    end
+
+    // The index is required in IC1 only when ECC is configured so is registered here
+    always_ff @(posedge clk_i) begin
+      if (lookup_grant_ic0) begin
+        lookup_index_ic1 <= lookup_addr_ic0[INDEX_HI-:INDEX_W];
+      end
+    end
+
+    // Store the ways with errors to be invalidated
+    always_ff @(posedge clk_i) begin
+      if (ecc_err_ic1) begin
+        ecc_correction_ways_q  <= ecc_correction_ways_d;
+        ecc_correction_index_q <= lookup_index_ic1;
+      end
+    end
+
+    assign ecc_write_req   = ecc_correction_write_q;
+    assign ecc_write_ways  = ecc_correction_ways_q;
+    assign ecc_write_index = ecc_correction_index_q;
+
+  end else begin : gen_no_data_ecc
+    assign ecc_err_ic1     = 1'b0;
+    assign ecc_write_req   = 1'b0;
+    assign ecc_write_ways  = '0;
+    assign ecc_write_index = '0;
+  end
+
+  ///////////////////////////////
+  // Cache allocation decision //
+  ///////////////////////////////
+
+  if (BranchCache) begin : gen_caching_logic
+
+    // Cache branch target + a number of subsequent lines
+    localparam int unsigned CACHE_AHEAD = 2;
+    localparam int unsigned CACHE_CNT_W = (CACHE_AHEAD == 1) ? 1 : $clog2(CACHE_AHEAD) + 1;
+    logic                   cache_cnt_dec;
+    logic [CACHE_CNT_W-1:0] cache_cnt_d, cache_cnt_q;
+
+    assign cache_cnt_dec = lookup_grant_ic0 & (|cache_cnt_q);
+    assign cache_cnt_d   = branch_i ? CACHE_AHEAD[CACHE_CNT_W-1:0] :
+                                      (cache_cnt_q - {{CACHE_CNT_W-1{1'b0}},cache_cnt_dec});
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        cache_cnt_q <= '0;
+      end else begin
+        cache_cnt_q <= cache_cnt_d;
+      end
+    end
+
+    assign fill_cache_new = (branch_i | (|cache_cnt_q)) & icache_enable_i &
+                            ~icache_inval_i & ~inval_prog_q;
+
+  end else begin : gen_cache_all
+
+    // Cache all missing fetches
+    assign fill_cache_new = icache_enable_i & ~start_inval & ~inval_prog_q;
+  end
+
+  //////////////////////////
+  // Fill buffer tracking //
+  //////////////////////////
+
+  always_comb begin
+    fb_fill_level = '0;
+    for (int i = 0; i < NUM_FB; i++) begin
+      if (fill_busy_q[i] & ~fill_stale_q[i]) begin
+        fb_fill_level += {{$clog2(NUM_FB)-1{1'b0}},1'b1};
+      end
+    end
+  end
+
+  // PMP errors might not / don't need to be granted (since the external request is masked)
+  assign gnt_or_pmp_err  = instr_gnt_i | instr_pmp_err_i;
+  assign gnt_not_pmp_err = instr_gnt_i & ~instr_pmp_err_i;
+  // Allocate a new buffer for every granted lookup
+  assign fill_new_alloc = lookup_grant_ic0;
+  // Track whether a speculative external request was made from IC0, and whether it was granted
+  // Speculative requests are only made for branches, or if the cache is disabled
+  assign fill_spec_req  = (~icache_enable_i | branch_or_mispredict) & ~|fill_ext_req;
+  assign fill_spec_done = fill_spec_req & gnt_not_pmp_err;
+  assign fill_spec_hold = fill_spec_req & ~gnt_or_pmp_err;
+
+  for (genvar fb = 0; fb < NUM_FB; fb++) begin : gen_fbs
+
+    /////////////////////////////
+    // Fill buffer allocations //
+    /////////////////////////////
+
+    // Allocate the lowest available buffer
+    if (fb == 0) begin : gen_fb_zero
+      assign fill_alloc_sel[fb] = ~fill_busy_q[fb];
+    end else begin : gen_fb_rest
+      assign fill_alloc_sel[fb] = ~fill_busy_q[fb] & (&fill_busy_q[fb-1:0]);
+    end
+
+    assign fill_alloc[fb]      = fill_alloc_sel[fb] & fill_new_alloc;
+    assign fill_busy_d[fb]     = fill_alloc[fb] | (fill_busy_q[fb] & ~fill_done[fb]);
+
+    // Track which other fill buffers are older than this one (for age-based arbitration)
+    // TODO sparsify
+    assign fill_older_d[fb]    = (fill_alloc[fb] ? fill_busy_q : fill_older_q[fb]) & ~fill_done;
+
+    // A fill buffer can release once all its actions are completed
+                                 // all data written to the cache (unless hit or error)
+    assign fill_done[fb]       = (fill_ram_done_q[fb] | fill_hit_q[fb] | ~fill_cache_q[fb] |
+                                  (|fill_err_q[fb])) &
+                                 // all data output unless stale due to intervening branch
+                                 (fill_out_done[fb] | fill_stale_q[fb] | branch_or_mispredict) &
+                                 // all external requests completed
+                                 fill_rvd_done[fb];
+
+    /////////////////////////////////
+    // Fill buffer status tracking //
+    /////////////////////////////////
+
+    // Track staleness (requests become stale when a branch intervenes)
+    assign fill_stale_d[fb]    = fill_busy_q[fb] & (branch_or_mispredict | fill_stale_q[fb]);
+    // Track whether or not this request should allocate to the cache
+    // Any invalidation or disabling of the cache while the buffer is busy will stop allocation
+    assign fill_cache_d[fb]    = (fill_alloc[fb] & fill_cache_new) |
+                                 (fill_cache_q[fb] & fill_busy_q[fb] &
+                                  icache_enable_i & ~icache_inval_i);
+    // Record whether the request hit in the cache
+    assign fill_hit_ic1[fb]    = lookup_valid_ic1 & fill_in_ic1[fb] & tag_hit_ic1 & ~ecc_err_ic1;
+    assign fill_hit_d[fb]      = fill_hit_ic1[fb] | (fill_hit_q[fb] & fill_busy_q[fb]);
+
+    ///////////////////////////////////////////
+    // Fill buffer external request tracking //
+    ///////////////////////////////////////////
+
+    // Make an external request
+    assign fill_ext_req[fb]    = fill_busy_q[fb] & ~fill_ext_done_d[fb];
+
+    // Count the number of completed external requests (each line requires LINE_BEATS requests)
+    // Don't count fake PMP error grants here since they will never receive an rvalid response
+    assign fill_ext_cnt_d[fb]  = fill_alloc[fb] ?
+                                   {{LINE_BEATS_W{1'b0}},fill_spec_done} :
+                                   (fill_ext_cnt_q[fb] + {{LINE_BEATS_W{1'b0}},
+                                                          fill_ext_arb[fb] & gnt_not_pmp_err});
+    // External request must be held until granted
+    assign fill_ext_hold_d[fb] = (fill_alloc[fb] & fill_spec_hold) |
+                                 (fill_ext_arb[fb] & ~gnt_or_pmp_err);
+    // External requests are completed when the counter is filled or when the request is cancelled
+    assign fill_ext_done_d[fb] = (fill_ext_cnt_q[fb][LINE_BEATS_W] |
+                                  // external requests are considered complete if the request hit
+                                  fill_hit_ic1[fb] | fill_hit_q[fb] |
+                                  // external requests will stop once any PMP error is received
+                                  fill_err_q[fb][fill_ext_off[fb]] |
+                                  // cancel if the line won't be cached and, it is stale
+                                  (~fill_cache_q[fb] & (branch_or_mispredict | fill_stale_q[fb] |
+                                   // or we're already at the end of the line
+                                                        fill_ext_beat[fb][LINE_BEATS_W]))) &
+                                 // can't cancel while we are waiting for a grant on the bus
+                                 ~fill_ext_hold_q[fb] & fill_busy_q[fb];
+    // Track whether this fill buffer expects to receive beats of data
+    assign fill_rvd_exp[fb]    = fill_busy_q[fb] & ~fill_rvd_done[fb];
+    // Count the number of rvalid beats received
+    assign fill_rvd_cnt_d[fb]  = fill_alloc[fb] ? '0 :
+                                                  (fill_rvd_cnt_q[fb] +
+                                                   {{LINE_BEATS_W{1'b0}},fill_rvd_arb[fb]});
+    // External data is complete when all issued external requests have received their data
+    assign fill_rvd_done[fb]   = (fill_ext_done_q[fb] & ~fill_ext_hold_q[fb]) &
+                                 (fill_rvd_cnt_q[fb] == fill_ext_cnt_q[fb]);
+
+    //////////////////////////////////////
+    // Fill buffer data output tracking //
+    //////////////////////////////////////
+
+    // Send data to the IF stage for requests that are not stale, have not completed their
+    // data output, and have data available to send.
+    // Data is available if:
+    // - The request hit in the cache
+    // - The current beat is an error (since a PMP error might not actually receive any data)
+    // - Buffered data is available (fill_rvd_cnt_q is ahead of fill_out_cnt_q)
+    // - Data is available from the bus this cycle (fill_rvd_arb)
+    assign fill_out_req[fb]    = fill_busy_q[fb] & ~fill_stale_q[fb] & ~fill_out_done[fb] &
+                                 (fill_hit_ic1[fb] | fill_hit_q[fb] |
+                                  (fill_err_q[fb][fill_out_cnt_q[fb][LINE_BEATS_W-1:0]]) |
+                                  (fill_rvd_beat[fb] > fill_out_cnt_q[fb]) | fill_rvd_arb[fb]);
+
+    // Calculate when a beat of data is output. Any ECC error squashes the output that cycle.
+    assign fill_out_grant[fb]  = fill_out_arb[fb] & output_ready;
+
+    // Count the beats of data output to the IF stage
+    assign fill_out_cnt_d[fb]  = fill_alloc[fb] ? {1'b0,lookup_addr_ic0[LINE_W-1:BUS_W]} :
+                                                  (fill_out_cnt_q[fb] +
+                                                   {{LINE_BEATS_W{1'b0}},fill_out_grant[fb]});
+    // Data output complete when the counter fills
+    assign fill_out_done[fb]   = fill_out_cnt_q[fb][LINE_BEATS_W];
+
+    //////////////////////////////////////
+    // Fill buffer ram request tracking //
+    //////////////////////////////////////
+
+                                 // make a fill request once all data beats received
+    assign fill_ram_req[fb]    = fill_busy_q[fb] & fill_rvd_cnt_q[fb][LINE_BEATS_W] &
+                                 // unless the request hit, was non-allocating or got an error
+                                 ~fill_hit_q[fb] & fill_cache_q[fb] & ~|fill_err_q[fb] &
+                                 // or the request was already completed
+                                 ~fill_ram_done_q[fb];
+
+    // Record when a cache allocation request has been completed
+    assign fill_ram_done_d[fb] = fill_ram_arb[fb] | (fill_ram_done_q[fb] & fill_busy_q[fb]);
+
+    //////////////////////////////
+    // Fill buffer line offsets //
+    //////////////////////////////
+
+    // When we branch into the middle of a line, the output count will not start from zero. This
+    // beat count is used to know which incoming rdata beats are relevant.
+    assign fill_ext_beat[fb]   = {1'b0,fill_addr_q[fb][LINE_W-1:BUS_W]} +
+                                 fill_ext_cnt_q[fb][LINE_BEATS_W:0];
+    assign fill_ext_off[fb]    = fill_ext_beat[fb][LINE_BEATS_W-1:0];
+    assign fill_rvd_beat[fb]   = {1'b0,fill_addr_q[fb][LINE_W-1:BUS_W]} +
+                                 fill_rvd_cnt_q[fb][LINE_BEATS_W:0];
+    assign fill_rvd_off[fb]    = fill_rvd_beat[fb][LINE_BEATS_W-1:0];
+
+    /////////////////////////////
+    // Fill buffer arbitration //
+    /////////////////////////////
+
+    // Age based arbitration - all these signals are one-hot
+    assign fill_ext_arb[fb]    = fill_ext_req[fb] & ~|(fill_ext_req & fill_older_q[fb]);
+    assign fill_ram_arb[fb]    = fill_ram_req[fb] & fill_grant_ic0 & ~|(fill_ram_req & fill_older_q[fb]);
+    // Calculate which fill buffer is the oldest one which still needs to output data to IF
+    assign fill_data_sel[fb]   = ~|(fill_busy_q & ~fill_out_done & ~fill_stale_q &
+                                    fill_older_q[fb]);
+    // Arbitrate the request which has data available to send, and is the oldest outstanding
+    assign fill_out_arb[fb]    = fill_out_req[fb] & fill_data_sel[fb];
+    // Assign incoming rvalid data to the oldest fill buffer expecting it
+    assign fill_rvd_arb[fb]    = instr_rvalid_i & fill_rvd_exp[fb] & ~|(fill_rvd_exp & fill_older_q[fb]);
+
+    /////////////////////////////
+    // Fill buffer data muxing //
+    /////////////////////////////
+
+    // Output data muxing controls
+    // 1. Select data from the fill buffer data register
+    assign fill_data_reg[fb]   = fill_busy_q[fb] & ~fill_stale_q[fb] &
+                                 ~fill_out_done[fb] & fill_data_sel[fb] &
+    //                           The incoming data is already ahead of the output count
+                                 ((fill_rvd_beat[fb] > fill_out_cnt_q[fb]) | fill_hit_q[fb] |
+                                  (|fill_err_q[fb]));
+    // 2. Select IC1 hit data
+    assign fill_data_hit[fb]   = fill_busy_q[fb] & fill_hit_ic1[fb] & fill_data_sel[fb];
+    // 3. Select incoming instr_rdata_i
+    assign fill_data_rvd[fb]   = fill_busy_q[fb] & fill_rvd_arb[fb] & ~fill_hit_q[fb] &
+                                 ~fill_hit_ic1[fb] & ~fill_stale_q[fb] & ~fill_out_done[fb] &
+    //                           The incoming data lines up with the output count
+                                 (fill_rvd_beat[fb] == fill_out_cnt_q[fb]) & fill_data_sel[fb];
+
+
+    ///////////////////////////
+    // Fill buffer registers //
+    ///////////////////////////
+
+    // Fill buffer general enable
+    assign fill_entry_en[fb]   = fill_alloc[fb] | fill_busy_q[fb];
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        fill_busy_q[fb]     <= 1'b0;
+        fill_older_q[fb]    <= '0;
+        fill_stale_q[fb]    <= 1'b0;
+        fill_cache_q[fb]    <= 1'b0;
+        fill_hit_q[fb]      <= 1'b0;
+        fill_ext_cnt_q[fb]  <= '0;
+        fill_ext_hold_q[fb] <= 1'b0;
+        fill_ext_done_q[fb] <= 1'b0;
+        fill_rvd_cnt_q[fb]  <= '0;
+        fill_ram_done_q[fb] <= 1'b0;
+        fill_out_cnt_q[fb]  <= '0;
+      end else if (fill_entry_en[fb]) begin
+        fill_busy_q[fb]     <= fill_busy_d[fb];
+        fill_older_q[fb]    <= fill_older_d[fb];
+        fill_stale_q[fb]    <= fill_stale_d[fb];
+        fill_cache_q[fb]    <= fill_cache_d[fb];
+        fill_hit_q[fb]      <= fill_hit_d[fb];
+        fill_ext_cnt_q[fb]  <= fill_ext_cnt_d[fb];
+        fill_ext_hold_q[fb] <= fill_ext_hold_d[fb];
+        fill_ext_done_q[fb] <= fill_ext_done_d[fb];
+        fill_rvd_cnt_q[fb]  <= fill_rvd_cnt_d[fb];
+        fill_ram_done_q[fb] <= fill_ram_done_d[fb];
+        fill_out_cnt_q[fb]  <= fill_out_cnt_d[fb];
+      end
+    end
+
+    ////////////////////////////////////////
+    // Fill buffer address / data storage //
+    ////////////////////////////////////////
+
+    assign fill_addr_en[fb]    = fill_alloc[fb];
+    assign fill_way_en[fb]     = (lookup_valid_ic1 & fill_in_ic1[fb]);
+
+    always_ff @(posedge clk_i) begin
+      if (fill_addr_en[fb]) begin
+        fill_addr_q[fb] <= lookup_addr_ic0;
+      end
+    end
+
+    always_ff @(posedge clk_i) begin
+      if (fill_way_en[fb]) begin
+        fill_way_q[fb]  <= sel_way_ic1;
+      end
+    end
+
+    // Data either comes from the cache or the bus. If there was an ECC error, we must take
+    // the incoming bus data since the cache hit data is corrupted.
+    assign fill_data_d[fb] = fill_hit_ic1[fb] ? hit_data_ic1[LineSize-1:0] :
+                                                {LINE_BEATS{instr_rdata_i}};
+
+    for (genvar b = 0; b < LINE_BEATS; b++) begin : gen_data_buf
+      // Error tracking (per beat)
+      //                           Either a PMP error on a speculative request,
+      assign fill_err_d[fb][b]   = (instr_pmp_err_i & fill_alloc[fb] & fill_spec_req &
+                                    (lookup_addr_ic0[LINE_W-1:BUS_W] == b[LINE_BEATS_W-1:0])) |
+      //                           a PMP error on a fill buffer ext req
+                                   (instr_pmp_err_i & fill_ext_arb[fb] &
+                                    (fill_ext_off[fb] == b[LINE_BEATS_W-1:0])) |
+      //                           Or a data error with instr_rvalid_i
+                                   (fill_rvd_arb[fb] & instr_err_i &
+                                    (fill_rvd_off[fb] == b[LINE_BEATS_W-1:0])) |
+      //                           Hold the error once recorded
+                                   (fill_busy_q[fb] & fill_err_q[fb][b]);
+
+      always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (!rst_ni) begin
+          fill_err_q[fb][b] <= '0;
+        end else if (fill_entry_en[fb]) begin
+          fill_err_q[fb][b] <= fill_err_d[fb][b];
+        end
+      end
+
+      // Enable the relevant part of the data register (or all for cache hits)
+      // Ignore incoming rvalid data when we already have cache hit data
+      assign fill_data_en[fb][b] = fill_hit_ic1[fb] |
+                                   (fill_rvd_arb[fb] & ~fill_hit_q[fb] &
+                                    (fill_rvd_off[fb] == b[LINE_BEATS_W-1:0]));
+
+      always_ff @(posedge clk_i) begin
+        if (fill_data_en[fb][b]) begin
+          fill_data_q[fb][b*BusWidth+:BusWidth] <= fill_data_d[fb][b*BusWidth+:BusWidth];
+        end
+      end
+
+    end
+  end
+
+  ////////////////////////////////
+  // Fill buffer one-hot muxing //
+  ////////////////////////////////
+
+  // External req info
+  always_comb begin
+    fill_ext_req_addr = '0;
+    for (int i = 0; i < NUM_FB; i++) begin
+      if (fill_ext_arb[i]) begin
+        fill_ext_req_addr |= {fill_addr_q[i][ADDR_W-1:LINE_W], fill_ext_off[i]};
+      end
+    end
+  end
+
+  // Cache req info
+  always_comb begin
+    fill_ram_req_addr = '0;
+    fill_ram_req_way  = '0;
+    fill_ram_req_data = '0;
+    for (int i = 0; i < NUM_FB; i++) begin
+      if (fill_ram_arb[i]) begin
+        fill_ram_req_addr |= fill_addr_q[i];
+        fill_ram_req_way  |= fill_way_q[i];
+        fill_ram_req_data |= fill_data_q[i];
+      end
+    end
+  end
+
+  // IF stage output data
+  always_comb begin
+    fill_out_data = '0;
+    fill_out_err  = '0;
+    for (int i = 0; i < NUM_FB; i++) begin
+      if (fill_data_reg[i]) begin
+        fill_out_data |= fill_data_q[i];
+        // Ignore any speculative errors accumulated on cache hits
+        fill_out_err  |= (fill_err_q[i] & ~{LINE_BEATS{fill_hit_q[i]}});
+      end
+    end
+  end
+
+  ///////////////////////
+  // External requests //
+  ///////////////////////
+
+  assign instr_req  = ((~icache_enable_i | branch_or_mispredict) & lookup_grant_ic0) |
+                      (|fill_ext_req);
+
+  assign instr_addr = |fill_ext_req ? fill_ext_req_addr :
+                                      lookup_addr_ic0[ADDR_W-1:BUS_W];
+
+  assign instr_req_o  = instr_req;
+  assign instr_addr_o = {instr_addr[ADDR_W-1:BUS_W],{BUS_W{1'b0}}};
+
+  ////////////////////////
+  // Output data muxing //
+  ////////////////////////
+
+  // Mux between line-width data sources
+  assign line_data = |fill_data_hit ? hit_data_ic1[LineSize-1:0] : fill_out_data;
+  assign line_err  = |fill_data_hit ? {LINE_BEATS{1'b0}} : fill_out_err;
+
+  // Mux the relevant beat of line data, based on the output address
+  always_comb begin
+    line_data_muxed = '0;
+    line_err_muxed  = 1'b0;
+    for (int i = 0; i < LINE_BEATS; i++) begin
+      // When data has been skidded, the output address is behind by one
+      if ((output_addr_q[LINE_W-1:BUS_W] + {{LINE_BEATS_W-1{1'b0}},skid_valid_q}) ==
+          i[LINE_BEATS_W-1:0]) begin
+        line_data_muxed |= line_data[i*32+:32];
+        line_err_muxed  |= line_err[i];
+      end
+    end
+  end
+
+  // Mux between incoming rdata and the muxed line data
+  assign output_data = |fill_data_rvd ? instr_rdata_i : line_data_muxed;
+  assign output_err  = |fill_data_rvd ? instr_err_i   : line_err_muxed;
+
+  // Output data is valid (from any of the three possible sources). Note that fill_out_arb
+  // must be used here rather than fill_out_req because data can become valid out of order
+  // (e.g. cache hit data can become available ahead of an older outstanding miss).
+  assign data_valid = |fill_out_arb;
+
+  // Skid buffer data
+  assign skid_data_d = output_data[31:16];
+
+  assign skid_en     = data_valid & (ready_i | skid_ready);
+
+  always_ff @(posedge clk_i) begin
+    if (skid_en) begin
+      skid_data_q <= skid_data_d;
+      skid_err_q  <= output_err;
+    end
+  end
+
+  // The data in the skid buffer is ready if it's a complete compressed instruction or if there's
+  // an error (no need to wait for the second half)
+  assign skid_complete_instr = skid_valid_q & ((skid_data_q[1:0] != 2'b11) | skid_err_q);
+
+  // Data can be loaded into the skid buffer for an unaligned uncompressed instruction
+  assign skid_ready = output_addr_q[1] & ~skid_valid_q & (~output_compressed | output_err);
+
+  assign output_ready = (ready_i | skid_ready) & ~skid_complete_instr;
+
+  assign output_compressed = (rdata_o[1:0] != 2'b11);
+
+  assign skid_valid_d =
+      // Branches invalidate the skid buffer
+      branch_or_mispredict ? 1'b0 :
+      // Once valid, the skid buffer stays valid until a compressed instruction realigns the stream
+      (skid_valid_q ? ~(ready_i & ((skid_data_q[1:0] != 2'b11) | skid_err_q)) :
+      // The skid buffer becomes valid when:
+                        // - we branch to an unaligned uncompressed instruction
+                      (((output_addr_q[1] & (~output_compressed | output_err)) |
+                        // - a compressed instruction misaligns the stream
+                        (~output_addr_q[1] & output_compressed & ~output_err & ready_i)) & data_valid));
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      skid_valid_q <= 1'b0;
+    end else begin
+      skid_valid_q <= skid_valid_d;
+    end
+  end
+
+  // Signal that valid data is available to the IF stage
+  // Note that if the first half of an unaligned instruction reports an error, we do not need
+  // to wait for the second half (and for PMP errors we might not have fetched the second half)
+                        // Compressed instruction completely satisfied by skid buffer
+  assign output_valid = skid_complete_instr |
+                        // Output data available and, output stream aligned, or skid data available,
+                        (data_valid & (~output_addr_q[1] | skid_valid_q |
+                                       // or this is an error or an unaligned compressed instruction
+                                       output_err | (output_data[17:16] != 2'b11)));
+
+  // Update the address on branches and every time an instruction is driven
+  assign output_addr_en = branch_or_mispredict | (ready_i & valid_o);
+
+  // Increment the address by two every time a compressed instruction is popped
+  assign addr_incr_two = output_compressed & ~err_o;
+
+  // Next IF stage PC
+  assign output_addr_incr = (output_addr_q[31:1] +
+                             // Increment address by 4 or 2
+                             {29'd0, ~addr_incr_two, addr_incr_two});
+
+  // Redirect the address on branches or mispredicts
+  assign output_addr_d = branch_i            ? addr_i[31:1] :
+                         branch_mispredict_i ? branch_mispredict_addr[31:1] :
+                                               output_addr_incr;
+
+  always_ff @(posedge clk_i) begin
+    if (output_addr_en) begin
+      output_addr_q <= output_addr_d;
+    end
+  end
+
+  // Mux the data from BusWidth to halfword
+  // This muxing realigns data when instruction words are split across BUS_W e.g.
+  // word 1 |----|*h1*|
+  // word 0 |*h0*|----| --> |*h1*|*h0*|
+  //        31   15   0     31   15   0
+  always_comb begin
+    output_data_lo = '0;
+    for (int i = 0; i < OUTPUT_BEATS; i++) begin
+      if (output_addr_q[BUS_W-1:1] == i[BUS_W-2:0]) begin
+        output_data_lo |= output_data[i*16+:16];
+      end
+    end
+  end
+
+  always_comb begin
+    output_data_hi = '0;
+    for (int i = 0; i < OUTPUT_BEATS-1; i++) begin
+      if (output_addr_q[BUS_W-1:1] == i[BUS_W-2:0]) begin
+        output_data_hi |= output_data[(i+1)*16+:16];
+      end
+    end
+    if (&output_addr_q[BUS_W-1:1]) begin
+      output_data_hi |= output_data[15:0];
+    end
+  end
+
+  assign valid_o     = output_valid & ~branch_mispredict_i;
+  assign rdata_o     = {output_data_hi, (skid_valid_q ? skid_data_q : output_data_lo)};
+  assign addr_o      = {output_addr_q, 1'b0};
+  assign err_o       = (skid_valid_q & skid_err_q) | (~skid_complete_instr & output_err);
+  // Error caused by the second half of a misaligned uncompressed instruction
+  // (only relevant when err_o is set)
+  assign err_plus2_o = skid_valid_q & ~skid_err_q;
+
+  ///////////////////
+  // Invalidations //
+  ///////////////////
+
+  // Invalidate on reset, or when instructed. If an invalidation request is received while a
+  // previous invalidation is ongoing, it does not need to be restarted.
+  assign start_inval   = (~reset_inval_q | icache_inval_i) & ~inval_prog_q;
+  assign inval_prog_d  = start_inval | (inval_prog_q & ~inval_done);
+  assign inval_done    = &inval_index_q;
+  assign inval_index_d = start_inval ? '0 :
+                                       (inval_index_q + {{INDEX_W-1{1'b0}},1'b1});
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      inval_prog_q  <= 1'b0;
+      reset_inval_q <= 1'b0;
+    end else begin
+      inval_prog_q  <= inval_prog_d;
+      reset_inval_q <= 1'b1;
+    end
+  end
+
+  always_ff @(posedge clk_i) begin
+    if (inval_prog_d) begin
+      inval_index_q <= inval_index_d;
+    end
+  end
+
+  /////////////////
+  // Busy status //
+  /////////////////
+
+  // Only busy (for WFI purposes) while an invalidation is in-progress, or external requests are
+  // outstanding.
+  assign busy_o = inval_prog_q | (|(fill_busy_q & ~fill_rvd_done));
+
+
+
+endmodule
diff --git a/verilog/rtl/brq_ifu_prefetch_buffer.sv b/verilog/rtl/brq_ifu_prefetch_buffer.sv
new file mode 100644
index 0000000..f366518
--- /dev/null
+++ b/verilog/rtl/brq_ifu_prefetch_buffer.sv
@@ -0,0 +1,316 @@
+
+/**
+ * Prefetcher Buffer for 32 bit memory interface
+ *
+ * Prefetch Buffer that caches instructions. This cuts overly long critical
+ * paths to the instruction cache.
+ */
+module brq_ifu_prefetch_buffer #(
+  parameter bit BranchPredictor = 1'b0
+) (
+    input  logic        clk_i,
+    input  logic        rst_ni,
+
+    input  logic        req_i,
+
+    input  logic        branch_i,
+    input  logic        branch_spec_i,
+    input  logic        predicted_branch_i,
+    input  logic        branch_mispredict_i,
+    input  logic [31:0] addr_i,
+
+
+    input  logic        ready_i,
+    output logic        valid_o,
+    output logic [31:0] rdata_o,
+    output logic [31:0] addr_o,
+    output logic        err_o,
+    output logic        err_plus2_o,
+
+
+    // goes to instruction memory / instruction cache
+    output logic        instr_req_o,
+    input  logic        instr_gnt_i,
+    output logic [31:0] instr_addr_o,
+    input  logic [31:0] instr_rdata_i,
+    input  logic        instr_err_i,
+    input  logic        instr_pmp_err_i,
+    input  logic        instr_rvalid_i,
+
+    // Prefetch Buffer Status
+    output logic        busy_o
+);
+
+  localparam int unsigned NUM_REQS  = 2;
+
+  logic                branch_suppress;
+  logic                valid_new_req, valid_req;
+  logic                valid_req_d, valid_req_q;
+  logic                discard_req_d, discard_req_q;
+  logic                gnt_or_pmp_err, rvalid_or_pmp_err;
+  logic [NUM_REQS-1:0] rdata_outstanding_n, rdata_outstanding_s, rdata_outstanding_q;
+  logic [NUM_REQS-1:0] branch_discard_n, branch_discard_s, branch_discard_q;
+  logic [NUM_REQS-1:0] rdata_pmp_err_n, rdata_pmp_err_s, rdata_pmp_err_q;
+  logic [NUM_REQS-1:0] rdata_outstanding_rev;
+
+  logic [31:0]         stored_addr_d, stored_addr_q;
+  logic                stored_addr_en;
+  logic [31:0]         fetch_addr_d, fetch_addr_q;
+  logic                fetch_addr_en;
+  logic [31:0]         branch_mispredict_addr;
+  logic [31:0]         instr_addr, instr_addr_w_aligned;
+  logic                instr_or_pmp_err;
+
+  logic                fifo_valid;
+  logic [31:0]         fifo_addr;
+  logic                fifo_ready;
+  logic                fifo_clear;
+  logic [NUM_REQS-1:0] fifo_busy;
+
+  logic                valid_raw;
+
+  logic [31:0]         addr_next;
+
+  logic                branch_or_mispredict;
+
+  ////////////////////////////
+  // Prefetch buffer status //
+  ////////////////////////////
+
+  assign busy_o = (|rdata_outstanding_q) | instr_req_o;
+
+  assign branch_or_mispredict = branch_i | branch_mispredict_i;
+
+  //////////////////////////////////////////////
+  // Fetch fifo - consumes addresses and data //
+  //////////////////////////////////////////////
+
+  // Instruction fetch errors are valid on the data phase of a request
+  // PMP errors are generated in the address phase, and registered into a fake data phase
+  assign instr_or_pmp_err = instr_err_i | rdata_pmp_err_q[0];
+
+  // A branch will invalidate any previously fetched instructions.
+  // Note that the FENCE.I instruction relies on this flushing behaviour on branch. If it is
+  // altered the FENCE.I implementation may require changes.
+  assign fifo_clear = branch_or_mispredict;
+
+  // Reversed version of rdata_outstanding_q which can be overlaid with fifo fill state
+  for (genvar i = 0; i < NUM_REQS; i++) begin : gen_rd_rev
+    assign rdata_outstanding_rev[i] = rdata_outstanding_q[NUM_REQS-1-i];
+  end
+
+  // The fifo is ready to accept a new request if it is not full - including space reserved for
+  // requests already outstanding.
+  // Overlay the fifo fill state with the outstanding requests to see if there is space.
+  assign fifo_ready = ~&(fifo_busy | rdata_outstanding_rev);
+
+  brq_ifu_fifo #(
+    .NUM_REQS (NUM_REQS)
+  ) fifo_i (
+      .clk_i                 ( clk_i             ),
+      .rst_ni                ( rst_ni            ),
+
+      .clear_i               ( fifo_clear        ),
+      .busy_o                ( fifo_busy         ),
+
+      .in_valid_i            ( fifo_valid        ),
+      .in_addr_i             ( fifo_addr         ),
+      .in_rdata_i            ( instr_rdata_i     ),
+      .in_err_i              ( instr_or_pmp_err  ),
+
+      .out_valid_o           ( valid_raw         ),
+      .out_ready_i           ( ready_i           ),
+      .out_rdata_o           ( rdata_o           ),
+      .out_addr_o            ( addr_o            ),
+      .out_addr_next_o       ( addr_next         ),
+      .out_err_o             ( err_o             ),
+      .out_err_plus2_o       ( err_plus2_o       )
+  );
+
+  //////////////
+  // Requests //
+  //////////////
+
+  // Suppress a new request on a not-taken branch (as the external address will be incorrect)
+  assign branch_suppress = branch_spec_i & ~branch_i;
+
+  // Make a new request any time there is space in the FIFO, and space in the request queue
+  assign valid_new_req = ~branch_suppress & req_i & (fifo_ready | branch_or_mispredict) &
+                         ~rdata_outstanding_q[NUM_REQS-1];
+
+  assign valid_req = valid_req_q | valid_new_req;
+
+  // If a request address triggers a PMP error, the external bus request is suppressed. We might
+  // therefore never receive a grant for such a request. The grant is faked in this case to make
+  // sure the request proceeds and the error is pushed to the FIFO.
+  assign gnt_or_pmp_err = instr_gnt_i | instr_pmp_err_i;
+
+  // As with the grant, the rvalid must be faked for a PMP error, since the request was suppressed.
+  assign rvalid_or_pmp_err = rdata_outstanding_q[0] & (instr_rvalid_i | rdata_pmp_err_q[0]);
+
+  // Hold the request stable for requests that didn't get granted
+  assign valid_req_d = valid_req & ~gnt_or_pmp_err;
+
+  // Record whether an outstanding bus request is cancelled by a branch
+  assign discard_req_d = valid_req_q & (branch_or_mispredict | discard_req_q);
+
+  ////////////////
+  // Fetch addr //
+  ////////////////
+
+  // Two addresses are tracked in the prefetch buffer:
+  // 1. stored_addr_q - This is the address issued on the bus. It stays stable until
+  //                    the request is granted.
+  // 2. fetch_addr_q  - This is our next address to fetch from. It is updated on branches to
+  //                    capture the new address, and then for each new request issued.
+  // A third address is tracked in the fetch FIFO itself:
+  // 3. instr_addr_q  - This is the address at the head of the FIFO, efectively our oldest fetched
+  //                    address. This address is updated on branches, and does its own increment
+  //                    each time the FIFO is popped.
+
+  // 1. stored_addr_q
+
+  // Only update stored_addr_q for new ungranted requests
+  assign stored_addr_en = valid_new_req & ~valid_req_q & ~gnt_or_pmp_err;
+
+  // Store whatever address was issued on the bus
+  assign stored_addr_d = instr_addr;
+
+  // CPU resets with a branch, so no need to reset these addresses
+  always_ff @(posedge clk_i) begin
+    if (stored_addr_en) begin
+      stored_addr_q <= stored_addr_d;
+    end
+  end
+
+  if (BranchPredictor) begin : g_branch_predictor
+    // Where the branch predictor is present record what address followed a predicted branch.  If
+    // that branch is predicted taken but mispredicted (so not-taken) this is used to resume on
+    // the not-taken code path.
+    logic [31:0] branch_mispredict_addr_q;
+    logic        branch_mispredict_addr_en;
+
+    assign branch_mispredict_addr_en = branch_i & predicted_branch_i;
+
+    always_ff @(posedge clk_i) begin
+      if (branch_mispredict_addr_en) begin
+        branch_mispredict_addr_q <= addr_next;
+      end
+    end
+
+    assign branch_mispredict_addr = branch_mispredict_addr_q;
+  end else begin : g_no_branch_predictor
+    logic        unused_predicted_branch;
+    logic [31:0] unused_addr_next;
+
+    assign unused_predicted_branch = predicted_branch_i;
+    assign unused_addr_next        = addr_next;
+
+    assign branch_mispredict_addr = '0;
+  end
+
+  // 2. fetch_addr_q
+
+  // Update on a branch or as soon as a request is issued
+  assign fetch_addr_en = branch_or_mispredict | (valid_new_req & ~valid_req_q);
+
+  assign fetch_addr_d = (branch_i            ? addr_i :
+                         branch_mispredict_i ? {branch_mispredict_addr[31:2], 2'b00} :
+                                               {fetch_addr_q[31:2], 2'b00}) +
+                        // Current address + 4
+                        {{29{1'b0}},(valid_new_req & ~valid_req_q),2'b00};
+
+  always_ff @(posedge clk_i) begin
+    if (fetch_addr_en) begin
+      fetch_addr_q <= fetch_addr_d;
+    end
+  end
+
+  // Address mux
+  assign instr_addr = valid_req_q         ? stored_addr_q :
+                      branch_spec_i       ? addr_i :
+                      branch_mispredict_i ? branch_mispredict_addr :
+                                            fetch_addr_q;
+
+  assign instr_addr_w_aligned = {instr_addr[31:2], 2'b00};
+
+  ///////////////////////////////
+  // Request outstanding queue //
+  ///////////////////////////////
+
+  for (genvar i = 0; i < NUM_REQS; i++) begin : g_outstanding_reqs
+    // Request 0 (always the oldest outstanding request)
+    if (i == 0) begin : g_req0
+      // A request becomes outstanding once granted, and is cleared once the rvalid is received.
+      // Outstanding requests shift down the queue towards entry 0.
+      assign rdata_outstanding_n[i] = (valid_req & gnt_or_pmp_err) |
+                                      rdata_outstanding_q[i];
+      // If a branch is received at any point while a request is outstanding, it must be tracked
+      // to ensure we discard the data once received
+      assign branch_discard_n[i]    = (valid_req & gnt_or_pmp_err & discard_req_d) |
+                                      (branch_or_mispredict & rdata_outstanding_q[i]) |
+                                      branch_discard_q[i];
+      // Record whether this request received a PMP error
+      assign rdata_pmp_err_n[i]     = (valid_req & ~rdata_outstanding_q[i] & instr_pmp_err_i) |
+                                      rdata_pmp_err_q[i];
+
+    end else begin : g_reqtop
+    // Entries > 0 consider the FIFO fill state to calculate their next state (by checking
+    // whether the previous entry is valid)
+
+      assign rdata_outstanding_n[i] = (valid_req & gnt_or_pmp_err &
+                                       rdata_outstanding_q[i-1]) |
+                                      rdata_outstanding_q[i];
+      assign branch_discard_n[i]    = (valid_req & gnt_or_pmp_err & discard_req_d &
+                                       rdata_outstanding_q[i-1]) |
+                                      (branch_or_mispredict & rdata_outstanding_q[i]) |
+                                      branch_discard_q[i];
+      assign rdata_pmp_err_n[i]     = (valid_req & ~rdata_outstanding_q[i] & instr_pmp_err_i &
+                                       rdata_outstanding_q[i-1]) |
+                                      rdata_pmp_err_q[i];
+    end
+  end
+
+  // Shift the entries down on each instr_rvalid_i
+  assign rdata_outstanding_s = rvalid_or_pmp_err ? {1'b0,rdata_outstanding_n[NUM_REQS-1:1]} :
+                                                   rdata_outstanding_n;
+  assign branch_discard_s    = rvalid_or_pmp_err ? {1'b0,branch_discard_n[NUM_REQS-1:1]} :
+                                                   branch_discard_n;
+  assign rdata_pmp_err_s     = rvalid_or_pmp_err ? {1'b0,rdata_pmp_err_n[NUM_REQS-1:1]} :
+                                                   rdata_pmp_err_n;
+
+  // Push a new entry to the FIFO once complete (and not cancelled by a branch)
+  assign fifo_valid = rvalid_or_pmp_err & ~branch_discard_q[0];
+
+  assign fifo_addr = branch_i ? addr_i : branch_mispredict_addr;
+
+  ///////////////
+  // Registers //
+  ///////////////
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      valid_req_q          <= 1'b0;
+      discard_req_q        <= 1'b0;
+      rdata_outstanding_q  <= 'b0;
+      branch_discard_q     <= 'b0;
+      rdata_pmp_err_q      <= 'b0;
+    end else begin
+      valid_req_q          <= valid_req_d;
+      discard_req_q        <= discard_req_d;
+      rdata_outstanding_q  <= rdata_outstanding_s;
+      branch_discard_q     <= branch_discard_s;
+      rdata_pmp_err_q      <= rdata_pmp_err_s;
+    end
+  end
+
+  /////////////
+  // Outputs //
+  /////////////
+
+  assign instr_req_o  = valid_req;
+  assign instr_addr_o = instr_addr_w_aligned;
+
+  assign valid_o = valid_raw & ~branch_mispredict_i;
+
+endmodule
diff --git a/verilog/rtl/brq_lsu.sv b/verilog/rtl/brq_lsu.sv
new file mode 100644
index 0000000..a87191a
--- /dev/null
+++ b/verilog/rtl/brq_lsu.sv
@@ -0,0 +1,489 @@
+/**
+ * Load Store Unit
+ *
+ * Load Store Unit, used to eliminate multiple access during processor stalls,
+ * and to align bytes and halfwords.
+ */
+
+module brq_lsu
+(
+    input  logic         clk_i,
+    input  logic         rst_ni,
+
+    // data interface
+    output logic         data_req_o,
+    input  logic         data_gnt_i,
+    input  logic         data_rvalid_i,
+    input  logic         data_err_i,
+    input  logic         data_pmp_err_i,
+
+    output logic [31:0]  data_addr_o,
+    output logic         data_we_o,
+    output logic [3:0]   data_be_o,
+    output logic [31:0]  data_wdata_o,
+    input  logic [31:0]  data_rdata_i,
+
+    // signals to/from ID/EX stage
+    input  logic         lsu_we_i,             // write enable                     -> from ID/EX
+    input  logic [1:0]   lsu_type_i,           // data type: word, half word, byte -> from ID/EX
+    input  logic [31:0]  lsu_wdata_i,          // data to write to memory          -> from ID/EX
+    input  logic         lsu_sign_ext_i,       // sign extension                   -> from ID/EX
+
+    output logic [31:0]  lsu_rdata_o,          // requested data                   -> to ID/EX
+    output logic         lsu_rdata_valid_o,
+    input  logic         lsu_req_i,            // data request                     -> from ID/EX
+
+    input  logic [31:0]  adder_result_ex_i,    // address computed in ALU          -> from ID/EX
+
+    output logic         addr_incr_req_o,      // request address increment for
+                                               // misaligned accesses              -> to ID/EX
+    output logic [31:0]  addr_last_o,          // address of last transaction      -> to controller
+                                               // -> mtval
+                                               // -> AGU for misaligned accesses
+
+    output logic         lsu_req_done_o,       // Signals that data request is complete
+                                               // (only need to await final data
+                                               // response)                        -> to ID/EX
+
+    output logic         lsu_resp_valid_o,     // LSU has response from transaction -> to ID/EX
+
+    // exception signals
+    output logic         load_err_o,
+    output logic         store_err_o,
+
+    output logic         busy_o,
+
+    output logic         perf_load_o,
+    output logic         perf_store_o
+);
+
+  logic [31:0]  data_addr;
+  logic [31:0]  data_addr_w_aligned;
+  logic [31:0]  addr_last_q;
+
+  logic         addr_update;
+  logic         ctrl_update;
+  logic         rdata_update;
+  logic [31:8]  rdata_q;
+  logic [1:0]   rdata_offset_q;
+  logic [1:0]   data_type_q;
+  logic         data_sign_ext_q;
+  logic         data_we_q;
+
+  logic [1:0]   data_offset;   // mux control for data to be written to memory
+
+  logic [3:0]   data_be;
+  logic [31:0]  data_wdata;
+
+  logic [31:0]  data_rdata_ext;
+
+  logic [31:0]  rdata_w_ext; // word realignment for misaligned loads
+  logic [31:0]  rdata_h_ext; // sign extension for half words
+  logic [31:0]  rdata_b_ext; // sign extension for bytes
+
+  logic         split_misaligned_access;
+  logic         handle_misaligned_q, handle_misaligned_d; // high after receiving grant for first
+                                                          // part of a misaligned access
+  logic         pmp_err_q, pmp_err_d;
+  logic         lsu_err_q, lsu_err_d;
+  logic         data_or_pmp_err;
+
+  typedef enum logic [2:0]  {
+    IDLE, WAIT_GNT_MIS, WAIT_RVALID_MIS, WAIT_GNT,
+    WAIT_RVALID_MIS_GNTS_DONE
+  } ls_fsm_e;
+
+  ls_fsm_e ls_fsm_cs, ls_fsm_ns;
+
+  assign data_addr   = adder_result_ex_i;
+  assign data_offset = data_addr[1:0];
+
+  ///////////////////
+  // BE generation //
+  ///////////////////
+
+  always_comb begin
+    unique case (lsu_type_i) // Data type 00 Word, 01 Half word, 11,10 byte
+      2'b00: begin // Writing a word
+        if (!handle_misaligned_q) begin // first part of potentially misaligned transaction
+          unique case (data_offset)
+            2'b00:   data_be = 4'b1111;
+            2'b01:   data_be = 4'b1110;
+            2'b10:   data_be = 4'b1100;
+            2'b11:   data_be = 4'b1000;
+           // default: data_be = 4'b1111;
+          endcase // case (data_offset)
+        end else begin // second part of misaligned transaction
+          unique case (data_offset)
+            2'b00:   data_be = 4'b0000; // this is not used, but included for completeness
+            2'b01:   data_be = 4'b0001;
+            2'b10:   data_be = 4'b0011;
+            2'b11:   data_be = 4'b0111;
+           // default: data_be = 4'b1111;
+          endcase // case (data_offset)
+        end
+      end
+
+      2'b01: begin // Writing a half word
+        if (!handle_misaligned_q) begin // first part of potentially misaligned transaction
+          unique case (data_offset)
+            2'b00:   data_be = 4'b0011;
+            2'b01:   data_be = 4'b0110;
+            2'b10:   data_be = 4'b1100;
+            2'b11:   data_be = 4'b1000;
+           // default: data_be = 4'b1111;
+          endcase // case (data_offset)
+        end else begin // second part of misaligned transaction
+          data_be = 4'b0001;
+        end
+      end
+
+      2'b10,
+      2'b11: begin // Writing a byte
+        unique case (data_offset)
+          2'b00:   data_be = 4'b0001;
+          2'b01:   data_be = 4'b0010;
+          2'b10:   data_be = 4'b0100;
+          2'b11:   data_be = 4'b1000;
+         // default: data_be = 4'b1111;
+        endcase // case (data_offset)
+      end
+
+     // default:     data_be = 4'b1111;
+    endcase // case (lsu_type_i)
+  end
+
+  /////////////////////
+  // WData alignment //
+  /////////////////////
+
+  // prepare data to be written to the memory
+  // we handle misaligned accesses, half word and byte accesses here
+  always_comb begin
+    unique case (data_offset)
+      2'b00:   data_wdata =  lsu_wdata_i[31:0];
+      2'b01:   data_wdata = {lsu_wdata_i[23:0], lsu_wdata_i[31:24]};
+      2'b10:   data_wdata = {lsu_wdata_i[15:0], lsu_wdata_i[31:16]};
+      2'b11:   data_wdata = {lsu_wdata_i[ 7:0], lsu_wdata_i[31: 8]};
+     // default: data_wdata =  lsu_wdata_i[31:0];
+    endcase // case (data_offset)
+  end
+
+  /////////////////////
+  // RData alignment //
+  /////////////////////
+
+  // register for unaligned rdata
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      rdata_q <= '0;
+    end else if (rdata_update) begin
+      rdata_q <= data_rdata_i[31:8];
+    end
+  end
+
+  // registers for transaction control
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      rdata_offset_q  <= 2'h0;
+      data_type_q     <= 2'h0;
+      data_sign_ext_q <= 1'b0;
+      data_we_q       <= 1'b0;
+    end else if (ctrl_update) begin
+      rdata_offset_q  <= data_offset;
+      data_type_q     <= lsu_type_i;
+      data_sign_ext_q <= lsu_sign_ext_i;
+      data_we_q       <= lsu_we_i;
+    end
+  end
+
+  // Store last address for mtval + AGU for misaligned transactions.
+  // Do not update in case of errors, mtval needs the (first) failing address
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      addr_last_q <= '0;
+    end else if (addr_update) begin
+      addr_last_q <= data_addr;
+    end
+  end
+
+  // take care of misaligned words
+  always_comb begin
+    unique case (rdata_offset_q)
+      2'b00:   rdata_w_ext =  data_rdata_i[31:0];
+      2'b01:   rdata_w_ext = {data_rdata_i[ 7:0], rdata_q[31:8]};
+      2'b10:   rdata_w_ext = {data_rdata_i[15:0], rdata_q[31:16]};
+      2'b11:   rdata_w_ext = {data_rdata_i[23:0], rdata_q[31:24]};
+     // default: rdata_w_ext =  data_rdata_i[31:0];
+    endcase
+  end
+
+  ////////////////////
+  // Sign extension //
+  ////////////////////
+
+  // sign extension for half words
+  always_comb begin
+    unique case (rdata_offset_q)
+      2'b00: begin
+        if (!data_sign_ext_q) begin
+          rdata_h_ext = {16'h0000, data_rdata_i[15:0]};
+        end else begin
+          rdata_h_ext = {{16{data_rdata_i[15]}}, data_rdata_i[15:0]};
+        end
+      end
+
+      2'b01: begin
+        if (!data_sign_ext_q) begin
+          rdata_h_ext = {16'h0000, data_rdata_i[23:8]};
+        end else begin
+          rdata_h_ext = {{16{data_rdata_i[23]}}, data_rdata_i[23:8]};
+        end
+      end
+
+      2'b10: begin
+        if (!data_sign_ext_q) begin
+          rdata_h_ext = {16'h0000, data_rdata_i[31:16]};
+        end else begin
+          rdata_h_ext = {{16{data_rdata_i[31]}}, data_rdata_i[31:16]};
+        end
+      end
+
+      2'b11: begin
+        if (!data_sign_ext_q) begin
+          rdata_h_ext = {16'h0000, data_rdata_i[7:0], rdata_q[31:24]};
+        end else begin
+          rdata_h_ext = {{16{data_rdata_i[7]}}, data_rdata_i[7:0], rdata_q[31:24]};
+        end
+      end
+
+     // default: rdata_h_ext = {16'h0000, data_rdata_i[15:0]};
+    endcase // case (rdata_offset_q)
+  end
+
+  // sign extension for bytes
+  always_comb begin
+    unique case (rdata_offset_q)
+      2'b00: begin
+        if (!data_sign_ext_q) begin
+          rdata_b_ext = {24'h00_0000, data_rdata_i[7:0]};
+        end else begin
+          rdata_b_ext = {{24{data_rdata_i[7]}}, data_rdata_i[7:0]};
+        end
+      end
+
+      2'b01: begin
+        if (!data_sign_ext_q) begin
+          rdata_b_ext = {24'h00_0000, data_rdata_i[15:8]};
+        end else begin
+          rdata_b_ext = {{24{data_rdata_i[15]}}, data_rdata_i[15:8]};
+        end
+      end
+
+      2'b10: begin
+        if (!data_sign_ext_q) begin
+          rdata_b_ext = {24'h00_0000, data_rdata_i[23:16]};
+        end else begin
+          rdata_b_ext = {{24{data_rdata_i[23]}}, data_rdata_i[23:16]};
+        end
+      end
+
+      2'b11: begin
+        if (!data_sign_ext_q) begin
+          rdata_b_ext = {24'h00_0000, data_rdata_i[31:24]};
+        end else begin
+          rdata_b_ext = {{24{data_rdata_i[31]}}, data_rdata_i[31:24]};
+        end
+      end
+
+     // default: rdata_b_ext = {24'h00_0000, data_rdata_i[7:0]};
+    endcase // case (rdata_offset_q)
+  end
+
+  // select word, half word or byte sign extended version
+  always_comb begin
+    unique case (data_type_q)
+      2'b00:       data_rdata_ext = rdata_w_ext;
+      2'b01:       data_rdata_ext = rdata_h_ext;
+      2'b10,2'b11: data_rdata_ext = rdata_b_ext;
+    //  default:     data_rdata_ext = rdata_w_ext;
+    endcase // case (data_type_q)
+  end
+
+  /////////////
+  // LSU FSM //
+  /////////////
+
+  // check for misaligned accesses that need to be split into two word-aligned accesses
+  assign split_misaligned_access =
+      ((lsu_type_i == 2'b00) && (data_offset != 2'b00)) || // misaligned word access
+      ((lsu_type_i == 2'b01) && (data_offset == 2'b11));   // misaligned half-word access
+
+  // FSM
+  always_comb begin
+    ls_fsm_ns       = ls_fsm_cs;
+
+    data_req_o          = 1'b0;
+    addr_incr_req_o     = 1'b0;
+    handle_misaligned_d = handle_misaligned_q;
+    pmp_err_d           = pmp_err_q;
+    lsu_err_d           = lsu_err_q;
+
+    addr_update         = 1'b0;
+    ctrl_update         = 1'b0;
+    rdata_update        = 1'b0;
+
+    perf_load_o         = 1'b0;
+    perf_store_o        = 1'b0;
+
+    unique case (ls_fsm_cs)
+
+      IDLE: begin
+        pmp_err_d = 1'b0;
+        if (lsu_req_i) begin
+          data_req_o   = 1'b1;
+          pmp_err_d    = data_pmp_err_i;
+          lsu_err_d    = 1'b0;
+          perf_load_o  = ~lsu_we_i;
+          perf_store_o = lsu_we_i;
+
+          if (data_gnt_i) begin
+            ctrl_update         = 1'b1;
+            addr_update         = 1'b1;
+            handle_misaligned_d = split_misaligned_access;
+            ls_fsm_ns           = split_misaligned_access ? WAIT_RVALID_MIS : IDLE;
+          end else begin
+            ls_fsm_ns           = split_misaligned_access ? WAIT_GNT_MIS    : WAIT_GNT;
+          end
+        end
+      end
+
+      WAIT_GNT_MIS: begin
+        data_req_o = 1'b1;
+        // data_pmp_err_i is valid during the address phase of a request. An error will block the
+        // external request and so a data_gnt_i might never be signalled. The registered version
+        // pmp_err_q is only updated for new address phases and so can be used in WAIT_GNT* and
+        // WAIT_RVALID* states
+        if (data_gnt_i || pmp_err_q) begin
+          addr_update         = 1'b1;
+          ctrl_update         = 1'b1;
+          handle_misaligned_d = 1'b1;
+          ls_fsm_ns           = WAIT_RVALID_MIS;
+        end
+      end
+
+      WAIT_RVALID_MIS: begin
+        // push out second request
+        data_req_o = 1'b1;
+        // tell ID/EX stage to update the address
+        addr_incr_req_o = 1'b1;
+
+        // first part rvalid is received, or gets a PMP error
+        if (data_rvalid_i || pmp_err_q) begin
+          // Update the PMP error for the second part
+          pmp_err_d = data_pmp_err_i;
+          // Record the error status of the first part
+          lsu_err_d = data_err_i | pmp_err_q;
+          // Capture the first rdata for loads
+          rdata_update = ~data_we_q;
+          // If already granted, wait for second rvalid
+          ls_fsm_ns = data_gnt_i ? IDLE : WAIT_GNT;
+          // Update the address for the second part, if no error
+          addr_update = data_gnt_i & ~(data_err_i | pmp_err_q);
+          // clear handle_misaligned if second request is granted
+          handle_misaligned_d = ~data_gnt_i;
+        end else begin
+          // first part rvalid is NOT received
+          if (data_gnt_i) begin
+            // second grant is received
+            ls_fsm_ns = WAIT_RVALID_MIS_GNTS_DONE;
+            handle_misaligned_d = 1'b0;
+          end
+        end
+      end
+
+      WAIT_GNT: begin
+        // tell ID/EX stage to update the address
+        addr_incr_req_o = handle_misaligned_q;
+        data_req_o      = 1'b1;
+        if (data_gnt_i || pmp_err_q) begin
+          ctrl_update         = 1'b1;
+          // Update the address, unless there was an error
+          addr_update         = ~lsu_err_q;
+          ls_fsm_ns           = IDLE;
+          handle_misaligned_d = 1'b0;
+        end
+      end
+
+      WAIT_RVALID_MIS_GNTS_DONE: begin
+        // tell ID/EX stage to update the address (to make sure the
+        // second address can be captured correctly for mtval and PMP checking)
+        addr_incr_req_o = 1'b1;
+        // Wait for the first rvalid, second request is already granted
+        if (data_rvalid_i) begin
+          // Update the pmp error for the second part
+          pmp_err_d = data_pmp_err_i;
+          // The first part cannot see a PMP error in this state
+          lsu_err_d = data_err_i;
+          // Now we can update the address for the second part if no error
+          addr_update = ~data_err_i;
+          // Capture the first rdata for loads
+          rdata_update = ~data_we_q;
+          // Wait for second rvalid
+          ls_fsm_ns = IDLE;
+        end
+      end
+
+      default: begin
+        ls_fsm_ns = IDLE;
+      end
+    endcase
+  end
+
+  assign lsu_req_done_o = (lsu_req_i | (ls_fsm_cs != IDLE)) & (ls_fsm_ns == IDLE);
+
+  // registers for FSM
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      ls_fsm_cs           <= IDLE;
+      handle_misaligned_q <= '0;
+      pmp_err_q           <= '0;
+      lsu_err_q           <= '0;
+    end else begin
+      ls_fsm_cs           <= ls_fsm_ns;
+      handle_misaligned_q <= handle_misaligned_d;
+      pmp_err_q           <= pmp_err_d;
+      lsu_err_q           <= lsu_err_d;
+    end
+  end
+
+  /////////////
+  // Outputs //
+  /////////////
+
+  assign data_or_pmp_err    = lsu_err_q | data_err_i | pmp_err_q;
+  assign lsu_resp_valid_o   = (data_rvalid_i | pmp_err_q) & (ls_fsm_cs == IDLE);
+  assign lsu_rdata_valid_o  = (ls_fsm_cs == IDLE) & data_rvalid_i & ~data_or_pmp_err & ~data_we_q;
+
+  // output to register file
+  assign lsu_rdata_o = data_rdata_ext;
+
+  // output data address must be word aligned
+  assign data_addr_w_aligned = {data_addr[31:2], 2'b00};
+
+  // output to data interface
+  assign data_addr_o   = data_addr_w_aligned;
+  assign data_wdata_o  = data_wdata;
+  assign data_we_o     = lsu_we_i;
+  assign data_be_o     = data_be;
+
+  // output to ID stage: mtval + AGU for misaligned transactions
+  assign addr_last_o   = addr_last_q;
+
+  // Signal a load or store error depending on the transaction type outstanding
+  assign load_err_o    = data_or_pmp_err & ~data_we_q & lsu_resp_valid_o;
+  assign store_err_o   = data_or_pmp_err &  data_we_q & lsu_resp_valid_o;
+
+  assign busy_o = (ls_fsm_cs != IDLE);
+
+endmodule
\ No newline at end of file
diff --git a/verilog/rtl/brq_pkg.sv b/verilog/rtl/brq_pkg.sv
new file mode 100644
index 0000000..fa44b1d
--- /dev/null
+++ b/verilog/rtl/brq_pkg.sv
@@ -0,0 +1,535 @@
+// Copyright lowRISC contributors.
+// Copyright 2017 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Package with constants used by Ibex
+ */
+package brq_pkg;
+
+/////////////////////
+// Parameter Enums //
+/////////////////////
+
+typedef enum integer {
+  RegFileFF    = 0,
+  RegFileFPGA  = 1,
+  RegFileLatch = 2
+} regfile_e;
+
+typedef enum integer {
+  RV32MNone        = 0,
+  RV32MSlow        = 1,
+  RV32MFast        = 2,
+  RV32MSingleCycle = 3
+} rv32m_e;
+
+typedef enum integer {
+  RV32BNone     = 0,
+  RV32BBalanced = 1,
+  RV32BFull     = 2
+} rv32b_e;
+
+// floatig point
+typedef enum integer { 
+  RV32FNone     = 0,
+  RV32FSingle   = 1,
+  RV64FDouble   = 2
+  // RV32FQuad     = 3
+} rvfloat_e;
+
+/////////////
+// Opcodes //
+/////////////
+
+typedef enum logic [6:0] {
+  OPCODE_LOAD     = 7'h03,
+  OPCODE_MISC_MEM = 7'h0f,
+  OPCODE_OP_IMM   = 7'h13,
+  OPCODE_AUIPC    = 7'h17,
+  OPCODE_STORE    = 7'h23,
+  OPCODE_OP       = 7'h33,
+  OPCODE_LUI      = 7'h37,
+  OPCODE_BRANCH   = 7'h63,
+  OPCODE_JALR     = 7'h67,
+  OPCODE_JAL      = 7'h6f,
+  OPCODE_SYSTEM   = 7'h73,
+  // Floating Point
+  OPCODE_LOAD_FP  = 7'h07,
+  OPCODE_STORE_FP = 7'h27,
+  OPCODE_MADD_FP  = 7'h43,
+  OPCODE_MSUB_FP  = 7'h47,
+  OPCODE_NMSUB_FP = 7'h4b,
+  OPCODE_NMADD_FP = 7'h4f,
+  OPCODE_OP_FP    = 7'h53
+} opcode_e;
+
+
+////////////////////
+// ALU operations //
+////////////////////
+
+typedef enum logic [5:0] {
+  // Arithmetics
+  ALU_ADD,
+  ALU_SUB,
+
+  // Logics
+  ALU_XOR,
+  ALU_OR,
+  ALU_AND,
+  // RV32B
+  ALU_XNOR,
+  ALU_ORN,
+  ALU_ANDN,
+
+  // Shifts
+  ALU_SRA,
+  ALU_SRL,
+  ALU_SLL,
+  // RV32B
+  ALU_SRO,
+  ALU_SLO,
+  ALU_ROR,
+  ALU_ROL,
+  ALU_GREV,
+  ALU_GORC,
+  ALU_SHFL,
+  ALU_UNSHFL,
+
+  // Comparisons
+  ALU_LT,
+  ALU_LTU,
+  ALU_GE,
+  ALU_GEU,
+  ALU_EQ,
+  ALU_NE,
+  // RV32B
+  ALU_MIN,
+  ALU_MINU,
+  ALU_MAX,
+  ALU_MAXU,
+
+  // Pack
+  // RV32B
+  ALU_PACK,
+  ALU_PACKU,
+  ALU_PACKH,
+
+  // Sign-Extend
+  // RV32B
+  ALU_SEXTB,
+  ALU_SEXTH,
+
+  // Bitcounting
+  // RV32B
+  ALU_CLZ,
+  ALU_CTZ,
+  ALU_PCNT,
+
+  // Set lower than
+  ALU_SLT,
+  ALU_SLTU,
+
+  // Ternary Bitmanip Operations
+  // RV32B
+  ALU_CMOV,
+  ALU_CMIX,
+  ALU_FSL,
+  ALU_FSR,
+
+  // Single-Bit Operations
+  // RV32B
+  ALU_SBSET,
+  ALU_SBCLR,
+  ALU_SBINV,
+  ALU_SBEXT,
+
+  // Bit Extract / Deposit
+  // RV32B
+  ALU_BEXT,
+  ALU_BDEP,
+
+  // Bit Field Place
+  // RV32B
+  ALU_BFP,
+
+  // Carry-less Multiply
+  // RV32B
+  ALU_CLMUL,
+  ALU_CLMULR,
+  ALU_CLMULH,
+
+  // Cyclic Redundancy Check
+  ALU_CRC32_B,
+  ALU_CRC32C_B,
+  ALU_CRC32_H,
+  ALU_CRC32C_H,
+  ALU_CRC32_W,
+  ALU_CRC32C_W
+} alu_op_e;
+
+typedef enum logic [1:0] {
+  // Multiplier/divider
+  MD_OP_MULL,
+  MD_OP_MULH,
+  MD_OP_DIV,
+  MD_OP_REM
+} md_op_e;
+
+// define which type instruction
+// is catered
+typedef enum logic {
+  SINGLE_FP,
+  DOUBLE_FP
+} fp_type_e;
+
+//////////////////////////////////
+// Control and status registers //
+//////////////////////////////////
+
+// CSR operations
+typedef enum logic [1:0] {
+  CSR_OP_READ,
+  CSR_OP_WRITE,
+  CSR_OP_SET,
+  CSR_OP_CLEAR
+} csr_op_e;
+
+// Privileged mode
+typedef enum logic[1:0] {
+  PRIV_LVL_M = 2'b11,
+  PRIV_LVL_H = 2'b10,
+  PRIV_LVL_S = 2'b01,
+  PRIV_LVL_U = 2'b00
+} priv_lvl_e;
+
+// Constants for the dcsr.xdebugver fields
+typedef enum logic[3:0] {
+   XDEBUGVER_NO     = 4'd0, // no external debug support
+   XDEBUGVER_STD    = 4'd4, // external debug according to RISC-V debug spec
+   XDEBUGVER_NONSTD = 4'd15 // debug not conforming to RISC-V debug spec
+} x_debug_ver_e;
+
+//////////////
+// WB stage //
+//////////////
+
+// Type of instruction present in writeback stage
+typedef enum logic[1:0] {
+  WB_INSTR_LOAD,  // Instruction is awaiting load data
+  WB_INSTR_STORE, // Instruction is awaiting store response
+  WB_INSTR_OTHER  // Instruction doesn't fit into above categories
+} wb_instr_type_e;
+
+//////////////
+// ID stage //
+//////////////
+
+// Operand a selection
+typedef enum logic[1:0] {
+  OP_A_REG_A,
+  OP_A_FWD,
+  OP_A_CURRPC,
+  OP_A_IMM
+} op_a_sel_e;
+
+// Immediate a selection
+typedef enum logic {
+  IMM_A_Z,
+  IMM_A_ZERO
+} imm_a_sel_e;
+
+// Operand b selection
+typedef enum logic {
+  OP_B_REG_B,
+  OP_B_IMM
+} op_b_sel_e;
+
+// Immediate b selection
+typedef enum logic [2:0] {
+  IMM_B_I,
+  IMM_B_S,
+  IMM_B_B,
+  IMM_B_U,
+  IMM_B_J,
+  IMM_B_INCR_PC,
+  IMM_B_INCR_ADDR
+} imm_b_sel_e;
+
+// Regfile write data selection
+typedef enum logic {
+  RF_WD_EX,
+  RF_WD_CSR
+} rf_wd_sel_e;
+
+//////////////
+// IF stage //
+//////////////
+
+// PC mux selection
+typedef enum logic [2:0] {
+  PC_BOOT,
+  PC_JUMP,
+  PC_EXC,
+  PC_ERET,
+  PC_DRET,
+  PC_BP
+} pc_sel_e;
+
+// Exception PC mux selection
+typedef enum logic [1:0] {
+  EXC_PC_EXC,
+  EXC_PC_IRQ,
+  EXC_PC_DBD,
+  EXC_PC_DBG_EXC // Exception while in debug mode
+} exc_pc_sel_e;
+
+// Interrupt requests
+typedef struct packed {
+  logic        irq_software;
+  logic        irq_timer;
+  logic        irq_external;
+  logic [14:0] irq_fast; // 15 fast interrupts,
+                         // one interrupt is reserved for NMI (not visible through mip/mie)
+} irqs_t;
+
+// Exception cause
+typedef enum logic [5:0] {
+  EXC_CAUSE_IRQ_SOFTWARE_M     = {1'b1, 5'd03},
+  EXC_CAUSE_IRQ_TIMER_M        = {1'b1, 5'd07},
+  EXC_CAUSE_IRQ_EXTERNAL_M     = {1'b1, 5'd11},
+  // EXC_CAUSE_IRQ_FAST_0      = {1'b1, 5'd16},
+  // EXC_CAUSE_IRQ_FAST_14     = {1'b1, 5'd30},
+  EXC_CAUSE_IRQ_NM             = {1'b1, 5'd31}, // == EXC_CAUSE_IRQ_FAST_15
+  EXC_CAUSE_INSN_ADDR_MISA     = {1'b0, 5'd00},
+  EXC_CAUSE_INSTR_ACCESS_FAULT = {1'b0, 5'd01},
+  EXC_CAUSE_ILLEGAL_INSN       = {1'b0, 5'd02},
+  EXC_CAUSE_BREAKPOINT         = {1'b0, 5'd03},
+  EXC_CAUSE_LOAD_ACCESS_FAULT  = {1'b0, 5'd05},
+  EXC_CAUSE_STORE_ACCESS_FAULT = {1'b0, 5'd07},
+  EXC_CAUSE_ECALL_UMODE        = {1'b0, 5'd08},
+  EXC_CAUSE_ECALL_MMODE        = {1'b0, 5'd11}
+} exc_cause_e;
+
+// Debug cause
+typedef enum logic [2:0] {
+  DBG_CAUSE_NONE    = 3'h0,
+  DBG_CAUSE_EBREAK  = 3'h1,
+  DBG_CAUSE_TRIGGER = 3'h2,
+  DBG_CAUSE_HALTREQ = 3'h3,
+  DBG_CAUSE_STEP    = 3'h4
+} dbg_cause_e;
+
+// PMP constants
+parameter int unsigned PMP_MAX_REGIONS      = 16;
+parameter int unsigned PMP_CFG_W            = 8;
+
+// PMP acces type
+parameter int unsigned PMP_I = 0;
+parameter int unsigned PMP_D = 1;
+
+typedef enum logic [1:0] {
+  PMP_ACC_EXEC    = 2'b00,
+  PMP_ACC_WRITE   = 2'b01,
+  PMP_ACC_READ    = 2'b10
+} pmp_req_e;
+
+// PMP cfg structures
+typedef enum logic [1:0] {
+  PMP_MODE_OFF   = 2'b00,
+  PMP_MODE_TOR   = 2'b01,
+  PMP_MODE_NA4   = 2'b10,
+  PMP_MODE_NAPOT = 2'b11
+} pmp_cfg_mode_e;
+
+typedef struct packed {
+  logic          lock;
+  pmp_cfg_mode_e mode;
+  logic          exec;
+  logic          write;
+  logic          read;
+} pmp_cfg_t;
+
+// CSRs
+typedef enum logic[11:0] {
+  // Machine information
+  CSR_MHARTID   = 12'hF14,
+
+  // Machine trap setup
+  CSR_MSTATUS   = 12'h300,
+  CSR_MISA      = 12'h301,
+  CSR_MIE       = 12'h304,
+  CSR_MTVEC     = 12'h305,
+
+  // Machine trap handling
+  CSR_MSCRATCH  = 12'h340,
+  CSR_MEPC      = 12'h341,
+  CSR_MCAUSE    = 12'h342,
+  CSR_MTVAL     = 12'h343,
+  CSR_MIP       = 12'h344,
+
+  // Physical memory protection
+  CSR_PMPCFG0   = 12'h3A0,
+  CSR_PMPCFG1   = 12'h3A1,
+  CSR_PMPCFG2   = 12'h3A2,
+  CSR_PMPCFG3   = 12'h3A3,
+  CSR_PMPADDR0  = 12'h3B0,
+  CSR_PMPADDR1  = 12'h3B1,
+  CSR_PMPADDR2  = 12'h3B2,
+  CSR_PMPADDR3  = 12'h3B3,
+  CSR_PMPADDR4  = 12'h3B4,
+  CSR_PMPADDR5  = 12'h3B5,
+  CSR_PMPADDR6  = 12'h3B6,
+  CSR_PMPADDR7  = 12'h3B7,
+  CSR_PMPADDR8  = 12'h3B8,
+  CSR_PMPADDR9  = 12'h3B9,
+  CSR_PMPADDR10 = 12'h3BA,
+  CSR_PMPADDR11 = 12'h3BB,
+  CSR_PMPADDR12 = 12'h3BC,
+  CSR_PMPADDR13 = 12'h3BD,
+  CSR_PMPADDR14 = 12'h3BE,
+  CSR_PMPADDR15 = 12'h3BF,
+
+  // Debug trigger
+  CSR_TSELECT   = 12'h7A0,
+  CSR_TDATA1    = 12'h7A1,
+  CSR_TDATA2    = 12'h7A2,
+  CSR_TDATA3    = 12'h7A3,
+  CSR_MCONTEXT  = 12'h7A8,
+  CSR_SCONTEXT  = 12'h7AA,
+
+  // Debug/trace
+  CSR_DCSR      = 12'h7b0,
+  CSR_DPC       = 12'h7b1,
+
+  // Debug
+  CSR_DSCRATCH0 = 12'h7b2, // optional
+  CSR_DSCRATCH1 = 12'h7b3, // optional
+
+  // Machine Counter/Timers
+  CSR_MCOUNTINHIBIT  = 12'h320,
+  CSR_MHPMEVENT3     = 12'h323,
+  CSR_MHPMEVENT4     = 12'h324,
+  CSR_MHPMEVENT5     = 12'h325,
+  CSR_MHPMEVENT6     = 12'h326,
+  CSR_MHPMEVENT7     = 12'h327,
+  CSR_MHPMEVENT8     = 12'h328,
+  CSR_MHPMEVENT9     = 12'h329,
+  CSR_MHPMEVENT10    = 12'h32A,
+  CSR_MHPMEVENT11    = 12'h32B,
+  CSR_MHPMEVENT12    = 12'h32C,
+  CSR_MHPMEVENT13    = 12'h32D,
+  CSR_MHPMEVENT14    = 12'h32E,
+  CSR_MHPMEVENT15    = 12'h32F,
+  CSR_MHPMEVENT16    = 12'h330,
+  CSR_MHPMEVENT17    = 12'h331,
+  CSR_MHPMEVENT18    = 12'h332,
+  CSR_MHPMEVENT19    = 12'h333,
+  CSR_MHPMEVENT20    = 12'h334,
+  CSR_MHPMEVENT21    = 12'h335,
+  CSR_MHPMEVENT22    = 12'h336,
+  CSR_MHPMEVENT23    = 12'h337,
+  CSR_MHPMEVENT24    = 12'h338,
+  CSR_MHPMEVENT25    = 12'h339,
+  CSR_MHPMEVENT26    = 12'h33A,
+  CSR_MHPMEVENT27    = 12'h33B,
+  CSR_MHPMEVENT28    = 12'h33C,
+  CSR_MHPMEVENT29    = 12'h33D,
+  CSR_MHPMEVENT30    = 12'h33E,
+  CSR_MHPMEVENT31    = 12'h33F,
+  CSR_MCYCLE         = 12'hB00,
+  CSR_MINSTRET       = 12'hB02,
+  CSR_MHPMCOUNTER3   = 12'hB03,
+  CSR_MHPMCOUNTER4   = 12'hB04,
+  CSR_MHPMCOUNTER5   = 12'hB05,
+  CSR_MHPMCOUNTER6   = 12'hB06,
+  CSR_MHPMCOUNTER7   = 12'hB07,
+  CSR_MHPMCOUNTER8   = 12'hB08,
+  CSR_MHPMCOUNTER9   = 12'hB09,
+  CSR_MHPMCOUNTER10  = 12'hB0A,
+  CSR_MHPMCOUNTER11  = 12'hB0B,
+  CSR_MHPMCOUNTER12  = 12'hB0C,
+  CSR_MHPMCOUNTER13  = 12'hB0D,
+  CSR_MHPMCOUNTER14  = 12'hB0E,
+  CSR_MHPMCOUNTER15  = 12'hB0F,
+  CSR_MHPMCOUNTER16  = 12'hB10,
+  CSR_MHPMCOUNTER17  = 12'hB11,
+  CSR_MHPMCOUNTER18  = 12'hB12,
+  CSR_MHPMCOUNTER19  = 12'hB13,
+  CSR_MHPMCOUNTER20  = 12'hB14,
+  CSR_MHPMCOUNTER21  = 12'hB15,
+  CSR_MHPMCOUNTER22  = 12'hB16,
+  CSR_MHPMCOUNTER23  = 12'hB17,
+  CSR_MHPMCOUNTER24  = 12'hB18,
+  CSR_MHPMCOUNTER25  = 12'hB19,
+  CSR_MHPMCOUNTER26  = 12'hB1A,
+  CSR_MHPMCOUNTER27  = 12'hB1B,
+  CSR_MHPMCOUNTER28  = 12'hB1C,
+  CSR_MHPMCOUNTER29  = 12'hB1D,
+  CSR_MHPMCOUNTER30  = 12'hB1E,
+  CSR_MHPMCOUNTER31  = 12'hB1F,
+  CSR_MCYCLEH        = 12'hB80,
+  CSR_MINSTRETH      = 12'hB82,
+  CSR_MHPMCOUNTER3H  = 12'hB83,
+  CSR_MHPMCOUNTER4H  = 12'hB84,
+  CSR_MHPMCOUNTER5H  = 12'hB85,
+  CSR_MHPMCOUNTER6H  = 12'hB86,
+  CSR_MHPMCOUNTER7H  = 12'hB87,
+  CSR_MHPMCOUNTER8H  = 12'hB88,
+  CSR_MHPMCOUNTER9H  = 12'hB89,
+  CSR_MHPMCOUNTER10H = 12'hB8A,
+  CSR_MHPMCOUNTER11H = 12'hB8B,
+  CSR_MHPMCOUNTER12H = 12'hB8C,
+  CSR_MHPMCOUNTER13H = 12'hB8D,
+  CSR_MHPMCOUNTER14H = 12'hB8E,
+  CSR_MHPMCOUNTER15H = 12'hB8F,
+  CSR_MHPMCOUNTER16H = 12'hB90,
+  CSR_MHPMCOUNTER17H = 12'hB91,
+  CSR_MHPMCOUNTER18H = 12'hB92,
+  CSR_MHPMCOUNTER19H = 12'hB93,
+  CSR_MHPMCOUNTER20H = 12'hB94,
+  CSR_MHPMCOUNTER21H = 12'hB95,
+  CSR_MHPMCOUNTER22H = 12'hB96,
+  CSR_MHPMCOUNTER23H = 12'hB97,
+  CSR_MHPMCOUNTER24H = 12'hB98,
+  CSR_MHPMCOUNTER25H = 12'hB99,
+  CSR_MHPMCOUNTER26H = 12'hB9A,
+  CSR_MHPMCOUNTER27H = 12'hB9B,
+  CSR_MHPMCOUNTER28H = 12'hB9C,
+  CSR_MHPMCOUNTER29H = 12'hB9D,
+  CSR_MHPMCOUNTER30H = 12'hB9E,
+  CSR_MHPMCOUNTER31H = 12'hB9F,
+  CSR_CPUCTRL        = 12'h7C0,
+  CSR_SECURESEED     = 12'h7C1,
+
+  // Floating point fcsr
+  CSR_FCSR           = 12'h003,
+  CSR_FRM            = 12'h002,
+  CSR_FFLAG          = 12'h001
+} csr_num_e;
+
+// CSR pmp-related offsets
+parameter logic [11:0] CSR_OFF_PMP_CFG  = 12'h3A0; // pmp_cfg  @ 12'h3a0 - 12'h3a3
+parameter logic [11:0] CSR_OFF_PMP_ADDR = 12'h3B0; // pmp_addr @ 12'h3b0 - 12'h3bf
+
+// CSR status bits
+parameter int unsigned CSR_MSTATUS_MIE_BIT      = 3;
+parameter int unsigned CSR_MSTATUS_MPIE_BIT     = 7;
+parameter int unsigned CSR_MSTATUS_MPP_BIT_LOW  = 11;
+parameter int unsigned CSR_MSTATUS_MPP_BIT_HIGH = 12;
+parameter int unsigned CSR_MSTATUS_MPRV_BIT     = 17;
+parameter int unsigned CSR_MSTATUS_TW_BIT       = 21;
+
+// CSR machine ISA
+parameter logic [1:0] CSR_MISA_MXL = 2'd1; // M-XLEN: XLEN in M-Mode for RV32
+
+// CSR interrupt pending/enable bits
+parameter int unsigned CSR_MSIX_BIT      = 3;
+parameter int unsigned CSR_MTIX_BIT      = 7;
+parameter int unsigned CSR_MEIX_BIT      = 11;
+parameter int unsigned CSR_MFIX_BIT_LOW  = 16;
+parameter int unsigned CSR_MFIX_BIT_HIGH = 30;
+
+endpackage
diff --git a/verilog/rtl/brq_pmp.sv b/verilog/rtl/brq_pmp.sv
new file mode 100644
index 0000000..0a5e800
--- /dev/null
+++ b/verilog/rtl/brq_pmp.sv
@@ -0,0 +1,125 @@
+
+module brq_pmp #(
+    // Granularity of NAPOT access,
+    // 0 = No restriction, 1 = 8 byte, 2 = 16 byte, 3 = 32 byte, etc.
+    parameter int unsigned PMPGranularity = 0,
+    // Number of access channels (e.g. i-side + d-side)
+    parameter int unsigned PMPNumChan     = 2,
+    // Number of implemented regions
+    parameter int unsigned PMPNumRegions  = 4
+) (
+    // Clock and Reset
+    input  logic                    clk_i,
+    input  logic                    rst_ni,
+
+    // Interface to CSRs
+    input  brq_pkg::pmp_cfg_t      csr_pmp_cfg_i  [PMPNumRegions],
+    input  logic [33:0]             csr_pmp_addr_i [PMPNumRegions],
+
+    input  brq_pkg::priv_lvl_e     priv_mode_i    [PMPNumChan],
+    // Access checking channels
+    input  logic [33:0]             pmp_req_addr_i [PMPNumChan],
+    input  brq_pkg::pmp_req_e      pmp_req_type_i [PMPNumChan],
+    output logic                    pmp_req_err_o  [PMPNumChan]
+
+);
+
+  import brq_pkg::*;
+
+  // Access Checking Signals
+  logic [33:0]                                region_start_addr [PMPNumRegions];
+  logic [33:PMPGranularity+2]                 region_addr_mask  [PMPNumRegions];
+  logic [PMPNumChan-1:0][PMPNumRegions-1:0]   region_match_gt;
+  logic [PMPNumChan-1:0][PMPNumRegions-1:0]   region_match_lt;
+  logic [PMPNumChan-1:0][PMPNumRegions-1:0]   region_match_eq;
+  logic [PMPNumChan-1:0][PMPNumRegions-1:0]   region_match_all;
+  logic [PMPNumChan-1:0][PMPNumRegions-1:0]   region_perm_check;
+  logic [PMPNumChan-1:0]                      access_fault;
+
+
+  // ---------------
+  // Access checking
+  // ---------------
+
+  for (genvar r = 0; r < PMPNumRegions; r++) begin : g_addr_exp
+    // Start address for TOR matching
+    if (r == 0) begin : g_entry0
+      assign region_start_addr[r] = (csr_pmp_cfg_i[r].mode == PMP_MODE_TOR) ? 34'h000000000 :
+                                                                              csr_pmp_addr_i[r];
+    end else begin : g_oth
+      assign region_start_addr[r] = (csr_pmp_cfg_i[r].mode == PMP_MODE_TOR) ? csr_pmp_addr_i[r-1] :
+                                                                              csr_pmp_addr_i[r];
+    end
+    // Address mask for NA matching
+    for (genvar b = PMPGranularity+2; b < 34; b++) begin : g_bitmask
+      if (b == 2) begin : g_bit0
+        // Always mask bit 2 for NAPOT
+        assign region_addr_mask[r][b] = (csr_pmp_cfg_i[r].mode != PMP_MODE_NAPOT);
+      end else begin : g_others
+        // We will mask this bit if it is within the programmed granule
+        // i.e. addr = yyyy 0111
+        //                  ^
+        //                  | This bit pos is the top of the mask, all lower bits set
+        // thus mask = 1111 0000
+        assign region_addr_mask[r][b] = (csr_pmp_cfg_i[r].mode != PMP_MODE_NAPOT) |
+                                        ~&csr_pmp_addr_i[r][b-1:PMPGranularity+1];
+      end
+    end
+  end
+
+  for (genvar c = 0; c < PMPNumChan; c++) begin : g_access_check
+    for (genvar r = 0; r < PMPNumRegions; r++) begin : g_regions
+      // Comparators are sized according to granularity
+      assign region_match_eq[c][r] = (pmp_req_addr_i[c][33:PMPGranularity+2] &
+                                      region_addr_mask[r]) ==
+                                     (region_start_addr[r][33:PMPGranularity+2] &
+                                      region_addr_mask[r]);
+      assign region_match_gt[c][r] = pmp_req_addr_i[c][33:PMPGranularity+2] >
+                                     region_start_addr[r][33:PMPGranularity+2];
+      assign region_match_lt[c][r] = pmp_req_addr_i[c][33:PMPGranularity+2] <
+                                     csr_pmp_addr_i[r][33:PMPGranularity+2];
+
+      always_comb begin
+        region_match_all[c][r] = 1'b0;
+        unique case (csr_pmp_cfg_i[r].mode)
+          PMP_MODE_OFF   : region_match_all[c][r] = 1'b0;
+          PMP_MODE_NA4   : region_match_all[c][r] = region_match_eq[c][r];
+          PMP_MODE_NAPOT : region_match_all[c][r] = region_match_eq[c][r];
+          PMP_MODE_TOR   : begin
+            region_match_all[c][r] = (region_match_eq[c][r] | region_match_gt[c][r]) &
+                                     region_match_lt[c][r];
+          end
+          default        : region_match_all[c][r] = 1'b0;
+        endcase
+      end
+
+      // Check specific required permissions
+      assign region_perm_check[c][r] =
+          ((pmp_req_type_i[c] == PMP_ACC_EXEC)  & csr_pmp_cfg_i[r].exec) |
+          ((pmp_req_type_i[c] == PMP_ACC_WRITE) & csr_pmp_cfg_i[r].write) |
+          ((pmp_req_type_i[c] == PMP_ACC_READ)  & csr_pmp_cfg_i[r].read);
+    end
+
+    // Access fault determination / prioritization
+    always_comb begin
+      // Default is allow for M-mode, deny for other modes
+      access_fault[c] = (priv_mode_i[c] != PRIV_LVL_M);
+
+      // PMP entries are statically prioritized, from 0 to N-1
+      // The lowest-numbered PMP entry which matches an address determines accessability
+      for (int r = PMPNumRegions-1; r >= 0; r--) begin
+        if (region_match_all[c][r]) begin
+          access_fault[c] = (priv_mode_i[c] == PRIV_LVL_M) ?
+              // For M-mode, any region which matches with the L-bit clear, or with sufficient
+              // access permissions will be allowed
+              (csr_pmp_cfg_i[r].lock & ~region_perm_check[c][r]) :
+              // For other modes, the lock bit doesn't matter
+              ~region_perm_check[c][r];
+        end
+      end
+    end
+
+    assign pmp_req_err_o[c] = access_fault[c];
+  end
+
+endmodule
diff --git a/verilog/rtl/brq_register_file_ff.sv b/verilog/rtl/brq_register_file_ff.sv
new file mode 100644
index 0000000..9aa643e
--- /dev/null
+++ b/verilog/rtl/brq_register_file_ff.sv
@@ -0,0 +1,98 @@
+
+/**
+ * RISC-V register file
+ *
+ * Register file with 31 or 15x 32 bit wide registers. Register 0 is fixed to 0.
+ * This register file is based on flip flops. Use this register file when
+ * targeting FPGA synthesis or Verilator simulation.
+ */
+module brq_register_file_ff #(
+    parameter bit          RV32E             = 0,
+    parameter int unsigned DataWidth         = 32,
+    parameter bit          DummyInstructions = 0
+) (
+    // Clock and Reset
+    input  logic                 clk_i,
+    input  logic                 rst_ni,
+
+    input  logic                 test_en_i,
+    input  logic                 dummy_instr_id_i,
+
+    //Read port R1
+    input  logic [4:0]           raddr_a_i,
+    output logic [DataWidth-1:0] rdata_a_o,
+
+    //Read port R2
+    input  logic [4:0]           raddr_b_i,
+    output logic [DataWidth-1:0] rdata_b_o,
+
+
+    // Write port W1
+    input  logic [4:0]           waddr_a_i,
+    input  logic [DataWidth-1:0] wdata_a_i,
+    input  logic                 we_a_i
+
+);
+
+  localparam int unsigned ADDR_WIDTH = RV32E ? 4 : 5;
+  localparam int unsigned NUM_WORDS  = 2**ADDR_WIDTH;
+
+  logic [NUM_WORDS-1:0][DataWidth-1:0] rf_reg;
+  logic [NUM_WORDS-1:1][DataWidth-1:0] rf_reg_q;
+  logic [NUM_WORDS-1:1]                we_a_dec;
+
+  always_comb begin : we_a_decoder
+    for (int unsigned i = 1; i < NUM_WORDS; i++) begin
+      we_a_dec[i] = (waddr_a_i == 5'(i)) ?  we_a_i : 1'b0;
+    end
+  end
+
+  // No flops for R0 as it's hard-wired to 0
+  for (genvar i = 1; i < NUM_WORDS; i++) begin : g_rf_flops
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        rf_reg_q[i] <= '0;
+      end else if(we_a_dec[i]) begin
+        rf_reg_q[i] <= wdata_a_i;
+      end
+    end
+  end
+
+  // With dummy instructions enabled, R0 behaves as a real register but will always return 0 for
+  // real instructions.
+  if (DummyInstructions) begin : g_dummy_r0
+    logic                 we_r0_dummy;
+    logic [DataWidth-1:0] rf_r0_q;
+
+    // Write enable for dummy R0 register (waddr_a_i will always be 0 for dummy instructions)
+    assign we_r0_dummy = we_a_i & dummy_instr_id_i;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        rf_r0_q <= '0;
+      end else if (we_r0_dummy) begin
+        rf_r0_q <= wdata_a_i;
+      end
+    end
+
+    // Output the dummy data for dummy instructions, otherwise R0 reads as zero
+    assign rf_reg[0] = dummy_instr_id_i ? rf_r0_q : '0;
+
+  end else begin : g_normal_r0
+    logic unused_dummy_instr_id;
+    assign unused_dummy_instr_id = dummy_instr_id_i;
+
+    // R0 is nil
+    assign rf_reg[0] = '0;
+  end
+
+  assign rf_reg[NUM_WORDS-1:1] = rf_reg_q[NUM_WORDS-1:1];
+
+  assign rdata_a_o = rf_reg[raddr_a_i];
+  assign rdata_b_o = rf_reg[raddr_b_i];
+
+  // Signal not used in FF register file
+  logic unused_test_en;
+  assign unused_test_en = test_en_i;
+
+endmodule
diff --git a/verilog/rtl/brq_wbu.sv b/verilog/rtl/brq_wbu.sv
new file mode 100644
index 0000000..95e801c
--- /dev/null
+++ b/verilog/rtl/brq_wbu.sv
@@ -0,0 +1,219 @@
+
+/**
+ * Writeback Stage
+ *
+ * Writeback is an optional third pipeline stage. It writes data back to the register file that was
+ * produced in the ID/EX stage or awaits a response to a load/store (LSU writes direct to register
+ * file for load data). If the writeback stage is not present (WritebackStage == 0) this acts as
+ * a simple passthrough to write data direct to the register file.
+ */
+
+
+module brq_wbu #(
+  parameter bit WritebackStage = 1'b0
+) (
+  input  logic                     clk_i,
+  input  logic                     rst_ni,
+
+  input  logic                     en_wb_i,
+  input  brq_pkg::wb_instr_type_e  instr_type_wb_i,
+  input  logic [31:0]              pc_id_i,
+  input  logic                     instr_is_compressed_id_i,
+  input  logic                     instr_perf_count_id_i,
+
+  output logic                     ready_wb_o,
+  output logic                     rf_write_wb_o,
+  output logic                     outstanding_load_wb_o,
+  output logic                     outstanding_store_wb_o,
+  output logic [31:0]              pc_wb_o,
+  output logic                     perf_instr_ret_wb_o,
+  output logic                     perf_instr_ret_compressed_wb_o,
+
+  input  logic [4:0]               rf_waddr_id_i,
+  input  logic [31:0]              rf_wdata_id_i,
+  input  logic                     rf_we_id_i,
+
+  input  logic [31:0]              rf_wdata_lsu_i,
+  input  logic                     rf_we_lsu_i,
+
+  output logic [31:0]              rf_wdata_fwd_wb_o,
+
+  output logic [4:0]               rf_waddr_wb_o,
+  output logic [31:0]              rf_wdata_wb_o,
+  output logic                     rf_we_wb_o,
+
+  input logic                      lsu_resp_valid_i,
+  input logic                      lsu_resp_err_i,
+
+  output logic                     instr_done_wb_o,
+
+  // floating point 
+  output logic                     fp_rf_write_wb_o,
+  output logic                     fp_rf_wen_wb_o,
+  output logic [4:0]               fp_rf_waddr_wb_o,
+  input  logic [4:0]               fp_rf_waddr_id_i,
+  input  logic                     fp_rf_wen_id_i,
+  output logic [31:0]              fp_rf_wdata_wb_o,
+  output logic                     fp_load_i
+);
+
+  import brq_pkg::*;
+
+  // 0 == RF write from ID
+  // 1 == RF write from LSU
+  logic [31:0] rf_wdata_wb_mux[2];
+  logic [1:0]  rf_wdata_wb_mux_we; 
+  
+  logic [31:0] fp_rf_wdata_wb_mux[2];
+  logic [1:0]  fp_rf_wdata_wb_mux_we;
+
+  if(WritebackStage) begin : g_writeback_stage
+    logic [31:0]    rf_wdata_wb_q;
+    logic           rf_we_wb_q;
+    logic [4:0]     rf_waddr_wb_q;
+
+    logic           wb_done;
+
+    logic           wb_valid_q;
+    logic [31:0]    wb_pc_q;
+    logic           wb_compressed_q;
+    logic           wb_count_q;
+    wb_instr_type_e wb_instr_type_q;
+
+    logic           wb_valid_d;
+
+    // floating point
+    //logic [31:0]    fp_rf_wdata_wb_q;
+    logic           fp_rf_we_wb_q;
+    //logic [4:0]     fp_rf_waddr_wb_q;
+    logic           fp_load_q;
+
+    // Stage becomes valid if an instruction enters for ID/EX and valid is cleared when instruction
+    // is done
+    assign wb_valid_d = (en_wb_i & ready_wb_o) | (wb_valid_q & ~wb_done);
+
+    // Writeback for non load/store instructions always completes in a cycle (so instantly done)
+    // Writeback for load/store must wait for response to be received by the LSU
+    // Signal only relevant if wb_valid_q set
+    assign wb_done = (wb_instr_type_q == WB_INSTR_OTHER) | lsu_resp_valid_i;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if(~rst_ni) begin
+        wb_valid_q <= 1'b0;
+      end else begin
+        wb_valid_q <= wb_valid_d;
+      end
+    end
+
+    always_ff @(posedge clk_i) begin
+      if(en_wb_i) begin
+        rf_we_wb_q       <= rf_we_id_i;
+        rf_waddr_wb_q    <= rf_waddr_id_i;
+        rf_wdata_wb_q    <= rf_wdata_id_i;
+        wb_instr_type_q  <= instr_type_wb_i;
+        wb_pc_q          <= pc_id_i;
+        wb_compressed_q  <= instr_is_compressed_id_i;
+        wb_count_q       <= instr_perf_count_id_i;
+
+        // added for floating point registers for wb stage
+        fp_rf_we_wb_q    <= fp_rf_wen_id_i;
+      //  fp_rf_waddr_wb_q <= rf_waddr_id_i;
+        //fp_rf_wdata_wb_q <= rf_wdata_id_i;
+        fp_load_q        <= fp_load_i;
+      end
+    end
+
+    assign rf_waddr_wb_o         = rf_waddr_wb_q;
+    assign rf_wdata_wb_mux[0]    = rf_wdata_wb_q;
+    assign rf_wdata_wb_mux_we[0] = rf_we_wb_q & wb_valid_q;
+        
+    assign fp_rf_waddr_wb_o         = rf_waddr_wb_q; // no seperate datapath for rd address
+    assign fp_rf_wdata_wb_mux[0]    = rf_wdata_wb_q; // no seperate datapath for data bus
+    assign fp_rf_wdata_wb_mux_we[0] = fp_rf_we_wb_q & wb_valid_q;
+
+    assign ready_wb_o = ~wb_valid_q | wb_done;
+
+    // Instruction in writeback will be writing to register file if either rf_we is set or writeback
+    // is awaiting load data. This is used for determining RF read hazards in ID/EX
+    assign rf_write_wb_o = wb_valid_q & (rf_we_wb_q | (wb_instr_type_q == WB_INSTR_LOAD));
+    assign fp_rf_write_wb_o = wb_valid_q & (fp_rf_we_wb_q | (wb_instr_type_q == WB_INSTR_LOAD));
+
+    assign outstanding_load_wb_o  = wb_valid_q & (wb_instr_type_q == WB_INSTR_LOAD);
+    assign outstanding_store_wb_o = wb_valid_q & (wb_instr_type_q == WB_INSTR_STORE);
+
+    assign pc_wb_o = wb_pc_q;
+
+    assign instr_done_wb_o = wb_valid_q & wb_done;
+
+    // Increment instruction retire counters for valid instructions which are not lsu errors
+    assign perf_instr_ret_wb_o            = instr_done_wb_o & wb_count_q &
+                                            ~(lsu_resp_valid_i & lsu_resp_err_i);
+    assign perf_instr_ret_compressed_wb_o = perf_instr_ret_wb_o & wb_compressed_q;
+
+    // Forward data that will be written to the RF back to ID to resolve data hazards. The flopped
+    // rf_wdata_wb_q is used rather than rf_wdata_wb_o as the latter includes read data from memory
+    // that returns too late to be used on the forwarding path.
+    assign rf_wdata_fwd_wb_o = rf_wdata_wb_q;
+
+    assign rf_wdata_wb_mux[1]     = rf_wdata_lsu_i;
+    assign rf_wdata_wb_mux_we[1]  = rf_we_lsu_i & ~fp_load_q;
+  
+    assign fp_rf_wdata_wb_mux[1]    = rf_wdata_lsu_i;
+    assign fp_rf_wdata_wb_mux_we[1] = rf_we_lsu_i & fp_load_q;
+  end else begin : g_bypass_wb
+    // without writeback stage just pass through register write signals
+    assign rf_waddr_wb_o         = rf_waddr_id_i;
+    assign rf_wdata_wb_mux[0]    = rf_wdata_id_i;
+    assign rf_wdata_wb_mux_we[0] = rf_we_id_i;
+
+    // for floating point unit
+    assign fp_rf_waddr_wb_o          = rf_waddr_id_i;  // no seperate datapath for rd address
+    assign fp_rf_wdata_wb_mux[0]     = rf_wdata_id_i;  // no seperate datapath for data bus
+    assign fp_rf_wdata_wb_mux_we[0]  = fp_rf_wen_id_i;
+
+    // Increment instruction retire counters for valid instructions which are not lsu errors
+    assign perf_instr_ret_wb_o            = instr_perf_count_id_i & en_wb_i &
+                                            ~(lsu_resp_valid_i & lsu_resp_err_i);
+    assign perf_instr_ret_compressed_wb_o = perf_instr_ret_wb_o & instr_is_compressed_id_i;
+
+    // ready needs to be constant 1 without writeback stage (otherwise ID/EX stage will stall)
+    assign ready_wb_o    = 1'b1;
+
+    // Unused Writeback stage only IO & wiring
+    // Assign inputs and internal wiring to unused signals to satisfy lint checks
+    // Tie-off outputs to constant values
+    logic           unused_clk;
+    logic           unused_rst;
+    wb_instr_type_e unused_instr_type_wb;
+    logic [31:0]    unused_pc_id;
+
+    assign unused_clk            = clk_i;
+    assign unused_rst            = rst_ni;
+    assign unused_instr_type_wb  = instr_type_wb_i;
+    assign unused_pc_id          = pc_id_i;
+
+    assign outstanding_load_wb_o  = 1'b0;
+    assign outstanding_store_wb_o = 1'b0;
+    assign pc_wb_o                = '0;
+    assign rf_write_wb_o          = 1'b0;
+    assign rf_wdata_fwd_wb_o      = 32'b0;
+    assign instr_done_wb_o        = 1'b0;
+
+    assign rf_wdata_wb_mux[1]     = rf_wdata_lsu_i;
+    assign rf_wdata_wb_mux_we[1]  = rf_we_lsu_i & ~fp_load_i;
+  
+    assign fp_rf_wdata_wb_mux[1]    = rf_wdata_lsu_i;
+    assign fp_rf_wdata_wb_mux_we[1] = rf_we_lsu_i & fp_load_i;
+  end
+
+  // RF write data can come from ID results (all RF writes that aren't because of loads will come
+  // from here) or the LSU (RF writes for load data)
+  assign rf_wdata_wb_o  = (rf_wdata_wb_mux_we[0]) ? rf_wdata_wb_mux[0] : 
+                          rf_wdata_wb_mux[1];
+  assign rf_we_wb_o     = |rf_wdata_wb_mux_we;
+  
+  assign fp_rf_wdata_wb_o = fp_rf_wdata_wb_mux_we[0] ? fp_rf_wdata_wb_mux[0] : 
+                            fp_rf_wdata_wb_mux[1];
+  assign fp_rf_wen_wb_o   = |fp_rf_wdata_wb_mux_we;
+
+endmodule
diff --git a/verilog/rtl/cf_math_pkg.sv b/verilog/rtl/cf_math_pkg.sv
new file mode 100644
index 0000000..9f35a44
--- /dev/null
+++ b/verilog/rtl/cf_math_pkg.sv
@@ -0,0 +1,61 @@
+// Copyright 2016 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+/// cf_math_pkg: Constant Function Implementations of Mathematical Functions for HDL Elaboration
+///
+/// This package contains a collection of mathematical functions that are commonly used when defining
+/// the value of constants in HDL code.  These functions are implemented as Verilog constants
+/// functions.  Introduced in Verilog 2001 (IEEE Std 1364-2001), a constant function (§ 10.3.5) is a
+/// function whose value can be evaluated at compile time or during elaboration.  A constant function
+/// must be called with arguments that are constants.
+package cf_math_pkg;
+
+    /// Ceiled Division of Two Natural Numbers
+    ///
+    /// Returns the quotient of two natural numbers, rounded towards plus infinity.
+    function automatic integer ceil_div (input longint dividend, input longint divisor);
+        automatic longint remainder;
+
+        // pragma translate_off
+        `ifndef VERILATOR
+        if (dividend < 0) begin
+            $fatal(1, "Dividend %0d is not a natural number!", dividend);
+        end
+
+        if (divisor < 0) begin
+            $fatal(1, "Divisor %0d is not a natural number!", divisor);
+        end
+
+        if (divisor == 0) begin
+            $fatal(1, "Division by zero!");
+        end
+        `endif
+        // pragma translate_on
+
+        remainder = dividend;
+        for (ceil_div = 0; remainder > 0; ceil_div++) begin
+            remainder = remainder - divisor;
+        end
+    endfunction
+
+    /// Index width required to be able to represent up to `num_idx` indices as a binary
+    /// encoded signal.
+    /// Ensures that the minimum width if an index signal is `1`, regardless of parametrization.
+    ///
+    /// Sample usage in type definition:
+    /// As parameter:
+    ///   `parameter type idx_t = logic[cf_math_pkg::idx_width(NumIdx)-1:0]`
+    /// As typedef:
+    ///   `typedef logic [cf_math_pkg::idx_width(NumIdx)-1:0] idx_t`
+    function automatic integer unsigned idx_width (input integer unsigned num_idx);
+        return (num_idx > 32'd1) ? unsigned'($clog2(num_idx)) : 32'd1;
+    endfunction
+
+endpackage
diff --git a/verilog/rtl/control_mvp.sv b/verilog/rtl/control_mvp.sv
new file mode 100644
index 0000000..4c5364d
--- /dev/null
+++ b/verilog/rtl/control_mvp.sv
@@ -0,0 +1,3413 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lei Li                    lile@iis.ee.ethz.ch              //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    04/03/2018                                                 //
+// Design Name:    FPU                                                        //
+// Module Name:    control_mvp.sv                                             //
+// Project Name:   Private FPU                                                //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    the control logic  of div and sqrt                         //
+//                                                                            //
+// Revision Date:  12/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To address some requirements by Stefan and add low power   //
+//                 control for special cases                                  //
+// Revision Date:  13/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To fix some bug found in Control FSM                       //
+//                 when Iteration_unit_num_S  = 2'b10                         //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+import defs_div_sqrt_mvp::*;
+
+module control_mvp
+
+  (//Input
+   input logic                                        Clk_CI,
+   input logic                                        Rst_RBI,
+   input logic                                        Div_start_SI ,
+   input logic                                        Sqrt_start_SI,
+   input logic                                        Start_SI,
+   input logic                                        Kill_SI,
+   input logic                                        Special_case_SBI,
+   input logic                                        Special_case_dly_SBI,
+   input logic [C_PC-1:0]                             Precision_ctl_SI,
+   input logic [1:0]                                  Format_sel_SI,
+   input logic [C_MANT_FP64:0]                        Numerator_DI,
+   input logic [C_EXP_FP64:0]                         Exp_num_DI,
+   input logic [C_MANT_FP64:0]                        Denominator_DI,
+   input logic [C_EXP_FP64:0]                         Exp_den_DI,
+
+
+   output logic                                       Div_start_dly_SO ,
+   output logic                                       Sqrt_start_dly_SO,
+   output logic                                       Div_enable_SO,
+   output logic                                       Sqrt_enable_SO,
+
+
+   //To next stage
+   output logic                                       Full_precision_SO,
+   output logic                                       FP32_SO,
+   output logic                                       FP64_SO,
+   output logic                                       FP16_SO,
+   output logic                                       FP16ALT_SO,
+
+   output logic                                       Ready_SO,
+   output logic                                       Done_SO,
+
+   output logic [C_MANT_FP64+4:0]                     Mant_result_prenorm_DO,
+ //  output logic [3:0]                                 Round_bit_DO,
+   output logic [C_EXP_FP64+1:0]                      Exp_result_prenorm_DO
+ );
+
+   logic  [C_MANT_FP64+1+4:0]                         Partial_remainder_DN,Partial_remainder_DP; //58bits,r=q+2
+   logic  [C_MANT_FP64+4:0]                           Quotient_DP; //57bits
+   /////////////////////////////////////////////////////////////////////////////
+   // Assign Inputs                                                          //
+   /////////////////////////////////////////////////////////////////////////////
+   logic [C_MANT_FP64+1:0]                            Numerator_se_D;  //sign extension and hidden bit
+   logic [C_MANT_FP64+1:0]                            Denominator_se_D; //signa extension and hidden bit
+   logic [C_MANT_FP64+1:0]                            Denominator_se_DB;  //1's complement
+
+   assign  Numerator_se_D={1'b0,Numerator_DI};
+
+   assign  Denominator_se_D={1'b0,Denominator_DI};
+
+  always_comb
+   begin
+     if(FP32_SO)
+       begin
+         Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP32], {(C_MANT_FP64-C_MANT_FP32){1'b0}} };
+       end
+     else if(FP64_SO) begin
+         Denominator_se_DB=~Denominator_se_D;
+     end
+     else if(FP16_SO) begin
+         Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16], {(C_MANT_FP64-C_MANT_FP16){1'b0}} };
+     end
+     else begin
+         Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT], {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} };
+     end
+   end
+
+
+   logic [C_MANT_FP64+1:0]                            Mant_D_sqrt_Norm;
+
+   assign Mant_D_sqrt_Norm=Exp_num_DI[0]?{1'b0,Numerator_DI}:{Numerator_DI,1'b0}; //for sqrt
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Format Selection                                                       //
+   /////////////////////////////////////////////////////////////////////////////
+   logic [1:0]                                      Format_sel_S;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Format_sel_S<='b0;
+          end
+        else if(Start_SI&&Ready_SO)
+          begin
+            Format_sel_S<=Format_sel_SI;
+          end
+        else
+          begin
+            Format_sel_S<=Format_sel_S;
+          end
+    end
+
+   assign FP32_SO = (Format_sel_S==2'b00);
+   assign FP64_SO = (Format_sel_S==2'b01);
+   assign FP16_SO = (Format_sel_S==2'b10);
+   assign FP16ALT_SO = (Format_sel_S==2'b11);
+
+
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Precision Control                                                       //
+   /////////////////////////////////////////////////////////////////////////////
+
+   logic [C_PC-1:0]                                   Precision_ctl_S;
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Precision_ctl_S<='b0;
+          end
+        else if(Start_SI&&Ready_SO)
+          begin
+            Precision_ctl_S<=Precision_ctl_SI;
+          end
+        else
+          begin
+            Precision_ctl_S<=Precision_ctl_S;
+          end
+    end
+  assign Full_precision_SO = (Precision_ctl_S==6'h00);
+
+
+
+     logic [5:0]                                     State_ctl_S;
+     logic [5:0]                                     State_Two_iteration_unit_S;
+     logic [5:0]                                     State_Four_iteration_unit_S;
+
+    assign State_Two_iteration_unit_S = Precision_ctl_S[C_PC-1:1];  //Two iteration units
+    assign State_Four_iteration_unit_S = Precision_ctl_S[C_PC-1:2];  //Four iteration units
+     always_comb
+       begin
+         case(Iteration_unit_num_S)
+//////////////////////one iteration unit, start///////////////////////////////////////
+           2'b00:  //one iteration unit
+             begin
+               case(Format_sel_S)
+                 2'b00: //FP32
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h1b;  //24+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = Precision_ctl_S;
+                       end
+                   end
+                 2'b01: //FP64
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h38;  //53+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = Precision_ctl_S;
+                       end
+                   end
+                 2'b10: //FP16
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h0e;  //11+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = Precision_ctl_S;
+                       end
+                   end
+                 2'b11: //FP16ALT
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h0b;  //8+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = Precision_ctl_S;
+                       end
+                  end
+                endcase
+              end
+//////////////////////one iteration unit, end///////////////////////////////////////
+
+//////////////////////two iteration units, start///////////////////////////////////////
+           2'b01:  //two iteration units
+             begin
+               case(Format_sel_S)
+                 2'b00: //FP32
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h0d;  //24+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Two_iteration_unit_S;
+                       end
+                   end
+                 2'b01: //FP64
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h1b;  //53+3 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Two_iteration_unit_S;
+                       end
+                   end
+                 2'b10: //FP16
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h06;  //11+3 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Two_iteration_unit_S;
+                       end
+                   end
+                 2'b11: //FP16ALT
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h05;  //8+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Two_iteration_unit_S;
+                       end
+                  end
+                endcase
+              end
+//////////////////////two iteration units, end///////////////////////////////////////
+
+//////////////////////three iteration units, start///////////////////////////////////////
+           2'b10:  //three iteration units
+             begin
+               case(Format_sel_S)
+                 2'b00: //FP32
+                   begin
+                     case(Precision_ctl_S)
+                       6'h00:
+                         begin
+                           State_ctl_S = 6'h08;  //24+3 more iterations for rounding bits
+                         end
+                       6'h06,6'h07,6'h08:
+                         begin
+                           State_ctl_S = 6'h02;
+                         end
+                       6'h09,6'h0a,6'h0b:
+                         begin
+                           State_ctl_S = 6'h03;
+                         end
+                       6'h0c,6'h0d,6'h0e:
+                         begin
+                           State_ctl_S = 6'h04;
+                         end
+                       6'h0f,6'h10,6'h11:
+                         begin
+                           State_ctl_S = 6'h05;
+                         end
+                       6'h12,6'h13,6'h14:
+                         begin
+                           State_ctl_S = 6'h06;
+                         end
+                       6'h15,6'h16,6'h17:
+                         begin
+                           State_ctl_S = 6'h07;
+                         end
+                       default:
+                         begin
+                           State_ctl_S = 6'h08;  //24+3 more iterations for rounding bits
+                         end
+                     endcase
+                   end
+                 2'b01: //FP64
+                   begin
+                     case(Precision_ctl_S)
+                       6'h00:
+                         begin
+                           State_ctl_S = 6'h12;  //53+4 more iterations for rounding bits
+                         end
+                       6'h06,6'h07,6'h08:
+                         begin
+                           State_ctl_S = 6'h02;
+                         end
+                       6'h09,6'h0a,6'h0b:
+                         begin
+                           State_ctl_S = 6'h03;
+                         end
+                       6'h0c,6'h0d,6'h0e:
+                         begin
+                           State_ctl_S = 6'h04;
+                         end
+                       6'h0f,6'h10,6'h11:
+                         begin
+                           State_ctl_S = 6'h05;
+                         end
+                       6'h12,6'h13,6'h14:
+                         begin
+                           State_ctl_S = 6'h06;
+                         end
+                       6'h15,6'h16,6'h17:
+                         begin
+                           State_ctl_S = 6'h07;
+                         end
+                       6'h18,6'h19,6'h1a:
+                         begin
+                           State_ctl_S = 6'h08;
+                         end
+                       6'h1b,6'h1c,6'h1d:
+                         begin
+                           State_ctl_S = 6'h09;
+                         end
+                       6'h1e,6'h1f,6'h20:
+                         begin
+                           State_ctl_S = 6'h0a;
+                         end
+                       6'h21,6'h22,6'h23:
+                         begin
+                           State_ctl_S = 6'h0b;
+                         end
+                       6'h24,6'h25,6'h26:
+                         begin
+                           State_ctl_S = 6'h0c;
+                         end
+                       6'h27,6'h28,6'h29:
+                         begin
+                           State_ctl_S = 6'h0d;
+                         end
+                       6'h2a,6'h2b,6'h2c:
+                         begin
+                           State_ctl_S = 6'h0e;
+                         end
+                       6'h2d,6'h2e,6'h2f:
+                         begin
+                           State_ctl_S = 6'h0f;
+                         end
+                       6'h30,6'h31,6'h32:
+                         begin
+                           State_ctl_S = 6'h10;
+                         end
+                       6'h33,6'h34,6'h35:
+                         begin
+                           State_ctl_S = 6'h11;
+                         end
+                       default:
+                         begin
+                           State_ctl_S = 6'h12;  //53+4 more iterations for rounding bits
+                         end
+                     endcase
+                   end
+                 2'b10: //FP16
+                   begin
+                     case(Precision_ctl_S)
+                       6'h00:
+                         begin
+                           State_ctl_S = 6'h04;  //12+3 more iterations for rounding bits
+                         end
+                       6'h06,6'h07,6'h08:
+                         begin
+                           State_ctl_S = 6'h02;
+                         end
+                       6'h09,6'h0a,6'h0b:
+                         begin
+                           State_ctl_S = 6'h03;
+                         end
+                       default:
+                         begin
+                           State_ctl_S = 6'h04;  //12+3 more iterations for rounding bits
+                         end
+                     endcase
+                   end
+                 2'b11: //FP16ALT
+                   begin
+                     case(Precision_ctl_S)
+                       6'h00:
+                         begin
+                           State_ctl_S = 6'h03;  //8+4 more iterations for rounding bits
+                         end
+                       6'h06,6'h07,6'h08:
+                         begin
+                           State_ctl_S = 6'h02;
+                         end
+                       default:
+                         begin
+                           State_ctl_S = 6'h03;  //8+4 more iterations for rounding bits
+                         end
+                     endcase
+                  end
+                endcase
+              end
+//////////////////////three iteration units, end///////////////////////////////////////
+
+//////////////////////four iteration units, start///////////////////////////////////////
+           2'b11:  //four iteration units
+             begin
+               case(Format_sel_S)
+                 2'b00: //FP32
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h06;  //24+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Four_iteration_unit_S;
+                       end
+                   end
+                 2'b01: //FP64
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h0d;  //53+3 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Four_iteration_unit_S;
+                       end
+                   end
+                 2'b10: //FP16
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h03;  //11+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Four_iteration_unit_S;
+                       end
+                   end
+                 2'b11: //FP16ALT
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h02;  //8+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Four_iteration_unit_S;
+                       end
+                  end
+                endcase
+              end
+//////////////////////four iteration units, end///////////////////////////////////////
+
+           endcase
+        end
+
+
+   /////////////////////////////////////////////////////////////////////////////
+   // control logic                                                           //
+   /////////////////////////////////////////////////////////////////////////////
+
+   logic                                               Div_start_dly_S;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)   //  generate Div_start_dly_S signal
+     begin
+        if(~Rst_RBI)
+          begin
+            Div_start_dly_S<=1'b0;
+          end
+        else if(Div_start_SI&&Ready_SO)
+         begin
+           Div_start_dly_S<=1'b1;
+         end
+        else
+          begin
+            Div_start_dly_S<=1'b0;
+          end
+    end
+
+   assign Div_start_dly_SO=Div_start_dly_S;
+
+  always_ff @(posedge Clk_CI, negedge Rst_RBI) begin  //  generate Div_enable_SO signal
+    if(~Rst_RBI)
+      Div_enable_SO<=1'b0;
+    // Synchronous rst_ni with Flush
+    else if (Kill_SI)
+      Div_enable_SO <= 1'b0;
+    else if(Div_start_SI&&Ready_SO)
+      Div_enable_SO<=1'b1;
+    else if(Done_SO)
+      Div_enable_SO<=1'b0;
+    else
+      Div_enable_SO<=Div_enable_SO;
+  end
+
+   logic                                                Sqrt_start_dly_S;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)   //  generate Sqrt_start_dly_SI signal
+     begin
+        if(~Rst_RBI)
+          begin
+            Sqrt_start_dly_S<=1'b0;
+          end
+        else if(Sqrt_start_SI&&Ready_SO)
+         begin
+           Sqrt_start_dly_S<=1'b1;
+         end
+        else
+          begin
+            Sqrt_start_dly_S<=1'b0;
+          end
+      end
+    assign Sqrt_start_dly_SO=Sqrt_start_dly_S;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI) begin   //  generate Sqrt_enable_SO signal
+    if(~Rst_RBI)
+      Sqrt_enable_SO<=1'b0;
+    else if (Kill_SI)
+      Sqrt_enable_SO <= 1'b0;
+    else if(Sqrt_start_SI&&Ready_SO)
+      Sqrt_enable_SO<=1'b1;
+    else if(Done_SO)
+      Sqrt_enable_SO<=1'b0;
+    else
+      Sqrt_enable_SO<=Sqrt_enable_SO;
+  end
+
+   logic [5:0]                                                  Crtl_cnt_S;
+   logic                                                        Start_dly_S;
+
+   assign   Start_dly_S=Div_start_dly_S |Sqrt_start_dly_S;
+
+   logic       Fsm_enable_S;
+   assign      Fsm_enable_S=( (Start_dly_S | (| Crtl_cnt_S)) && (~Kill_SI) && Special_case_dly_SBI);
+
+   logic                                                        Final_state_S;
+   assign     Final_state_S= (Crtl_cnt_S==State_ctl_S);
+
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI) //control_FSM
+     begin
+        if (~Rst_RBI)
+          begin
+             Crtl_cnt_S    <= '0;
+          end
+          else if (Final_state_S | Kill_SI)
+            begin
+              Crtl_cnt_S    <= '0;
+            end
+          else if(Fsm_enable_S) // one cycle Start_SI
+            begin
+              Crtl_cnt_S    <= Crtl_cnt_S+1;
+            end
+          else
+            begin
+              Crtl_cnt_S    <= '0;
+            end
+     end // always_ff
+
+
+
+    always_ff @(posedge Clk_CI, negedge Rst_RBI) //Generate  Done_SO,  they can share this Done_SO.
+      begin
+        if(~Rst_RBI)
+          begin
+            Done_SO<=1'b0;
+          end
+        else if(Start_SI&&Ready_SO)
+          begin
+            if(~Special_case_SBI)
+              begin
+                Done_SO<=1'b1;
+              end
+            else
+              begin
+                Done_SO<=1'b0;
+              end
+          end
+        else if(Final_state_S)
+          begin
+            Done_SO<=1'b1;
+          end
+        else
+          begin
+            Done_SO<=1'b0;
+          end
+       end
+
+
+
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI) //Generate  Ready_SO
+     begin
+       if(~Rst_RBI)
+         begin
+           Ready_SO<=1'b1;
+         end
+
+       else if(Start_SI&&Ready_SO)
+         begin
+            if(~Special_case_SBI)
+              begin
+                Ready_SO<=1'b1;
+              end
+            else
+              begin
+                Ready_SO<=1'b0;
+              end
+         end
+       else if(Final_state_S | Kill_SI)
+         begin
+           Ready_SO<=1'b1;
+         end
+       else
+         begin
+           Ready_SO<=Ready_SO;
+         end
+     end
+
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b00, start  //
+   ////////////////////////////////////////////////////////////////////////////
+
+  logic                                    Qcnt_one_0;
+  logic                                    Qcnt_one_1;
+  logic [1:0]                              Qcnt_one_2;
+  logic [2:0]                              Qcnt_one_3;
+  logic [3:0]                              Qcnt_one_4;
+  logic [4:0]                              Qcnt_one_5;
+  logic [5:0]                              Qcnt_one_6;
+  logic [6:0]                              Qcnt_one_7;
+  logic [7:0]                              Qcnt_one_8;
+  logic [8:0]                              Qcnt_one_9;
+  logic [9:0]                              Qcnt_one_10;
+  logic [10:0]                             Qcnt_one_11;
+  logic [11:0]                             Qcnt_one_12;
+  logic [12:0]                             Qcnt_one_13;
+  logic [13:0]                             Qcnt_one_14;
+  logic [14:0]                             Qcnt_one_15;
+  logic [15:0]                             Qcnt_one_16;
+  logic [16:0]                             Qcnt_one_17;
+  logic [17:0]                             Qcnt_one_18;
+  logic [18:0]                             Qcnt_one_19;
+  logic [19:0]                             Qcnt_one_20;
+  logic [20:0]                             Qcnt_one_21;
+  logic [21:0]                             Qcnt_one_22;
+  logic [22:0]                             Qcnt_one_23;
+  logic [23:0]                             Qcnt_one_24;
+  logic [24:0]                             Qcnt_one_25;
+  logic [25:0]                             Qcnt_one_26;
+  logic [26:0]                             Qcnt_one_27;
+  logic [27:0]                             Qcnt_one_28;
+  logic [28:0]                             Qcnt_one_29;
+  logic [29:0]                             Qcnt_one_30;
+  logic [30:0]                             Qcnt_one_31;
+  logic [31:0]                             Qcnt_one_32;
+  logic [32:0]                             Qcnt_one_33;
+  logic [33:0]                             Qcnt_one_34;
+  logic [34:0]                             Qcnt_one_35;
+  logic [35:0]                             Qcnt_one_36;
+  logic [36:0]                             Qcnt_one_37;
+  logic [37:0]                             Qcnt_one_38;
+  logic [38:0]                             Qcnt_one_39;
+  logic [39:0]                             Qcnt_one_40;
+  logic [40:0]                             Qcnt_one_41;
+  logic [41:0]                             Qcnt_one_42;
+  logic [42:0]                             Qcnt_one_43;
+  logic [43:0]                             Qcnt_one_44;
+  logic [44:0]                             Qcnt_one_45;
+  logic [45:0]                             Qcnt_one_46;
+  logic [46:0]                             Qcnt_one_47;
+  logic [47:0]                             Qcnt_one_48;
+  logic [48:0]                             Qcnt_one_49;
+  logic [49:0]                             Qcnt_one_50;
+  logic [50:0]                             Qcnt_one_51;
+  logic [51:0]                             Qcnt_one_52;
+  logic [52:0]                             Qcnt_one_53;
+  logic [53:0]                             Qcnt_one_54;
+  logic [54:0]                             Qcnt_one_55;
+  logic [55:0]                             Qcnt_one_56;
+  logic [56:0]                             Qcnt_one_57;
+  logic [57:0]                             Qcnt_one_58;
+  logic [58:0]                             Qcnt_one_59;
+  logic [59:0]                             Qcnt_one_60;
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b00, end    //
+   ////////////////////////////////////////////////////////////////////////////
+
+
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b01, start  //
+   ////////////////////////////////////////////////////////////////////////////
+  logic [1:0]                              Qcnt_two_0;
+  logic [2:0]                              Qcnt_two_1;
+  logic [4:0]                              Qcnt_two_2;
+  logic [6:0]                              Qcnt_two_3;
+  logic [8:0]                              Qcnt_two_4;
+  logic [10:0]                             Qcnt_two_5;
+  logic [12:0]                             Qcnt_two_6;
+  logic [14:0]                             Qcnt_two_7;
+  logic [16:0]                             Qcnt_two_8;
+  logic [18:0]                             Qcnt_two_9;
+  logic [20:0]                             Qcnt_two_10;
+  logic [22:0]                             Qcnt_two_11;
+  logic [24:0]                             Qcnt_two_12;
+  logic [26:0]                             Qcnt_two_13;
+  logic [28:0]                             Qcnt_two_14;
+  logic [30:0]                             Qcnt_two_15;
+  logic [32:0]                             Qcnt_two_16;
+  logic [34:0]                             Qcnt_two_17;
+  logic [36:0]                             Qcnt_two_18;
+  logic [38:0]                             Qcnt_two_19;
+  logic [40:0]                             Qcnt_two_20;
+  logic [42:0]                             Qcnt_two_21;
+  logic [44:0]                             Qcnt_two_22;
+  logic [46:0]                             Qcnt_two_23;
+  logic [48:0]                             Qcnt_two_24;
+  logic [50:0]                             Qcnt_two_25;
+  logic [52:0]                             Qcnt_two_26;
+  logic [54:0]                             Qcnt_two_27;
+  logic [56:0]                             Qcnt_two_28;
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b01, end    //
+   ////////////////////////////////////////////////////////////////////////////
+
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b10, start  //
+   ////////////////////////////////////////////////////////////////////////////
+  logic [2:0]                              Qcnt_three_0;
+  logic [4:0]                              Qcnt_three_1;
+  logic [7:0]                              Qcnt_three_2;
+  logic [10:0]                             Qcnt_three_3;
+  logic [13:0]                             Qcnt_three_4;
+  logic [16:0]                             Qcnt_three_5;
+  logic [19:0]                             Qcnt_three_6;
+  logic [22:0]                             Qcnt_three_7;
+  logic [25:0]                             Qcnt_three_8;
+  logic [28:0]                             Qcnt_three_9;
+  logic [31:0]                             Qcnt_three_10;
+  logic [34:0]                             Qcnt_three_11;
+  logic [37:0]                             Qcnt_three_12;
+  logic [40:0]                             Qcnt_three_13;
+  logic [43:0]                             Qcnt_three_14;
+  logic [46:0]                             Qcnt_three_15;
+  logic [49:0]                             Qcnt_three_16;
+  logic [52:0]                             Qcnt_three_17;
+  logic [55:0]                             Qcnt_three_18;
+  logic [58:0]                             Qcnt_three_19;
+  logic [61:0]                             Qcnt_three_20;
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b10, end    //
+   ////////////////////////////////////////////////////////////////////////////
+
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b11, start  //
+   ////////////////////////////////////////////////////////////////////////////
+  logic [3:0]                              Qcnt_four_0;
+  logic [6:0]                              Qcnt_four_1;
+  logic [10:0]                             Qcnt_four_2;
+  logic [14:0]                             Qcnt_four_3;
+  logic [18:0]                             Qcnt_four_4;
+  logic [22:0]                             Qcnt_four_5;
+  logic [26:0]                             Qcnt_four_6;
+  logic [30:0]                             Qcnt_four_7;
+  logic [34:0]                             Qcnt_four_8;
+  logic [38:0]                             Qcnt_four_9;
+  logic [42:0]                             Qcnt_four_10;
+  logic [46:0]                             Qcnt_four_11;
+  logic [50:0]                             Qcnt_four_12;
+  logic [54:0]                             Qcnt_four_13;
+  logic [58:0]                             Qcnt_four_14;
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b11, end    //
+   ////////////////////////////////////////////////////////////////////////////
+
+
+
+   logic [C_MANT_FP64+1+4:0]                                      Sqrt_R0,Sqrt_Q0,Q_sqrt0,Q_sqrt_com_0;
+   logic [C_MANT_FP64+1+4:0]                                      Sqrt_R1,Sqrt_Q1,Q_sqrt1,Q_sqrt_com_1;
+   logic [C_MANT_FP64+1+4:0]                                      Sqrt_R2,Sqrt_Q2,Q_sqrt2,Q_sqrt_com_2;
+   logic [C_MANT_FP64+1+4:0]                                      Sqrt_R3,Sqrt_Q3,Q_sqrt3,Q_sqrt_com_3,Sqrt_R4; //Sqrt_Q4;
+
+
+   logic [1:0]                                                    Sqrt_DI  [3:0];
+   logic [1:0]                                                    Sqrt_DO  [3:0];
+   logic                                                          Sqrt_carry_DO;
+
+
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_a_D [3:0];
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_b_D [3:0];
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_a_BMASK_D [3:0];
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_b_BMASK_D [3:0];
+  logic                                                           Iteration_cell_carry_D [3:0];
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_sum_D [3:0];
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_sum_AMASK_D [3:0];
+
+
+  logic [3:0]                                                     Sqrt_quotinent_S;
+
+
+   always_comb
+    begin  //
+      case (Format_sel_S)
+        2'b00:
+          begin
+            Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP32+5])};
+            Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt0[C_MANT_FP32+5:0] };
+            Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt1[C_MANT_FP32+5:0] };
+            Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt2[C_MANT_FP32+5:0] };
+            Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt3[C_MANT_FP32+5:0] };
+          end
+        2'b01:
+          begin
+            Sqrt_quotinent_S = {Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2],Iteration_cell_carry_D[3]};
+            Q_sqrt_com_0=~Q_sqrt0;
+            Q_sqrt_com_1=~Q_sqrt1;
+            Q_sqrt_com_2=~Q_sqrt2;
+            Q_sqrt_com_3=~Q_sqrt3;
+          end
+        2'b10:
+          begin
+            Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP16+5])};
+            Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt0[C_MANT_FP16+5:0] };
+            Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt1[C_MANT_FP16+5:0] };
+            Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt2[C_MANT_FP16+5:0] };
+            Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt3[C_MANT_FP16+5:0] };
+          end
+        2'b11:
+          begin
+            Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP16ALT+5])};
+            Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt0[C_MANT_FP16ALT+5:0] };
+            Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt1[C_MANT_FP16ALT+5:0] };
+            Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt2[C_MANT_FP16ALT+5:0] };
+            Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt3[C_MANT_FP16ALT+5:0] };
+          end
+        endcase
+    end
+
+
+
+  assign  Qcnt_one_0=    {1'b0};  //qk for each feedback
+  assign  Qcnt_one_1=    {Quotient_DP[0]};
+  assign  Qcnt_one_2=    {Quotient_DP[1:0]};
+  assign  Qcnt_one_3=    {Quotient_DP[2:0]};
+  assign  Qcnt_one_4=    {Quotient_DP[3:0]};
+  assign  Qcnt_one_5=    {Quotient_DP[4:0]};
+  assign  Qcnt_one_6=    {Quotient_DP[5:0]};
+  assign  Qcnt_one_7=    {Quotient_DP[6:0]};
+  assign  Qcnt_one_8=    {Quotient_DP[7:0]};
+  assign  Qcnt_one_9=    {Quotient_DP[8:0]};
+  assign  Qcnt_one_10=    {Quotient_DP[9:0]};
+  assign  Qcnt_one_11=    {Quotient_DP[10:0]};
+  assign  Qcnt_one_12=    {Quotient_DP[11:0]};
+  assign  Qcnt_one_13=    {Quotient_DP[12:0]};
+  assign  Qcnt_one_14=    {Quotient_DP[13:0]};
+  assign  Qcnt_one_15=    {Quotient_DP[14:0]};
+  assign  Qcnt_one_16=    {Quotient_DP[15:0]};
+  assign  Qcnt_one_17=    {Quotient_DP[16:0]};
+  assign  Qcnt_one_18=    {Quotient_DP[17:0]};
+  assign  Qcnt_one_19=    {Quotient_DP[18:0]};
+  assign  Qcnt_one_20=    {Quotient_DP[19:0]};
+  assign  Qcnt_one_21=    {Quotient_DP[20:0]};
+  assign  Qcnt_one_22=    {Quotient_DP[21:0]};
+  assign  Qcnt_one_23=    {Quotient_DP[22:0]};
+  assign  Qcnt_one_24=    {Quotient_DP[23:0]};
+  assign  Qcnt_one_25=    {Quotient_DP[24:0]};
+  assign  Qcnt_one_26=    {Quotient_DP[25:0]};
+  assign  Qcnt_one_27=    {Quotient_DP[26:0]};
+  assign  Qcnt_one_28=    {Quotient_DP[27:0]};
+  assign  Qcnt_one_29=    {Quotient_DP[28:0]};
+  assign  Qcnt_one_30=    {Quotient_DP[29:0]};
+  assign  Qcnt_one_31=    {Quotient_DP[30:0]};
+  assign  Qcnt_one_32=    {Quotient_DP[31:0]};
+  assign  Qcnt_one_33=    {Quotient_DP[32:0]};
+  assign  Qcnt_one_34=    {Quotient_DP[33:0]};
+  assign  Qcnt_one_35=    {Quotient_DP[34:0]};
+  assign  Qcnt_one_36=    {Quotient_DP[35:0]};
+  assign  Qcnt_one_37=    {Quotient_DP[36:0]};
+  assign  Qcnt_one_38=    {Quotient_DP[37:0]};
+  assign  Qcnt_one_39=    {Quotient_DP[38:0]};
+  assign  Qcnt_one_40=    {Quotient_DP[39:0]};
+  assign  Qcnt_one_41=    {Quotient_DP[40:0]};
+  assign  Qcnt_one_42=    {Quotient_DP[41:0]};
+  assign  Qcnt_one_43=    {Quotient_DP[42:0]};
+  assign  Qcnt_one_44=    {Quotient_DP[43:0]};
+  assign  Qcnt_one_45=    {Quotient_DP[44:0]};
+  assign  Qcnt_one_46=    {Quotient_DP[45:0]};
+  assign  Qcnt_one_47=    {Quotient_DP[46:0]};
+  assign  Qcnt_one_48=    {Quotient_DP[47:0]};
+  assign  Qcnt_one_49=    {Quotient_DP[48:0]};
+  assign  Qcnt_one_50=    {Quotient_DP[49:0]};
+  assign  Qcnt_one_51=    {Quotient_DP[50:0]};
+  assign  Qcnt_one_52=    {Quotient_DP[51:0]};
+  assign  Qcnt_one_53=    {Quotient_DP[52:0]};
+  assign  Qcnt_one_54=    {Quotient_DP[53:0]};
+  assign  Qcnt_one_55=    {Quotient_DP[54:0]};
+  assign  Qcnt_one_56=    {Quotient_DP[55:0]};
+  assign  Qcnt_one_57=    {Quotient_DP[56:0]};
+
+
+  assign  Qcnt_two_0 =    {1'b0,            Sqrt_quotinent_S[3]};  //qk for each feedback
+  assign  Qcnt_two_1 =    {Quotient_DP[1:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_2 =    {Quotient_DP[3:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_3 =    {Quotient_DP[5:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_4 =    {Quotient_DP[7:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_5 =    {Quotient_DP[9:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_6 =    {Quotient_DP[11:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_7 =    {Quotient_DP[13:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_8 =    {Quotient_DP[15:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_9 =    {Quotient_DP[17:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_10 =    {Quotient_DP[19:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_11 =    {Quotient_DP[21:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_12 =    {Quotient_DP[23:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_13 =    {Quotient_DP[25:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_14 =    {Quotient_DP[27:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_15 =    {Quotient_DP[29:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_16 =    {Quotient_DP[31:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_17 =    {Quotient_DP[33:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_18 =    {Quotient_DP[35:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_19 =    {Quotient_DP[37:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_20 =    {Quotient_DP[39:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_21 =    {Quotient_DP[41:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_22 =    {Quotient_DP[43:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_23 =    {Quotient_DP[45:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_24 =    {Quotient_DP[47:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_25 =    {Quotient_DP[49:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_26 =    {Quotient_DP[51:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_27 =    {Quotient_DP[53:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_28 =    {Quotient_DP[55:0],Sqrt_quotinent_S[3]};
+
+
+  assign  Qcnt_three_0 =    {1'b0,            Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};  //qk for each feedback
+  assign  Qcnt_three_1 =    {Quotient_DP[2:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_2 =    {Quotient_DP[5:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_3 =    {Quotient_DP[8:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_4 =    {Quotient_DP[11:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_5 =    {Quotient_DP[14:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_6 =    {Quotient_DP[17:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_7 =    {Quotient_DP[20:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_8 =    {Quotient_DP[23:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_9 =    {Quotient_DP[26:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_10 =    {Quotient_DP[29:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_11 =    {Quotient_DP[32:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_12 =    {Quotient_DP[35:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_13 =    {Quotient_DP[38:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_14 =    {Quotient_DP[41:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_15 =    {Quotient_DP[44:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_16 =    {Quotient_DP[47:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_17 =    {Quotient_DP[50:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_18 =    {Quotient_DP[53:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_19 =    {Quotient_DP[56:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+
+
+  assign      Qcnt_four_0 =    {1'b0,            Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_1 =    {Quotient_DP[3:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_2 =    {Quotient_DP[7:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_3 =    {Quotient_DP[11:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_4 =    {Quotient_DP[15:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_5 =    {Quotient_DP[19:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_6 =    {Quotient_DP[23:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_7 =    {Quotient_DP[27:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_8 =    {Quotient_DP[31:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_9 =    {Quotient_DP[35:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_10 =    {Quotient_DP[39:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_11 =    {Quotient_DP[43:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_12 =    {Quotient_DP[47:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_13 =    {Quotient_DP[51:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_14 =    {Quotient_DP[55:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+
+
+
+
+  always_comb begin  // the intermediate operands for sqrt
+
+  case(Iteration_unit_num_S)
+    2'b00:
+      begin
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b00, start       //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+
+
+        case(Crtl_cnt_S)
+
+          6'b000000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_one_0};
+              Sqrt_Q0=Q_sqrt_com_0;
+            end
+          6'b000001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_one_1};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+              Q_sqrt0={{(C_MANT_FP64+4){1'b0}},Qcnt_one_2};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+              Q_sqrt0={{(C_MANT_FP64+3){1'b0}},Qcnt_one_3};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8];
+              Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_one_4};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10];
+              Q_sqrt0={{(C_MANT_FP64+1){1'b0}},Qcnt_one_5};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000110:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12];
+              Q_sqrt0={{(C_MANT_FP64){1'b0}},Qcnt_one_6};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000111:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14];
+              Q_sqrt0={{(C_MANT_FP64-1){1'b0}},Qcnt_one_7};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16];
+              Q_sqrt0={{(C_MANT_FP64-2){1'b0}},Qcnt_one_8};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18];
+              Q_sqrt0={{(C_MANT_FP64-3){1'b0}},Qcnt_one_9};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20];
+              Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_one_10};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22];
+              Q_sqrt0={{(C_MANT_FP64-5){1'b0}},Qcnt_one_11};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24];
+              Q_sqrt0={{(C_MANT_FP64-6){1'b0}},Qcnt_one_12};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26];
+              Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_one_13};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001110:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28];
+              Q_sqrt0={{(C_MANT_FP64-8){1'b0}},Qcnt_one_14};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001111:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30];
+              Q_sqrt0={{(C_MANT_FP64-9){1'b0}},Qcnt_one_15};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32];
+              Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_one_16};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34];
+              Q_sqrt0={{(C_MANT_FP64-11){1'b0}},Qcnt_one_17};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36];
+              Q_sqrt0={{(C_MANT_FP64-12){1'b0}},Qcnt_one_18};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38];
+              Q_sqrt0={{(C_MANT_FP64-13){1'b0}},Qcnt_one_19};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40];
+              Q_sqrt0={{(C_MANT_FP64-14){1'b0}},Qcnt_one_20};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42];
+              Q_sqrt0={{(C_MANT_FP64-15){1'b0}},Qcnt_one_21};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010110:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44];
+              Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_one_22};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010111:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46];
+              Q_sqrt0={{(C_MANT_FP64-17){1'b0}},Qcnt_one_23};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48];
+              Q_sqrt0={{(C_MANT_FP64-18){1'b0}},Qcnt_one_24};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50];
+              Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_one_25};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52];
+              Q_sqrt0={{(C_MANT_FP64-20){1'b0}},Qcnt_one_26};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-21){1'b0}},Qcnt_one_27};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_one_28};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-23){1'b0}},Qcnt_one_29};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-24){1'b0}},Qcnt_one_30};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-25){1'b0}},Qcnt_one_31};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-26){1'b0}},Qcnt_one_32};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-27){1'b0}},Qcnt_one_33};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_one_34};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-29){1'b0}},Qcnt_one_35};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-30){1'b0}},Qcnt_one_36};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_one_37};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-32){1'b0}},Qcnt_one_38};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-33){1'b0}},Qcnt_one_39};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_one_40};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-35){1'b0}},Qcnt_one_41};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-36){1'b0}},Qcnt_one_42};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-37){1'b0}},Qcnt_one_43};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-38){1'b0}},Qcnt_one_44};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-39){1'b0}},Qcnt_one_45};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_one_46};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-41){1'b0}},Qcnt_one_47};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-42){1'b0}},Qcnt_one_48};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_one_49};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-44){1'b0}},Qcnt_one_50};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-45){1'b0}},Qcnt_one_51};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_one_52};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-47){1'b0}},Qcnt_one_53};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-48){1'b0}},Qcnt_one_54};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-49){1'b0}},Qcnt_one_55};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b111000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-50){1'b0}},Qcnt_one_56};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+
+          default:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0='0;
+              Sqrt_Q0='0;
+            end
+        endcase
+      end
+
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b00, end         //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+    2'b01:
+      begin
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b01, start       //
+   /////////////////////////////////////////////////////////////////////////////
+        case(Crtl_cnt_S)
+
+          6'b000000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_two_0[1]};
+              Sqrt_Q0=Q_sqrt_com_0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+              Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_two_0[1:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+              Q_sqrt0={{(C_MANT_FP64+4){1'b0}},Qcnt_two_1[2:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+              Q_sqrt1={{(C_MANT_FP64+3){1'b0}},Qcnt_two_1[2:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8];
+              Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_two_2[4:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10];
+              Q_sqrt1={{(C_MANT_FP64+1){1'b0}},Qcnt_two_2[4:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12];
+              Q_sqrt0={{(C_MANT_FP64){1'b0}},Qcnt_two_3[6:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14];
+              Q_sqrt1={{(C_MANT_FP64-1){1'b0}},Qcnt_two_3[6:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16];
+              Q_sqrt0={{(C_MANT_FP64-2){1'b0}},Qcnt_two_4[8:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18];
+              Q_sqrt1={{(C_MANT_FP64-3){1'b0}},Qcnt_two_4[8:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+            6'b000101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20];
+              Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_two_5[10:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22];
+              Q_sqrt1={{(C_MANT_FP64-5){1'b0}},Qcnt_two_5[10:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000110:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24];
+              Q_sqrt0={{(C_MANT_FP64-6){1'b0}},Qcnt_two_6[12:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26];
+              Q_sqrt1={{(C_MANT_FP64-7){1'b0}},Qcnt_two_6[12:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000111:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28];
+              Q_sqrt0={{(C_MANT_FP64-8){1'b0}},Qcnt_two_7[14:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30];
+              Q_sqrt1={{(C_MANT_FP64-9){1'b0}},Qcnt_two_7[14:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32];
+              Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_two_8[16:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34];
+              Q_sqrt1={{(C_MANT_FP64-11){1'b0}},Qcnt_two_8[16:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36];
+              Q_sqrt0={{(C_MANT_FP64-12){1'b0}},Qcnt_two_9[18:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38];
+              Q_sqrt1={{(C_MANT_FP64-13){1'b0}},Qcnt_two_9[18:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40];
+              Q_sqrt0={{(C_MANT_FP64-14){1'b0}},Qcnt_two_10[20:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42];
+              Q_sqrt1={{(C_MANT_FP64-15){1'b0}},Qcnt_two_10[20:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44];
+              Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_two_11[22:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46];
+              Q_sqrt1={{(C_MANT_FP64-17){1'b0}},Qcnt_two_11[22:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48];
+              Q_sqrt0={{(C_MANT_FP64-18){1'b0}},Qcnt_two_12[24:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50];
+              Q_sqrt1={{(C_MANT_FP64-19){1'b0}},Qcnt_two_12[24:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52];
+              Q_sqrt0={{(C_MANT_FP64-20){1'b0}},Qcnt_two_13[26:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-21){1'b0}},Qcnt_two_13[26:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_two_14[28:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-23){1'b0}},Qcnt_two_14[28:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-24){1'b0}},Qcnt_two_15[30:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-25){1'b0}},Qcnt_two_15[30:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-26){1'b0}},Qcnt_two_16[32:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-27){1'b0}},Qcnt_two_16[32:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_two_17[34:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-29){1'b0}},Qcnt_two_17[34:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-30){1'b0}},Qcnt_two_18[36:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-31){1'b0}},Qcnt_two_18[36:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-32){1'b0}},Qcnt_two_19[38:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-33){1'b0}},Qcnt_two_19[38:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_two_20[40:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-35){1'b0}},Qcnt_two_20[40:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-36){1'b0}},Qcnt_two_21[42:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-37){1'b0}},Qcnt_two_21[42:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-38){1'b0}},Qcnt_two_22[44:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-39){1'b0}},Qcnt_two_22[44:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_two_23[46:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-41){1'b0}},Qcnt_two_23[46:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b011000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-42){1'b0}},Qcnt_two_24[48:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-43){1'b0}},Qcnt_two_24[48:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b011001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-44){1'b0}},Qcnt_two_25[50:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-45){1'b0}},Qcnt_two_25[50:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b011010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_two_26[52:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-47){1'b0}},Qcnt_two_26[52:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b011011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-48){1'b0}},Qcnt_two_27[54:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-49){1'b0}},Qcnt_two_27[54:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b011100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-50){1'b0}},Qcnt_two_28[56:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-51){1'b0}},Qcnt_two_28[56:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          default:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_two_0[1]};
+              Sqrt_Q0=Q_sqrt_com_0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+              Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_two_0[1:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+        endcase
+      end
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b01, end       //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+    2'b10:
+      begin
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b10, start       //
+   /////////////////////////////////////////////////////////////////////////////
+
+        case(Crtl_cnt_S)
+          6'b000000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_three_0[2]};
+              Sqrt_Q0=Q_sqrt_com_0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+              Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_three_0[2:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+              Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_three_0[2:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+              Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_three_1[4:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8];
+              Q_sqrt1={{(C_MANT_FP64+1){1'b0}},Qcnt_three_1[4:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10];
+              Q_sqrt2={{(C_MANT_FP64){1'b0}},Qcnt_three_1[4:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12];
+              Q_sqrt0={{(C_MANT_FP64-1){1'b0}},Qcnt_three_2[7:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14];
+              Q_sqrt1={{(C_MANT_FP64-2){1'b0}},Qcnt_three_2[7:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16];
+              Q_sqrt2={{(C_MANT_FP64-3){1'b0}},Qcnt_three_2[7:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18];
+              Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_three_3[10:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20];
+              Q_sqrt1={{(C_MANT_FP64-5){1'b0}},Qcnt_three_3[10:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22];
+              Q_sqrt2={{(C_MANT_FP64-6){1'b0}},Qcnt_three_3[10:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24];
+              Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_three_4[13:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26];
+              Q_sqrt1={{(C_MANT_FP64-8){1'b0}},Qcnt_three_4[13:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28];
+              Q_sqrt2={{(C_MANT_FP64-9){1'b0}},Qcnt_three_4[13:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30];
+              Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_three_5[16:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32];
+              Q_sqrt1={{(C_MANT_FP64-11){1'b0}},Qcnt_three_5[16:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34];
+              Q_sqrt2={{(C_MANT_FP64-12){1'b0}},Qcnt_three_5[16:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000110:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36];
+              Q_sqrt0={{(C_MANT_FP64-13){1'b0}},Qcnt_three_6[19:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38];
+              Q_sqrt1={{(C_MANT_FP64-14){1'b0}},Qcnt_three_6[19:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40];
+              Q_sqrt2={{(C_MANT_FP64-15){1'b0}},Qcnt_three_6[19:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000111:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42];
+              Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_three_7[22:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44];
+              Q_sqrt1={{(C_MANT_FP64-17){1'b0}},Qcnt_three_7[22:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46];
+              Q_sqrt2={{(C_MANT_FP64-18){1'b0}},Qcnt_three_7[22:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48];
+              Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_three_8[25:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50];
+              Q_sqrt1={{(C_MANT_FP64-20){1'b0}},Qcnt_three_8[25:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52];
+              Q_sqrt2={{(C_MANT_FP64-21){1'b0}},Qcnt_three_8[25:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_three_9[28:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-23){1'b0}},Qcnt_three_9[28:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-24){1'b0}},Qcnt_three_9[28:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-25){1'b0}},Qcnt_three_10[31:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-26){1'b0}},Qcnt_three_10[31:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-27){1'b0}},Qcnt_three_10[31:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_three_11[34:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-29){1'b0}},Qcnt_three_11[34:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-30){1'b0}},Qcnt_three_11[34:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_three_12[37:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-32){1'b0}},Qcnt_three_12[37:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-33){1'b0}},Qcnt_three_12[37:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_three_13[40:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-35){1'b0}},Qcnt_three_13[40:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-36){1'b0}},Qcnt_three_13[40:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-37){1'b0}},Qcnt_three_14[43:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-38){1'b0}},Qcnt_three_14[43:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-39){1'b0}},Qcnt_three_14[43:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_three_15[46:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-41){1'b0}},Qcnt_three_15[46:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-42){1'b0}},Qcnt_three_15[46:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b010000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_three_16[49:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-44){1'b0}},Qcnt_three_16[49:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-45){1'b0}},Qcnt_three_16[49:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b010001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_three_17[52:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-47){1'b0}},Qcnt_three_17[52:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-48){1'b0}},Qcnt_three_17[52:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b010010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-49){1'b0}},Qcnt_three_18[55:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-50){1'b0}},Qcnt_three_18[55:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-51){1'b0}},Qcnt_three_18[55:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          default :
+              begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_three_0[2]};
+              Sqrt_Q0=Q_sqrt_com_0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+              Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_three_0[2:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+              Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_three_0[2:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+        endcase
+
+      end
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b10, end       //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+    2'b11:
+      begin
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b11, start       //
+   /////////////////////////////////////////////////////////////////////////////
+
+              case(Crtl_cnt_S)
+
+                6'b000000:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+                    Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_four_0[3]};
+                    Sqrt_Q0=Q_sqrt_com_0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+                    Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_four_0[3:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+                    Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_four_0[3:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+                    Q_sqrt3={{(C_MANT_FP64+2){1'b0}},Qcnt_four_0[3:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000001:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8];
+                    Q_sqrt0={{(C_MANT_FP64+1){1'b0}},Qcnt_four_1[6:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10];
+                    Q_sqrt1={{(C_MANT_FP64){1'b0}},Qcnt_four_1[6:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12];
+                    Q_sqrt2={{(C_MANT_FP64-1){1'b0}},Qcnt_four_1[6:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14];
+                    Q_sqrt3={{(C_MANT_FP64-2){1'b0}},Qcnt_four_1[6:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000010:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16];
+                    Q_sqrt0={{(C_MANT_FP64-3){1'b0}},Qcnt_four_2[10:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18];
+                    Q_sqrt1={{(C_MANT_FP64-4){1'b0}},Qcnt_four_2[10:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20];
+                    Q_sqrt2={{(C_MANT_FP64-5){1'b0}},Qcnt_four_2[10:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22];
+                    Q_sqrt3={{(C_MANT_FP64-6){1'b0}},Qcnt_four_2[10:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000011:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24];
+                    Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_four_3[14:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26];
+                    Q_sqrt1={{(C_MANT_FP64-8){1'b0}},Qcnt_four_3[14:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28];
+                    Q_sqrt2={{(C_MANT_FP64-9){1'b0}},Qcnt_four_3[14:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30];
+                    Q_sqrt3={{(C_MANT_FP64-10){1'b0}},Qcnt_four_3[14:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000100:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32];
+                    Q_sqrt0={{(C_MANT_FP64-11){1'b0}},Qcnt_four_4[18:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34];
+                    Q_sqrt1={{(C_MANT_FP64-12){1'b0}},Qcnt_four_4[18:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36];
+                    Q_sqrt2={{(C_MANT_FP64-13){1'b0}},Qcnt_four_4[18:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38];
+                    Q_sqrt3={{(C_MANT_FP64-14){1'b0}},Qcnt_four_4[18:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000101:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40];
+                    Q_sqrt0={{(C_MANT_FP64-15){1'b0}},Qcnt_four_5[22:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42];
+                    Q_sqrt1={{(C_MANT_FP64-16){1'b0}},Qcnt_four_5[22:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44];
+                    Q_sqrt2={{(C_MANT_FP64-17){1'b0}},Qcnt_four_5[22:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46];
+                    Q_sqrt3={{(C_MANT_FP64-18){1'b0}},Qcnt_four_5[22:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000110:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48];
+                    Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_four_6[26:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50];
+                    Q_sqrt1={{(C_MANT_FP64-20){1'b0}},Qcnt_four_6[26:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52];
+                    Q_sqrt2={{(C_MANT_FP64-21){1'b0}},Qcnt_four_6[26:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-22){1'b0}},Qcnt_four_6[26:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000111:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-23){1'b0}},Qcnt_four_7[30:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-24){1'b0}},Qcnt_four_7[30:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-25){1'b0}},Qcnt_four_7[30:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-26){1'b0}},Qcnt_four_7[30:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001000:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-27){1'b0}},Qcnt_four_8[34:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-28){1'b0}},Qcnt_four_8[34:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-29){1'b0}},Qcnt_four_8[34:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-30){1'b0}},Qcnt_four_8[34:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001001:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_four_9[38:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-32){1'b0}},Qcnt_four_9[38:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-33){1'b0}},Qcnt_four_9[38:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-34){1'b0}},Qcnt_four_9[38:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001010:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-35){1'b0}},Qcnt_four_10[42:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-36){1'b0}},Qcnt_four_10[42:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-37){1'b0}},Qcnt_four_10[42:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-38){1'b0}},Qcnt_four_10[42:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001011:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-39){1'b0}},Qcnt_four_11[46:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-40){1'b0}},Qcnt_four_11[46:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-41){1'b0}},Qcnt_four_11[46:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-42){1'b0}},Qcnt_four_11[46:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001100:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_four_12[50:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-44){1'b0}},Qcnt_four_12[50:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-45){1'b0}},Qcnt_four_12[50:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-46){1'b0}},Qcnt_four_12[50:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001101:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-47){1'b0}},Qcnt_four_13[54:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-48){1'b0}},Qcnt_four_13[54:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-49){1'b0}},Qcnt_four_13[54:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-50){1'b0}},Qcnt_four_13[54:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                default:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+                    Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_four_0[3]};
+                    Sqrt_Q0=Q_sqrt_com_0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+                    Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_four_0[3:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+                    Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_four_0[3:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+                    Q_sqrt3={{(C_MANT_FP64+2){1'b0}},Qcnt_four_0[3:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+              endcase
+            end
+      endcase
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b11, end         //
+   /////////////////////////////////////////////////////////////////////////////
+ end
+
+
+
+  assign Sqrt_R0= ((Sqrt_start_dly_S)?'0:{Partial_remainder_DP[C_MANT_FP64+5:0]});
+  assign Sqrt_R1= {Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+2:0],Sqrt_DO[0]} ;
+  assign Sqrt_R2= {Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+2:0],Sqrt_DO[1]};
+  assign Sqrt_R3= {Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+2:0],Sqrt_DO[2]};
+  assign Sqrt_R4= {Iteration_cell_sum_AMASK_D[3][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[3][C_MANT_FP64+2:0],Sqrt_DO[3]};
+
+  logic [C_MANT_FP64+5:0]                               Denominator_se_format_DB;  //
+
+  assign Denominator_se_format_DB={Denominator_se_DB[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT],{FP16ALT_SO?FP16ALT_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP16ALT-1]},
+                                                         Denominator_se_DB[C_MANT_FP64-C_MANT_FP16ALT-2:C_MANT_FP64-C_MANT_FP16],{FP16_SO?FP16_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP16-1]},
+                                                         Denominator_se_DB[C_MANT_FP64-C_MANT_FP16-2:C_MANT_FP64-C_MANT_FP32],{FP32_SO?FP32_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP32-1]},
+                                                         Denominator_se_DB[C_MANT_FP64-C_MANT_FP32-2:C_MANT_FP64-C_MANT_FP64],FP64_SO,3'b0} ;
+  //                   for           iteration cell_U0
+  logic [C_MANT_FP64+5:0]                           First_iteration_cell_div_a_D,First_iteration_cell_div_b_D;
+  logic                                             Sel_b_for_first_S;
+
+
+  assign First_iteration_cell_div_a_D=(Div_start_dly_S)?{Numerator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT],{FP16ALT_SO?FP16ALT_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP16ALT-1]},
+                                                         Numerator_se_D[C_MANT_FP64-C_MANT_FP16ALT-2:C_MANT_FP64-C_MANT_FP16],{FP16_SO?FP16_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP16-1]},
+                                                         Numerator_se_D[C_MANT_FP64-C_MANT_FP16-2:C_MANT_FP64-C_MANT_FP32],{FP32_SO?FP32_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP32-1]},
+                                                         Numerator_se_D[C_MANT_FP64-C_MANT_FP32-2:C_MANT_FP64-C_MANT_FP64],FP64_SO,3'b0}
+                                                        :{Partial_remainder_DP[C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16ALT+2]},
+                                                         Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16+2]},
+                                                         Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP32+2]},
+                                                         Partial_remainder_DP[C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Quotient_DP[0],3'b0};
+  assign Sel_b_for_first_S=(Div_start_dly_S)?1:Quotient_DP[0];
+  assign First_iteration_cell_div_b_D=Sel_b_for_first_S?Denominator_se_format_DB:{Denominator_se_D,4'b0};
+  assign Iteration_cell_a_BMASK_D[0]=Sqrt_enable_SO?Sqrt_R0:{First_iteration_cell_div_a_D};
+  assign Iteration_cell_b_BMASK_D[0]=Sqrt_enable_SO?Sqrt_Q0:{First_iteration_cell_div_b_D};
+
+
+
+  //                   for           iteration cell_U1
+  logic [C_MANT_FP64+5:0]                          Sec_iteration_cell_div_a_D,Sec_iteration_cell_div_b_D;
+  logic                                            Sel_b_for_sec_S;
+  generate
+    if(|Iteration_unit_num_S)
+      begin
+        assign Sel_b_for_sec_S=~Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+5];
+        assign Sec_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16ALT+2]},
+                                           Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16+2]},
+                                           Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP32+2]},
+                                           Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_sec_S,3'b0};
+        assign Sec_iteration_cell_div_b_D=Sel_b_for_sec_S?Denominator_se_format_DB:{Denominator_se_D,4'b0};
+        assign Iteration_cell_a_BMASK_D[1]=Sqrt_enable_SO?Sqrt_R1:{Sec_iteration_cell_div_a_D};
+        assign Iteration_cell_b_BMASK_D[1]=Sqrt_enable_SO?Sqrt_Q1:{Sec_iteration_cell_div_b_D};
+      end
+    endgenerate
+
+  //                   for           iteration cell_U2
+  logic [C_MANT_FP64+5:0]                          Thi_iteration_cell_div_a_D,Thi_iteration_cell_div_b_D;
+  logic                                            Sel_b_for_thi_S;
+  generate
+    if((Iteration_unit_num_S==2'b10) | (Iteration_unit_num_S==2'b11))
+      begin
+        assign Sel_b_for_thi_S=~Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+5];
+        assign Thi_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16ALT+2]},
+                                           Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16+2]},
+                                           Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP32+2]},
+                                           Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_thi_S,3'b0};
+        assign Thi_iteration_cell_div_b_D=Sel_b_for_thi_S?Denominator_se_format_DB:{Denominator_se_D,4'b0};
+        assign Iteration_cell_a_BMASK_D[2]=Sqrt_enable_SO?Sqrt_R2:{Thi_iteration_cell_div_a_D};
+        assign Iteration_cell_b_BMASK_D[2]=Sqrt_enable_SO?Sqrt_Q2:{Thi_iteration_cell_div_b_D};
+      end
+  endgenerate
+
+  //                   for           iteration cell_U3
+  logic [C_MANT_FP64+5:0]                          Fou_iteration_cell_div_a_D,Fou_iteration_cell_div_b_D;
+  logic                                            Sel_b_for_fou_S;
+
+  generate
+    if(Iteration_unit_num_S==2'b11)
+      begin
+        assign Sel_b_for_fou_S=~Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+5];
+        assign Fou_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16ALT+2]},
+                                           Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16+2]},
+                                           Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP32+2]},
+                                           Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_fou_S,3'b0};
+        assign Fou_iteration_cell_div_b_D=Sel_b_for_fou_S?Denominator_se_format_DB:{Denominator_se_D,4'b0};
+        assign Iteration_cell_a_BMASK_D[3]=Sqrt_enable_SO?Sqrt_R3:{Fou_iteration_cell_div_a_D};
+        assign Iteration_cell_b_BMASK_D[3]=Sqrt_enable_SO?Sqrt_Q3:{Fou_iteration_cell_div_b_D};
+      end
+  endgenerate
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Masking Contrl                                                          //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+  logic [C_MANT_FP64+1+4:0]                          Mask_bits_ctl_S;  //For extension
+
+  assign Mask_bits_ctl_S =58'h3ff_ffff_ffff_ffff;   //It is not needed. The corresponding process is handled the above codes
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Iteration Instances  with masking control                               //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+  logic                                             Div_enable_SI   [3:0];
+  logic                                             Div_start_dly_SI   [3:0];
+  logic                                             Sqrt_enable_SI   [3:0];
+  generate
+    genvar i,j;
+      for (i=0; i <= Iteration_unit_num_S ; i++)
+        begin
+          for (j = 0; j <= C_MANT_FP64+5; j++) begin
+              assign Iteration_cell_a_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_a_BMASK_D[i][j];
+              assign Iteration_cell_b_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_b_BMASK_D[i][j];
+              assign Iteration_cell_sum_AMASK_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_sum_D[i][j];
+          end
+
+          assign  Div_enable_SI[i] = Div_enable_SO;
+          assign  Div_start_dly_SI[i] = Div_start_dly_S;
+          assign  Sqrt_enable_SI[i] = Sqrt_enable_SO;
+          iteration_div_sqrt_mvp #(C_MANT_FP64+6) iteration_div_sqrt
+          (
+          .A_DI                                    (Iteration_cell_a_D[i]            ),
+          .B_DI                                    (Iteration_cell_b_D[i]            ),
+          .Div_enable_SI                           (Div_enable_SI[i]                 ),
+          .Div_start_dly_SI                        (Div_start_dly_SI[i]              ),
+          .Sqrt_enable_SI                          (Sqrt_enable_SI[i]                ),
+          .D_DI                                    (Sqrt_DI[i]                       ),
+          .D_DO                                    (Sqrt_DO[i]                       ),
+          .Sum_DO                                  (Iteration_cell_sum_D[i]          ),
+          .Carry_out_DO                            (Iteration_cell_carry_D[i]        )
+         );
+
+        end
+
+  endgenerate
+
+
+
+  always_comb
+    begin
+      case (Iteration_unit_num_S)
+        2'b00:
+          begin
+            if(Fsm_enable_S)
+               Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R1:Iteration_cell_sum_AMASK_D[0];
+            else
+               Partial_remainder_DN = Partial_remainder_DP;
+          end
+        2'b01:
+          begin
+            if(Fsm_enable_S)
+               Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R2:Iteration_cell_sum_AMASK_D[1];
+            else
+               Partial_remainder_DN = Partial_remainder_DP;
+          end
+        2'b10:
+          begin
+            if(Fsm_enable_S)
+               Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R3:Iteration_cell_sum_AMASK_D[2];
+            else
+               Partial_remainder_DN = Partial_remainder_DP;
+          end
+        2'b11:
+          begin
+            if(Fsm_enable_S)
+               Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R4:Iteration_cell_sum_AMASK_D[3];
+            else
+               Partial_remainder_DN = Partial_remainder_DP;
+          end
+        endcase
+     end
+
+
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)   // partial_remainder
+     begin
+        if(~Rst_RBI)
+          begin
+             Partial_remainder_DP <= '0;
+          end
+        else
+          begin
+             Partial_remainder_DP <= Partial_remainder_DN;
+          end
+    end
+
+   logic [C_MANT_FP64+4:0] Quotient_DN;
+
+  always_comb                                                      // Can choosen the different carry-outs based on different operations
+    begin
+      case (Iteration_unit_num_S)
+        2'b00:
+          begin
+            if(Fsm_enable_S)
+               Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+3:0],Sqrt_quotinent_S[3]} :{Quotient_DP[C_MANT_FP64+3:0],Iteration_cell_carry_D[0]};
+            else
+               Quotient_DN= Quotient_DP;
+          end
+        2'b01:
+          begin
+            if(Fsm_enable_S)
+               Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+2:0],Sqrt_quotinent_S[3:2]} :{Quotient_DP[C_MANT_FP64+2:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1]};
+            else
+               Quotient_DN= Quotient_DP;
+          end
+        2'b10:
+          begin
+            if(Fsm_enable_S)
+               Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+1:0],Sqrt_quotinent_S[3:1]} : {Quotient_DP[C_MANT_FP64+1:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2]};
+            else
+               Quotient_DN= Quotient_DP;
+          end
+        2'b11:
+          begin
+            if(Fsm_enable_S)
+               Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64:0],Sqrt_quotinent_S } : {Quotient_DP[C_MANT_FP64:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2],Iteration_cell_carry_D[3]};
+            else
+               Quotient_DN= Quotient_DP;
+          end
+        endcase
+     end
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)   // Quotient
+     begin
+        if(~Rst_RBI)
+          begin
+          Quotient_DP <= '0;
+          end
+        else
+          Quotient_DP <= Quotient_DN;
+    end
+
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Precision Control for outputs                                          //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+//////////////////////one iteration unit, start///////////////////////////////////////
+   generate
+     if(Iteration_unit_num_S==2'b00)
+       begin
+        always_comb
+          begin
+            case (Format_sel_S)
+              2'b00:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                    6'h17:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h16:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-1:0],{(C_MANT_FP64-C_MANT_FP32+4+1){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h15:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-2:0],{(C_MANT_FP64-C_MANT_FP32+4+2){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-3:0],{(C_MANT_FP64-C_MANT_FP32+4+3){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h13:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-5:0],{(C_MANT_FP64-C_MANT_FP32+4+5){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h11:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-7:0],{(C_MANT_FP64-C_MANT_FP32+4+7){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-9:0],{(C_MANT_FP64-C_MANT_FP32+4+9){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0d:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-10:0],{(C_MANT_FP64-C_MANT_FP32+4+10){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-11:0],{(C_MANT_FP64-C_MANT_FP32+4+11){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0b:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-13:0],{(C_MANT_FP64-C_MANT_FP32+4+13){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-14:0],{(C_MANT_FP64-C_MANT_FP32+4+14){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-15:0],{(C_MANT_FP64-C_MANT_FP32+4+15){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                  endcase
+                end
+
+              2'b01:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4
+                      end
+                    6'h34:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64:0],{(4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h33:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(4+1){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h32:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-2:0],{(4+2){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h31:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-3:0],{(4+3){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h30:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-4:0],{(4+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-6:0],{(4+6){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2d:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-7:0],{(4+7){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-8:0],{(4+8){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2b:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(4+9){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-10:0],{(4+10){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h29:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h28:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-12:0],{(4+12){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h27:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(4+13){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h26:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-14:0],{(4+14){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h25:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-15:0],{(4+15){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h24:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-16:0],{(4+16){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h23:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h22:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-18:0],{(4+18){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h21:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-19:0],{(4+19){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h20:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-20:0],{(4+20){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(4+21){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-22:0],{(4+22){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1d:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-24:0],{(4+24){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1b:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(4+25){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-26:0],{(4+26){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h19:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-27:0],{(4+27){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h18:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-28:0],{(4+28){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h17:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h16:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-30:0],{(4+30){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h15:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-31:0],{(4+31){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-32:0],{(4+32){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h13:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(4+33){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-34:0],{(4+34){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h11:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-36:0],{(4+36){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(4+37){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-38:0],{(4+38){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0d:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-39:0],{(4+39){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-40:0],{(4+40){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0b:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-42:0],{(4+42){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-43:0],{(4+43){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-44:0],{(4+44){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(4+45){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4
+                      end
+                  endcase
+                end
+
+              2'b10:
+                begin
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}}}; //+4
+                      end
+                    6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16:0],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-1:0],{(C_MANT_FP64-C_MANT_FP16+4+1){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-2:0],{(C_MANT_FP64-C_MANT_FP16+4+2){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-3:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}}}; //+4
+                      end
+                  endcase
+                end
+
+              2'b11:
+                begin
+
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}}}; //+4
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}}}; //+4
+                      end
+                  endcase
+                end
+            endcase
+          end
+        end
+      endgenerate
+//////////////////////one iteration unit, end//////////////////////////////////////////
+
+//////////////////////two iteration units, start///////////////////////////////////////
+   generate
+     if(Iteration_unit_num_S==2'b01)
+       begin
+        always_comb
+          begin
+            case (Format_sel_S)
+              2'b00:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                    6'h17,6'h16:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h15,6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-2:0],{(C_MANT_FP64-C_MANT_FP32+4+2){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h13,6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h11,6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0f,6'h0e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-10:0],{(C_MANT_FP64-C_MANT_FP32+4+10){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-14:0],{(C_MANT_FP64-C_MANT_FP32+4+14){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                  endcase
+                end
+              2'b01:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],1'b0}; //+3
+                      end
+                    6'h34:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+1:1],{(4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h33,6'h32:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(4+1){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h31,6'h30:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-3:0],{(4+3){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2f,6'h2e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2d,6'h2c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-7:0],{(4+7){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2b,6'h2a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(4+9){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h29,6'h28:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h27,6'h26:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(4+13){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h25,6'h24:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-15:0],{(4+15){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h23,6'h22:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h21,6'h20:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-19:0],{(4+19){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1f,6'h1e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(4+21){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1d,6'h1c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1b,6'h1a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(4+25){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h19,6'h18:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-27:0],{(4+27){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h17,6'h16:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h15,6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-31:0],{(4+31){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h13,6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(4+33){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h11,6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0f,6'h0e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(4+37){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-39:0],{(4+39){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-43:0],{(4+43){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(4+45){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],1'b0}; //+3
+                      end
+                  endcase
+                end
+
+              2'b10:
+                begin
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+3:0],{(C_MANT_FP64-C_MANT_FP16+1){1'b0}} }; //+3
+                      end
+                    6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-1:0],{(C_MANT_FP64-C_MANT_FP16+4+1){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-3:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4
+                      end
+                  endcase
+                end
+
+              2'b11:
+                begin
+
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                  endcase
+                end
+            endcase
+          end
+       end
+     endgenerate
+//////////////////////two iteration units, end//////////////////////////////////////////
+
+//////////////////////three iteration units, start///////////////////////////////////////
+   generate
+     if(Iteration_unit_num_S==2'b10)
+       begin
+        always_comb
+          begin
+            case (Format_sel_S)
+              2'b00:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+3:0],{(C_MANT_FP64-C_MANT_FP32+1){1'b0}}}; //+3
+                      end
+                    6'h17,6'h16,6'h15:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h14,6'h13,6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-3:0],{(C_MANT_FP64-C_MANT_FP32+4+3){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h11,6'h10,6'h0f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0e,6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-9:0],{(C_MANT_FP64-C_MANT_FP32+4+9){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a,6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h08,6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-15:0],{(C_MANT_FP64-C_MANT_FP32+4+15){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+3:0],{(C_MANT_FP64-C_MANT_FP32+1){1'b0}}}; //+3
+                      end
+                  endcase
+                end
+
+              2'b01:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4
+                      end
+                    6'h34,6'h33:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+1:1],{(4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h32,6'h31,6'h30:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-2:0],{(4+2){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2f,6'h2e,6'h2d:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2c,6'h2b,6'h2a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-8:0],{(4+8){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h29,6'h28,6'h27:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h26,6'h25,6'h24:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-14:0],{(4+14){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h23,6'h22,6'h21:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h20,6'h1f,6'h1e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-20:0],{(4+20){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1d,6'h1c,6'h1b:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1a,6'h19,6'h18:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-26:0],{(4+26){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h17,6'h16,6'h15:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h14,6'h13,6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-32:0],{(4+32){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h11,6'h10,6'h0f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0e,6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-38:0],{(4+38){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a,6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h08,6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-44:0],{(4+44){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4
+                      end
+                  endcase
+                end
+
+              2'b10:
+                begin
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4
+                      end
+                    6'h0a,6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h08,6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-2:0],{(C_MANT_FP64-C_MANT_FP16+4+2){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4
+                      end
+                  endcase
+                end
+
+              2'b11:
+                begin
+
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+1:1],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                  endcase
+                end
+            endcase
+          end
+        end
+      endgenerate
+//////////////////////three iteration units, end//////////////////////////////////////////
+
+//////////////////////four iteration units, start///////////////////////////////////////
+   generate
+     if(Iteration_unit_num_S==2'b11)
+       begin
+        always_comb
+          begin
+            case (Format_sel_S)
+              2'b00:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                    6'h17,6'h16,6'h15,6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h13,6'h12,6'h11,6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0f,6'h0e,6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a,6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                  endcase
+                end
+
+              2'b01:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}}}; //+3
+                      end
+                    6'h34:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h33,6'h32,6'h31,6'h30:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(5){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2f,6'h2e,6'h2d,6'h2c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(9){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2b,6'h2a,6'h29,6'h28:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(13){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h27,6'h26,6'h25,6'h24:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(17){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h23,6'h22,6'h21,6'h20:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(21){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1f,6'h1e,6'h1d,6'h1c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(25){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1b,6'h1a,6'h19,6'h18:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(29){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h17,6'h16,6'h15,6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(33){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h13,6'h12,6'h11,6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(37){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0f,6'h0e,6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(41){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a,6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(45){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(49){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}}}; //+3
+                      end
+                  endcase
+                end
+
+              2'b10:
+                begin
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+5:0],{(C_MANT_FP64-C_MANT_FP16-1){1'b0}} }; //+5
+                      end
+                    6'h0a,6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1-4:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+5:0],{(C_MANT_FP64-C_MANT_FP16-1){1'b0}} }; //+5
+                      end
+                  endcase
+                end
+
+              2'b11:
+                begin
+
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                  endcase
+                end
+            endcase
+          end
+        end
+      endgenerate
+//////////////////////four iteration units, end///////////////////////////////////////
+
+
+
+
+
+// resultant exponent
+   logic   [C_EXP_FP64+1:0]    Exp_result_prenorm_DN,Exp_result_prenorm_DP;
+
+   logic   [C_EXP_FP64+1:0]                                Exp_add_a_D;
+   logic   [C_EXP_FP64+1:0]                                Exp_add_b_D;
+   logic   [C_EXP_FP64+1:0]                                Exp_add_c_D;
+
+  integer                                                 C_BIAS_AONE, C_HALF_BIAS;
+  always_comb
+    begin  //
+      case (Format_sel_S)
+        2'b00:
+          begin
+            C_BIAS_AONE =C_BIAS_AONE_FP32;
+            C_HALF_BIAS =C_HALF_BIAS_FP32;
+          end
+        2'b01:
+          begin
+            C_BIAS_AONE =C_BIAS_AONE_FP64;
+            C_HALF_BIAS =C_HALF_BIAS_FP64;
+          end
+        2'b10:
+          begin
+            C_BIAS_AONE =C_BIAS_AONE_FP16;
+            C_HALF_BIAS =C_HALF_BIAS_FP16;
+          end
+        2'b11:
+          begin
+            C_BIAS_AONE =C_BIAS_AONE_FP16ALT;
+            C_HALF_BIAS =C_HALF_BIAS_FP16ALT;
+          end
+        endcase
+    end
+
+//For division, exponent=(Exp_a_D-LZ1)-(Exp_b_D-LZ2)+BIAS
+//For square root, exponent=(Exp_a_D-LZ1)/2+(Exp_a_D-LZ1)%2+C_HALF_BIAS
+//For exponent, in preprorces module, (Exp_a_D-LZ1) and (Exp_b_D-LZ2) have been processed with the corresponding process for denormal numbers.
+
+  assign Exp_add_a_D = {Sqrt_start_dly_S?{Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64:1]}:{Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI}};
+  assign Exp_add_b_D = {Sqrt_start_dly_S?{1'b0,{C_EXP_ZERO_FP64},Exp_num_DI[0]}:{~Exp_den_DI[C_EXP_FP64],~Exp_den_DI[C_EXP_FP64],~Exp_den_DI}};
+  assign Exp_add_c_D = {Div_start_dly_S?{{C_BIAS_AONE}}:{{C_HALF_BIAS}}};
+  assign Exp_result_prenorm_DN  = (Start_dly_S)?{Exp_add_a_D + Exp_add_b_D + Exp_add_c_D}:Exp_result_prenorm_DP;
+
+
+  always_ff @(posedge Clk_CI, negedge Rst_RBI)
+   begin
+      if(~Rst_RBI)
+        begin
+          Exp_result_prenorm_DP <= '0;
+        end
+      else
+        begin
+          Exp_result_prenorm_DP<=  Exp_result_prenorm_DN;
+        end
+   end
+
+  assign Exp_result_prenorm_DO = Exp_result_prenorm_DP;
+
+endmodule
diff --git a/verilog/rtl/data_mem_top.sv b/verilog/rtl/data_mem_top.sv
new file mode 100644
index 0000000..19fb93e
--- /dev/null
+++ b/verilog/rtl/data_mem_top.sv
@@ -0,0 +1,67 @@
+module data_mem_top
+(
+  input clk_i,
+  input rst_ni,
+
+// tl-ul insterface
+  input tlul_pkg::tl_h2d_t tl_d_i,
+  output tlul_pkg::tl_d2h_t tl_d_o,
+  
+// sram interface
+  output  logic        csb,
+  output  logic [11:0] addr_o,
+  output  logic [31:0] wdata_o,
+  output  logic [3:0]  wmask_o,
+  output  logic        we_o,
+  input   logic [31:0] rdata_i
+);
+
+  logic        tl_req;
+  logic [31:0] tl_wmask;
+  logic        we_i;
+  logic        rvalid_o;
+
+  assign wmask_o[0] = (tl_wmask[7:0]   != 8'b0) ? 1'b1: 1'b0;
+  assign wmask_o[1] = (tl_wmask[15:8]  != 8'b0) ? 1'b1: 1'b0;
+  assign wmask_o[2] = (tl_wmask[23:16] != 8'b0) ? 1'b1: 2'b0;
+  assign wmask_o[3] = (tl_wmask[31:24] != 8'b0) ? 1'b1: 2'b0; 
+  
+  assign we_o    = ~we_i;
+  assign csb     = ~tl_req;
+
+tlul_sram_adapter #(
+  .SramAw       (12),
+  .SramDw       (32), 
+  .Outstanding  (4),  
+  .ByteAccess   (1),
+  .ErrOnWrite   (0),  // 1: Writes not allowed, automatically error
+  .ErrOnRead    (0) 
+
+) data_mem (
+    .clk_i    (clk_i),
+    .rst_ni   (rst_ni),
+    .tl_i     (tl_d_i),
+    .tl_o     (tl_d_o), 
+    .req_o    (tl_req),
+    .gnt_i    (1'b1),
+    .we_o     (we_i),
+    .addr_o   (addr_o),
+    .wdata_o  (wdata_o),
+    .wmask_o  (tl_wmask),
+    .rdata_i  (rst_ni? rdata_i: '0), // (reset) ? rdata_o: '0
+    .rvalid_i (rvalid_o),
+    .rerror_i (2'b0)
+
+);
+
+  always_ff @(posedge clk_i) begin
+    if (!rst_ni) begin
+      rvalid_o <= 1'b0;
+    end else if (we_i) begin
+      rvalid_o <= 1'b0;
+    end else begin 
+      rvalid_o <= tl_req;
+    end
+  end
+
+endmodule
diff --git a/verilog/rtl/debug_rom.sv b/verilog/rtl/debug_rom.sv
new file mode 100644
index 0000000..528c62c
--- /dev/null
+++ b/verilog/rtl/debug_rom.sv
@@ -0,0 +1,65 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the "License"); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File: $filename.v
+ *
+ * Description: Auto-generated bootrom
+ */
+
+// Auto-generated code
+module debug_rom (
+  input  logic         clk_i,
+  input  logic         req_i,
+  input  logic [63:0]  addr_i,
+  output logic [63:0]  rdata_o
+);
+
+  localparam int unsigned RomSize = 19;
+  logic [RomSize-1:0][63:0] mem;
+  assign mem = {
+    64'h00000000_7b200073,
+    64'h7b202473_7b302573,
+    64'h10852423_f1402473,
+    64'ha85ff06f_7b202473,
+    64'h7b302573_10052223,
+    64'h00100073_7b202473,
+    64'h7b302573_10052623,
+    64'h00c51513_00c55513,
+    64'h00000517_fd5ff06f,
+    64'hfa041ce3_00247413,
+    64'h40044403_00a40433,
+    64'hf1402473_02041c63,
+    64'h00147413_40044403,
+    64'h00a40433_10852023,
+    64'hf1402473_00c51513,
+    64'h00c55513_00000517,
+    64'h7b351073_7b241073,
+    64'h0ff0000f_04c0006f,
+    64'h07c0006f_00c0006f
+  };
+
+  logic [$clog2(RomSize)-1:0] addr_q;
+
+  always_ff @(posedge clk_i) begin
+    if (req_i) begin
+      addr_q <= addr_i[$clog2(RomSize)-1+3:3];
+    end
+  end
+
+  // this prevents spurious Xes from propagating into
+  // the speculative fetch stage of the core
+  always_comb begin : p_outmux
+    rdata_o = '0;
+    if (addr_q < $clog2(RomSize)'(RomSize)) begin
+        rdata_o = mem[addr_q];
+    end
+  end
+
+endmodule
diff --git a/verilog/rtl/debug_rom_one_scratch.sv b/verilog/rtl/debug_rom_one_scratch.sv
new file mode 100644
index 0000000..3e88406
--- /dev/null
+++ b/verilog/rtl/debug_rom_one_scratch.sv
@@ -0,0 +1,60 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the "License"); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File: $filename.v
+ *
+ * Description: Auto-generated bootrom
+ */
+
+// Auto-generated code
+module debug_rom_one_scratch (
+  input  logic         clk_i,
+  input  logic         req_i,
+  input  logic [63:0]  addr_i,
+  output logic [63:0]  rdata_o
+);
+
+  localparam int unsigned RomSize = 13;
+  logic [RomSize-1:0][63:0] mem;
+
+  assign mem = {
+    64'h00000000_7b200073,
+    64'h7b202473_10802423,
+    64'hf1402473_ab1ff06f,
+    64'h7b202473_10002223,
+    64'h00100073_7b202473,
+    64'h10002623_fddff06f,
+    64'hfc0418e3_00247413,
+    64'h40044403_f1402473,
+    64'h02041263_00147413,
+    64'h40044403_10802023,
+    64'hf1402473_7b241073,
+    64'h0ff0000f_0340006f,
+    64'h0500006f_00c0006f
+  };
+
+  logic [$clog2(RomSize)-1:0] addr_q;
+
+  always_ff @(posedge clk_i) begin
+    if (req_i) begin
+      addr_q <= addr_i[$clog2(RomSize)-1+3:3];
+    end
+  end
+
+  // this prevents spurious Xes from propagating into
+  // the speculative fetch stage of the core
+  always_comb begin : p_outmux
+    rdata_o = '0;
+    if (addr_q < $clog2(RomSize)'(RomSize)) begin
+        rdata_o = mem[addr_q];
+    end
+  end
+
+endmodule
diff --git a/verilog/rtl/defs_div_sqrt_mvp.sv b/verilog/rtl/defs_div_sqrt_mvp.sv
new file mode 100644
index 0000000..b3f41fe
--- /dev/null
+++ b/verilog/rtl/defs_div_sqrt_mvp.sv
@@ -0,0 +1,83 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// This file contains all div_sqrt_top_mvp parameters
+// Authors    : Lei Li  (lile@iis.ee.ethz.ch)
+
+package defs_div_sqrt_mvp;
+
+   // op command
+   localparam C_RM                  = 3;
+   localparam C_RM_NEAREST          = 3'h0;
+   localparam C_RM_TRUNC            = 3'h1;
+   localparam C_RM_PLUSINF          = 3'h2;
+   localparam C_RM_MINUSINF         = 3'h3;
+   localparam C_PC                  = 6; // Precision Control
+   localparam C_FS                  = 2; // Format Selection
+   localparam C_IUNC                = 2; // Iteration Unit Number Control
+   localparam Iteration_unit_num_S  = 2'b10;
+
+   // FP64
+   localparam C_OP_FP64             = 64;
+   localparam C_MANT_FP64           = 52;
+   localparam C_EXP_FP64            = 11;
+   localparam C_BIAS_FP64           = 1023;
+   localparam C_BIAS_AONE_FP64      = 11'h400;
+   localparam C_HALF_BIAS_FP64      = 511;
+   localparam C_EXP_ZERO_FP64       = 11'h000;
+   localparam C_EXP_ONE_FP64        = 13'h001; // Bit width is in agreement with in norm
+   localparam C_EXP_INF_FP64        = 11'h7FF;
+   localparam C_MANT_ZERO_FP64      = 52'h0;
+   localparam C_MANT_NAN_FP64       = 52'h8_0000_0000_0000;
+   localparam C_PZERO_FP64          = 64'h0000_0000_0000_0000;
+   localparam C_MZERO_FP64          = 64'h8000_0000_0000_0000;
+   localparam C_QNAN_FP64           = 64'h7FF8_0000_0000_0000;
+
+   // FP32
+   localparam C_OP_FP32             = 32;
+   localparam C_MANT_FP32           = 23;
+   localparam C_EXP_FP32            = 8;
+   localparam C_BIAS_FP32           = 127;
+   localparam C_BIAS_AONE_FP32      = 8'h80;
+   localparam C_HALF_BIAS_FP32      = 63;
+   localparam C_EXP_ZERO_FP32       = 8'h00;
+   localparam C_EXP_INF_FP32        = 8'hFF;
+   localparam C_MANT_ZERO_FP32      = 23'h0;
+   localparam C_PZERO_FP32          = 32'h0000_0000;
+   localparam C_MZERO_FP32          = 32'h8000_0000;
+   localparam C_QNAN_FP32           = 32'h7FC0_0000;
+
+   // FP16
+   localparam C_OP_FP16             = 16;
+   localparam C_MANT_FP16           = 10;
+   localparam C_EXP_FP16            = 5;
+   localparam C_BIAS_FP16           = 15;
+   localparam C_BIAS_AONE_FP16      = 5'h10;
+   localparam C_HALF_BIAS_FP16      = 7;
+   localparam C_EXP_ZERO_FP16       = 5'h00;
+   localparam C_EXP_INF_FP16        = 5'h1F;
+   localparam C_MANT_ZERO_FP16      = 10'h0;
+   localparam C_PZERO_FP16          = 16'h0000;
+   localparam C_MZERO_FP16          = 16'h8000;
+   localparam C_QNAN_FP16           = 16'h7E00;
+
+   // FP16alt
+   localparam C_OP_FP16ALT           = 16;
+   localparam C_MANT_FP16ALT         = 7;
+   localparam C_EXP_FP16ALT          = 8;
+   localparam C_BIAS_FP16ALT         = 127;
+   localparam C_BIAS_AONE_FP16ALT    = 8'h80;
+   localparam C_HALF_BIAS_FP16ALT    = 63;
+   localparam C_EXP_ZERO_FP16ALT     = 8'h00;
+   localparam C_EXP_INF_FP16ALT      = 8'hFF;
+   localparam C_MANT_ZERO_FP16ALT    = 7'h0;
+   localparam C_QNAN_FP16ALT         = 16'h7FC0;
+
+endpackage : defs_div_sqrt_mvp
diff --git a/verilog/rtl/div_sqrt_top_mvp.sv b/verilog/rtl/div_sqrt_top_mvp.sv
new file mode 100644
index 0000000..3af6081
--- /dev/null
+++ b/verilog/rtl/div_sqrt_top_mvp.sv
@@ -0,0 +1,180 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lei Li -- lile@iis.ee.ethz.ch                              //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    03/03/2018                                                 //
+// Design Name:    div_sqrt_top_mvp                                           //
+// Module Name:    div_sqrt_top_mvp.sv                                        //
+// Project Name:   The shared divisor and square root                         //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    The top of div and sqrt                                    //
+//                                                                            //
+//                                                                            //
+// Revision Date:  12/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To address some requirements by Stefan and add low power   //
+//                 control for special cases                                  //
+////////////////////////////////////////////////////////////////////////////////
+
+import defs_div_sqrt_mvp::*;
+
+module div_sqrt_top_mvp
+
+  (//Input
+   input logic                            Clk_CI,
+   input logic                            Rst_RBI,
+   input logic                            Div_start_SI,
+   input logic                            Sqrt_start_SI,
+
+   //Input Operands
+   input logic [C_OP_FP64-1:0]            Operand_a_DI,
+   input logic [C_OP_FP64-1:0]            Operand_b_DI,
+
+   // Input Control
+   input logic [C_RM-1:0]                 RM_SI,    //Rounding Mode
+   input logic [C_PC-1:0]                 Precision_ctl_SI, // Precision Control
+   input logic [C_FS-1:0]                 Format_sel_SI,  // Format Selection,
+   input logic                            Kill_SI,
+
+   //Output Result
+   output logic [C_OP_FP64-1:0]           Result_DO,
+
+   //Output-Flags
+   output logic [4:0]                     Fflags_SO,
+   output logic                           Ready_SO,
+   output logic                           Done_SO
+ );
+
+
+
+
+
+   //Operand components
+   logic [C_EXP_FP64:0]                 Exp_a_D;
+   logic [C_EXP_FP64:0]                 Exp_b_D;
+   logic [C_MANT_FP64:0]                Mant_a_D;
+   logic [C_MANT_FP64:0]                Mant_b_D;
+
+   logic [C_EXP_FP64+1:0]               Exp_z_D;
+   logic [C_MANT_FP64+4:0]              Mant_z_D;
+   logic                                Sign_z_D;
+   logic                                Start_S;
+   logic [C_RM-1:0]                     RM_dly_S;
+   logic                                Div_enable_S;
+   logic                                Sqrt_enable_S;
+   logic                                Inf_a_S;
+   logic                                Inf_b_S;
+   logic                                Zero_a_S;
+   logic                                Zero_b_S;
+   logic                                NaN_a_S;
+   logic                                NaN_b_S;
+   logic                                SNaN_S;
+   logic                                Special_case_SB,Special_case_dly_SB;
+
+   logic Full_precision_S;
+   logic FP32_S;
+   logic FP64_S;
+   logic FP16_S;
+   logic FP16ALT_S;
+
+
+ preprocess_mvp  preprocess_U0
+ (
+   .Clk_CI                (Clk_CI             ),
+   .Rst_RBI               (Rst_RBI            ),
+   .Div_start_SI          (Div_start_SI       ),
+   .Sqrt_start_SI         (Sqrt_start_SI      ),
+   .Ready_SI              (Ready_SO           ),
+   .Operand_a_DI          (Operand_a_DI       ),
+   .Operand_b_DI          (Operand_b_DI       ),
+   .RM_SI                 (RM_SI              ),
+   .Format_sel_SI         (Format_sel_SI      ),
+   .Start_SO              (Start_S            ),
+   .Exp_a_DO_norm         (Exp_a_D            ),
+   .Exp_b_DO_norm         (Exp_b_D            ),
+   .Mant_a_DO_norm        (Mant_a_D           ),
+   .Mant_b_DO_norm        (Mant_b_D           ),
+   .RM_dly_SO             (RM_dly_S           ),
+   .Sign_z_DO             (Sign_z_D           ),
+   .Inf_a_SO              (Inf_a_S            ),
+   .Inf_b_SO              (Inf_b_S            ),
+   .Zero_a_SO             (Zero_a_S           ),
+   .Zero_b_SO             (Zero_b_S           ),
+   .NaN_a_SO              (NaN_a_S            ),
+   .NaN_b_SO              (NaN_b_S            ),
+   .SNaN_SO               (SNaN_S             ),
+   .Special_case_SBO      (Special_case_SB    ),
+   .Special_case_dly_SBO  (Special_case_dly_SB)
+   );
+
+ nrbd_nrsc_mvp   nrbd_nrsc_U0
+  (
+   .Clk_CI                (Clk_CI             ),
+   .Rst_RBI               (Rst_RBI            ),
+   .Div_start_SI          (Div_start_SI       ) ,
+   .Sqrt_start_SI         (Sqrt_start_SI      ),
+   .Start_SI              (Start_S            ),
+   .Kill_SI               (Kill_SI            ),
+   .Special_case_SBI      (Special_case_SB    ),
+   .Special_case_dly_SBI  (Special_case_dly_SB),
+   .Div_enable_SO         (Div_enable_S       ),
+   .Sqrt_enable_SO        (Sqrt_enable_S      ),
+   .Precision_ctl_SI      (Precision_ctl_SI   ),
+   .Format_sel_SI         (Format_sel_SI      ),
+   .Exp_a_DI              (Exp_a_D            ),
+   .Exp_b_DI              (Exp_b_D            ),
+   .Mant_a_DI             (Mant_a_D           ),
+   .Mant_b_DI             (Mant_b_D           ),
+   .Full_precision_SO     (Full_precision_S   ),
+   .FP32_SO               (FP32_S             ),
+   .FP64_SO               (FP64_S             ),
+   .FP16_SO               (FP16_S             ),
+   .FP16ALT_SO            (FP16ALT_S          ),
+   .Ready_SO              (Ready_SO           ),
+   .Done_SO               (Done_SO            ),
+   .Exp_z_DO              (Exp_z_D            ),
+   .Mant_z_DO             (Mant_z_D           )
+    );
+
+
+ norm_div_sqrt_mvp  fpu_norm_U0
+  (
+   .Mant_in_DI            (Mant_z_D           ),
+   .Exp_in_DI             (Exp_z_D            ),
+   .Sign_in_DI            (Sign_z_D           ),
+   .Div_enable_SI         (Div_enable_S       ),
+   .Sqrt_enable_SI        (Sqrt_enable_S      ),
+   .Inf_a_SI              (Inf_a_S            ),
+   .Inf_b_SI              (Inf_b_S            ),
+   .Zero_a_SI             (Zero_a_S           ),
+   .Zero_b_SI             (Zero_b_S           ),
+   .NaN_a_SI              (NaN_a_S            ),
+   .NaN_b_SI              (NaN_b_S            ),
+   .SNaN_SI               (SNaN_S             ),
+   .RM_SI                 (RM_dly_S           ),
+   .Full_precision_SI     (Full_precision_S   ),
+   .FP32_SI               (FP32_S             ),
+   .FP64_SI               (FP64_S             ),
+   .FP16_SI               (FP16_S             ),
+   .FP16ALT_SI            (FP16ALT_S          ),
+   .Result_DO             (Result_DO          ),
+   .Fflags_SO             (Fflags_SO          ) //{NV,DZ,OF,UF,NX}
+   );
+
+endmodule
diff --git a/verilog/rtl/dm_csrs.sv b/verilog/rtl/dm_csrs.sv
new file mode 100644
index 0000000..e74d320
--- /dev/null
+++ b/verilog/rtl/dm_csrs.sv
@@ -0,0 +1,630 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the “License”); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File:  dm_csrs.sv
+ * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+ * Date:   30.6.2018
+ *
+ * Description: Debug CSRs. Communication over Debug Transport Module (DTM)
+ */
+
+module dm_csrs #(
+  parameter int unsigned        NrHarts          = 1,
+  parameter int unsigned        BusWidth         = 32,
+  parameter logic [NrHarts-1:0] SelectableHarts  = {NrHarts{1'b1}}
+) (
+  input  logic                              clk_i,           // Clock
+  input  logic                              rst_ni,          // Asynchronous reset active low
+  input  logic                              testmode_i,
+  input  logic                              dmi_rst_ni,      // Debug Module Intf reset active-low
+  input  logic                              dmi_req_valid_i,
+  output logic                              dmi_req_ready_o,
+  input  dm::dmi_req_t                      dmi_req_i,
+  // every request needs a response one cycle later
+  output logic                              dmi_resp_valid_o,
+  input  logic                              dmi_resp_ready_i,
+  output dm::dmi_resp_t                     dmi_resp_o,
+  // global ctrl
+  output logic                              ndmreset_o,      // non-debug module reset active-high
+  output logic                              dmactive_o,      // 1 -> debug-module is active,
+                                                             // 0 -> synchronous re-set
+  // hart status
+  input  dm::hartinfo_t [NrHarts-1:0]       hartinfo_i,      // static hartinfo
+  input  logic [NrHarts-1:0]                halted_i,        // hart is halted
+  input  logic [NrHarts-1:0]                unavailable_i,   // e.g.: powered down
+  input  logic [NrHarts-1:0]                resumeack_i,     // hart acknowledged resume request
+  // hart control
+  output logic [19:0]                       hartsel_o,       // hartselect to ctrl module
+  output logic [NrHarts-1:0]                haltreq_o,       // request to halt a hart
+  output logic [NrHarts-1:0]                resumereq_o,     // request hart to resume
+  output logic                              clear_resumeack_o,
+
+  output logic                              cmd_valid_o,       // debugger writing to cmd field
+  output dm::command_t                      cmd_o,             // abstract command
+  input  logic                              cmderror_valid_i,  // an error occurred
+  input  dm::cmderr_e                       cmderror_i,        // this error occurred
+  input  logic                              cmdbusy_i,         // cmd is currently busy executing
+
+  output logic [dm::ProgBufSize-1:0][31:0]  progbuf_o, // to system bus
+  output logic [dm::DataCount-1:0][31:0]    data_o,
+
+  input  logic [dm::DataCount-1:0][31:0]    data_i,
+  input  logic                              data_valid_i,
+  // system bus access module (SBA)
+  output logic [BusWidth-1:0]               sbaddress_o,
+  input  logic [BusWidth-1:0]               sbaddress_i,
+  output logic                              sbaddress_write_valid_o,
+  // control signals in
+  output logic                              sbreadonaddr_o,
+  output logic                              sbautoincrement_o,
+  output logic [2:0]                        sbaccess_o,
+  // data out
+  output logic                              sbreadondata_o,
+  output logic [BusWidth-1:0]               sbdata_o,
+  output logic                              sbdata_read_valid_o,
+  output logic                              sbdata_write_valid_o,
+  // read data in
+  input  logic [BusWidth-1:0]               sbdata_i,
+  input  logic                              sbdata_valid_i,
+  // control signals
+  input  logic                              sbbusy_i,
+  input  logic                              sberror_valid_i, // bus error occurred
+  input  logic [2:0]                        sberror_i // bus error occurred
+);
+
+  // the amount of bits we need to represent all harts
+  localparam int unsigned HartSelLen = (NrHarts == 1) ? 1 : $clog2(NrHarts);
+  localparam int unsigned NrHartsAligned = 2**HartSelLen;
+
+  dm::dtm_op_e dtm_op;
+  assign dtm_op = dm::dtm_op_e'(dmi_req_i.op);
+
+  logic [31:0] resp_queue_data;
+
+  localparam dm::dm_csr_e DataEnd = dm::dm_csr_e'(dm::Data0 + {4'b0, dm::DataCount} - 8'h1);
+  localparam dm::dm_csr_e ProgBufEnd = dm::dm_csr_e'(dm::ProgBuf0 + {4'b0, dm::ProgBufSize} - 8'h1);
+
+  logic [31:0] haltsum0, haltsum1, haltsum2, haltsum3;
+  logic [((NrHarts-1)/2**5 + 1) * 32 - 1 : 0] halted;
+  logic [(NrHarts-1)/2**5:0][31:0] halted_reshaped0;
+  logic [(NrHarts-1)/2**10:0][31:0] halted_reshaped1;
+  logic [(NrHarts-1)/2**15:0][31:0] halted_reshaped2;
+  logic [((NrHarts-1)/2**10+1)*32-1:0] halted_flat1;
+  logic [((NrHarts-1)/2**15+1)*32-1:0] halted_flat2;
+  logic [31:0] halted_flat3;
+
+  // haltsum0
+  logic [14:0] hartsel_idx0;
+  always_comb begin : p_haltsum0
+    halted              = '0;
+    haltsum0            = '0;
+    hartsel_idx0        = hartsel_o[19:5];
+    halted[NrHarts-1:0] = halted_i;
+    halted_reshaped0    = halted;
+    if (hartsel_idx0 < 15'((NrHarts-1)/2**5+1)) begin
+      haltsum0 = halted_reshaped0[hartsel_idx0];
+    end
+  end
+
+  // haltsum1
+  logic [9:0] hartsel_idx1;
+  always_comb begin : p_reduction1
+    halted_flat1 = '0;
+    haltsum1     = '0;
+    hartsel_idx1 = hartsel_o[19:10];
+
+    for (int unsigned k = 0; k < (NrHarts-1)/2**5+1; k++) begin
+      halted_flat1[k] = |halted_reshaped0[k];
+    end
+    halted_reshaped1 = halted_flat1;
+
+    if (hartsel_idx1 < 10'(((NrHarts-1)/2**10+1))) begin
+      haltsum1 = halted_reshaped1[hartsel_idx1];
+    end
+  end
+
+  // haltsum2
+  logic [4:0] hartsel_idx2;
+  always_comb begin : p_reduction2
+    halted_flat2 = '0;
+    haltsum2     = '0;
+    hartsel_idx2 = hartsel_o[19:15];
+
+    for (int unsigned k = 0; k < (NrHarts-1)/2**10+1; k++) begin
+      halted_flat2[k] = |halted_reshaped1[k];
+    end
+    halted_reshaped2 = halted_flat2;
+
+    if (hartsel_idx2 < 5'(((NrHarts-1)/2**15+1))) begin
+      haltsum2         = halted_reshaped2[hartsel_idx2];
+    end
+  end
+
+  // haltsum3
+  always_comb begin : p_reduction3
+    halted_flat3 = '0;
+    for (int unsigned k = 0; k < NrHarts/2**15+1; k++) begin
+      halted_flat3[k] = |halted_reshaped2[k];
+    end
+    haltsum3 = halted_flat3;
+  end
+
+
+  dm::dmstatus_t      dmstatus;
+  dm::dmcontrol_t     dmcontrol_d, dmcontrol_q;
+  dm::abstractcs_t    abstractcs;
+  dm::cmderr_e        cmderr_d, cmderr_q;
+  dm::command_t       command_d, command_q;
+  logic               cmd_valid_d, cmd_valid_q;
+  dm::abstractauto_t  abstractauto_d, abstractauto_q;
+  dm::sbcs_t          sbcs_d, sbcs_q;
+  logic [63:0]        sbaddr_d, sbaddr_q;
+  logic [63:0]        sbdata_d, sbdata_q;
+
+  logic [NrHarts-1:0] havereset_d, havereset_q;
+  // program buffer
+  logic [dm::ProgBufSize-1:0][31:0] progbuf_d, progbuf_q;
+  logic [dm::DataCount-1:0][31:0] data_d, data_q;
+
+  logic [HartSelLen-1:0] selected_hart;
+
+  // a successful response returns zero
+  assign dmi_resp_o.resp = dm::DTM_SUCCESS;
+  // SBA
+  assign sbautoincrement_o = sbcs_q.sbautoincrement;
+  assign sbreadonaddr_o    = sbcs_q.sbreadonaddr;
+  assign sbreadondata_o    = sbcs_q.sbreadondata;
+  assign sbaccess_o        = sbcs_q.sbaccess;
+  assign sbdata_o          = sbdata_q[BusWidth-1:0];
+  assign sbaddress_o       = sbaddr_q[BusWidth-1:0];
+
+  assign hartsel_o         = {dmcontrol_q.hartselhi, dmcontrol_q.hartsello};
+
+  // needed to avoid lint warnings
+  logic [NrHartsAligned-1:0] havereset_d_aligned, havereset_q_aligned,
+                             resumeack_aligned, unavailable_aligned,
+                             halted_aligned;
+  assign resumeack_aligned   = NrHartsAligned'(resumeack_i);
+  assign unavailable_aligned = NrHartsAligned'(unavailable_i);
+  assign halted_aligned      = NrHartsAligned'(halted_i);
+
+  assign havereset_d         = NrHarts'(havereset_d_aligned);
+  assign havereset_q_aligned = NrHartsAligned'(havereset_q);
+
+  dm::hartinfo_t [NrHartsAligned-1:0] hartinfo_aligned;
+  always_comb begin : p_hartinfo_align
+    hartinfo_aligned = '0;
+    hartinfo_aligned[NrHarts-1:0] = hartinfo_i;
+  end
+
+  // helper variables
+  dm::sbcs_t sbcs;
+  dm::dmcontrol_t dmcontrol;
+  dm::abstractcs_t a_abstractcs;
+  logic [4:0] autoexecdata_idx;
+  always_comb begin : csr_read_write
+    // --------------------
+    // Static Values (R/O)
+    // --------------------
+    // dmstatus
+    dmstatus    = '0;
+    dmstatus.version = dm::DbgVersion013;
+    // no authentication implemented
+    dmstatus.authenticated = 1'b1;
+    // we do not support halt-on-reset sequence
+    dmstatus.hasresethaltreq = 1'b0;
+    // TODO(zarubaf) things need to change here if we implement the array mask
+    dmstatus.allhavereset = havereset_q_aligned[selected_hart];
+    dmstatus.anyhavereset = havereset_q_aligned[selected_hart];
+
+    dmstatus.allresumeack = resumeack_aligned[selected_hart];
+    dmstatus.anyresumeack = resumeack_aligned[selected_hart];
+
+    dmstatus.allunavail   = unavailable_aligned[selected_hart];
+    dmstatus.anyunavail   = unavailable_aligned[selected_hart];
+
+    // as soon as we are out of the legal Hart region tell the debugger
+    // that there are only non-existent harts
+    dmstatus.allnonexistent = logic'(32'(hartsel_o) > (NrHarts - 1));
+    dmstatus.anynonexistent = logic'(32'(hartsel_o) > (NrHarts - 1));
+
+    // We are not allowed to be in multiple states at once. This is a to
+    // make the running/halted and unavailable states exclusive.
+    dmstatus.allhalted    = halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart];
+    dmstatus.anyhalted    = halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart];
+
+    dmstatus.allrunning   = ~halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart];
+    dmstatus.anyrunning   = ~halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart];
+
+    // abstractcs
+    abstractcs = '0;
+    abstractcs.datacount = dm::DataCount;
+    abstractcs.progbufsize = dm::ProgBufSize;
+    abstractcs.busy = cmdbusy_i;
+    abstractcs.cmderr = cmderr_q;
+
+    // abstractautoexec
+    abstractauto_d = abstractauto_q;
+    abstractauto_d.zero0 = '0;
+
+    // default assignments
+    havereset_d_aligned = NrHartsAligned'(havereset_q);
+    dmcontrol_d         = dmcontrol_q;
+    cmderr_d            = cmderr_q;
+    command_d           = command_q;
+    progbuf_d           = progbuf_q;
+    data_d              = data_q;
+    sbcs_d              = sbcs_q;
+    sbaddr_d            = 64'(sbaddress_i);
+    sbdata_d            = sbdata_q;
+
+    resp_queue_data         = 32'b0;
+    cmd_valid_d             = 1'b0;
+    sbaddress_write_valid_o = 1'b0;
+    sbdata_read_valid_o     = 1'b0;
+    sbdata_write_valid_o    = 1'b0;
+    clear_resumeack_o       = 1'b0;
+
+    // helper variables
+    sbcs         = '0;
+    dmcontrol    = '0;
+    a_abstractcs = '0;
+
+    autoexecdata_idx    = dmi_req_i.addr[4:0] - 5'(dm::Data0);
+
+    // localparam int unsigned DataCountAlign = $clog2(dm::DataCount);
+    // reads
+    if (dmi_req_ready_o && dmi_req_valid_i && dtm_op == dm::DTM_READ) begin
+      unique case ({1'b0, dmi_req_i.addr}) inside
+        [(dm::Data0):DataEnd]: begin
+          // logic [$clog2(dm::DataCount)-1:0] resp_queue_idx;
+          // resp_queue_idx = dmi_req_i.addr[4:0] - int'(dm::Data0);
+          resp_queue_data = data_q[$clog2(dm::DataCount)'(autoexecdata_idx)];
+          if (!cmdbusy_i) begin
+            // check whether we need to re-execute the command (just give a cmd_valid)
+            if (autoexecdata_idx < $bits(abstractauto_q.autoexecdata)) begin
+              cmd_valid_d = abstractauto_q.autoexecdata[autoexecdata_idx];
+            end
+          end
+        end
+        dm::DMControl:    resp_queue_data = dmcontrol_q;
+        dm::DMStatus:     resp_queue_data = dmstatus;
+        dm::Hartinfo:     resp_queue_data = hartinfo_aligned[selected_hart];
+        dm::AbstractCS:   resp_queue_data = abstractcs;
+        dm::AbstractAuto: resp_queue_data = abstractauto_q;
+        // command is read-only
+        dm::Command:    resp_queue_data = '0;
+        [(dm::ProgBuf0):ProgBufEnd]: begin
+          resp_queue_data = progbuf_q[dmi_req_i.addr[$clog2(dm::ProgBufSize)-1:0]];
+          if (!cmdbusy_i) begin
+            // check whether we need to re-execute the command (just give a cmd_valid)
+            // range of autoexecprogbuf is 31:16
+            cmd_valid_d = abstractauto_q.autoexecprogbuf[{1'b1, dmi_req_i.addr[3:0]}];
+          end
+        end
+        dm::HaltSum0: resp_queue_data = haltsum0;
+        dm::HaltSum1: resp_queue_data = haltsum1;
+        dm::HaltSum2: resp_queue_data = haltsum2;
+        dm::HaltSum3: resp_queue_data = haltsum3;
+        dm::SBCS: begin
+          resp_queue_data = sbcs_q;
+        end
+        dm::SBAddress0: begin
+          // access while the SBA was busy
+          if (sbbusy_i) begin
+            sbcs_d.sbbusyerror = 1'b1;
+          end else begin
+            resp_queue_data = sbaddr_q[31:0];
+          end
+        end
+        dm::SBAddress1: begin
+          // access while the SBA was busy
+          if (sbbusy_i) begin
+            sbcs_d.sbbusyerror = 1'b1;
+          end else begin
+            resp_queue_data = sbaddr_q[63:32];
+          end
+        end
+        dm::SBData0: begin
+          // access while the SBA was busy
+          if (sbbusy_i) begin
+            sbcs_d.sbbusyerror = 1'b1;
+          end else begin
+            sbdata_read_valid_o = (sbcs_q.sberror == '0);
+            resp_queue_data = sbdata_q[31:0];
+          end
+        end
+        dm::SBData1: begin
+          // access while the SBA was busy
+          if (sbbusy_i) begin
+            sbcs_d.sbbusyerror = 1'b1;
+          end else begin
+            resp_queue_data = sbdata_q[63:32];
+          end
+        end
+        default:;
+      endcase
+    end
+
+    // write
+    if (dmi_req_ready_o && dmi_req_valid_i && dtm_op == dm::DTM_WRITE) begin
+      unique case (dm::dm_csr_e'({1'b0, dmi_req_i.addr})) inside
+        [(dm::Data0):DataEnd]: begin
+          // attempts to write them while busy is set does not change their value
+          if (!cmdbusy_i && dm::DataCount > 0) begin
+            data_d[dmi_req_i.addr[$clog2(dm::DataCount)-1:0]] = dmi_req_i.data;
+            // check whether we need to re-execute the command (just give a cmd_valid)
+            if (autoexecdata_idx < $bits(abstractauto_q.autoexecdata)) begin
+              cmd_valid_d = abstractauto_q.autoexecdata[autoexecdata_idx];
+            end
+          end
+        end
+        dm::DMControl: begin
+          dmcontrol = dm::dmcontrol_t'(dmi_req_i.data);
+          // clear the havreset of the selected hart
+          if (dmcontrol.ackhavereset) begin
+            havereset_d_aligned[selected_hart] = 1'b0;
+          end
+          dmcontrol_d = dmi_req_i.data;
+        end
+        dm::DMStatus:; // write are ignored to R/O register
+        dm::Hartinfo:; // hartinfo is R/O
+        // only command error is write-able
+        dm::AbstractCS: begin // W1C
+          // Gets set if an abstract command fails. The bits in this
+          // field remain set until they are cleared by writing 1 to
+          // them. No abstract command is started until the value is
+          // reset to 0.
+          a_abstractcs = dm::abstractcs_t'(dmi_req_i.data);
+          // reads during abstract command execution are not allowed
+          if (!cmdbusy_i) begin
+            cmderr_d = dm::cmderr_e'(~a_abstractcs.cmderr & cmderr_q);
+          end else if (cmderr_q == dm::CmdErrNone) begin
+            cmderr_d = dm::CmdErrBusy;
+          end
+        end
+        dm::Command: begin
+          // writes are ignored if a command is already busy
+          if (!cmdbusy_i) begin
+            cmd_valid_d = 1'b1;
+            command_d = dm::command_t'(dmi_req_i.data);
+          // if there was an attempted to write during a busy execution
+          // and the cmderror field is zero set the busy error
+          end else if (cmderr_q == dm::CmdErrNone) begin
+            cmderr_d = dm::CmdErrBusy;
+          end
+        end
+        dm::AbstractAuto: begin
+          // this field can only be written legally when there is no command executing
+          if (!cmdbusy_i) begin
+            abstractauto_d                 = 32'b0;
+            abstractauto_d.autoexecdata    = 12'(dmi_req_i.data[dm::DataCount-1:0]);
+            abstractauto_d.autoexecprogbuf = 16'(dmi_req_i.data[dm::ProgBufSize-1+16:16]);
+          end else if (cmderr_q == dm::CmdErrNone) begin
+            cmderr_d = dm::CmdErrBusy;
+          end
+        end
+        [(dm::ProgBuf0):ProgBufEnd]: begin
+          // attempts to write them while busy is set does not change their value
+          if (!cmdbusy_i) begin
+            progbuf_d[dmi_req_i.addr[$clog2(dm::ProgBufSize)-1:0]] = dmi_req_i.data;
+            // check whether we need to re-execute the command (just give a cmd_valid)
+            // this should probably throw an error if executed during another command
+            // was busy
+            // range of autoexecprogbuf is 31:16
+            cmd_valid_d = abstractauto_q.autoexecprogbuf[{1'b1, dmi_req_i.addr[3:0]}];
+          end
+        end
+        dm::SBCS: begin
+          // access while the SBA was busy
+          if (sbbusy_i) begin
+            sbcs_d.sbbusyerror = 1'b1;
+          end else begin
+            sbcs = dm::sbcs_t'(dmi_req_i.data);
+            sbcs_d = sbcs;
+            // R/W1C
+            sbcs_d.sbbusyerror = sbcs_q.sbbusyerror & (~sbcs.sbbusyerror);
+            sbcs_d.sberror     = sbcs_q.sberror     & (~sbcs.sberror);
+          end
+        end
+        dm::SBAddress0: begin
+          // access while the SBA was busy
+          if (sbbusy_i) begin
+            sbcs_d.sbbusyerror = 1'b1;
+          end else begin
+            sbaddr_d[31:0] = dmi_req_i.data;
+            sbaddress_write_valid_o = (sbcs_q.sberror == '0);
+          end
+        end
+        dm::SBAddress1: begin
+          // access while the SBA was busy
+          if (sbbusy_i) begin
+            sbcs_d.sbbusyerror = 1'b1;
+          end else begin
+            sbaddr_d[63:32] = dmi_req_i.data;
+          end
+        end
+        dm::SBData0: begin
+          // access while the SBA was busy
+          if (sbbusy_i) begin
+           sbcs_d.sbbusyerror = 1'b1;
+          end else begin
+            sbdata_d[31:0] = dmi_req_i.data;
+            sbdata_write_valid_o = (sbcs_q.sberror == '0);
+          end
+        end
+        dm::SBData1: begin
+          // access while the SBA was busy
+          if (sbbusy_i) begin
+           sbcs_d.sbbusyerror = 1'b1;
+          end else begin
+            sbdata_d[63:32] = dmi_req_i.data;
+          end
+        end
+        default:;
+      endcase
+    end
+    // hart threw a command error and has precedence over bus writes
+    if (cmderror_valid_i) begin
+      cmderr_d = cmderror_i;
+    end
+
+    // update data registers
+    if (data_valid_i) begin
+      data_d = data_i;
+    end
+
+    // set the havereset flag when we did a ndmreset
+    if (ndmreset_o) begin
+      havereset_d_aligned[NrHarts-1:0] = '1;
+    end
+    // -------------
+    // System Bus
+    // -------------
+    // set bus error
+    if (sberror_valid_i) begin
+      sbcs_d.sberror = sberror_i;
+    end
+    // update read data
+    if (sbdata_valid_i) begin
+      sbdata_d = 64'(sbdata_i);
+    end
+
+    // dmcontrol
+    // TODO(zarubaf) we currently do not implement the hartarry mask
+    dmcontrol_d.hasel           = 1'b0;
+    // we do not support resetting an individual hart
+    dmcontrol_d.hartreset       = 1'b0;
+    dmcontrol_d.setresethaltreq = 1'b0;
+    dmcontrol_d.clrresethaltreq = 1'b0;
+    dmcontrol_d.zero1           = '0;
+    dmcontrol_d.zero0           = '0;
+    // Non-writeable, clear only
+    dmcontrol_d.ackhavereset    = 1'b0;
+    if (!dmcontrol_q.resumereq && dmcontrol_d.resumereq) begin
+      clear_resumeack_o = 1'b1;
+    end
+    if (dmcontrol_q.resumereq && resumeack_i) begin
+      dmcontrol_d.resumereq = 1'b0;
+    end
+    // static values for dcsr
+    sbcs_d.sbversion            = 3'd1;
+    sbcs_d.sbbusy               = sbbusy_i;
+    sbcs_d.sbasize              = $bits(sbcs_d.sbasize)'(BusWidth);
+    sbcs_d.sbaccess128          = 1'b0;
+    sbcs_d.sbaccess64           = logic'(BusWidth == 32'd64);
+    sbcs_d.sbaccess32           = logic'(BusWidth == 32'd32);
+    sbcs_d.sbaccess16           = 1'b0;
+    sbcs_d.sbaccess8            = 1'b0;
+    sbcs_d.sbaccess             = (BusWidth == 32'd64) ? 3'd3 : 3'd2;
+  end
+
+  // output multiplexer
+  always_comb begin : p_outmux
+    selected_hart = hartsel_o[HartSelLen-1:0];
+    // default assignment
+    haltreq_o = '0;
+    resumereq_o = '0;
+    if (selected_hart < (HartSelLen+1)'(NrHarts)) begin
+      haltreq_o[selected_hart]   = dmcontrol_q.haltreq;
+      resumereq_o[selected_hart] = dmcontrol_q.resumereq;
+    end
+  end
+
+  assign dmactive_o  = dmcontrol_q.dmactive;
+  assign cmd_o       = command_q;
+  assign cmd_valid_o = cmd_valid_q;
+  assign progbuf_o   = progbuf_q;
+  assign data_o      = data_q;
+
+  assign ndmreset_o = dmcontrol_q.ndmreset;
+
+  logic unused_testmode;
+  assign unused_testmode = testmode_i;
+
+  // response FIFO
+  fifo_sync #(
+    .Width   (32),
+    .Pass    (1'b0),
+    .Depth   (2)
+  ) i_fifo (
+    .clk_i   ( clk_i                ),
+    .rst_ni  ( dmi_rst_ni           ), // reset only when system is re-set
+    .clr_i   ( 1'b0                 ),
+    .wdata_i ( resp_queue_data      ),
+    .wvalid_i( dmi_req_valid_i      ),
+    .wready_o( dmi_req_ready_o      ),
+    .rdata_o ( dmi_resp_o.data      ),
+    .rvalid_o( dmi_resp_valid_o     ),
+    .rready_i( dmi_resp_ready_i     ),
+    .depth_o (                      )  // Doesn't use
+  );
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    // PoR
+    if (!rst_ni) begin
+      dmcontrol_q    <= '0;
+      // this is the only write-able bit during reset
+      cmderr_q       <= dm::CmdErrNone;
+      command_q      <= '0;
+      cmd_valid_q    <= '0;
+      abstractauto_q <= '0;
+      progbuf_q      <= '0;
+      data_q         <= '0;
+      sbcs_q         <= '0;
+      sbaddr_q       <= '0;
+      sbdata_q       <= '0;
+      havereset_q    <= '1;
+    end else begin
+      havereset_q    <= SelectableHarts & havereset_d;
+      // synchronous re-set of debug module, active-low, except for dmactive
+      if (!dmcontrol_q.dmactive) begin
+        dmcontrol_q.haltreq          <= '0;
+        dmcontrol_q.resumereq        <= '0;
+        dmcontrol_q.hartreset        <= '0;
+        dmcontrol_q.ackhavereset     <= '0;
+        dmcontrol_q.zero1            <= '0;
+        dmcontrol_q.hasel            <= '0;
+        dmcontrol_q.hartsello        <= '0;
+        dmcontrol_q.hartselhi        <= '0;
+        dmcontrol_q.zero0            <= '0;
+        dmcontrol_q.setresethaltreq  <= '0;
+        dmcontrol_q.clrresethaltreq  <= '0;
+        dmcontrol_q.ndmreset         <= '0;
+        // this is the only write-able bit during reset
+        dmcontrol_q.dmactive         <= dmcontrol_d.dmactive;
+        cmderr_q                     <= dm::CmdErrNone;
+        command_q                    <= '0;
+        cmd_valid_q                  <= '0;
+        abstractauto_q               <= '0;
+        progbuf_q                    <= '0;
+        data_q                       <= '0;
+        sbcs_q                       <= '0;
+        sbaddr_q                     <= '0;
+        sbdata_q                     <= '0;
+      end else begin
+        dmcontrol_q                  <= dmcontrol_d;
+        cmderr_q                     <= cmderr_d;
+        command_q                    <= command_d;
+        cmd_valid_q                  <= cmd_valid_d;
+        abstractauto_q               <= abstractauto_d;
+        progbuf_q                    <= progbuf_d;
+        data_q                       <= data_d;
+        sbcs_q                       <= sbcs_d;
+        sbaddr_q                     <= sbaddr_d;
+        sbdata_q                     <= sbdata_d;
+      end
+    end
+  end
+
+
+  //pragma translate_on
+
+endmodule : dm_csrs
diff --git a/verilog/rtl/dm_mem.sv b/verilog/rtl/dm_mem.sv
new file mode 100644
index 0000000..4ef7a26
--- /dev/null
+++ b/verilog/rtl/dm_mem.sv
@@ -0,0 +1,523 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+* Copyright and related rights are licensed under the Solderpad Hardware
+* License, Version 0.51 (the “License”); you may not use this file except in
+* compliance with the License.  You may obtain a copy of the License at
+* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+* or agreed to in writing, software, hardware and materials distributed under
+* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+* CONDITIONS OF ANY KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations under the License.
+*
+* File:   dm_mem.sv
+* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+* Date:   11.7.2018
+*
+* Description: Memory module for execution-based debug clients
+*
+*/
+
+module dm_mem #(
+  parameter int unsigned        NrHarts          =  1,
+  parameter int unsigned        BusWidth         = 32,
+  parameter logic [NrHarts-1:0] SelectableHarts  = {NrHarts{1'b1}},
+  parameter int unsigned        DmBaseAddress    = '0
+) (
+  input  logic                             clk_i,       // Clock
+  input  logic                             rst_ni,      // debug module reset
+
+  output logic [NrHarts-1:0]               debug_req_o,
+  input  logic [19:0]                      hartsel_i,
+  // from Ctrl and Status register
+  input  logic [NrHarts-1:0]               haltreq_i,
+  input  logic [NrHarts-1:0]               resumereq_i,
+  input  logic                             clear_resumeack_i,
+
+  // state bits
+  output logic [NrHarts-1:0]               halted_o,    // hart acknowledge halt
+  output logic [NrHarts-1:0]               resuming_o,  // hart is resuming
+
+  input  logic [dm::ProgBufSize-1:0][31:0] progbuf_i,    // program buffer to expose
+
+  input  logic [dm::DataCount-1:0][31:0]   data_i,       // data in
+  output logic [dm::DataCount-1:0][31:0]   data_o,       // data out
+  output logic                             data_valid_o, // data out is valid
+  // abstract command interface
+  input  logic                             cmd_valid_i,
+  input  dm::command_t                     cmd_i,
+  output logic                             cmderror_valid_o,
+  output dm::cmderr_e                      cmderror_o,
+  output logic                             cmdbusy_o,
+  // data interface
+
+  // SRAM interface
+  input  logic                             req_i,
+  input  logic                             we_i,
+  input  logic [BusWidth-1:0]              addr_i,
+  input  logic [BusWidth-1:0]              wdata_i,
+  input  logic [BusWidth/8-1:0]            be_i,
+  output logic [BusWidth-1:0]              rdata_o
+);
+  localparam int unsigned DbgAddressBits = 12;
+  localparam int unsigned HartSelLen     = (NrHarts == 1) ? 1 : $clog2(NrHarts);
+  localparam int unsigned NrHartsAligned = 2**HartSelLen;
+  localparam int unsigned MaxAar         = (BusWidth == 64) ? 4 : 3;
+  localparam bit          HasSndScratch  = (DmBaseAddress != 0);
+  // Depending on whether we are at the zero page or not we either use `x0` or `x10/a0`
+  localparam logic [4:0]  LoadBaseAddr   = (DmBaseAddress == 0) ? 5'd0 : 5'd10;
+
+  localparam logic [DbgAddressBits-1:0] DataBaseAddr        = (dm::DataAddr);
+  localparam logic [DbgAddressBits-1:0] DataEndAddr         = (dm::DataAddr + 4*dm::DataCount - 1);
+  localparam logic [DbgAddressBits-1:0] ProgBufBaseAddr     = (dm::DataAddr - 4*dm::ProgBufSize);
+  localparam logic [DbgAddressBits-1:0] ProgBufEndAddr      = (dm::DataAddr - 1);
+  localparam logic [DbgAddressBits-1:0] AbstractCmdBaseAddr = (ProgBufBaseAddr - 4*10);
+  localparam logic [DbgAddressBits-1:0] AbstractCmdEndAddr  = (ProgBufBaseAddr - 1);
+
+  localparam logic [DbgAddressBits-1:0] WhereToAddr   = 'h300;
+  localparam logic [DbgAddressBits-1:0] FlagsBaseAddr = 'h400;
+  localparam logic [DbgAddressBits-1:0] FlagsEndAddr  = 'h7FF;
+
+  localparam logic [DbgAddressBits-1:0] HaltedAddr    = 'h100;
+  localparam logic [DbgAddressBits-1:0] GoingAddr     = 'h104;
+  localparam logic [DbgAddressBits-1:0] ResumingAddr  = 'h108;
+  localparam logic [DbgAddressBits-1:0] ExceptionAddr = 'h10C;
+
+  logic [dm::ProgBufSize/2-1:0][63:0]   progbuf;
+  logic [7:0][63:0]   abstract_cmd;
+  logic [NrHarts-1:0] halted_d, halted_q;
+  logic [NrHarts-1:0] resuming_d, resuming_q;
+  logic               resume, go, going;
+
+  logic exception;
+  logic unsupported_command;
+
+  logic [63:0] rom_rdata;
+  logic [63:0] rdata_d, rdata_q;
+  logic        word_enable32_q;
+
+  // this is needed to avoid lint warnings related to array indexing
+  // resize hartsel to valid range
+  logic [HartSelLen-1:0] hartsel, wdata_hartsel;
+
+  assign hartsel       = hartsel_i[HartSelLen-1:0];
+  assign wdata_hartsel = wdata_i[HartSelLen-1:0];
+
+  logic [NrHartsAligned-1:0] resumereq_aligned, haltreq_aligned,
+                             halted_d_aligned, halted_q_aligned,
+                             halted_aligned, resumereq_wdata_aligned,
+                             resuming_d_aligned, resuming_q_aligned;
+
+  assign resumereq_aligned       = NrHartsAligned'(resumereq_i);
+  assign haltreq_aligned         = NrHartsAligned'(haltreq_i);
+  assign resumereq_wdata_aligned = NrHartsAligned'(resumereq_i);
+
+  assign halted_q_aligned        = NrHartsAligned'(halted_q);
+  assign halted_d                = NrHarts'(halted_d_aligned);
+  assign resuming_q_aligned      = NrHartsAligned'(resuming_q);
+  assign resuming_d              = NrHarts'(resuming_d_aligned);
+
+  // distinguish whether we need to forward data from the ROM or the FSM
+  // latch the address for this
+  logic fwd_rom_d, fwd_rom_q;
+  dm::ac_ar_cmd_t ac_ar;
+
+  // Abstract Command Access Register
+  assign ac_ar       = dm::ac_ar_cmd_t'(cmd_i.control);
+  assign debug_req_o = haltreq_i;
+  assign halted_o    = halted_q;
+  assign resuming_o  = resuming_q;
+
+  // reshape progbuf
+  assign progbuf = progbuf_i;
+
+  typedef enum logic [1:0] { Idle, Go, Resume, CmdExecuting } state_e;
+  state_e state_d, state_q;
+
+  // hart ctrl queue
+  always_comb begin : p_hart_ctrl_queue
+    cmderror_valid_o = 1'b0;
+    cmderror_o       = dm::CmdErrNone;
+    state_d          = state_q;
+    go               = 1'b0;
+    resume           = 1'b0;
+    cmdbusy_o        = 1'b1;
+
+    unique case (state_q)
+      Idle: begin
+        cmdbusy_o = 1'b0;
+        if (cmd_valid_i && halted_q_aligned[hartsel] && !unsupported_command) begin
+          // give the go signal
+          state_d = Go;
+        end else if (cmd_valid_i) begin
+          // hart must be halted for all requests
+          cmderror_valid_o = 1'b1;
+          cmderror_o = dm::CmdErrorHaltResume;
+        end
+        // CSRs want to resume, the request is ignored when the hart is
+        // requested to halt or it didn't clear the resuming_q bit before
+        if (resumereq_aligned[hartsel] && !resuming_q_aligned[hartsel] &&
+            !haltreq_aligned[hartsel] && halted_q_aligned[hartsel]) begin
+          state_d = Resume;
+        end
+      end
+
+      Go: begin
+        // we are already busy here since we scheduled the execution of a program
+        cmdbusy_o = 1'b1;
+        go        = 1'b1;
+        // the thread is now executing the command, track its state
+        if (going) begin
+            state_d = CmdExecuting;
+        end
+      end
+
+      Resume: begin
+        cmdbusy_o = 1'b1;
+        resume = 1'b1;
+        if (resuming_q_aligned[hartsel]) begin
+          state_d = Idle;
+        end
+      end
+
+      CmdExecuting: begin
+        cmdbusy_o = 1'b1;
+        go        = 1'b0;
+        // wait until the hart has halted again
+        if (halted_aligned[hartsel]) begin
+          state_d = Idle;
+        end
+      end
+
+      //default: ;
+    endcase
+
+    // only signal once that cmd is unsupported so that we can clear cmderr
+    // in subsequent writes to abstractcs
+    if (unsupported_command && cmd_valid_i) begin
+      cmderror_valid_o = 1'b1;
+      cmderror_o = dm::CmdErrNotSupported;
+    end
+
+    if (exception) begin
+      cmderror_valid_o = 1'b1;
+      cmderror_o = dm::CmdErrorException;
+    end
+  end
+
+  // word mux for 32bit and 64bit buses
+  logic [63:0] word_mux;
+  assign word_mux = (fwd_rom_q) ? rom_rdata : rdata_q;
+
+  if (BusWidth == 64) begin : gen_word_mux64
+    assign rdata_o = word_mux;
+  end else begin : gen_word_mux32
+    assign rdata_o = (word_enable32_q) ? word_mux[32 +: 32] : word_mux[0 +: 32];
+  end
+
+  // read/write logic
+  logic [63:0] data_bits;
+  logic [7:0][7:0] rdata;
+  always_comb begin : p_rw_logic
+
+    halted_d_aligned   = NrHartsAligned'(halted_q);
+    resuming_d_aligned = NrHartsAligned'(resuming_q);
+    rdata_d        = rdata_q;
+    // convert the data in bits representation
+    data_bits      = data_i;
+    rdata          = '0;
+
+    // write data in csr register
+    data_valid_o   = 1'b0;
+    exception      = 1'b0;
+    halted_aligned     = '0;
+    going          = 1'b0;
+
+    // The resume ack signal is lowered when the resume request is deasserted
+    if (clear_resumeack_i) begin
+      resuming_d_aligned[hartsel] = 1'b0;
+    end
+    // we've got a new request
+    if (req_i) begin
+      // this is a write
+      if (we_i) begin
+        unique case (addr_i[DbgAddressBits-1:0]) inside
+          HaltedAddr: begin
+            halted_aligned[wdata_hartsel] = 1'b1;
+            halted_d_aligned[wdata_hartsel] = 1'b1;
+          end
+          GoingAddr: begin
+            going = 1'b1;
+          end
+          ResumingAddr: begin
+            // clear the halted flag as the hart resumed execution
+            halted_d_aligned[wdata_hartsel] = 1'b0;
+            // set the resuming flag which needs to be cleared by the debugger
+            resuming_d_aligned[wdata_hartsel] = 1'b1;
+          end
+          // an exception occurred during execution
+          ExceptionAddr: exception = 1'b1;
+          // core can write data registers
+          [DataBaseAddr:DataEndAddr]: begin
+            data_valid_o = 1'b1;
+            for (int i = 0; i < $bits(be_i); i++) begin
+              if (be_i[i]) begin
+                data_bits[i*8+:8] = wdata_i[i*8+:8];
+              end
+            end
+          end
+          default ;
+        endcase
+
+      // this is a read
+      end else begin
+        unique case (addr_i[DbgAddressBits-1:0]) inside
+          // variable ROM content
+          WhereToAddr: begin
+            // variable jump to abstract cmd, program_buffer or resume
+            if (resumereq_wdata_aligned[wdata_hartsel]) begin
+              rdata_d = {32'b0, dm::jal('0, 21'(dm::ResumeAddress[11:0])-21'(WhereToAddr))};
+            end
+
+            // there is a command active so jump there
+            if (cmdbusy_o) begin
+              // transfer not set is shortcut to the program buffer if postexec is set
+              // keep this statement narrow to not catch invalid commands
+              if (cmd_i.cmdtype == dm::AccessRegister &&
+                  !ac_ar.transfer && ac_ar.postexec) begin
+                rdata_d = {32'b0, dm::jal('0, 21'(ProgBufBaseAddr)-21'(WhereToAddr))};
+              // this is a legit abstract cmd -> execute it
+              end else begin
+                rdata_d = {32'b0, dm::jal('0, 21'(AbstractCmdBaseAddr)-21'(WhereToAddr))};
+              end
+            end
+          end
+
+          [DataBaseAddr:DataEndAddr]: begin
+            rdata_d = {
+                      data_i[$clog2(dm::ProgBufSize)'(addr_i[DbgAddressBits-1:3] -
+                          DataBaseAddr[DbgAddressBits-1:3] + 1'b1)],
+                      data_i[$clog2(dm::ProgBufSize)'(addr_i[DbgAddressBits-1:3] -
+                          DataBaseAddr[DbgAddressBits-1:3])]
+                      };
+          end
+
+          [ProgBufBaseAddr:ProgBufEndAddr]: begin
+            rdata_d = progbuf[$clog2(dm::ProgBufSize)'(addr_i[DbgAddressBits-1:3] -
+                          ProgBufBaseAddr[DbgAddressBits-1:3])];
+          end
+
+          // two slots for abstract command
+          [AbstractCmdBaseAddr:AbstractCmdEndAddr]: begin
+            // return the correct address index
+            rdata_d = abstract_cmd[3'(addr_i[DbgAddressBits-1:3] -
+                           AbstractCmdBaseAddr[DbgAddressBits-1:3])];
+          end
+          // harts are polling for flags here
+          [FlagsBaseAddr:FlagsEndAddr]: begin
+            // release the corresponding hart
+            if (({addr_i[DbgAddressBits-1:3], 3'b0} - FlagsBaseAddr[DbgAddressBits-1:0]) ==
+              (DbgAddressBits'(hartsel) & {{(DbgAddressBits-3){1'b1}}, 3'b0})) begin
+              rdata[DbgAddressBits'(hartsel) & DbgAddressBits'(3'b111)] = {6'b0, resume, go};
+            end
+            rdata_d = rdata;
+          end
+          default: ;
+        endcase
+      end
+    end
+
+    data_o = data_bits;
+  end
+
+  always_comb begin : p_abstract_cmd_rom
+    // this abstract command is currently unsupported
+    unsupported_command = 1'b0;
+    // default memory
+    // if ac_ar.transfer is not set then we can take a shortcut to the program buffer
+    abstract_cmd[0][31:0]  = dm::illegal();
+    // load debug module base address into a0, this is shared among all commands
+    abstract_cmd[0][63:32] = HasSndScratch ? dm::auipc(5'd10, '0) : dm::nop();
+    // clr lowest 12b -> DM base offset
+    abstract_cmd[1][31:0]  = HasSndScratch ? dm::srli(5'd10, 5'd10, 6'd12) : dm::nop();
+    abstract_cmd[1][63:32] = HasSndScratch ? dm::slli(5'd10, 5'd10, 6'd12) : dm::nop();
+    abstract_cmd[2][31:0]  = dm::nop();
+    abstract_cmd[2][63:32] = dm::nop();
+    abstract_cmd[3][31:0]  = dm::nop();
+    abstract_cmd[3][63:32] = dm::nop();
+    abstract_cmd[4][31:0]  = HasSndScratch ? dm::csrr(dm::CSR_DSCRATCH1, 5'd10) : dm::nop();
+    abstract_cmd[4][63:32] = dm::ebreak();
+    abstract_cmd[7:5]      = '0;
+
+    // this depends on the command being executed
+    unique case (cmd_i.cmdtype)
+      // --------------------
+      // Access Register
+      // --------------------
+      dm::AccessRegister: begin
+        if (32'(ac_ar.aarsize) < MaxAar && ac_ar.transfer && ac_ar.write) begin
+          // store a0 in dscratch1
+          abstract_cmd[0][31:0] = HasSndScratch ? dm::csrr(dm::CSR_DSCRATCH1, 5'd10) : dm::nop();
+          // this range is reserved
+          if (ac_ar.regno[15:14] != '0) begin
+            abstract_cmd[0][31:0] = dm::ebreak(); // we leave asap
+            unsupported_command = 1'b1;
+          // A0 access needs to be handled separately, as we use A0 to load
+          // the DM address offset need to access DSCRATCH1 in this case
+          end else if (HasSndScratch && ac_ar.regno[12] && (!ac_ar.regno[5]) &&
+                      (ac_ar.regno[4:0] == 5'd10)) begin
+            // store s0 in dscratch
+            abstract_cmd[2][31:0]  = dm::csrw(dm::CSR_DSCRATCH0, 5'd8);
+            // load from data register
+            abstract_cmd[2][63:32] = dm::load(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr);
+            // and store it in the corresponding CSR
+            abstract_cmd[3][31:0]  = dm::csrw(dm::CSR_DSCRATCH1, 5'd8);
+            // restore s0 again from dscratch
+            abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8);
+          // GPR/FPR access
+          end else if (ac_ar.regno[12]) begin
+            // determine whether we want to access the floating point register or not
+            if (ac_ar.regno[5]) begin
+              abstract_cmd[2][31:0] =
+                  dm::float_load(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr);
+            end else begin
+              abstract_cmd[2][31:0] =
+                  dm::load(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr);
+            end
+          // CSR access
+          end else begin
+            // data register to CSR
+            // store s0 in dscratch
+            abstract_cmd[2][31:0]  = dm::csrw(dm::CSR_DSCRATCH0, 5'd8);
+            // load from data register
+            abstract_cmd[2][63:32] = dm::load(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr);
+            // and store it in the corresponding CSR
+            abstract_cmd[3][31:0]  = dm::csrw(dm::csr_reg_t'(ac_ar.regno[11:0]), 5'd8);
+            // restore s0 again from dscratch
+            abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8);
+          end
+        end else if (32'(ac_ar.aarsize) < MaxAar && ac_ar.transfer && !ac_ar.write) begin
+          // store a0 in dscratch1
+          abstract_cmd[0][31:0]  = HasSndScratch ?
+                                   dm::csrr(dm::CSR_DSCRATCH1, LoadBaseAddr) :
+                                   dm::nop();
+          // this range is reserved
+          if (ac_ar.regno[15:14] != '0) begin
+              abstract_cmd[0][31:0] = dm::ebreak(); // we leave asap
+              unsupported_command = 1'b1;
+          // A0 access needs to be handled separately, as we use A0 to load
+          // the DM address offset need to access DSCRATCH1 in this case
+          end else if (HasSndScratch && ac_ar.regno[12] && (!ac_ar.regno[5]) &&
+                      (ac_ar.regno[4:0] == 5'd10)) begin
+            // store s0 in dscratch
+            abstract_cmd[2][31:0]  = dm::csrw(dm::CSR_DSCRATCH0, 5'd8);
+            // read value from CSR into s0
+            abstract_cmd[2][63:32] = dm::csrr(dm::CSR_DSCRATCH1, 5'd8);
+            // and store s0 into data section
+            abstract_cmd[3][31:0]  = dm::store(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr);
+            // restore s0 again from dscratch
+            abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8);
+          // GPR/FPR access
+          end else if (ac_ar.regno[12]) begin
+            // determine whether we want to access the floating point register or not
+            if (ac_ar.regno[5]) begin
+              abstract_cmd[2][31:0] =
+                  dm::float_store(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr);
+            end else begin
+              abstract_cmd[2][31:0] =
+                  dm::store(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr);
+            end
+          // CSR access
+          end else begin
+            // CSR register to data
+            // store s0 in dscratch
+            abstract_cmd[2][31:0]  = dm::csrw(dm::CSR_DSCRATCH0, 5'd8);
+            // read value from CSR into s0
+            abstract_cmd[2][63:32] = dm::csrr(dm::csr_reg_t'(ac_ar.regno[11:0]), 5'd8);
+            // and store s0 into data section
+            abstract_cmd[3][31:0]  = dm::store(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr);
+            // restore s0 again from dscratch
+            abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8);
+          end
+        end else if (32'(ac_ar.aarsize) >= MaxAar || ac_ar.aarpostincrement == 1'b1) begin
+          // this should happend when e.g. ac_ar.aarsize >= MaxAar
+          // Openocd will try to do an access with aarsize=64 bits
+          // first before falling back to 32 bits.
+          abstract_cmd[0][31:0] = dm::ebreak(); // we leave asap
+          unsupported_command = 1'b1;
+        end
+
+        // Check whether we need to execute the program buffer. When we
+        // get an unsupported command we really should abort instead of
+        // still trying to execute the program buffer, makes it easier
+        // for the debugger to recover
+        if (ac_ar.postexec && !unsupported_command) begin
+          // issue a nop, we will automatically run into the program buffer
+          abstract_cmd[4][63:32] = dm::nop();
+        end
+      end
+      // not supported at the moment
+      // dm::QuickAccess:;
+      // dm::AccessMemory:;
+      default: begin
+        abstract_cmd[0][31:0] = dm::ebreak();
+        unsupported_command = 1'b1;
+      end
+    endcase
+  end
+
+  logic [63:0] rom_addr;
+  assign rom_addr = 64'(addr_i);
+
+  // Depending on whether the debug module is located
+  // at the zero page we can instantiate a simplified version
+  // which only requires one scratch register per hart.
+  // For all other cases we need to set aside
+  // two registers per hart, hence we also need
+  // two scratch registers.
+  if (HasSndScratch) begin : gen_rom_snd_scratch
+    debug_rom i_debug_rom (
+      .clk_i,
+      .req_i,
+      .addr_i  ( rom_addr  ),
+      .rdata_o ( rom_rdata )
+    );
+  end else begin : gen_rom_one_scratch
+    // It uses the zero register (`x0`) as the base
+    // for its loads. The zero register does not need to
+    // be saved.
+    debug_rom_one_scratch i_debug_rom (
+      .clk_i,
+      .req_i,
+      .addr_i  ( rom_addr  ),
+      .rdata_o ( rom_rdata )
+    );
+  end
+
+  // ROM starts at the HaltAddress of the core e.g.: it immediately jumps to
+  // the ROM base address
+  assign fwd_rom_d = logic'(addr_i[DbgAddressBits-1:0] >= dm::HaltAddress[DbgAddressBits-1:0]);
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    if (!rst_ni) begin
+      fwd_rom_q       <= 1'b0;
+      rdata_q         <= '0;
+      state_q         <= Idle;
+      word_enable32_q <= 1'b0;
+    end else begin
+      fwd_rom_q       <= fwd_rom_d;
+      rdata_q         <= rdata_d;
+      state_q         <= state_d;
+      word_enable32_q <= addr_i[2];
+    end
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      halted_q   <= 1'b0;
+      resuming_q <= 1'b0;
+    end else begin
+      halted_q   <= SelectableHarts & halted_d;
+      resuming_q <= SelectableHarts & resuming_d;
+    end
+  end
+
+endmodule : dm_mem
diff --git a/verilog/rtl/dm_pkg.sv b/verilog/rtl/dm_pkg.sv
new file mode 100644
index 0000000..1b7d0f5
--- /dev/null
+++ b/verilog/rtl/dm_pkg.sv
@@ -0,0 +1,414 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the “License”); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File:   dm_pkg.sv
+ * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+ * Date:   30.6.2018
+ *
+ * Description: Debug-module package, contains common system definitions.
+ *
+ */
+
+package dm;
+  localparam logic [3:0] DbgVersion013 = 4'h2;
+  // size of program buffer in junks of 32-bit words
+  localparam logic [4:0] ProgBufSize   = 5'h8;
+
+  // amount of data count registers implemented
+  localparam logic [3:0] DataCount     = 4'h2;
+
+  // address to which a hart should jump when it was requested to halt
+  localparam logic [63:0] HaltAddress = 64'h800;
+  localparam logic [63:0] ResumeAddress = HaltAddress + 4;
+  localparam logic [63:0] ExceptionAddress = HaltAddress + 8;
+
+  // address where data0-15 is shadowed or if shadowed in a CSR
+  // address of the first CSR used for shadowing the data
+  localparam logic [11:0] DataAddr = 12'h380; // we are aligned with Rocket here
+
+  // debug registers
+  typedef enum logic [7:0] {
+    Data0        = 8'h04,
+    Data1        = 8'h05,
+    Data2        = 8'h06,
+    Data3        = 8'h07,
+    Data4        = 8'h08,
+    Data5        = 8'h09,
+    Data6        = 8'h0A,
+    Data7        = 8'h0B,
+    Data8        = 8'h0C,
+    Data9        = 8'h0D,
+    Data10       = 8'h0E,
+    Data11       = 8'h0F,
+    DMControl    = 8'h10,
+    DMStatus     = 8'h11, // r/o
+    Hartinfo     = 8'h12,
+    HaltSum1     = 8'h13,
+    HAWindowSel  = 8'h14,
+    HAWindow     = 8'h15,
+    AbstractCS   = 8'h16,
+    Command      = 8'h17,
+    AbstractAuto = 8'h18,
+    DevTreeAddr0 = 8'h19,
+    DevTreeAddr1 = 8'h1A,
+    DevTreeAddr2 = 8'h1B,
+    DevTreeAddr3 = 8'h1C,
+    NextDM       = 8'h1D,
+    ProgBuf0     = 8'h20,
+    ProgBuf15    = 8'h2F,
+    AuthData     = 8'h30,
+    HaltSum2     = 8'h34,
+    HaltSum3     = 8'h35,
+    SBAddress3   = 8'h37,
+    SBCS         = 8'h38,
+    SBAddress0   = 8'h39,
+    SBAddress1   = 8'h3A,
+    SBAddress2   = 8'h3B,
+    SBData0      = 8'h3C,
+    SBData1      = 8'h3D,
+    SBData2      = 8'h3E,
+    SBData3      = 8'h3F,
+    HaltSum0     = 8'h40
+  } dm_csr_e;
+
+  // debug causes
+  localparam logic [2:0] CauseBreakpoint = 3'h1;
+  localparam logic [2:0] CauseTrigger    = 3'h2;
+  localparam logic [2:0] CauseRequest    = 3'h3;
+  localparam logic [2:0] CauseSingleStep = 3'h4;
+
+  typedef struct packed {
+    logic [31:23] zero1;
+    logic         impebreak;
+    logic [21:20] zero0;
+    logic         allhavereset;
+    logic         anyhavereset;
+    logic         allresumeack;
+    logic         anyresumeack;
+    logic         allnonexistent;
+    logic         anynonexistent;
+    logic         allunavail;
+    logic         anyunavail;
+    logic         allrunning;
+    logic         anyrunning;
+    logic         allhalted;
+    logic         anyhalted;
+    logic         authenticated;
+    logic         authbusy;
+    logic         hasresethaltreq;
+    logic         devtreevalid;
+    logic [3:0]   version;
+  } dmstatus_t;
+
+  typedef struct packed {
+    logic         haltreq;
+    logic         resumereq;
+    logic         hartreset;
+    logic         ackhavereset;
+    logic         zero1;
+    logic         hasel;
+    logic [25:16] hartsello;
+    logic [15:6]  hartselhi;
+    logic [5:4]   zero0;
+    logic         setresethaltreq;
+    logic         clrresethaltreq;
+    logic         ndmreset;
+    logic         dmactive;
+  } dmcontrol_t;
+
+  typedef struct packed {
+    logic [31:24] zero1;
+    logic [23:20] nscratch;
+    logic [19:17] zero0;
+    logic         dataaccess;
+    logic [15:12] datasize;
+    logic [11:0]  dataaddr;
+  } hartinfo_t;
+
+  typedef enum logic [2:0] {
+    CmdErrNone, CmdErrBusy, CmdErrNotSupported,
+    CmdErrorException, CmdErrorHaltResume,
+    CmdErrorBus, CmdErrorOther = 7
+  } cmderr_e;
+
+  typedef struct packed {
+    logic [31:29] zero3;
+    logic [28:24] progbufsize;
+    logic [23:13] zero2;
+    logic         busy;
+    logic         zero1;
+    cmderr_e      cmderr;
+    logic [7:4]   zero0;
+    logic [3:0]   datacount;
+  } abstractcs_t;
+
+  typedef enum logic [7:0] {
+    AccessRegister = 8'h0,
+    QuickAccess    = 8'h1,
+    AccessMemory   = 8'h2
+  } cmd_e;
+
+  typedef struct packed {
+    cmd_e        cmdtype;
+    logic [23:0] control;
+  } command_t;
+
+  typedef struct packed {
+    logic [31:16] autoexecprogbuf;
+    logic [15:12] zero0;
+    logic [11:0]  autoexecdata;
+  } abstractauto_t;
+
+  typedef struct packed {
+    logic         zero1;
+    logic [22:20] aarsize;
+    logic         aarpostincrement;
+    logic         postexec;
+    logic         transfer;
+    logic         write;
+    logic [15:0]  regno;
+  } ac_ar_cmd_t;
+
+  // DTM
+  typedef enum logic [1:0] {
+    DTM_NOP   = 2'h0,
+    DTM_READ  = 2'h1,
+    DTM_WRITE = 2'h2
+  } dtm_op_e;
+
+  typedef struct packed {
+    logic [31:29] sbversion;
+    logic [28:23] zero0;
+    logic         sbbusyerror;
+    logic         sbbusy;
+    logic         sbreadonaddr;
+    logic [19:17] sbaccess;
+    logic         sbautoincrement;
+    logic         sbreadondata;
+    logic [14:12] sberror;
+    logic [11:5]  sbasize;
+    logic         sbaccess128;
+    logic         sbaccess64;
+    logic         sbaccess32;
+    logic         sbaccess16;
+    logic         sbaccess8;
+  } sbcs_t;
+
+  localparam logic [1:0] DTM_SUCCESS = 2'h0;
+
+  typedef struct packed {
+    logic [6:0]  addr;
+    dtm_op_e     op;
+    logic [31:0] data;
+  } dmi_req_t;
+
+  typedef struct packed  {
+    logic [31:0] data;
+    logic [1:0]  resp;
+  } dmi_resp_t;
+
+  // privilege levels
+  typedef enum logic[1:0] {
+    PRIV_LVL_M = 2'b11,
+    PRIV_LVL_S = 2'b01,
+    PRIV_LVL_U = 2'b00
+  } priv_lvl_t;
+
+  // debugregs in core
+  typedef struct packed {
+    logic [31:28]     xdebugver;
+    logic [27:16]     zero2;
+    logic             ebreakm;
+    logic             zero1;
+    logic             ebreaks;
+    logic             ebreaku;
+    logic             stepie;
+    logic             stopcount;
+    logic             stoptime;
+    logic [8:6]       cause;
+    logic             zero0;
+    logic             mprven;
+    logic             nmip;
+    logic             step;
+    priv_lvl_t        prv;
+  } dcsr_t;
+
+  // CSRs
+  typedef enum logic [11:0] {
+    // Floating-Point CSRs
+    CSR_FFLAGS         = 12'h001,
+    CSR_FRM            = 12'h002,
+    CSR_FCSR           = 12'h003,
+    CSR_FTRAN          = 12'h800,
+    // Supervisor Mode CSRs
+    CSR_SSTATUS        = 12'h100,
+    CSR_SIE            = 12'h104,
+    CSR_STVEC          = 12'h105,
+    CSR_SCOUNTEREN     = 12'h106,
+    CSR_SSCRATCH       = 12'h140,
+    CSR_SEPC           = 12'h141,
+    CSR_SCAUSE         = 12'h142,
+    CSR_STVAL          = 12'h143,
+    CSR_SIP            = 12'h144,
+    CSR_SATP           = 12'h180,
+    // Machine Mode CSRs
+    CSR_MSTATUS        = 12'h300,
+    CSR_MISA           = 12'h301,
+    CSR_MEDELEG        = 12'h302,
+    CSR_MIDELEG        = 12'h303,
+    CSR_MIE            = 12'h304,
+    CSR_MTVEC          = 12'h305,
+    CSR_MCOUNTEREN     = 12'h306,
+    CSR_MSCRATCH       = 12'h340,
+    CSR_MEPC           = 12'h341,
+    CSR_MCAUSE         = 12'h342,
+    CSR_MTVAL          = 12'h343,
+    CSR_MIP            = 12'h344,
+    CSR_PMPCFG0        = 12'h3A0,
+    CSR_PMPADDR0       = 12'h3B0,
+    CSR_MVENDORID      = 12'hF11,
+    CSR_MARCHID        = 12'hF12,
+    CSR_MIMPID         = 12'hF13,
+    CSR_MHARTID        = 12'hF14,
+    CSR_MCYCLE         = 12'hB00,
+    CSR_MINSTRET       = 12'hB02,
+    CSR_DCACHE         = 12'h701,
+    CSR_ICACHE         = 12'h700,
+
+    CSR_TSELECT        = 12'h7A0,
+    CSR_TDATA1         = 12'h7A1,
+    CSR_TDATA2         = 12'h7A2,
+    CSR_TDATA3         = 12'h7A3,
+    CSR_TINFO          = 12'h7A4,
+
+    // Debug CSR
+    CSR_DCSR           = 12'h7b0,
+    CSR_DPC            = 12'h7b1,
+    CSR_DSCRATCH0      = 12'h7b2, // optional
+    CSR_DSCRATCH1      = 12'h7b3, // optional
+
+    // Counters and Timers
+    CSR_CYCLE          = 12'hC00,
+    CSR_TIME           = 12'hC01,
+    CSR_INSTRET        = 12'hC02
+  } csr_reg_t;
+
+
+  // Instruction Generation Helpers
+  function automatic logic [31:0] jal (logic [4:0]  rd,
+                                       logic [20:0] imm);
+    // OpCode Jal
+    return {imm[20], imm[10:1], imm[11], imm[19:12], rd, 7'h6f};
+  endfunction
+
+  function automatic logic [31:0] jalr (logic [4:0]  rd,
+                                        logic [4:0]  rs1,
+                                        logic [11:0] offset);
+    // OpCode Jal
+    return {offset[11:0], rs1, 3'b0, rd, 7'h67};
+  endfunction
+
+  function automatic logic [31:0] andi (logic [4:0]  rd,
+                                        logic [4:0]  rs1,
+                                        logic [11:0] imm);
+    // OpCode andi
+    return {imm[11:0], rs1, 3'h7, rd, 7'h13};
+  endfunction
+
+  function automatic logic [31:0] slli (logic [4:0] rd,
+                                        logic [4:0] rs1,
+                                        logic [5:0] shamt);
+    // OpCode slli
+    return {6'b0, shamt[5:0], rs1, 3'h1, rd, 7'h13};
+  endfunction
+
+  function automatic logic [31:0] srli (logic [4:0] rd,
+                                        logic [4:0] rs1,
+                                        logic [5:0] shamt);
+    // OpCode srli
+    return {6'b0, shamt[5:0], rs1, 3'h5, rd, 7'h13};
+  endfunction
+
+  function automatic logic [31:0] load (logic [2:0]  size,
+                                        logic [4:0]  dest,
+                                        logic [4:0]  base,
+                                        logic [11:0] offset);
+    // OpCode Load
+    return {offset[11:0], base, size, dest, 7'h03};
+  endfunction
+
+  function automatic logic [31:0] auipc (logic [4:0]  rd,
+                                         logic [20:0] imm);
+    // OpCode Auipc
+    return {imm[20], imm[10:1], imm[11], imm[19:12], rd, 7'h17};
+  endfunction
+
+  function automatic logic [31:0] store (logic [2:0]  size,
+                                         logic [4:0]  src,
+                                         logic [4:0]  base,
+                                         logic [11:0] offset);
+    // OpCode Store
+    return {offset[11:5], src, base, size, offset[4:0], 7'h23};
+  endfunction
+
+  function automatic logic [31:0] float_load (logic [2:0]  size,
+                                              logic [4:0]  dest,
+                                              logic [4:0]  base,
+                                              logic [11:0] offset);
+    // OpCode Load
+    return {offset[11:0], base, size, dest, 7'b00_001_11};
+  endfunction
+
+  function automatic logic [31:0] float_store (logic [2:0]  size,
+                                               logic [4:0]  src,
+                                               logic [4:0]  base,
+                                               logic [11:0] offset);
+    // OpCode Store
+    return {offset[11:5], src, base, size, offset[4:0], 7'b01_001_11};
+  endfunction
+
+  function automatic logic [31:0] csrw (csr_reg_t   csr,
+                                        logic [4:0] rs1);
+    // CSRRW, rd, OpCode System
+    return {csr, rs1, 3'h1, 5'h0, 7'h73};
+  endfunction
+
+  function automatic logic [31:0] csrr (csr_reg_t   csr,
+                                        logic [4:0] dest);
+    // rs1, CSRRS, rd, OpCode System
+    return {csr, 5'h0, 3'h2, dest, 7'h73};
+  endfunction
+
+  function automatic logic [31:0] branch(logic [4:0]  src2,
+                                         logic [4:0]  src1,
+                                         logic [2:0]  funct3,
+                                         logic [11:0] offset);
+    // OpCode Branch
+    return {offset[11], offset[9:4], src2, src1, funct3,
+        offset[3:0], offset[10], 7'b11_000_11};
+  endfunction
+
+  function automatic logic [31:0] ebreak ();
+    return 32'h00100073;
+  endfunction
+
+  function automatic logic [31:0] wfi ();
+    return 32'h10500073;
+  endfunction
+
+  function automatic logic [31:0] nop ();
+    return 32'h00000013;
+  endfunction
+
+  function automatic logic [31:0] illegal ();
+    return 32'h00000000;
+  endfunction
+
+endpackage : dm
diff --git a/verilog/rtl/dm_sba.sv b/verilog/rtl/dm_sba.sv
new file mode 100644
index 0000000..c97f956
--- /dev/null
+++ b/verilog/rtl/dm_sba.sv
@@ -0,0 +1,172 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+* Copyright and related rights are licensed under the Solderpad Hardware
+* License, Version 0.51 (the “License”); you may not use this file except in
+* compliance with the License.  You may obtain a copy of the License at
+* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+* or agreed to in writing, software, hardware and materials distributed under
+* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+* CONDITIONS OF ANY KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations under the License.
+*
+* File:   dm_sba.sv
+* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+* Date:   1.8.2018
+*
+* Description: System Bus Access Module
+*
+*/
+module dm_sba #(
+  parameter int unsigned BusWidth = 32
+) (
+  input  logic                   clk_i,       // Clock
+  input  logic                   rst_ni,
+  input  logic                   dmactive_i,  // synchronous reset active low
+
+  output logic                   master_req_o,
+  output logic [BusWidth-1:0]    master_add_o,
+  output logic                   master_we_o,
+  output logic [BusWidth-1:0]    master_wdata_o,
+  output logic [BusWidth/8-1:0]  master_be_o,
+  input  logic                   master_gnt_i,
+  input  logic                   master_r_valid_i,
+  input  logic [BusWidth-1:0]    master_r_rdata_i,
+
+  input  logic [BusWidth-1:0]    sbaddress_i,
+  input  logic                   sbaddress_write_valid_i,
+  // control signals in
+  input  logic                   sbreadonaddr_i,
+  output logic [BusWidth-1:0]    sbaddress_o,
+  input  logic                   sbautoincrement_i,
+  input  logic [2:0]             sbaccess_i,
+  // data in
+  input  logic                   sbreadondata_i,
+  input  logic [BusWidth-1:0]    sbdata_i,
+  input  logic                   sbdata_read_valid_i,
+  input  logic                   sbdata_write_valid_i,
+  // read data out
+  output logic [BusWidth-1:0]    sbdata_o,
+  output logic                   sbdata_valid_o,
+  // control signals
+  output logic                   sbbusy_o,
+  output logic                   sberror_valid_o, // bus error occurred
+  output logic [2:0]             sberror_o // bus error occurred
+);
+
+  typedef enum logic [2:0] { Idle, Read, Write, WaitRead, WaitWrite } state_e;
+  state_e state_d, state_q;
+
+  logic [BusWidth-1:0]           address;
+  logic                          req;
+  logic                          gnt;
+  logic                          we;
+  logic [BusWidth/8-1:0]         be;
+  logic [$clog2(BusWidth/8)-1:0] be_idx;
+
+  assign sbbusy_o = logic'(state_q != Idle);
+
+  always_comb begin : p_fsm
+    req     = 1'b0;
+    address = sbaddress_i;
+    we      = 1'b0;
+    be      = '0;
+    be_idx  = sbaddress_i[$clog2(BusWidth/8)-1:0];
+
+    sberror_o       = '0;
+    sberror_valid_o = 1'b0;
+    sbaddress_o     = sbaddress_i;
+
+    state_d = state_q;
+
+    unique case (state_q)
+      Idle: begin
+        // debugger requested a read
+        if (sbaddress_write_valid_i && sbreadonaddr_i)  state_d = Read;
+        // debugger requested a write
+        if (sbdata_write_valid_i) state_d = Write;
+        // perform another read
+        if (sbdata_read_valid_i && sbreadondata_i) state_d = Read;
+      end
+
+      Read: begin
+        req = 1'b1;
+        if (gnt) state_d = WaitRead;
+      end
+
+      Write: begin
+        req = 1'b1;
+        we  = 1'b1;
+        // generate byte enable mask
+        unique case (sbaccess_i)
+          3'b000: begin
+            be[be_idx] = '1;
+          end
+          3'b001: begin
+            be[int'({be_idx[$high(be_idx):1], 1'b0}) +: 2] = '1;
+          end
+          3'b010: begin
+            if (BusWidth == 32'd64) be[int'({be_idx[$high(be_idx)], 2'h0}) +: 4] = '1;
+            else                    be = '1;
+          end
+          3'b011: be = '1;
+          default: ;
+        endcase
+        if (gnt) state_d = WaitWrite;
+      end
+
+      WaitRead: begin
+        if (sbdata_valid_o) begin
+          state_d = Idle;
+          // auto-increment address
+          if (sbautoincrement_i) sbaddress_o = sbaddress_i + (32'h1 << sbaccess_i);
+        end
+      end
+
+      WaitWrite: begin
+        if (sbdata_valid_o) begin
+          state_d = Idle;
+          // auto-increment address
+          if (sbautoincrement_i) sbaddress_o = sbaddress_i + (32'h1 << sbaccess_i);
+        end
+      end
+
+      default: state_d = Idle; // catch parasitic state
+    endcase
+
+    // handle error case
+    if (sbaccess_i > 3 && state_q != Idle) begin
+      req             = 1'b0;
+      state_d         = Idle;
+      sberror_valid_o = 1'b1;
+      sberror_o       = 3'd3;
+    end
+    // further error handling should go here ...
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    if (!rst_ni) begin
+      state_q <= Idle;
+    end else begin
+      state_q <= state_d;
+    end
+  end
+
+  assign master_req_o    = req;
+  assign master_add_o    = address[BusWidth-1:0];
+  assign master_we_o     = we;
+  assign master_wdata_o  = sbdata_i[BusWidth-1:0];
+  assign master_be_o     = be[BusWidth/8-1:0];
+  assign gnt             = master_gnt_i;
+  assign sbdata_valid_o  = master_r_valid_i;
+  assign sbdata_o        = master_r_rdata_i[BusWidth-1:0];
+
+
+  //pragma translate_off
+  `ifndef VERILATOR
+    // maybe bump severity to $error if not handled at runtime
+    dm_sba_access_size: assert property(@(posedge clk_i) disable iff (dmactive_i !== 1'b0)
+        (state_d != Idle) |-> (sbaccess_i < 4))
+            else $warning ("accesses > 8 byte not supported at the moment");
+  `endif
+  //pragma translate_on
+
+endmodule : dm_sba
diff --git a/verilog/rtl/dmi_cdc.sv b/verilog/rtl/dmi_cdc.sv
new file mode 100644
index 0000000..c1c1de2
--- /dev/null
+++ b/verilog/rtl/dmi_cdc.sv
@@ -0,0 +1,85 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+* Copyright and related rights are licensed under the Solderpad Hardware
+* License, Version 0.51 (the “License”); you may not use this file except in
+* compliance with the License.  You may obtain a copy of the License at
+* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+* or agreed to in writing, software, hardware and materials distributed under
+* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+* CONDITIONS OF ANY KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations under the License.
+*
+* File:   axi_riscv_debug_module.sv
+* Author: Andreas Traber <atraber@iis.ee.ethz.ch>
+* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+*
+* Description: Clock domain crossings for JTAG to DMI very heavily based
+*              on previous work by Andreas Traber for the PULP project.
+*              This is mainly a wrapper around the existing CDCs.
+*/
+module dmi_cdc (
+  // JTAG side (master side)
+  input  logic             tck_i,
+  input  logic             trst_ni,
+
+  input  dm::dmi_req_t     jtag_dmi_req_i,
+  output logic             jtag_dmi_ready_o,
+  input  logic             jtag_dmi_valid_i,
+
+  output dm::dmi_resp_t    jtag_dmi_resp_o,
+  output logic             jtag_dmi_valid_o,
+  input  logic             jtag_dmi_ready_i,
+
+  // core side (slave side)
+  input  logic             clk_i,
+  input  logic             rst_ni,
+
+  output dm::dmi_req_t     core_dmi_req_o,
+  output logic             core_dmi_valid_o,
+  input  logic             core_dmi_ready_i,
+
+  input dm::dmi_resp_t     core_dmi_resp_i,
+  output logic             core_dmi_ready_o,
+  input  logic             core_dmi_valid_i
+);
+
+  // TODO: Make it clean for synthesis.
+
+  fifo_async #(
+    .Width       ( $bits(dm::dmi_req_t) ),
+    .Depth       ( 4 )
+  ) i_cdc_req (
+    .clk_wr_i    ( tck_i            ),
+    .rst_wr_ni   ( trst_ni          ),
+    .wvalid_i    ( jtag_dmi_valid_i ),
+    .wready_o    ( jtag_dmi_ready_o ), // wrclk
+    .wdata_i     ( jtag_dmi_req_i   ),
+    .wdepth_o    (                  ),
+
+    .clk_rd_i    ( clk_i            ),
+    .rst_rd_ni   ( rst_ni           ),
+    .rvalid_o    ( core_dmi_valid_o ),
+    .rready_i    ( core_dmi_ready_i ),
+    .rdata_o     ( core_dmi_req_o   ),
+    .rdepth_o    (                  )
+  );
+
+  fifo_async #(
+    .Width       ( $bits(dm::dmi_resp_t) ),
+    .Depth       ( 4 )
+  ) i_cdc_resp (
+    .clk_wr_i    ( clk_i            ),
+    .rst_wr_ni   ( rst_ni           ),
+    .wvalid_i    ( core_dmi_valid_i ),
+    .wready_o    ( core_dmi_ready_o ), // wrclk
+    .wdata_i     ( core_dmi_resp_i  ),
+    .wdepth_o    (                  ),
+
+    .clk_rd_i    ( tck_i            ),
+    .rst_rd_ni   ( trst_ni          ),
+    .rvalid_o    ( jtag_dmi_valid_o ),
+    .rready_i    ( jtag_dmi_ready_i ),
+    .rdata_o     ( jtag_dmi_resp_o  ),
+    .rdepth_o    (                  )
+  );
+
+endmodule : dmi_cdc
diff --git a/verilog/rtl/dmi_jtag.sv b/verilog/rtl/dmi_jtag.sv
new file mode 100644
index 0000000..917cdc6
--- /dev/null
+++ b/verilog/rtl/dmi_jtag.sv
@@ -0,0 +1,264 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+* Copyright and related rights are licensed under the Solderpad Hardware
+* License, Version 0.51 (the “License”); you may not use this file except in
+* compliance with the License.  You may obtain a copy of the License at
+* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+* or agreed to in writing, software, hardware and materials distributed under
+* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+* CONDITIONS OF ANY KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations under the License.
+*
+* File:   axi_riscv_debug_module.sv
+* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+* Date:   19.7.2018
+*
+* Description: JTAG DMI (debug module interface)
+*
+*/
+
+module dmi_jtag #(
+  parameter logic [31:0] IdcodeValue = 32'h00000001
+) (
+  input  logic         clk_i,      // DMI Clock
+  input  logic         rst_ni,     // Asynchronous reset active low
+  input  logic         testmode_i,
+
+  output logic         dmi_rst_no, // hard reset
+  output dm::dmi_req_t dmi_req_o,
+  output logic         dmi_req_valid_o,
+  input  logic         dmi_req_ready_i,
+
+  input dm::dmi_resp_t dmi_resp_i,
+  output logic         dmi_resp_ready_o,
+  input  logic         dmi_resp_valid_i,
+
+  input  logic         tck_i,    // JTAG test clock pad
+  input  logic         tms_i,    // JTAG test mode select pad
+  input  logic         trst_ni,  // JTAG test reset pad
+  input  logic         td_i,     // JTAG test data input pad
+  output logic         td_o,     // JTAG test data output pad
+  output logic         tdo_oe_o  // Data out output enable
+);
+  assign       dmi_rst_no = rst_ni;
+
+  logic        test_logic_reset;
+  logic        shift_dr;
+  logic        update_dr;
+  logic        capture_dr;
+  logic        dmi_access;
+  logic        dtmcs_select;
+  logic        dmi_reset;
+  logic        dmi_tdi;
+  logic        dmi_tdo;
+
+  dm::dmi_req_t  dmi_req;
+  logic          dmi_req_ready;
+  logic          dmi_req_valid;
+
+  dm::dmi_resp_t dmi_resp;
+  logic          dmi_resp_valid;
+  logic          dmi_resp_ready;
+
+  typedef struct packed {
+    logic [6:0]  address;
+    logic [31:0] data;
+    logic [1:0]  op;
+  } dmi_t;
+
+  typedef enum logic [1:0] {
+    DMINoError = 2'h0, DMIReservedError = 2'h1,
+    DMIOPFailed = 2'h2, DMIBusy = 2'h3
+  } dmi_error_e;
+
+  typedef enum logic [2:0] { Idle, Read, WaitReadValid, Write, WaitWriteValid } state_e;
+  state_e state_d, state_q;
+
+  logic [$bits(dmi_t)-1:0] dr_d, dr_q;
+  logic [6:0] address_d, address_q;
+  logic [31:0] data_d, data_q;
+
+  dmi_t  dmi;
+  assign dmi          = dmi_t'(dr_q);
+  assign dmi_req.addr = address_q;
+  assign dmi_req.data = data_q;
+  assign dmi_req.op   = (state_q == Write) ? dm::DTM_WRITE : dm::DTM_READ;
+  // we'will always be ready to accept the data we requested
+  assign dmi_resp_ready = 1'b1;
+
+  logic error_dmi_busy;
+  dmi_error_e error_d, error_q;
+
+  always_comb begin : p_fsm
+    error_dmi_busy = 1'b0;
+    // default assignments
+    state_d   = state_q;
+    address_d = address_q;
+    data_d    = data_q;
+    error_d   = error_q;
+
+    dmi_req_valid = 1'b0;
+
+    unique case (state_q)
+      Idle: begin
+        // make sure that no error is sticky
+        if (dmi_access && update_dr && (error_q == DMINoError)) begin
+          // save address and value
+          address_d = dmi.address;
+          data_d = dmi.data;
+          if (dm::dtm_op_e'(dmi.op) == dm::DTM_READ) begin
+            state_d = Read;
+          end else if (dm::dtm_op_e'(dmi.op) == dm::DTM_WRITE) begin
+            state_d = Write;
+          end
+          // else this is a nop and we can stay here
+        end
+      end
+
+      Read: begin
+        dmi_req_valid = 1'b1;
+        if (dmi_req_ready) begin
+          state_d = WaitReadValid;
+        end
+      end
+
+      WaitReadValid: begin
+        // load data into register and shift out
+        if (dmi_resp_valid) begin
+          data_d = dmi_resp.data;
+          state_d = Idle;
+        end
+      end
+
+      Write: begin
+        dmi_req_valid = 1'b1;
+        // got a valid answer go back to idle
+        if (dmi_req_ready) begin
+          state_d = Idle;
+        end
+      end
+
+      default: begin
+        // just wait for idle here
+        if (dmi_resp_valid) begin
+          state_d = Idle;
+        end
+      end
+    endcase
+
+    // update_dr means we got another request but we didn't finish
+    // the one in progress, this state is sticky
+    if (update_dr && state_q != Idle) begin
+      error_dmi_busy = 1'b1;
+    end
+
+    // if capture_dr goes high while we are in the read state
+    // or in the corresponding wait state we are not giving back a valid word
+    // -> throw an error
+    if (capture_dr && state_q inside {Read, WaitReadValid}) begin
+      error_dmi_busy = 1'b1;
+    end
+
+    if (error_dmi_busy) begin
+      error_d = DMIBusy;
+    end
+    // clear sticky error flag
+    if (dmi_reset && dtmcs_select) begin
+      error_d = DMINoError;
+    end
+  end
+
+  // shift register
+  assign dmi_tdo = dr_q[0];
+
+  always_comb begin : p_shift
+    dr_d    = dr_q;
+
+    if (capture_dr) begin
+      if (dmi_access) begin
+        if (error_q == DMINoError && !error_dmi_busy) begin
+          dr_d = {address_q, data_q, DMINoError};
+        // DMI was busy, report an error
+        end else if (error_q == DMIBusy || error_dmi_busy) begin
+          dr_d = {address_q, data_q, DMIBusy};
+        end
+      end
+    end
+
+    if (shift_dr) begin
+      if (dmi_access) begin
+        dr_d = {dmi_tdi, dr_q[$bits(dr_q)-1:1]};
+      end
+    end
+
+    if (test_logic_reset) begin
+      dr_d = '0;
+    end
+  end
+
+  always_ff @(posedge tck_i or negedge trst_ni) begin : p_regs
+    if (!trst_ni) begin
+      dr_q      <= '0;
+      state_q   <= Idle;
+      address_q <= '0;
+      data_q    <= '0;
+      error_q   <= DMINoError;
+    end else begin
+      dr_q      <= dr_d;
+      state_q   <= state_d;
+      address_q <= address_d;
+      data_q    <= data_d;
+      error_q   <= error_d;
+    end
+  end
+
+  // ---------
+  // TAP
+  // ---------
+  dmi_jtag_tap #(
+    .IrLength (5),
+    .IdcodeValue(IdcodeValue)
+  ) i_dmi_jtag_tap (
+    .tck_i (tck_i) ,
+    .tms_i (tms_i),
+    .trst_ni (trst_ni),
+    .td_i    (td_i),
+    .td_o    (td_o),
+    .tdo_oe_o(tdo_oe_o),
+    .testmode_i (testmode_i),
+    .test_logic_reset_o ( test_logic_reset ),
+    .shift_dr_o         ( shift_dr         ),
+    .update_dr_o        ( update_dr        ),
+    .capture_dr_o       ( capture_dr       ),
+    .dmi_access_o       ( dmi_access       ),
+    .dtmcs_select_o     ( dtmcs_select     ),
+    .dmi_reset_o        ( dmi_reset        ),
+    .dmi_error_i        ( error_q          ),
+    .dmi_tdi_o          ( dmi_tdi          ),
+    .dmi_tdo_i          ( dmi_tdo          )
+  );
+
+  // ---------
+  // CDC
+  // ---------
+  dmi_cdc i_dmi_cdc (
+    // JTAG side (master side)
+    .tck_i  (tck_i),
+    .trst_ni (trst_ni),
+    .jtag_dmi_req_i    ( dmi_req          ),
+    .jtag_dmi_ready_o  ( dmi_req_ready    ),
+    .jtag_dmi_valid_i  ( dmi_req_valid    ),
+    .jtag_dmi_resp_o   ( dmi_resp         ),
+    .jtag_dmi_valid_o  ( dmi_resp_valid   ),
+    .jtag_dmi_ready_i  ( dmi_resp_ready   ),
+    // core side
+    .clk_i,
+    .rst_ni,
+    .core_dmi_req_o    ( dmi_req_o        ),
+    .core_dmi_valid_o  ( dmi_req_valid_o  ),
+    .core_dmi_ready_i  ( dmi_req_ready_i  ),
+    .core_dmi_resp_i   ( dmi_resp_i       ),
+    .core_dmi_ready_o  ( dmi_resp_ready_o ),
+    .core_dmi_valid_i  ( dmi_resp_valid_i )
+  );
+
+endmodule : dmi_jtag
diff --git a/verilog/rtl/dmi_jtag_tap.sv b/verilog/rtl/dmi_jtag_tap.sv
new file mode 100644
index 0000000..e90f914
--- /dev/null
+++ b/verilog/rtl/dmi_jtag_tap.sv
@@ -0,0 +1,345 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the “License”); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File:   dmi_jtag_tap.sv
+ * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+ * Date:   19.7.2018
+ *
+ * Description: JTAG TAP for DMI (according to debug spec 0.13)
+ *
+ */
+
+module dmi_jtag_tap #(
+  parameter int unsigned IrLength = 5,
+  // JTAG IDCODE Value
+  parameter logic [31:0] IdcodeValue = 32'h00000001
+  // xxxx             version
+  // xxxxxxxxxxxxxxxx part number
+  // xxxxxxxxxxx      manufacturer id
+  // 1                required by standard
+) (
+  input  logic        tck_i,    // JTAG test clock pad
+  input  logic        tms_i,    // JTAG test mode select pad
+  input  logic        trst_ni,  // JTAG test reset pad
+  input  logic        td_i,     // JTAG test data input pad
+  output logic        td_o,     // JTAG test data output pad
+  output logic        tdo_oe_o, // Data out output enable
+  input  logic        testmode_i,
+  output logic        test_logic_reset_o,
+  output logic        shift_dr_o,
+  output logic        update_dr_o,
+  output logic        capture_dr_o,
+
+  // we want to access DMI register
+  output logic        dmi_access_o,
+  // JTAG is interested in writing the DTM CSR register
+  output logic        dtmcs_select_o,
+  // clear error state
+  output logic        dmi_reset_o,
+  input  logic [1:0]  dmi_error_i,
+  // test data to submodule
+  output logic        dmi_tdi_o,
+  // test data in from submodule
+  input  logic        dmi_tdo_i
+);
+
+  // to submodule
+  assign dmi_tdi_o = td_i;
+
+  typedef enum logic [3:0] {
+    TestLogicReset, RunTestIdle, SelectDrScan,
+    CaptureDr, ShiftDr, Exit1Dr, PauseDr, Exit2Dr,
+    UpdateDr, SelectIrScan, CaptureIr, ShiftIr,
+    Exit1Ir, PauseIr, Exit2Ir, UpdateIr
+  } tap_state_e;
+
+  tap_state_e tap_state_q, tap_state_d;
+
+  typedef enum logic [IrLength-1:0] {
+    BYPASS0   = 'h0,
+    IDCODE    = 'h1,
+    DTMCSR    = 'h10,
+    DMIACCESS = 'h11,
+    BYPASS1   = 'h1f
+  } ir_reg_e;
+
+  typedef struct packed {
+    logic [31:18] zero1;
+    logic         dmihardreset;
+    logic         dmireset;
+    logic         zero0;
+    logic [14:12] idle;
+    logic [11:10] dmistat;
+    logic [9:4]   abits;
+    logic [3:0]   version;
+  } dtmcs_t;
+
+  // ----------------
+  // IR logic
+  // ----------------
+
+  // shift register
+  logic [IrLength-1:0]  jtag_ir_shift_d, jtag_ir_shift_q;
+  // IR register -> this gets captured from shift register upon update_ir
+  ir_reg_e              jtag_ir_d, jtag_ir_q;
+  logic capture_ir, shift_ir, update_ir; // pause_ir
+
+  always_comb begin : p_jtag
+    jtag_ir_shift_d = jtag_ir_shift_q;
+    jtag_ir_d       = jtag_ir_q;
+
+    // IR shift register
+    if (shift_ir) begin
+      jtag_ir_shift_d = {td_i, jtag_ir_shift_q[IrLength-1:1]};
+    end
+
+    // capture IR register
+    if (capture_ir) begin
+      jtag_ir_shift_d =  IrLength'(4'b0101);
+    end
+
+    // update IR register
+    if (update_ir) begin
+      jtag_ir_d = ir_reg_e'(jtag_ir_shift_q);
+    end
+
+    // synchronous test-logic reset
+    if (test_logic_reset_o) begin
+      jtag_ir_shift_d = '0;
+      jtag_ir_d       = IDCODE;
+    end
+  end
+
+  always_ff @(posedge tck_i, negedge trst_ni) begin : p_jtag_ir_reg
+    if (!trst_ni) begin
+      jtag_ir_shift_q <= '0;
+      jtag_ir_q       <= IDCODE;
+    end else begin
+      jtag_ir_shift_q <= jtag_ir_shift_d;
+      jtag_ir_q       <= jtag_ir_d;
+    end
+  end
+
+  // ----------------
+  // TAP DR Regs
+  // ----------------
+  // - Bypass
+  // - IDCODE
+  // - DTM CS
+  logic [31:0] idcode_d, idcode_q;
+  logic        idcode_select;
+  logic        bypass_select;
+  dtmcs_t      dtmcs_d, dtmcs_q;
+  logic        bypass_d, bypass_q;  // this is a 1-bit register
+
+  assign dmi_reset_o = dtmcs_q.dmireset;
+
+  always_comb begin
+    idcode_d = idcode_q;
+    bypass_d = bypass_q;
+    dtmcs_d  = dtmcs_q;
+
+    if (capture_dr_o) begin
+      if (idcode_select) idcode_d = IdcodeValue;
+      if (bypass_select) bypass_d = 1'b0;
+      if (dtmcs_select_o) begin
+        dtmcs_d  = '{
+                      zero1        : '0,
+                      dmihardreset : 1'b0,
+                      dmireset     : 1'b0,
+                      zero0        : '0,
+                      idle         : 3'd1, // 1: Enter Run-Test/Idle and leave it immediately
+                      dmistat      : dmi_error_i, // 0: No error, 1: Op failed, 2: too fast
+                      abits        : 6'd7, // The size of address in dmi
+                      version      : 4'd1  // Version described in spec version 0.13 (and later?)
+                    };
+      end
+    end
+
+    if (shift_dr_o) begin
+      if (idcode_select)  idcode_d = {td_i, 31'(idcode_q >> 1)};
+      if (bypass_select)  bypass_d = td_i;
+      if (dtmcs_select_o) dtmcs_d  = {td_i, 31'(dtmcs_q >> 1)};
+    end
+
+    if (test_logic_reset_o) begin
+      idcode_d = IdcodeValue;
+      bypass_d = 1'b0;
+    end
+  end
+
+  // ----------------
+  // Data reg select
+  // ----------------
+  always_comb begin : p_data_reg_sel
+    dmi_access_o   = 1'b0;
+    dtmcs_select_o = 1'b0;
+    idcode_select  = 1'b0;
+    bypass_select  = 1'b0;
+    unique case (jtag_ir_q)
+      BYPASS0:   bypass_select  = 1'b1;
+      IDCODE:    idcode_select  = 1'b1;
+      DTMCSR:    dtmcs_select_o = 1'b1;
+      DMIACCESS: dmi_access_o   = 1'b1;
+      BYPASS1:   bypass_select  = 1'b1;
+      default:   bypass_select  = 1'b1;
+    endcase
+  end
+
+  // ----------------
+  // Output select
+  // ----------------
+  logic tdo_mux;
+
+  always_comb begin : p_out_sel
+    // we are shifting out the IR register
+    if (shift_ir) begin
+      tdo_mux = jtag_ir_shift_q[0];
+    // here we are shifting the DR register
+    end else begin
+      unique case (jtag_ir_q)
+        IDCODE:         tdo_mux = idcode_q[0];     // Reading ID code
+        DTMCSR:         tdo_mux = dtmcs_q.version[0];
+        DMIACCESS:      tdo_mux = dmi_tdo_i;       // Read from DMI TDO
+        default:        tdo_mux = bypass_q;      // BYPASS instruction
+      endcase
+    end
+  end
+
+  // ----------------
+  // DFT
+  // ----------------
+  logic tck_n;
+
+  prim_generic_clock_inv #(
+    .HasScanMode(1'b1)
+  ) i_tck_inv (
+    .clk_i      ( tck_i      ),
+    .clk_no     ( tck_n      ),
+    .scanmode_i ( testmode_i )
+  );
+
+  // TDO changes state at negative edge of TCK
+  always_ff @(posedge tck_n, negedge trst_ni) begin : p_tdo_regs
+    if (!trst_ni) begin
+      td_o     <= 1'b0;
+      tdo_oe_o <= 1'b0;
+    end else begin
+      td_o     <= tdo_mux;
+      tdo_oe_o <= (shift_ir | shift_dr_o);
+    end
+  end
+  // ----------------
+  // TAP FSM
+  // ----------------
+  // Determination of next state; purely combinatorial
+  always_comb begin : p_tap_fsm
+
+    test_logic_reset_o = 1'b0;
+
+    capture_dr_o       = 1'b0;
+    shift_dr_o         = 1'b0;
+    update_dr_o        = 1'b0;
+
+    capture_ir         = 1'b0;
+    shift_ir           = 1'b0;
+    // pause_ir           = 1'b0; unused
+    update_ir          = 1'b0;
+
+    unique case (tap_state_q)
+      TestLogicReset: begin
+        tap_state_d = (tms_i) ? TestLogicReset : RunTestIdle;
+        test_logic_reset_o = 1'b1;
+      end
+      RunTestIdle: begin
+        tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle;
+      end
+      // DR Path
+      SelectDrScan: begin
+        tap_state_d = (tms_i) ? SelectIrScan : CaptureDr;
+      end
+      CaptureDr: begin
+        capture_dr_o = 1'b1;
+        tap_state_d = (tms_i) ? Exit1Dr : ShiftDr;
+      end
+      ShiftDr: begin
+        shift_dr_o = 1'b1;
+        tap_state_d = (tms_i) ? Exit1Dr : ShiftDr;
+      end
+      Exit1Dr: begin
+        tap_state_d = (tms_i) ? UpdateDr : PauseDr;
+      end
+      PauseDr: begin
+        tap_state_d = (tms_i) ? Exit2Dr : PauseDr;
+      end
+      Exit2Dr: begin
+        tap_state_d = (tms_i) ? UpdateDr : ShiftDr;
+      end
+      UpdateDr: begin
+        update_dr_o = 1'b1;
+        tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle;
+      end
+      // IR Path
+      SelectIrScan: begin
+        tap_state_d = (tms_i) ? TestLogicReset : CaptureIr;
+      end
+      // In this controller state, the shift register bank in the
+      // Instruction Register parallel loads a pattern of fixed values on
+      // the rising edge of TCK. The last two significant bits must always
+      // be "01".
+      CaptureIr: begin
+        capture_ir = 1'b1;
+        tap_state_d = (tms_i) ? Exit1Ir : ShiftIr;
+      end
+      // In this controller state, the instruction register gets connected
+      // between TDI and TDO, and the captured pattern gets shifted on
+      // each rising edge of TCK. The instruction available on the TDI
+      // pin is also shifted in to the instruction register.
+      ShiftIr: begin
+        shift_ir = 1'b1;
+        tap_state_d = (tms_i) ? Exit1Ir : ShiftIr;
+      end
+      Exit1Ir: begin
+        tap_state_d = (tms_i) ? UpdateIr : PauseIr;
+      end
+      PauseIr: begin
+        // pause_ir = 1'b1; // unused
+        tap_state_d = (tms_i) ? Exit2Ir : PauseIr;
+      end
+      Exit2Ir: begin
+        tap_state_d = (tms_i) ? UpdateIr : ShiftIr;
+      end
+      // In this controller state, the instruction in the instruction
+      // shift register is latched to the latch bank of the Instruction
+      // Register on every falling edge of TCK. This instruction becomes
+      // the current instruction once it is latched.
+      UpdateIr: begin
+        update_ir = 1'b1;
+        tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle;
+      end
+      //default: ; // can't actually happen since case is full
+    endcase
+  end
+
+  always_ff @(posedge tck_i or negedge trst_ni) begin : p_regs
+    if (!trst_ni) begin
+      tap_state_q <= RunTestIdle;
+      idcode_q    <= IdcodeValue;
+      bypass_q    <= 1'b0;
+      dtmcs_q     <= '0;
+    end else begin
+      tap_state_q <= tap_state_d;
+      idcode_q    <= idcode_d;
+      bypass_q    <= bypass_d;
+      dtmcs_q     <= dtmcs_d;
+    end
+  end
+
+endmodule : dmi_jtag_tap
diff --git a/verilog/rtl/down_clocking_even.v b/verilog/rtl/down_clocking_even.v
new file mode 100644
index 0000000..49a06ca
--- /dev/null
+++ b/verilog/rtl/down_clocking_even.v
@@ -0,0 +1,37 @@
+/*Down clocking module

+Output clock frequency is the original frequency divided by an even number

+*/

+module	down_clocking_even(

+input	clk_i,

+input	rst_ni,

+input	[15:0]i_divisor,

+output	o_clk

+);

+

+wire	[15:0]divisor;

+wire	borrow;

+

+minus_one	minus_one_0(

+.i_operand(i_divisor),

+.o_result(divisor),

+.o_borrow(borrow)

+);

+

+wire	go;

+assign	go=((i_divisor!=0)&&rst_ni);

+reg	[15:0]ct;

+reg	clk;

+always@(posedge clk_i )

+	if(!rst_ni)begin

+		ct<=0;

+		clk<=0;

+	end

+	else if(go)begin

+		if(ct>=divisor)begin

+			ct<=0;

+			clk<=~clk;

+		end

+		else ct<=ct+1;

+	end

+assign	o_clk=go?clk:clk_i;

+endmodule
\ No newline at end of file
diff --git a/verilog/rtl/down_clocking_odd.v b/verilog/rtl/down_clocking_odd.v
new file mode 100644
index 0000000..3395908
--- /dev/null
+++ b/verilog/rtl/down_clocking_odd.v
@@ -0,0 +1,69 @@
+/*Author: Zhuxu

+	m99a1@yahoo.cn

+Down clocking module

+Output clock frequency is the original frequency divided by an odd number

+*/

+module down_clocking_odd(

+input	clk_i,

+input	rst_ni,

+input	[15:0]i_divisor,

+output	o_clk

+);

+

+reg	a,b;

+wire	c;

+

+assign	c=(~a)&(~b);

+wire	[15:0]divisor;

+wire	borrow;

+minus_one	minus_one_0(

+.i_operand(i_divisor),

+.o_result(divisor),

+.o_borrow(borrow)

+);

+

+wire	go;

+assign	go=((i_divisor!=0)&&rst_ni);

+reg	[15:0]ct_0;

+always@(posedge clk_i )

+	if(!rst_ni)begin

+		a<=0;

+		ct_0<=0;	

+	end

+	else if(go)begin

+		if(a)begin

+			if(ct_0>=divisor)begin

+				ct_0<=0;

+				a<=0;

+			end

+			else ct_0<=ct_0+1;

+		end

+		else if(c)a<=c;

+	end

+

+

+reg	[15:0]ct_1;

+always@(negedge clk_i )

+	if(!rst_ni)begin

+		b<=0;

+		ct_1<=0;	

+	end

+	else if(go)begin

+		if(b)begin

+			if(ct_1>=divisor)begin

+				ct_1<=0;

+				b<=0;

+			end

+			else ct_1<=ct_1+1;

+		end

+		else if(c)b<=c;

+	end

+

+reg	clk;

+always@(posedge c)

+	if(!rst_ni)clk<=0;

+	else	clk<=~clk;

+

+assign	o_clk=go?clk:clk_i;

+

+endmodule
\ No newline at end of file
diff --git a/verilog/rtl/fifo_async.sv b/verilog/rtl/fifo_async.sv
new file mode 100644
index 0000000..0add43c
--- /dev/null
+++ b/verilog/rtl/fifo_async.sv
@@ -0,0 +1,203 @@
+
+// Generic asynchronous fifo for use in a variety of devices.
+
+
+module fifo_async #(
+  parameter  int unsigned Width  = 16,
+  parameter  int unsigned Depth  = 3,
+  localparam int unsigned DepthW = $clog2(Depth+1) // derived parameter representing [0..Depth]
+) (
+  // write port
+  input                  clk_wr_i,
+  input                  rst_wr_ni,
+  input                  wvalid_i,
+  output                 wready_o,
+  input [Width-1:0]      wdata_i,
+  output [DepthW-1:0]    wdepth_o,
+
+  // read port
+  input                  clk_rd_i,
+  input                  rst_rd_ni,
+  output                 rvalid_o,
+  input                  rready_i,
+  output [Width-1:0]     rdata_o,
+  output [DepthW-1:0]    rdepth_o
+);
+
+
+  localparam int unsigned PTRV_W = $clog2(Depth);
+  localparam logic [PTRV_W-1:0] DepthMinus1 = PTRV_W'(Depth - 1);
+  localparam int unsigned PTR_WIDTH = PTRV_W+1;
+
+  logic [PTR_WIDTH-1:0]    fifo_wptr, fifo_rptr;
+  logic [PTR_WIDTH-1:0]    fifo_wptr_sync_combi,   fifo_rptr_sync;
+  logic [PTR_WIDTH-1:0]    fifo_wptr_gray_sync,    fifo_rptr_gray_sync;
+  logic [PTR_WIDTH-1:0]    fifo_wptr_gray,         fifo_rptr_gray;
+  logic                    fifo_incr_wptr, fifo_incr_rptr, empty;
+
+  logic full_wclk, full_rclk;
+
+  assign wready_o = !full_wclk;
+  assign rvalid_o = !empty;
+
+  // create the write and read pointers
+
+  assign fifo_incr_wptr = wvalid_i & wready_o;
+  assign fifo_incr_rptr = rvalid_o & rready_i;
+
+  ///////////////////
+  // write pointer //
+  ///////////////////
+
+  always_ff @(posedge clk_wr_i or negedge rst_wr_ni)
+    if (!rst_wr_ni) begin
+      fifo_wptr <= {(PTR_WIDTH){1'b0}};
+    end else if (fifo_incr_wptr) begin
+      if (fifo_wptr[PTR_WIDTH-2:0] == DepthMinus1) begin
+        fifo_wptr <= {~fifo_wptr[PTR_WIDTH-1],{(PTR_WIDTH-1){1'b0}}};
+      end else begin
+        fifo_wptr <= fifo_wptr + {{(PTR_WIDTH-1){1'b0}},1'b1};
+    end
+  end
+
+  // gray-coded version
+  always_ff @(posedge clk_wr_i or negedge rst_wr_ni)
+    if (!rst_wr_ni) begin
+      fifo_wptr_gray <= {(PTR_WIDTH){1'b0}};
+    end else if (fifo_incr_wptr) begin
+      if (fifo_wptr[PTR_WIDTH-2:0] == DepthMinus1) begin
+        fifo_wptr_gray <= dec2gray({~fifo_wptr[PTR_WIDTH-1],{(PTR_WIDTH-1){1'b0}}});
+      end else begin
+        fifo_wptr_gray <= dec2gray(fifo_wptr + {{(PTR_WIDTH-1){1'b0}},1'b1});
+      end
+    end
+
+  prim_generic_flop_2sync #(.Width(PTR_WIDTH)) sync_wptr (
+    .clk_i    (clk_rd_i),
+    .rst_ni   (rst_rd_ni),
+    .d_i      (fifo_wptr_gray),
+    .q_o      (fifo_wptr_gray_sync));
+
+  assign fifo_wptr_sync_combi = gray2dec(fifo_wptr_gray_sync);
+
+  //////////////////
+  // read pointer //
+  //////////////////
+
+  always_ff @(posedge clk_rd_i or negedge rst_rd_ni)
+    if (!rst_rd_ni) begin
+      fifo_rptr <= {(PTR_WIDTH){1'b0}};
+    end else if (fifo_incr_rptr) begin
+      if (fifo_rptr[PTR_WIDTH-2:0] == DepthMinus1) begin
+        fifo_rptr <= {~fifo_rptr[PTR_WIDTH-1],{(PTR_WIDTH-1){1'b0}}};
+      end else begin
+        fifo_rptr <= fifo_rptr + {{(PTR_WIDTH-1){1'b0}},1'b1};
+    end
+  end
+
+  // gray-coded version
+  always_ff @(posedge clk_rd_i or negedge rst_rd_ni)
+    if (!rst_rd_ni) begin
+      fifo_rptr_gray <= {(PTR_WIDTH){1'b0}};
+    end else if (fifo_incr_rptr) begin
+      if (fifo_rptr[PTR_WIDTH-2:0] == DepthMinus1) begin
+        fifo_rptr_gray <= dec2gray({~fifo_rptr[PTR_WIDTH-1],{(PTR_WIDTH-1){1'b0}}});
+      end else begin
+        fifo_rptr_gray <= dec2gray(fifo_rptr + {{(PTR_WIDTH-1){1'b0}},1'b1});
+      end
+    end
+
+  prim_generic_flop_2sync #(.Width(PTR_WIDTH)) sync_rptr (
+    .clk_i    (clk_wr_i),
+    .rst_ni   (rst_wr_ni),
+    .d_i      (fifo_rptr_gray),
+    .q_o      (fifo_rptr_gray_sync));
+
+  always_ff @(posedge clk_wr_i or negedge rst_wr_ni)
+    if (!rst_wr_ni) begin
+      fifo_rptr_sync <= {PTR_WIDTH{1'b0}};
+    end else begin
+      fifo_rptr_sync <= gray2dec(fifo_rptr_gray_sync);
+    end
+
+  //////////////////
+  // empty / full //
+  //////////////////
+
+  assign  full_wclk = (fifo_wptr == (fifo_rptr_sync ^ {1'b1,{(PTR_WIDTH-1){1'b0}}}));
+  assign  full_rclk = (fifo_wptr_sync_combi == (fifo_rptr ^ {1'b1,{(PTR_WIDTH-1){1'b0}}}));
+
+  // Current depth in the write clock side
+  logic  wptr_msb;
+  logic  rptr_sync_msb;
+  logic  [PTRV_W-1:0] wptr_value;
+  logic  [PTRV_W-1:0] rptr_sync_value;
+  assign wptr_msb = fifo_wptr[PTR_WIDTH-1];
+  assign rptr_sync_msb = fifo_rptr_sync[PTR_WIDTH-1];
+  assign wptr_value = fifo_wptr[0+:PTRV_W];
+  assign rptr_sync_value = fifo_rptr_sync[0+:PTRV_W];
+  assign wdepth_o = (full_wclk) ? DepthW'(Depth) :
+                    (wptr_msb == rptr_sync_msb) ? DepthW'(wptr_value) - DepthW'(rptr_sync_value) :
+                    (DepthW'(Depth) - DepthW'(rptr_sync_value) + DepthW'(wptr_value)) ;
+
+  // Same again in the read clock side
+  assign empty = (fifo_wptr_sync_combi ==  fifo_rptr);
+  logic  rptr_msb;
+  logic  wptr_sync_msb;
+  logic  [PTRV_W-1:0] rptr_value;
+  logic  [PTRV_W-1:0] wptr_sync_value;
+  assign wptr_sync_msb = fifo_wptr_sync_combi[PTR_WIDTH-1];
+  assign rptr_msb = fifo_rptr[PTR_WIDTH-1];
+  assign wptr_sync_value = fifo_wptr_sync_combi[0+:PTRV_W];
+  assign rptr_value = fifo_rptr[0+:PTRV_W];
+  assign rdepth_o = (full_rclk) ? DepthW'(Depth) :
+                    (wptr_sync_msb == rptr_msb) ? DepthW'(wptr_sync_value) - DepthW'(rptr_value) :
+                    (DepthW'(Depth) - DepthW'(rptr_value) + DepthW'(wptr_sync_value)) ;
+
+  /////////////
+  // storage //
+  /////////////
+
+  logic [Width-1:0] storage [Depth];
+
+  always_ff @(posedge clk_wr_i)
+    if (fifo_incr_wptr) begin
+      storage[fifo_wptr[PTR_WIDTH-2:0]] <= wdata_i;
+    end
+
+  assign rdata_o = storage[fifo_rptr[PTR_WIDTH-2:0]];
+
+  // gray code conversion functions.  algorithm walks up from 0..N-1
+  // then flips the upper bit and walks down from N-1 to 0.
+
+  function automatic [PTR_WIDTH-1:0] dec2gray(input logic [PTR_WIDTH-1:0] decval);
+    logic [PTR_WIDTH-1:0] decval_sub;
+    logic [PTR_WIDTH-2:0] decval_in;
+    logic                 unused_decval_msb;
+
+    decval_sub = (PTR_WIDTH)'(Depth) - {1'b0, decval[PTR_WIDTH-2:0]} - 1'b1;
+
+    {unused_decval_msb, decval_in} = decval[PTR_WIDTH-1] ? decval_sub : decval;
+    // Was done in two assigns for low bits and top bit
+    // but that generates a (bogus) verilator warning, so do in one assign
+    dec2gray = {decval[PTR_WIDTH-1],
+                {1'b0,decval_in[PTR_WIDTH-2:1]} ^ decval_in[PTR_WIDTH-2:0]};
+  endfunction
+
+  function automatic [PTR_WIDTH-1:0] gray2dec(input logic [PTR_WIDTH-1:0] grayval);
+    logic [PTR_WIDTH-2:0] dec_tmp, dec_tmp_sub;
+    logic                 unused_decsub_msb;
+
+    dec_tmp[PTR_WIDTH-2] = grayval[PTR_WIDTH-2];
+    for (int i = PTR_WIDTH-3; i >= 0; i--)
+      dec_tmp[i] = dec_tmp[i+1]^grayval[i];
+    {unused_decsub_msb, dec_tmp_sub} = (PTR_WIDTH-1)'(Depth) - {1'b0, dec_tmp} - 1'b1;
+    if (grayval[PTR_WIDTH-1])
+      gray2dec = {1'b1,dec_tmp_sub};
+    else
+      gray2dec = {1'b0,dec_tmp};
+  endfunction
+
+
+
+endmodule
diff --git a/verilog/rtl/fifo_sync.sv b/verilog/rtl/fifo_sync.sv
new file mode 100644
index 0000000..b0ab38a
--- /dev/null
+++ b/verilog/rtl/fifo_sync.sv
@@ -0,0 +1,147 @@
+
+// Generic synchronous fifo for use in a variety of devices.
+
+
+module fifo_sync #(
+  parameter int unsigned Width       = 16,
+  parameter bit Pass                 = 1'b1, // if == 1 allow requests to pass through empty FIFO
+  parameter int unsigned Depth       = 4,
+  parameter bit OutputZeroIfEmpty    = 1'b1, // if == 1 always output 0 when FIFO is empty
+  // derived parameter
+  localparam int          DepthW     = tlul_pkg::vbits(Depth+1)
+) (
+  input                   clk_i,
+  input                   rst_ni,
+  // synchronous clear / flush port
+  input                   clr_i,
+  // write port
+  input                   wvalid_i,
+  output                  wready_o,
+  input   [Width-1:0]     wdata_i,
+  // read port
+  output                  rvalid_o,
+  input                   rready_i,
+  output  [Width-1:0]     rdata_o,
+  // occupancy
+  output  [DepthW-1:0]    depth_o
+);
+
+  // FIFO is in complete passthrough mode
+  if (Depth == 0) begin : gen_passthru_fifo
+
+    assign depth_o = 1'b0; //output is meaningless
+
+    // devie facing
+    assign rvalid_o = wvalid_i;
+    assign rdata_o = wdata_i;
+
+    // host facing
+    assign wready_o = rready_i;
+
+    // this avoids lint warnings
+    logic unused_clr;
+    assign unused_clr = clr_i;
+
+  // Normal FIFO construction
+  end else begin : gen_normal_fifo
+
+    localparam int unsigned PTRV_W    = tlul_pkg::vbits(Depth);
+    localparam int unsigned PTR_WIDTH = PTRV_W+1;
+
+    logic [PTR_WIDTH-1:0] fifo_wptr, fifo_rptr;
+    logic                 fifo_incr_wptr, fifo_incr_rptr, fifo_empty;
+
+    // create the write and read pointers
+    logic  full, empty;
+    logic  wptr_msb;
+    logic  rptr_msb;
+    logic  [PTRV_W-1:0] wptr_value;
+    logic  [PTRV_W-1:0] rptr_value;
+
+    assign wptr_msb = fifo_wptr[PTR_WIDTH-1];
+    assign rptr_msb = fifo_rptr[PTR_WIDTH-1];
+    assign wptr_value = fifo_wptr[0+:PTRV_W];
+    assign rptr_value = fifo_rptr[0+:PTRV_W];
+    assign depth_o = (full)                 ? DepthW'(Depth) :
+                     (wptr_msb == rptr_msb) ? DepthW'(wptr_value) - DepthW'(rptr_value) :
+                     (DepthW'(Depth) - DepthW'(rptr_value) + DepthW'(wptr_value)) ;
+
+    assign fifo_incr_wptr = wvalid_i & wready_o;
+    assign fifo_incr_rptr = rvalid_o & rready_i;
+
+    assign wready_o = ~full;
+    assign rvalid_o = ~empty;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        fifo_wptr <= {(PTR_WIDTH){1'b0}};
+      end else if (clr_i) begin
+        fifo_wptr <= {(PTR_WIDTH){1'b0}};
+      end else if (fifo_incr_wptr) begin
+        if (fifo_wptr[PTR_WIDTH-2:0] == (PTR_WIDTH-1)'(Depth-1)) begin
+          fifo_wptr <= {~fifo_wptr[PTR_WIDTH-1],{(PTR_WIDTH-1){1'b0}}};
+        end else begin
+          fifo_wptr <= fifo_wptr + {{(PTR_WIDTH-1){1'b0}},1'b1};
+        end
+      end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        fifo_rptr <= {(PTR_WIDTH){1'b0}};
+      end else if (clr_i) begin
+        fifo_rptr <= {(PTR_WIDTH){1'b0}};
+      end else if (fifo_incr_rptr) begin
+        if (fifo_rptr[PTR_WIDTH-2:0] == (PTR_WIDTH-1)'(Depth-1)) begin
+          fifo_rptr <= {~fifo_rptr[PTR_WIDTH-1],{(PTR_WIDTH-1){1'b0}}};
+        end else begin
+          fifo_rptr <= fifo_rptr + {{(PTR_WIDTH-1){1'b0}},1'b1};
+        end
+      end
+    end
+
+    assign  full       = (fifo_wptr == (fifo_rptr ^ {1'b1,{(PTR_WIDTH-1){1'b0}}}));
+    assign  fifo_empty = (fifo_wptr ==  fifo_rptr);
+
+
+    // the generate blocks below are needed to avoid lint errors due to array indexing
+    // in the where the fifo only has one storage element
+    logic [Depth-1:0][Width-1:0] storage;
+    logic [Width-1:0] storage_rdata;
+    if (Depth == 1) begin : gen_depth_eq1
+      assign storage_rdata = storage[0];
+
+      always_ff @(posedge clk_i)
+        if (fifo_incr_wptr) begin
+          storage[0] <= wdata_i;
+        end
+    // fifo with more than one storage element
+    end else begin : gen_depth_gt1
+      assign storage_rdata = storage[fifo_rptr[PTR_WIDTH-2:0]];
+
+      always_ff @(posedge clk_i)
+        if (fifo_incr_wptr) begin
+          storage[fifo_wptr[PTR_WIDTH-2:0]] <= wdata_i;
+        end
+    end
+
+    logic [Width-1:0] rdata_int;
+    if (Pass == 1'b1) begin : gen_pass
+      assign rdata_int = (fifo_empty && wvalid_i) ? wdata_i : storage_rdata;
+      assign empty = fifo_empty & ~wvalid_i;
+    end else begin : gen_nopass
+      assign rdata_int = storage_rdata;
+      assign empty = fifo_empty;
+    end
+
+    if (OutputZeroIfEmpty == 1'b1) begin : gen_output_zero
+      assign rdata_o = empty ? 'b0 : rdata_int;
+    end else begin : gen_no_output_zero
+      assign rdata_o = rdata_int;
+    end
+
+  end // block: gen_normal_fifo
+
+
+
+endmodule
diff --git a/verilog/rtl/fpnew_cast_multi.sv b/verilog/rtl/fpnew_cast_multi.sv
new file mode 100644
index 0000000..98ed07b
--- /dev/null
+++ b/verilog/rtl/fpnew_cast_multi.sv
@@ -0,0 +1,758 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_cast_multi #(
+  parameter fpnew_pkg::fmt_logic_t   FpFmtConfig  = '1,
+  parameter fpnew_pkg::ifmt_logic_t  IntFmtConfig = '1,
+  // FPU configuration
+  parameter int unsigned             NumPipeRegs = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig  = fpnew_pkg::BEFORE,
+  parameter type                     TagType     = logic,
+  parameter type                     AuxType     = logic,
+  // Do not change
+  localparam int unsigned WIDTH = fpnew_pkg::maximum(fpnew_pkg::max_fp_width(FpFmtConfig),
+                                                     fpnew_pkg::max_int_width(IntFmtConfig)),
+  localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
+) (
+  input  logic                   clk_i,
+  input  logic                   rst_ni,
+  // Input signals
+  input  logic [WIDTH-1:0]       operands_i, // 1 operand
+  input  logic [NUM_FORMATS-1:0] is_boxed_i, // 1 operand
+  input  fpnew_pkg::roundmode_e  rnd_mode_i,
+  input  fpnew_pkg::operation_e  op_i,
+  input  logic                   op_mod_i,
+  input  fpnew_pkg::fp_format_e  src_fmt_i,
+  input  fpnew_pkg::fp_format_e  dst_fmt_i,
+  input  fpnew_pkg::int_format_e int_fmt_i,
+  input  TagType                 tag_i,
+  input  AuxType                 aux_i,
+  // Input Handshake
+  input  logic                   in_valid_i,
+  output logic                   in_ready_o,
+  input  logic                   flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]       result_o,
+  output fpnew_pkg::status_t     status_o,
+  output logic                   extension_bit_o,
+  output TagType                 tag_o,
+  output AuxType                 aux_o,
+  // Output handshake
+  output logic                   out_valid_o,
+  input  logic                   out_ready_i,
+  // Indication of valid data in flight
+  output logic                   busy_o
+);
+
+  // ----------
+  // Constants
+  // ----------
+  localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS;
+  localparam int unsigned MAX_INT_WIDTH   = fpnew_pkg::max_int_width(IntFmtConfig);
+
+  localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig);
+
+  localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits;
+  localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits;
+  localparam int unsigned SUPER_BIAS     = 2**(SUPER_EXP_BITS - 1) - 1;
+
+  // The internal mantissa includes normal bit or an entire integer
+  localparam int unsigned INT_MAN_WIDTH = fpnew_pkg::maximum(SUPER_MAN_BITS + 1, MAX_INT_WIDTH);
+  // If needed, there will be a LZC for renormalization
+  localparam int unsigned LZC_RESULT_WIDTH = $clog2(INT_MAN_WIDTH);
+  // The internal exponent must be able to represent the smallest denormal input value as signed
+  // or the number of bits in an integer
+  localparam int unsigned INT_EXP_WIDTH = fpnew_pkg::maximum($clog2(MAX_INT_WIDTH),
+      fpnew_pkg::maximum(SUPER_EXP_BITS, $clog2(SUPER_BIAS + SUPER_MAN_BITS))) + 1;
+  // Pipelines
+  localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
+                               : 0); // no regs here otherwise
+  localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
+                          ? NumPipeRegs
+                          : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                             ? ((NumPipeRegs + 2) / 3) // First to get distributed regs
+                             : 0); // no regs here otherwise
+  localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? (NumPipeRegs / 3) // Last to get distributed regs
+                               : 0); // no regs here otherwise
+
+  // ---------------
+  // Input pipeline
+  // ---------------
+  // Selected pipeline output signals as non-arrays
+  logic [WIDTH-1:0]       operands_q;
+  logic [NUM_FORMATS-1:0] is_boxed_q;
+  logic                   op_mod_q;
+  fpnew_pkg::fp_format_e  src_fmt_q;
+  fpnew_pkg::fp_format_e  dst_fmt_q;
+  fpnew_pkg::int_format_e int_fmt_q;
+
+  // Input pipeline signals, index i holds signal after i register stages
+  logic                   [0:NUM_INP_REGS][WIDTH-1:0]       inp_pipe_operands_q;
+  logic                   [0:NUM_INP_REGS][NUM_FORMATS-1:0] inp_pipe_is_boxed_q;
+  fpnew_pkg::roundmode_e  [0:NUM_INP_REGS]                  inp_pipe_rnd_mode_q;
+  fpnew_pkg::operation_e  [0:NUM_INP_REGS]                  inp_pipe_op_q;
+  logic                   [0:NUM_INP_REGS]                  inp_pipe_op_mod_q;
+  fpnew_pkg::fp_format_e  [0:NUM_INP_REGS]                  inp_pipe_src_fmt_q;
+  fpnew_pkg::fp_format_e  [0:NUM_INP_REGS]                  inp_pipe_dst_fmt_q;
+  fpnew_pkg::int_format_e [0:NUM_INP_REGS]                  inp_pipe_int_fmt_q;
+  TagType                 [0:NUM_INP_REGS]                  inp_pipe_tag_q;
+  AuxType                 [0:NUM_INP_REGS]                  inp_pipe_aux_q;
+  logic                   [0:NUM_INP_REGS]                  inp_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_INP_REGS] inp_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign inp_pipe_operands_q[0] = operands_i;
+  assign inp_pipe_is_boxed_q[0] = is_boxed_i;
+  assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+  assign inp_pipe_op_q[0]       = op_i;
+  assign inp_pipe_op_mod_q[0]   = op_mod_i;
+  assign inp_pipe_src_fmt_q[0]  = src_fmt_i;
+  assign inp_pipe_dst_fmt_q[0]  = dst_fmt_i;
+  assign inp_pipe_int_fmt_q[0]  = int_fmt_i;
+  assign inp_pipe_tag_q[0]      = tag_i;
+  assign inp_pipe_aux_q[0]      = aux_i;
+  assign inp_pipe_valid_q[0]    = in_valid_i;
+  // Input stage: Propagate pipeline ready signal to updtream circuitry
+  assign in_ready_o = inp_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+    `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
+    `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+    `FFL(inp_pipe_op_q[i+1],       inp_pipe_op_q[i],       reg_ena, fpnew_pkg::FMADD)
+    `FFL(inp_pipe_op_mod_q[i+1],   inp_pipe_op_mod_q[i],   reg_ena, '0)
+    `FFL(inp_pipe_src_fmt_q[i+1],  inp_pipe_src_fmt_q[i],  reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(inp_pipe_dst_fmt_q[i+1],  inp_pipe_dst_fmt_q[i],  reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(inp_pipe_int_fmt_q[i+1],  inp_pipe_int_fmt_q[i],  reg_ena, fpnew_pkg::int_format_e'(0))
+    `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
+    `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
+  assign is_boxed_q = inp_pipe_is_boxed_q[NUM_INP_REGS];
+  assign op_mod_q   = inp_pipe_op_mod_q[NUM_INP_REGS];
+  assign src_fmt_q  = inp_pipe_src_fmt_q[NUM_INP_REGS];
+  assign dst_fmt_q  = inp_pipe_dst_fmt_q[NUM_INP_REGS];
+  assign int_fmt_q  = inp_pipe_int_fmt_q[NUM_INP_REGS];
+
+  // -----------------
+  // Input processing
+  // -----------------
+  logic src_is_int, dst_is_int; // if 0, it's a float
+
+  assign src_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::I2F);
+  assign dst_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::F2I);
+
+  logic [INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
+
+  logic        [NUM_FORMATS-1:0]                    fmt_sign;
+  logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
+  logic        [NUM_FORMATS-1:0][INT_MAN_WIDTH-1:0] fmt_mantissa;
+  logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_shift_compensation; // for LZC
+
+  fpnew_pkg::fp_info_t [NUM_FORMATS-1:0] info;
+
+  logic [NUM_INT_FORMATS-1:0][INT_MAN_WIDTH-1:0] ifmt_input_val;
+  logic                                          int_sign;
+  logic [INT_MAN_WIDTH-1:0]                      int_value, int_mantissa;
+
+  // FP Input initialization
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      // Classify input
+      fpnew_classifier #(
+        .FpFormat    ( fpnew_pkg::fp_format_e'(fmt) ),
+        .NumOperands ( 1                            )
+      ) i_fpnew_classifier (
+        .operands_i ( operands_q[FP_WIDTH-1:0] ),
+        .is_boxed_i ( is_boxed_q[fmt]          ),
+        .info_o     ( info[fmt]                )
+      );
+
+      assign fmt_sign[fmt]     = operands_q[FP_WIDTH-1];
+      assign fmt_exponent[fmt] = signed'({1'b0, operands_q[MAN_BITS+:EXP_BITS]});
+      assign fmt_mantissa[fmt] = {info[fmt].is_normal, operands_q[MAN_BITS-1:0]}; // zero pad
+      // Compensation for the difference in mantissa widths used for leading-zero count
+      assign fmt_shift_compensation[fmt] = signed'(INT_MAN_WIDTH - 1 - MAN_BITS);
+    end else begin : inactive_format
+      assign info[fmt]                   = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+      assign fmt_sign[fmt]               = fpnew_pkg::DONT_CARE;             // format disabled
+      assign fmt_exponent[fmt]           = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+      assign fmt_mantissa[fmt]           = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+      assign fmt_shift_compensation[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+    end
+  end
+
+  // Sign-extend INT input
+  for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_sign_extend_int
+    // Set up some constants
+    localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+
+    if (IntFmtConfig[ifmt]) begin : active_format // only active formats
+      always_comb begin : sign_ext_input
+        // sign-extend value only if it's signed
+        ifmt_input_val[ifmt]                = '{default: operands_q[INT_WIDTH-1] & ~op_mod_q};
+        ifmt_input_val[ifmt][INT_WIDTH-1:0] = operands_q[INT_WIDTH-1:0];
+      end
+    end else begin : inactive_format
+      assign ifmt_input_val[ifmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+    end
+  end
+
+  // Construct input mantissa from integer
+  assign int_value    = ifmt_input_val[int_fmt_q];
+  assign int_sign     = int_value[INT_MAN_WIDTH-1] & ~op_mod_q; // only signed ints are negative
+  assign int_mantissa = int_sign ? unsigned'(-int_value) : int_value; // get magnitude of negative
+
+  // select mantissa with source format
+  assign encoded_mant = src_is_int ? int_mantissa : fmt_mantissa[src_fmt_q];
+
+  // --------------
+  // Normalization
+  // --------------
+  logic signed [INT_EXP_WIDTH-1:0] src_bias;      // src format bias
+  logic signed [INT_EXP_WIDTH-1:0] src_exp;       // src format exponent (biased)
+  logic signed [INT_EXP_WIDTH-1:0] src_subnormal; // src is subnormal
+  logic signed [INT_EXP_WIDTH-1:0] src_offset;    // src offset within mantissa
+
+  assign src_bias      = signed'(fpnew_pkg::bias(src_fmt_q));
+  assign src_exp       = fmt_exponent[src_fmt_q];
+  assign src_subnormal = signed'({1'b0, info[src_fmt_q].is_subnormal});
+  assign src_offset    = fmt_shift_compensation[src_fmt_q];
+
+  logic                            input_sign;   // input sign
+  logic signed [INT_EXP_WIDTH-1:0] input_exp;    // unbiased true exponent
+  logic        [INT_MAN_WIDTH-1:0] input_mant;   // normalized input mantissa
+  logic                            mant_is_zero; // for integer zeroes
+
+  logic signed [INT_EXP_WIDTH-1:0] fp_input_exp;
+  logic signed [INT_EXP_WIDTH-1:0] int_input_exp;
+
+  // Input mantissa needs to be normalized
+  logic [LZC_RESULT_WIDTH-1:0] renorm_shamt;     // renormalization shift amount
+  logic [LZC_RESULT_WIDTH:0]   renorm_shamt_sgn; // signed form for calculations
+
+  // Leading-zero counter is needed for renormalization
+  lzc #(
+    .WIDTH ( INT_MAN_WIDTH ),
+    .MODE  ( 1             ) // MODE = 1 counts leading zeroes
+  ) i_lzc (
+    .in_i    ( encoded_mant ),
+    .cnt_o   ( renorm_shamt ),
+    .empty_o ( mant_is_zero )
+  );
+  assign renorm_shamt_sgn = signed'({1'b0, renorm_shamt});
+
+  // Get the sign from the proper source
+  assign input_sign = src_is_int ? int_sign : fmt_sign[src_fmt_q];
+  // Realign input mantissa, append zeroes if destination is wider
+  assign input_mant = encoded_mant << renorm_shamt;
+  // Unbias exponent and compensate for shift
+  assign fp_input_exp  = signed'(src_exp + src_subnormal - src_bias -
+                                 renorm_shamt_sgn + src_offset); // compensate for shift
+  assign int_input_exp = signed'(INT_MAN_WIDTH - 1 - renorm_shamt_sgn);
+
+  assign input_exp     = src_is_int ? int_input_exp : fp_input_exp;
+
+  logic signed [INT_EXP_WIDTH-1:0] destination_exp;  // re-biased exponent for destination
+
+  // Rebias the exponent
+  assign destination_exp = input_exp + signed'(fpnew_pkg::bias(dst_fmt_q));
+
+  // ---------------
+  // Internal pipeline
+  // ---------------
+  // Pipeline output signals as non-arrays
+  logic                            input_sign_q;
+  logic signed [INT_EXP_WIDTH-1:0] input_exp_q;
+  logic [INT_MAN_WIDTH-1:0]        input_mant_q;
+  logic signed [INT_EXP_WIDTH-1:0] destination_exp_q;
+  logic                            src_is_int_q;
+  logic                            dst_is_int_q;
+  fpnew_pkg::fp_info_t             info_q;
+  logic                            mant_is_zero_q;
+  logic                            op_mod_q2;
+  fpnew_pkg::roundmode_e           rnd_mode_q;
+  fpnew_pkg::fp_format_e           src_fmt_q2;
+  fpnew_pkg::fp_format_e           dst_fmt_q2;
+  fpnew_pkg::int_format_e          int_fmt_q2;
+  // Internal pipeline signals, index i holds signal after i register stages
+
+
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_input_sign_q;
+  logic signed            [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_input_exp_q;
+  logic                   [0:NUM_MID_REGS][INT_MAN_WIDTH-1:0] mid_pipe_input_mant_q;
+  logic signed            [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_dest_exp_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_src_is_int_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_dst_is_int_q;
+  fpnew_pkg::fp_info_t    [0:NUM_MID_REGS]                    mid_pipe_info_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_mant_zero_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_op_mod_q;
+  fpnew_pkg::roundmode_e  [0:NUM_MID_REGS]                    mid_pipe_rnd_mode_q;
+  fpnew_pkg::fp_format_e  [0:NUM_MID_REGS]                    mid_pipe_src_fmt_q;
+  fpnew_pkg::fp_format_e  [0:NUM_MID_REGS]                    mid_pipe_dst_fmt_q;
+  fpnew_pkg::int_format_e [0:NUM_MID_REGS]                    mid_pipe_int_fmt_q;
+  TagType                 [0:NUM_MID_REGS]                    mid_pipe_tag_q;
+  AuxType                 [0:NUM_MID_REGS]                    mid_pipe_aux_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_MID_REGS] mid_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from upstream logic
+  assign mid_pipe_input_sign_q[0] = input_sign;
+  assign mid_pipe_input_exp_q[0]  = input_exp;
+  assign mid_pipe_input_mant_q[0] = input_mant;
+  assign mid_pipe_dest_exp_q[0]   = destination_exp;
+  assign mid_pipe_src_is_int_q[0] = src_is_int;
+  assign mid_pipe_dst_is_int_q[0] = dst_is_int;
+  assign mid_pipe_info_q[0]       = info[src_fmt_q];
+  assign mid_pipe_mant_zero_q[0]  = mant_is_zero;
+  assign mid_pipe_op_mod_q[0]     = op_mod_q;
+  assign mid_pipe_rnd_mode_q[0]   = inp_pipe_rnd_mode_q[NUM_INP_REGS];
+  assign mid_pipe_src_fmt_q[0]    = src_fmt_q;
+  assign mid_pipe_dst_fmt_q[0]    = dst_fmt_q;
+  assign mid_pipe_int_fmt_q[0]    = int_fmt_q;
+  assign mid_pipe_tag_q[0]        = inp_pipe_tag_q[NUM_INP_REGS];
+  assign mid_pipe_aux_q[0]        = inp_pipe_aux_q[NUM_INP_REGS];
+  assign mid_pipe_valid_q[0]      = inp_pipe_valid_q[NUM_INP_REGS];
+  // Input stage: Propagate pipeline ready signal to input pipe
+  assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
+
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0)
+    `FFL(mid_pipe_input_exp_q[i+1],  mid_pipe_input_exp_q[i],  reg_ena, '0)
+    `FFL(mid_pipe_input_mant_q[i+1], mid_pipe_input_mant_q[i], reg_ena, '0)
+    `FFL(mid_pipe_dest_exp_q[i+1],   mid_pipe_dest_exp_q[i],   reg_ena, '0)
+    `FFL(mid_pipe_src_is_int_q[i+1], mid_pipe_src_is_int_q[i], reg_ena, '0)
+    `FFL(mid_pipe_dst_is_int_q[i+1], mid_pipe_dst_is_int_q[i], reg_ena, '0)
+    `FFL(mid_pipe_info_q[i+1],       mid_pipe_info_q[i],       reg_ena, '0)
+    `FFL(mid_pipe_mant_zero_q[i+1],  mid_pipe_mant_zero_q[i],  reg_ena, '0)
+    `FFL(mid_pipe_op_mod_q[i+1],     mid_pipe_op_mod_q[i],     reg_ena, '0)
+    `FFL(mid_pipe_rnd_mode_q[i+1],   mid_pipe_rnd_mode_q[i],   reg_ena, fpnew_pkg::RNE)
+    `FFL(mid_pipe_src_fmt_q[i+1],    mid_pipe_src_fmt_q[i],    reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(mid_pipe_dst_fmt_q[i+1],    mid_pipe_dst_fmt_q[i],    reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(mid_pipe_int_fmt_q[i+1],    mid_pipe_int_fmt_q[i],    reg_ena, fpnew_pkg::int_format_e'(0))
+    `FFL(mid_pipe_tag_q[i+1],        mid_pipe_tag_q[i],        reg_ena, TagType'('0))
+    `FFL(mid_pipe_aux_q[i+1],        mid_pipe_aux_q[i],        reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign input_sign_q      = mid_pipe_input_sign_q[NUM_MID_REGS];
+  assign input_exp_q       = mid_pipe_input_exp_q[NUM_MID_REGS];
+  assign input_mant_q      = mid_pipe_input_mant_q[NUM_MID_REGS];
+  assign destination_exp_q = mid_pipe_dest_exp_q[NUM_MID_REGS];
+  assign src_is_int_q      = mid_pipe_src_is_int_q[NUM_MID_REGS];
+  assign dst_is_int_q      = mid_pipe_dst_is_int_q[NUM_MID_REGS];
+  assign info_q            = mid_pipe_info_q[NUM_MID_REGS];
+  assign mant_is_zero_q    = mid_pipe_mant_zero_q[NUM_MID_REGS];
+  assign op_mod_q2         = mid_pipe_op_mod_q[NUM_MID_REGS];
+  assign rnd_mode_q        = mid_pipe_rnd_mode_q[NUM_MID_REGS];
+  assign src_fmt_q2        = mid_pipe_src_fmt_q[NUM_MID_REGS];
+  assign dst_fmt_q2        = mid_pipe_dst_fmt_q[NUM_MID_REGS];
+  assign int_fmt_q2        = mid_pipe_int_fmt_q[NUM_MID_REGS];
+
+  // --------
+  // Casting
+  // --------
+  logic [INT_EXP_WIDTH-1:0] final_exp;        // after eventual adjustments
+
+  logic [2*INT_MAN_WIDTH:0]  preshift_mant;    // mantissa before final shift
+  logic [2*INT_MAN_WIDTH:0]  destination_mant; // mantissa from shifter, with rnd bit
+  logic [SUPER_MAN_BITS-1:0] final_mant;       // mantissa after adjustments
+  logic [MAX_INT_WIDTH-1:0]  final_int;        // integer shifted in position
+
+  logic [$clog2(INT_MAN_WIDTH+1)-1:0] denorm_shamt; // shift amount for denormalization
+
+  logic [1:0] fp_round_sticky_bits, int_round_sticky_bits, round_sticky_bits;
+  logic       of_before_round, uf_before_round;
+
+
+  // Perform adjustments to mantissa and exponent
+  always_comb begin : cast_value
+    // Default assignment
+    final_exp       = unsigned'(destination_exp_q); // take exponent as is, only look at lower bits
+    preshift_mant   = '0;  // initialize mantissa container with zeroes
+    denorm_shamt    = SUPER_MAN_BITS - fpnew_pkg::man_bits(dst_fmt_q2); // right of mantissa
+    of_before_round = 1'b0;
+    uf_before_round = 1'b0;
+
+    // Place mantissa to the left of the shifter
+    preshift_mant = input_mant_q << (INT_MAN_WIDTH + 1);
+
+    // Handle INT casts
+    if (dst_is_int_q) begin
+      // By default right shift mantissa to be an integer
+      denorm_shamt = unsigned'(MAX_INT_WIDTH - 1 - input_exp_q);
+      // overflow: when converting to unsigned the range is larger by one
+      if (input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) begin
+        denorm_shamt    = '0; // prevent shifting
+        of_before_round = 1'b1;
+      // underflow
+      end else if (input_exp_q < -1) begin
+        denorm_shamt    = MAX_INT_WIDTH + 1; // all bits go to the sticky
+        uf_before_round = 1'b1;
+      end
+    // Handle FP over-/underflows
+    end else begin
+      // Overflow or infinities (for proper rounding)
+      if ((destination_exp_q >= signed'(2**fpnew_pkg::exp_bits(dst_fmt_q2))-1) ||
+          (~src_is_int_q && info_q.is_inf)) begin
+        final_exp       = unsigned'(2**fpnew_pkg::exp_bits(dst_fmt_q2)-2); // largest normal value
+        preshift_mant   = '1;                           // largest normal value and RS bits set
+        of_before_round = 1'b1;
+      // Denormalize underflowing values
+      end else if (destination_exp_q < 1 &&
+                   destination_exp_q >= -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin
+        final_exp       = '0; // denormal result
+        denorm_shamt    = unsigned'(denorm_shamt + 1 - destination_exp_q); // adjust right shifting
+        uf_before_round = 1'b1;
+      // Limit the shift to retain sticky bits
+      end else if (destination_exp_q < -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin
+        final_exp       = '0; // denormal result
+        denorm_shamt    = unsigned'(denorm_shamt + 2 + fpnew_pkg::man_bits(dst_fmt_q2)); // to sticky
+        uf_before_round = 1'b1;
+      end
+    end
+  end
+
+  localparam NUM_FP_STICKY  = 2 * INT_MAN_WIDTH - SUPER_MAN_BITS - 1; // removed mantissa, 1. and R
+  localparam NUM_INT_STICKY = 2 * INT_MAN_WIDTH - MAX_INT_WIDTH; // removed int and R
+
+  // Mantissa adjustment shift
+  assign destination_mant = preshift_mant >> denorm_shamt;
+  // Extract final mantissa and round bit, discard the normal bit (for FP)
+  assign {final_mant, fp_round_sticky_bits[1]} =
+      destination_mant[2*INT_MAN_WIDTH-1-:SUPER_MAN_BITS+1];
+  assign {final_int, int_round_sticky_bits[1]} = destination_mant[2*INT_MAN_WIDTH-:MAX_INT_WIDTH+1];
+  // Collapse sticky bits
+  assign fp_round_sticky_bits[0]  = (| {destination_mant[NUM_FP_STICKY-1:0]});
+  assign int_round_sticky_bits[0] = (| {destination_mant[NUM_INT_STICKY-1:0]});
+
+  // select RS bits for destination operation
+  assign round_sticky_bits = dst_is_int_q ? int_round_sticky_bits : fp_round_sticky_bits;
+
+  // ----------------------------
+  // Rounding and classification
+  // ----------------------------
+  logic [WIDTH-1:0] pre_round_abs;  // absolute value of result before rnd
+  logic             of_after_round; // overflow
+  logic             uf_after_round; // underflow
+
+  logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_pre_round_abs; // per format
+  logic [NUM_FORMATS-1:0]            fmt_of_after_round;
+  logic [NUM_FORMATS-1:0]            fmt_uf_after_round;
+
+  logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_pre_round_abs; // per format
+
+  logic             rounded_sign;
+  logic [WIDTH-1:0] rounded_abs; // absolute value of result after rounding
+  logic             result_true_zero;
+
+  logic [WIDTH-1:0] rounded_int_res; // after possible inversion
+  logic             rounded_int_res_zero; // after rounding
+
+
+  // Pack exponent and mantissa into proper rounding form
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble
+    // Set up some constants
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      always_comb begin : assemble_result
+        fmt_pre_round_abs[fmt] = {final_exp[EXP_BITS-1:0], final_mant[MAN_BITS-1:0]}; // 0-extend
+      end
+    end else begin : inactive_format
+      assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Sign-extend integer result
+  for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_res_sign_ext
+    // Set up some constants
+    localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+
+    if (IntFmtConfig[ifmt]) begin : active_format
+      always_comb begin : assemble_result
+        // sign-extend reusult
+        ifmt_pre_round_abs[ifmt]                = '{default: final_int[INT_WIDTH-1]};
+        ifmt_pre_round_abs[ifmt][INT_WIDTH-1:0] = final_int[INT_WIDTH-1:0];
+      end
+    end else begin : inactive_format
+      assign ifmt_pre_round_abs[ifmt] = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Select output with destination format and operation
+  assign pre_round_abs = dst_is_int_q ? ifmt_pre_round_abs[int_fmt_q2] : fmt_pre_round_abs[dst_fmt_q2];
+
+  fpnew_rounding #(
+    .AbsWidth ( WIDTH )
+  ) i_fpnew_rounding (
+    .abs_value_i             ( pre_round_abs     ),
+    .sign_i                  ( input_sign_q      ), // source format
+    .round_sticky_bits_i     ( round_sticky_bits ),
+    .rnd_mode_i              ( rnd_mode_q        ),
+    .effective_subtraction_i ( 1'b0              ), // no operation happened
+    .abs_rounded_o           ( rounded_abs       ),
+    .sign_o                  ( rounded_sign      ),
+    .exact_zero_o            ( result_true_zero  )
+  );
+
+  logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result;
+
+  // Detect overflows and inject sign
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      always_comb begin : post_process
+        // detect of / uf
+        fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal
+        fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp.
+
+        // Assemble regular result, nan box short ones. Int zeroes need to be detected`
+        fmt_result[fmt]               = '1;
+        fmt_result[fmt][FP_WIDTH-1:0] = src_is_int_q & mant_is_zero_q
+                                        ? '0
+                                        : {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]};
+      end
+    end else begin : inactive_format
+      assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE;
+      assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE;
+      assign fmt_result[fmt]         = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Classification after rounding select by destination format
+  assign uf_after_round = fmt_uf_after_round[dst_fmt_q2];
+  assign of_after_round = fmt_of_after_round[dst_fmt_q2];
+
+  // Negative integer result needs to be brought into two's complement
+  assign rounded_int_res      = rounded_sign ? unsigned'(-rounded_abs) : rounded_abs;
+  assign rounded_int_res_zero = (rounded_int_res == '0);
+
+  // -------------------------
+  // FP Special case handling
+  // -------------------------
+  logic [WIDTH-1:0]   fp_special_result;
+  fpnew_pkg::status_t fp_special_status;
+  logic               fp_result_is_special;
+
+  logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_special_result;
+
+  // Special result construction
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1;
+    localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1);
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      always_comb begin : special_results
+        logic [FP_WIDTH-1:0] special_res;
+        special_res = info_q.is_zero
+                      ? input_sign_q << FP_WIDTH-1 // signed zero
+                      : {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
+
+        // Initialize special result with ones (NaN-box)
+        fmt_special_result[fmt]               = '1;
+        fmt_special_result[fmt][FP_WIDTH-1:0] = special_res;
+      end
+    end else begin : inactive_format
+      assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Detect special case from source format, I2F casts don't produce a special result
+  assign fp_result_is_special = ~src_is_int_q & (info_q.is_zero |
+                                                 info_q.is_nan |
+                                                 ~info_q.is_boxed);
+
+  // Signalling input NaNs raise invalid flag, otherwise no flags set
+  assign fp_special_status = '{NV: info_q.is_signalling, default: 1'b0};
+
+  // Assemble result according to destination format
+  assign fp_special_result = fmt_special_result[dst_fmt_q2]; // destination format
+
+  // --------------------------
+  // INT Special case handling
+  // --------------------------
+  logic [WIDTH-1:0]   int_special_result;
+  fpnew_pkg::status_t int_special_status;
+  logic               int_result_is_special;
+
+  logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_special_result;
+
+  // Special result construction
+  for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_special_results_int
+    // Set up some constants
+    localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+
+    if (IntFmtConfig[ifmt]) begin : active_format
+      always_comb begin : special_results
+        automatic logic [INT_WIDTH-1:0] special_res;
+
+        // Default is overflow to positive max, which is 2**INT_WIDTH-1 or 2**(INT_WIDTH-1)-1
+        special_res[INT_WIDTH-2:0] = '1;       // alone yields 2**(INT_WIDTH-1)-1
+        special_res[INT_WIDTH-1]   = op_mod_q2; // for unsigned casts yields 2**INT_WIDTH-1
+
+        // Negative special case (except for nans) tie to -max or 0
+        if (input_sign_q && !info_q.is_nan)
+          special_res = ~special_res;
+
+        // Initialize special result with sign-extension
+        ifmt_special_result[ifmt]                = '{default: special_res[INT_WIDTH-1]};
+        ifmt_special_result[ifmt][INT_WIDTH-1:0] = special_res;
+      end
+    end else begin : inactive_format
+      assign ifmt_special_result[ifmt] = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
+  assign int_result_is_special = info_q.is_nan | info_q.is_inf |
+                                 of_before_round | ~info_q.is_boxed |
+                                 (input_sign_q & op_mod_q2 & ~rounded_int_res_zero);
+
+  // All integer special cases are invalid
+  assign int_special_status = '{NV: 1'b1, default: 1'b0};
+
+  // Assemble result according to destination format
+  assign int_special_result = ifmt_special_result[int_fmt_q2]; // destination format
+
+  // -----------------
+  // Result selection
+  // -----------------
+  fpnew_pkg::status_t int_regular_status, fp_regular_status;
+
+  logic [WIDTH-1:0]   fp_result, int_result;
+  fpnew_pkg::status_t fp_status, int_status;
+
+  assign fp_regular_status.NV = src_is_int_q & (of_before_round | of_after_round); // overflow is invalid for I2F casts
+  assign fp_regular_status.DZ = 1'b0; // no divisions
+  assign fp_regular_status.OF = ~src_is_int_q & (~info_q.is_inf & (of_before_round | of_after_round)); // inf casts no OF
+  assign fp_regular_status.UF = uf_after_round & fp_regular_status.NX;
+  assign fp_regular_status.NX = src_is_int_q ? (| fp_round_sticky_bits) // overflow is invalid in i2f
+            : (| fp_round_sticky_bits) | (~info_q.is_inf & (of_before_round | of_after_round));
+  assign int_regular_status = '{NX: (| int_round_sticky_bits), default: 1'b0};
+
+  assign fp_result  = fp_result_is_special  ? fp_special_result  : fmt_result[dst_fmt_q2];
+  assign fp_status  = fp_result_is_special  ? fp_special_status  : fp_regular_status;
+  assign int_result = int_result_is_special ? int_special_result : rounded_int_res;
+  assign int_status = int_result_is_special ? int_special_status : int_regular_status;
+
+  // Final results for output pipeline
+  logic [WIDTH-1:0]   result_d;
+  fpnew_pkg::status_t status_d;
+  logic               extension_bit;
+
+  // Select output depending on special case detection
+  assign result_d = dst_is_int_q ? int_result : fp_result;
+  assign status_d = dst_is_int_q ? int_status : fp_status;
+
+  // MSB of int result decides extension, otherwise NaN box
+  assign extension_bit = dst_is_int_q ? int_result[WIDTH-1] : 1'b1;
+
+  // ----------------
+  // Output Pipeline
+  // ----------------
+  // Output pipeline signals, index i holds signal after i register stages
+  logic               [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
+  fpnew_pkg::status_t [0:NUM_OUT_REGS]            out_pipe_status_q;
+  logic               [0:NUM_OUT_REGS]            out_pipe_ext_bit_q;
+  TagType             [0:NUM_OUT_REGS]            out_pipe_tag_q;
+  AuxType             [0:NUM_OUT_REGS]            out_pipe_aux_q;
+  logic               [0:NUM_OUT_REGS]            out_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_OUT_REGS] out_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign out_pipe_result_q[0]  = result_d;
+  assign out_pipe_status_q[0]  = status_d;
+  assign out_pipe_ext_bit_q[0] = extension_bit;
+  assign out_pipe_tag_q[0]     = mid_pipe_tag_q[NUM_MID_REGS];
+  assign out_pipe_aux_q[0]     = mid_pipe_aux_q[NUM_MID_REGS];
+  assign out_pipe_valid_q[0]   = mid_pipe_valid_q[NUM_MID_REGS];
+  // Input stage: Propagate pipeline ready signal to inside pipe
+  assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(out_pipe_result_q[i+1],  out_pipe_result_q[i],  reg_ena, '0)
+    `FFL(out_pipe_status_q[i+1],  out_pipe_status_q[i],  reg_ena, '0)
+    `FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0)
+    `FFL(out_pipe_tag_q[i+1],     out_pipe_tag_q[i],     reg_ena, TagType'('0))
+    `FFL(out_pipe_aux_q[i+1],     out_pipe_aux_q[i],     reg_ena, AuxType'('0))
+  end
+  // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+  assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+  // Output stage: assign module outputs
+  assign result_o        = out_pipe_result_q[NUM_OUT_REGS];
+  assign status_o        = out_pipe_status_q[NUM_OUT_REGS];
+  assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS];
+  assign tag_o           = out_pipe_tag_q[NUM_OUT_REGS];
+  assign aux_o           = out_pipe_aux_q[NUM_OUT_REGS];
+  assign out_valid_o     = out_pipe_valid_q[NUM_OUT_REGS];
+  assign busy_o          = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
+endmodule
diff --git a/verilog/rtl/fpnew_classifier.sv b/verilog/rtl/fpnew_classifier.sv
new file mode 100644
index 0000000..5e4fab9
--- /dev/null
+++ b/verilog/rtl/fpnew_classifier.sv
@@ -0,0 +1,72 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_classifier #(
+  parameter fpnew_pkg::fp_format_e   FpFormat = fpnew_pkg::fp_format_e'(0),
+  parameter int unsigned             NumOperands = 1,
+  // Do not change
+  localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat)
+) (
+  input  logic                [NumOperands-1:0][WIDTH-1:0] operands_i,
+  input  logic                [NumOperands-1:0]            is_boxed_i,
+  output fpnew_pkg::fp_info_t [NumOperands-1:0]            info_o
+);
+
+  localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
+  localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
+
+  // Type definition
+  typedef struct packed {
+    logic                sign;
+    logic [EXP_BITS-1:0] exponent;
+    logic [MAN_BITS-1:0] mantissa;
+  } fp_t;
+
+  // Iterate through all operands
+  for (genvar op = 0; op < int'(NumOperands); op++) begin : gen_num_values
+
+    fp_t value;
+    logic is_boxed;
+    logic is_normal;
+    logic is_inf;
+    logic is_nan;
+    logic is_signalling;
+    logic is_quiet;
+    logic is_zero;
+    logic is_subnormal;
+
+    // ---------------
+    // Classify Input
+    // ---------------
+    always_comb begin : classify_input
+      value         = operands_i[op];
+      is_boxed      = is_boxed_i[op];
+      is_normal     = is_boxed && (value.exponent != '0) && (value.exponent != '1);
+      is_zero       = is_boxed && (value.exponent == '0) && (value.mantissa == '0);
+      is_subnormal  = is_boxed && (value.exponent == '0) && !is_zero;
+      is_inf        = is_boxed && ((value.exponent == '1) && (value.mantissa == '0));
+      is_nan        = !is_boxed || ((value.exponent == '1) && (value.mantissa != '0));
+      is_signalling = is_boxed && is_nan && (value.mantissa[MAN_BITS-1] == 1'b0);
+      is_quiet      = is_nan && !is_signalling;
+      // Assign output for current input
+      info_o[op].is_normal     = is_normal;
+      info_o[op].is_subnormal  = is_subnormal;
+      info_o[op].is_zero       = is_zero;
+      info_o[op].is_inf        = is_inf;
+      info_o[op].is_nan        = is_nan;
+      info_o[op].is_signalling = is_signalling;
+      info_o[op].is_quiet      = is_quiet;
+      info_o[op].is_boxed      = is_boxed;
+    end
+  end
+endmodule
diff --git a/verilog/rtl/fpnew_divsqrt_multi.sv b/verilog/rtl/fpnew_divsqrt_multi.sv
new file mode 100644
index 0000000..937d345
--- /dev/null
+++ b/verilog/rtl/fpnew_divsqrt_multi.sv
@@ -0,0 +1,339 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+
+module fpnew_divsqrt_multi #(
+  parameter fpnew_pkg::fmt_logic_t   FpFmtConfig  = '1,
+  // FPU configuration
+  parameter int unsigned             NumPipeRegs = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig  = fpnew_pkg::AFTER,
+  parameter type                     TagType     = logic,
+  parameter type                     AuxType     = logic,
+  // Do not change
+  localparam int unsigned WIDTH       = fpnew_pkg::max_fp_width(FpFmtConfig),
+  localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
+) (
+  input  logic                        clk_i,
+  input  logic                        rst_ni,
+  // Input signals
+  input  logic [1:0][WIDTH-1:0]       operands_i, // 2 operands
+  input  logic [NUM_FORMATS-1:0][1:0] is_boxed_i, // 2 operands
+  input  fpnew_pkg::roundmode_e       rnd_mode_i,
+  input  fpnew_pkg::operation_e       op_i,
+  input  fpnew_pkg::fp_format_e       dst_fmt_i,
+  input  TagType                      tag_i,
+  input  AuxType                      aux_i,
+  // Input Handshake
+  input  logic                        in_valid_i,
+  output logic                        in_ready_o,
+  input  logic                        flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]            result_o,
+  output fpnew_pkg::status_t          status_o,
+  output logic                        extension_bit_o,
+  output TagType                      tag_o,
+  output AuxType                      aux_o,
+  // Output handshake
+  output logic                        out_valid_o,
+  input  logic                        out_ready_i,
+  // Indication of valid data in flight
+  output logic                        busy_o
+);
+
+  // ----------
+  // Constants
+  // ----------
+  // Pipelines
+  localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE)
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? (NumPipeRegs / 2) // Last to get distributed regs
+                               : 0); // no regs here otherwise
+  localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE)
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? ((NumPipeRegs + 1) / 2) // First to get distributed regs
+                               : 0); // no regs here otherwise
+
+  // ---------------
+  // Input pipeline
+  // ---------------
+  // Selected pipeline output signals as non-arrays
+  logic [1:0][WIDTH-1:0] operands_q;
+  fpnew_pkg::roundmode_e rnd_mode_q;
+  fpnew_pkg::operation_e op_q;
+  fpnew_pkg::fp_format_e dst_fmt_q;
+  logic                  in_valid_q;
+
+  // Input pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_INP_REGS][1:0][WIDTH-1:0]       inp_pipe_operands_q;
+  fpnew_pkg::roundmode_e [0:NUM_INP_REGS]                       inp_pipe_rnd_mode_q;
+  fpnew_pkg::operation_e [0:NUM_INP_REGS]                       inp_pipe_op_q;
+  fpnew_pkg::fp_format_e [0:NUM_INP_REGS]                       inp_pipe_dst_fmt_q;
+  TagType                [0:NUM_INP_REGS]                       inp_pipe_tag_q;
+  AuxType                [0:NUM_INP_REGS]                       inp_pipe_aux_q;
+  logic                  [0:NUM_INP_REGS]                       inp_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_INP_REGS] inp_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign inp_pipe_operands_q[0] = operands_i;
+  assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+  assign inp_pipe_op_q[0]       = op_i;
+  assign inp_pipe_dst_fmt_q[0]  = dst_fmt_i;
+  assign inp_pipe_tag_q[0]      = tag_i;
+  assign inp_pipe_aux_q[0]      = aux_i;
+  assign inp_pipe_valid_q[0]    = in_valid_i;
+  // Input stage: Propagate pipeline ready signal to updtream circuitry
+  assign in_ready_o = inp_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+    `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+    `FFL(inp_pipe_op_q[i+1],       inp_pipe_op_q[i],       reg_ena, fpnew_pkg::FMADD)
+    `FFL(inp_pipe_dst_fmt_q[i+1],  inp_pipe_dst_fmt_q[i],  reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
+    `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
+  assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS];
+  assign op_q       = inp_pipe_op_q[NUM_INP_REGS];
+  assign dst_fmt_q  = inp_pipe_dst_fmt_q[NUM_INP_REGS];
+  assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS];
+
+  // -----------------
+  // Input processing
+  // -----------------
+  logic [1:0]       divsqrt_fmt;
+  logic [1:0][63:0] divsqrt_operands; // those are fixed to 64bit
+  logic             input_is_fp8;
+
+  // Translate fpnew formats into divsqrt formats
+  always_comb begin : translate_fmt
+    unique case (dst_fmt_q)
+      fpnew_pkg::FP32:    divsqrt_fmt = 2'b00;
+      fpnew_pkg::FP64:    divsqrt_fmt = 2'b01;
+      fpnew_pkg::FP16:    divsqrt_fmt = 2'b10;
+      fpnew_pkg::FP16ALT: divsqrt_fmt = 2'b11;
+      default:            divsqrt_fmt = 2'b10; // maps also FP8 to FP16
+    endcase
+
+    // Only if FP8 is enabled
+    input_is_fp8 = FpFmtConfig[fpnew_pkg::FP8] & (dst_fmt_q == fpnew_pkg::FP8);
+
+    // If FP8 is supported, map it to an FP16 value
+    divsqrt_operands[0] = input_is_fp8 ? operands_q[0] << 8 : operands_q[0];
+    divsqrt_operands[1] = input_is_fp8 ? operands_q[1] << 8 : operands_q[1];
+  end
+
+  // ------------
+  // Control FSM
+  // ------------
+  logic in_ready;               // input handshake with upstream
+  logic div_valid, sqrt_valid;  // input signalling with unit
+  logic unit_ready, unit_done;  // status signals from unit instance
+  logic op_starting;            // high in the cycle a new operation starts
+  logic out_valid, out_ready;   // output handshake with downstream
+  logic hold_result;            // whether to put result into hold register
+  logic data_is_held;           // data in hold register is valid
+  logic unit_busy;              // valid data in flight
+  // FSM states
+  typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e;
+  fsm_state_e state_q, state_d;
+
+  // Upstream ready comes from sanitization FSM
+  assign inp_pipe_ready[NUM_INP_REGS] = in_ready;
+
+  // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr.
+  assign div_valid   = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i;
+  assign sqrt_valid  = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i;
+  assign op_starting = div_valid | sqrt_valid;
+
+  // FSM to safely apply and receive data from DIVSQRT unit
+  always_comb begin : flag_fsm
+    // Default assignments
+    in_ready     = 1'b0;
+    out_valid    = 1'b0;
+    hold_result  = 1'b0;
+    data_is_held = 1'b0;
+    unit_busy    = 1'b0;
+    state_d      = state_q;
+
+    unique case (state_q)
+      // Waiting for work
+      IDLE: begin
+        in_ready = 1'b1; // we're ready
+        if (in_valid_q && unit_ready) begin // New work arrives
+          state_d = BUSY; // go into processing state
+        end
+      end
+      // Operation in progress
+      BUSY: begin
+        unit_busy = 1'b1; // data in flight
+        // If the unit is done with processing
+        if (unit_done) begin
+          out_valid = 1'b1; // try to commit result downstream
+          // If downstream accepts our result
+          if (out_ready) begin
+            state_d = IDLE; // we anticipate going back to idling..
+            if (in_valid_q && unit_ready) begin // ..unless new work comes in
+              in_ready = 1'b1; // we acknowledge the instruction
+              state_d  = BUSY; // and stay busy with it
+            end
+          // Otherwise if downstream is not ready for the result
+          end else begin
+            hold_result = 1'b1; // activate the hold register
+            state_d     = HOLD; // wait for the pipeline to take the data
+          end
+        end
+      end
+      // Waiting with valid result for downstream
+      HOLD: begin
+        unit_busy    = 1'b1; // data in flight
+        data_is_held = 1'b1; // data in hold register is valid
+        out_valid    = 1'b1; // try to commit result downstream
+        // If the result is accepted by downstream
+        if (out_ready) begin
+          state_d = IDLE; // go back to idle..
+          if (in_valid_q && unit_ready) begin // ..unless new work comes in
+            in_ready = 1'b1; // acknowledge the new transaction
+            state_d  = BUSY; // will be busy with the next instruction
+          end
+        end
+      end
+      // fall into idle state otherwise
+      default: state_d = IDLE;
+    endcase
+
+    // Flushing overrides the other actions
+    if (flush_i) begin
+      unit_busy = 1'b0; // data is invalidated
+      out_valid = 1'b0; // cancel any valid data
+      state_d   = IDLE; // go to default state
+    end
+  end
+
+  // FSM status register (asynch active low rst_ni)
+  `FF(state_q, state_d, IDLE)
+
+  // Hold additional information while the operation is in progress
+  logic result_is_fp8_q;
+  TagType result_tag_q;
+  AuxType result_aux_q;
+
+  // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst)
+  `FFL(result_is_fp8_q, input_is_fp8,                 op_starting, '0)
+  `FFL(result_tag_q,    inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0)
+  `FFL(result_aux_q,    inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0)
+
+  // -----------------
+  // DIVSQRT instance
+  // -----------------
+  logic [63:0]        unit_result;
+  logic [WIDTH-1:0]   adjusted_result, held_result_q;
+  fpnew_pkg::status_t unit_status, held_status_q;
+
+  div_sqrt_top_mvp i_divsqrt_lei (
+   .Clk_CI           ( clk_i               ),
+   .Rst_RBI          ( rst_ni              ),
+   .Div_start_SI     ( div_valid           ),
+   .Sqrt_start_SI    ( sqrt_valid          ),
+   .Operand_a_DI     ( divsqrt_operands[0] ),
+   .Operand_b_DI     ( divsqrt_operands[1] ),
+   .RM_SI            ( rnd_mode_q          ),
+   .Precision_ctl_SI ( '0                  ),
+   .Format_sel_SI    ( divsqrt_fmt         ),
+   .Kill_SI          ( flush_i             ),
+   .Result_DO        ( unit_result         ),
+   .Fflags_SO        ( unit_status         ),
+   .Ready_SO         ( unit_ready          ),
+   .Done_SO          ( unit_done           )
+  );
+
+  // Adjust result width and fix FP8
+  assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result;
+
+  // The Hold register (load, no rst_ni)
+  `FFLNR(held_result_q, adjusted_result, hold_result, clk_i)
+  `FFLNR(held_status_q, unit_status,     hold_result, clk_i)
+
+  // --------------
+  // Output Select
+  // --------------
+  logic [WIDTH-1:0]   result_d;
+  fpnew_pkg::status_t status_d;
+  // Prioritize hold register data
+  assign result_d = data_is_held ? held_result_q : adjusted_result;
+  assign status_d = data_is_held ? held_status_q : unit_status;
+
+  // ----------------
+  // Output Pipeline
+  // ----------------
+  // Output pipeline signals, index i holds signal after i register stages
+  logic               [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
+  fpnew_pkg::status_t [0:NUM_OUT_REGS]            out_pipe_status_q;
+  TagType             [0:NUM_OUT_REGS]            out_pipe_tag_q;
+  AuxType             [0:NUM_OUT_REGS]            out_pipe_aux_q;
+  logic               [0:NUM_OUT_REGS]            out_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_OUT_REGS] out_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign out_pipe_result_q[0] = result_d;
+  assign out_pipe_status_q[0] = status_d;
+  assign out_pipe_tag_q[0]    = result_tag_q;
+  assign out_pipe_aux_q[0]    = result_aux_q;
+  assign out_pipe_valid_q[0]  = out_valid;
+  // Input stage: Propagate pipeline ready signal to inside pipe
+  assign out_ready = out_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
+    `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
+    `FFL(out_pipe_tag_q[i+1],    out_pipe_tag_q[i],    reg_ena, TagType'('0))
+    `FFL(out_pipe_aux_q[i+1],    out_pipe_aux_q[i],    reg_ena, AuxType'('0))
+  end
+  // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+  assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+  // Output stage: assign module outputs
+  assign result_o        = out_pipe_result_q[NUM_OUT_REGS];
+  assign status_o        = out_pipe_status_q[NUM_OUT_REGS];
+  assign extension_bit_o = 1'b1; // always NaN-Box result
+  assign tag_o           = out_pipe_tag_q[NUM_OUT_REGS];
+  assign aux_o           = out_pipe_aux_q[NUM_OUT_REGS];
+  assign out_valid_o     = out_pipe_valid_q[NUM_OUT_REGS];
+  assign busy_o          = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q});
+endmodule
diff --git a/verilog/rtl/fpnew_fma.sv b/verilog/rtl/fpnew_fma.sv
new file mode 100644
index 0000000..420e793
--- /dev/null
+++ b/verilog/rtl/fpnew_fma.sv
@@ -0,0 +1,671 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_fma #(
+  parameter fpnew_pkg::fp_format_e   FpFormat    = fpnew_pkg::fp_format_e'(0),
+  parameter int unsigned             NumPipeRegs = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig  = fpnew_pkg::BEFORE,
+  parameter type                     TagType     = logic,
+  parameter type                     AuxType     = logic,
+
+  localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change
+) (
+  input logic                      clk_i,
+  input logic                      rst_ni,
+  // Input signals
+  input logic [2:0][WIDTH-1:0]     operands_i, // 3 operands
+  input logic [2:0]                is_boxed_i, // 3 operands
+  input fpnew_pkg::roundmode_e     rnd_mode_i,
+  input fpnew_pkg::operation_e     op_i,
+  input logic                      op_mod_i,
+  input TagType                    tag_i,
+  input AuxType                    aux_i,
+  // Input Handshake
+  input  logic                     in_valid_i,
+  output logic                     in_ready_o,
+  input  logic                     flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]         result_o,
+  output fpnew_pkg::status_t       status_o,
+  output logic                     extension_bit_o,
+  output TagType                   tag_o,
+  output AuxType                   aux_o,
+  // Output handshake
+  output logic                     out_valid_o,
+  input  logic                     out_ready_i,
+  // Indication of valid data in flight
+  output logic                     busy_o
+);
+
+  // ----------
+  // Constants
+  // ----------
+  localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
+  localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
+  localparam int unsigned BIAS     = fpnew_pkg::bias(FpFormat);
+  // Precision bits 'p' include the implicit bit
+  localparam int unsigned PRECISION_BITS = MAN_BITS + 1;
+  // The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection
+  localparam int unsigned LOWER_SUM_WIDTH  = 2 * PRECISION_BITS + 3;
+  localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH);
+  // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid
+  // datapath leakage. This is either given by the exponent bits or the width of the LZC result.
+  // In most reasonable FP formats the internal exponent will be wider than the LZC result.
+  localparam int unsigned EXP_WIDTH = unsigned'(fpnew_pkg::maximum(EXP_BITS + 2, LZC_RESULT_WIDTH));
+  // Shift amount width: maximum internal mantissa size is 3p+3 bits
+  localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3);
+  // Pipelines
+  localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
+                               : 0); // no regs here otherwise
+  localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
+                          ? NumPipeRegs
+                          : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                             ? ((NumPipeRegs + 2) / 3) // First to get distributed regs
+                             : 0); // no regs here otherwise
+  localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? (NumPipeRegs / 3) // Last to get distributed regs
+                               : 0); // no regs here otherwise
+
+  // ----------------
+  // Type definition
+  // ----------------
+  typedef struct packed {
+    logic                sign;
+    logic [EXP_BITS-1:0] exponent;
+    logic [MAN_BITS-1:0] mantissa;
+  } fp_t;
+
+  // ---------------
+  // Input pipeline
+  // ---------------
+  // Input pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q;
+  logic                  [0:NUM_INP_REGS][2:0]            inp_pipe_is_boxed_q;
+  fpnew_pkg::roundmode_e [0:NUM_INP_REGS]                 inp_pipe_rnd_mode_q;
+  fpnew_pkg::operation_e [0:NUM_INP_REGS]                 inp_pipe_op_q;
+  logic                  [0:NUM_INP_REGS]                 inp_pipe_op_mod_q;
+  TagType                [0:NUM_INP_REGS]                 inp_pipe_tag_q;
+  AuxType                [0:NUM_INP_REGS]                 inp_pipe_aux_q;
+  logic                  [0:NUM_INP_REGS]                 inp_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_INP_REGS] inp_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign inp_pipe_operands_q[0] = operands_i;
+  assign inp_pipe_is_boxed_q[0] = is_boxed_i;
+  assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+  assign inp_pipe_op_q[0]       = op_i;
+  assign inp_pipe_op_mod_q[0]   = op_mod_i;
+  assign inp_pipe_tag_q[0]      = tag_i;
+  assign inp_pipe_aux_q[0]      = aux_i;
+  assign inp_pipe_valid_q[0]    = in_valid_i;
+  // Input stage: Propagate pipeline ready signal to updtream circuitry
+  assign in_ready_o = inp_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+    `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
+    `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+    `FFL(inp_pipe_op_q[i+1],       inp_pipe_op_q[i],       reg_ena, fpnew_pkg::FMADD)
+    `FFL(inp_pipe_op_mod_q[i+1],   inp_pipe_op_mod_q[i],   reg_ena, '0)
+    `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
+    `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
+  end
+
+  // -----------------
+  // Input processing
+  // -----------------
+  fpnew_pkg::fp_info_t [2:0] info_q;
+
+  // Classify input
+  fpnew_classifier #(
+    .FpFormat    ( FpFormat ),
+    .NumOperands ( 3        )
+    ) i_class_inputs (
+    .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ),
+    .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ),
+    .info_o     ( info_q                            )
+  );
+
+  fp_t                 operand_a, operand_b, operand_c;
+  fpnew_pkg::fp_info_t info_a,    info_b,    info_c;
+
+  // Operation selection and operand adjustment
+  // | \c op_q  | \c op_mod_q | Operation Adjustment
+  // |:--------:|:-----------:|---------------------
+  // | FMADD    | \c 0        | FMADD: none
+  // | FMADD    | \c 1        | FMSUB: Invert sign of operand C
+  // | FNMSUB   | \c 0        | FNMSUB: Invert sign of operand A
+  // | FNMSUB   | \c 1        | FNMADD: Invert sign of operands A and C
+  // | ADD      | \c 0        | ADD: Set operand A to +1.0
+  // | ADD      | \c 1        | SUB: Set operand A to +1.0, invert sign of operand C
+  // | MUL      | \c 0        | MUL: Set operand C to +0.0
+  // | *others* | \c -        | *invalid*
+  // \note \c op_mod_q always inverts the sign of the addend.
+  always_comb begin : op_select
+
+    // Default assignments - packing-order-agnostic
+    operand_a = inp_pipe_operands_q[NUM_INP_REGS][0];
+    operand_b = inp_pipe_operands_q[NUM_INP_REGS][1];
+    operand_c = inp_pipe_operands_q[NUM_INP_REGS][2];
+    info_a    = info_q[0];
+    info_b    = info_q[1];
+    info_c    = info_q[2];
+
+    // op_mod_q inverts sign of operand C
+    operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+
+    unique case (inp_pipe_op_q[NUM_INP_REGS])
+      fpnew_pkg::FMADD:  ; // do nothing
+      fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product
+      fpnew_pkg::ADD: begin // Set multiplicand to +1
+        operand_a = '{sign: 1'b0, exponent: BIAS, mantissa: '0};
+        info_a    = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value.
+      end
+      fpnew_pkg::MUL: begin // Set addend to -0 (for proper rounding with RDN)
+        operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0};
+        info_c    = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value.
+      end
+      default: begin // propagate don't cares
+        operand_a  = '{default: fpnew_pkg::DONT_CARE};
+        operand_b  = '{default: fpnew_pkg::DONT_CARE};
+        operand_c  = '{default: fpnew_pkg::DONT_CARE};
+        info_a     = '{default: fpnew_pkg::DONT_CARE};
+        info_b     = '{default: fpnew_pkg::DONT_CARE};
+        info_c     = '{default: fpnew_pkg::DONT_CARE};
+      end
+    endcase
+  end
+
+  // ---------------------
+  // Input classification
+  // ---------------------
+  logic any_operand_inf;
+  logic any_operand_nan;
+  logic signalling_nan;
+  logic effective_subtraction;
+  logic tentative_sign;
+
+  // Reduction for special case handling
+  assign any_operand_inf = (| {info_a.is_inf,        info_b.is_inf,        info_c.is_inf});
+  assign any_operand_nan = (| {info_a.is_nan,        info_b.is_nan,        info_c.is_nan});
+  assign signalling_nan  = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling});
+  // Effective subtraction in FMA occurs when product and addend signs differ
+  assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign;
+  // The tentative sign of the FMA shall be the sign of the product
+  assign tentative_sign = operand_a.sign ^ operand_b.sign;
+
+  // ----------------------
+  // Special case handling
+  // ----------------------
+  fp_t                special_result;
+  fpnew_pkg::status_t special_status;
+  logic               result_is_special;
+
+  always_comb begin : special_cases
+    // Default assignments
+    special_result    = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN
+    special_status    = '0;
+    result_is_special = 1'b0;
+
+    // Handle potentially mixed nan & infinity input => important for the case where infinity and
+    // zero are multiplied and added to a qnan.
+    // RISC-V mandates raising the NV exception in these cases:
+    // (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs)
+    if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin
+      result_is_special = 1'b1; // bypass FMA, output is the canonical qNaN
+      special_status.NV = 1'b1; // invalid operation
+    // NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP
+    end else if (any_operand_nan) begin
+      result_is_special = 1'b1;           // bypass FMA, output is the canonical qNaN
+      special_status.NV = signalling_nan; // raise the invalid operation flag if signalling
+    // Special cases involving infinity
+    end else if (any_operand_inf) begin
+      result_is_special = 1'b1; // bypass FMA
+      // Effective addition of opposite infinities (±inf - ±inf) is invalid!
+      if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction)
+        special_status.NV = 1'b1; // invalid operation
+      // Handle cases where output will be inf because of inf product input
+      else if (info_a.is_inf || info_b.is_inf) begin
+        // Result is infinity with the sign of the product
+        special_result    = '{sign: operand_a.sign ^ operand_b.sign, exponent: '1, mantissa: '0};
+      // Handle cases where the addend is inf
+      end else if (info_c.is_inf) begin
+        // Result is inifinity with sign of the addend (= operand_c)
+        special_result    = '{sign: operand_c.sign, exponent: '1, mantissa: '0};
+      end
+    end
+  end
+
+  // ---------------------------
+  // Initial exponent data path
+  // ---------------------------
+  logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c;
+  logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference;
+  logic signed [EXP_WIDTH-1:0] tentative_exponent;
+
+  // Zero-extend exponents into signed container - implicit width extension
+  assign exponent_a = signed'({1'b0, operand_a.exponent});
+  assign exponent_b = signed'({1'b0, operand_b.exponent});
+  assign exponent_c = signed'({1'b0, operand_c.exponent});
+
+  // Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx)
+  // with Ex the encoded exponent and nx the implicit bit. Internal exponents stay biased.
+  assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm
+  // Biased product exponent is the sum of encoded exponents minus the bias.
+  assign exponent_product = (info_a.is_zero || info_b.is_zero)
+                            ? 2 - signed'(BIAS) // in case the product is zero, set minimum exp.
+                            : signed'(exponent_a + info_a.is_subnormal
+                                      + exponent_b + info_b.is_subnormal
+                                      - signed'(BIAS));
+  // Exponent difference is the addend exponent minus the product exponent
+  assign exponent_difference = exponent_addend - exponent_product;
+  // The tentative exponent will be the larger of the product or addend exponent
+  assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product;
+
+  // Shift amount for addend based on exponents (unsigned as only right shifts)
+  logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt;
+
+  always_comb begin : addend_shift_amount
+    // Product-anchored case, saturated shift (addend is only in the sticky bit)
+    if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1))
+      addend_shamt = 3 * PRECISION_BITS + 4;
+    // Addend and product will have mutual bits to add
+    else if (exponent_difference <= signed'(PRECISION_BITS + 2))
+      addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference);
+    // Addend-anchored case, saturated shift (product is only in the sticky bit)
+    else
+      addend_shamt = 0;
+  end
+
+  // ------------------
+  // Product data path
+  // ------------------
+  logic [PRECISION_BITS-1:0]   mantissa_a, mantissa_b, mantissa_c;
+  logic [2*PRECISION_BITS-1:0] product;             // the p*p product is 2p bits wide
+  logic [3*PRECISION_BITS+3:0] product_shifted;     // addends are 3p+4 bit wide (including G/R)
+
+  // Add implicit bits to mantissae
+  assign mantissa_a = {info_a.is_normal, operand_a.mantissa};
+  assign mantissa_b = {info_b.is_normal, operand_b.mantissa};
+  assign mantissa_c = {info_c.is_normal, operand_c.mantissa};
+
+  // Mantissa multiplier (a*b)
+  assign product = mantissa_a * mantissa_b;
+
+  // Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky:
+  // | 000...000 | product | RS |
+  //  <-  p+2  -> <-  2p -> < 2>
+  assign product_shifted = product << 2; // constant shift
+
+  // -----------------
+  // Addend data path
+  // -----------------
+  logic [3*PRECISION_BITS+3:0] addend_after_shift;  // upper 3p+4 bits are needed to go on
+  logic [PRECISION_BITS-1:0]   addend_sticky_bits;  // up to p bit of shifted addend are sticky
+  logic                        sticky_before_add;   // they are compressed into a single sticky bit
+  logic [3*PRECISION_BITS+3:0] addend_shifted;      // addends are 3p+4 bit wide (including G/R)
+  logic                        inject_carry_in;     // inject carry for subtractions if needed
+
+  // In parallel, the addend is right-shifted according to the exponent difference. Up to p bits
+  // are shifted out and compressed into a sticky bit.
+  // BEFORE THE SHIFT:
+  // | mantissa_c | 000..000 |
+  //  <-    p   -> <- 3p+4 ->
+  // AFTER THE SHIFT:
+  // | 000..........000 | mantissa_c | 000...............0GR |  sticky bits  |
+  //  <- addend_shamt -> <-    p   -> <- 2p+4-addend_shamt -> <-  up to p  ->
+  assign {addend_after_shift, addend_sticky_bits} =
+      (mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt;
+
+  assign sticky_before_add     = (| addend_sticky_bits);
+  // assign addend_after_shift[0] = sticky_before_add;
+
+  // In case of a subtraction, the addend is inverted
+  assign addend_shifted  = (effective_subtraction) ? ~addend_after_shift : addend_after_shift;
+  assign inject_carry_in = effective_subtraction & ~sticky_before_add;
+
+  // ------
+  // Adder
+  // ------
+  logic [3*PRECISION_BITS+4:0] sum_raw;   // added one bit for the carry
+  logic                        sum_carry; // observe carry bit from sum for sign fixing
+  logic [3*PRECISION_BITS+3:0] sum;       // discard carry as sum won't overflow
+  logic                        final_sign;
+
+  //Mantissa adder (ab+c). In normal addition, it cannot overflow.
+  assign sum_raw = product_shifted + addend_shifted + inject_carry_in;
+  assign sum_carry = sum_raw[3*PRECISION_BITS+4];
+
+  // Complement negative sum (can only happen in subtraction -> overflows for positive results)
+  assign sum        = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw;
+
+  // In case of a mispredicted subtraction result, do a sign flip
+  assign final_sign = (effective_subtraction && (sum_carry == tentative_sign))
+                      ? 1'b1
+                      : (effective_subtraction ? 1'b0 : tentative_sign);
+
+  // ---------------
+  // Internal pipeline
+  // ---------------
+  // Pipeline output signals as non-arrays
+  logic                          effective_subtraction_q;
+  logic signed [EXP_WIDTH-1:0]   exponent_product_q;
+  logic signed [EXP_WIDTH-1:0]   exponent_difference_q;
+  logic signed [EXP_WIDTH-1:0]   tentative_exponent_q;
+  logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q;
+  logic                          sticky_before_add_q;
+  logic [3*PRECISION_BITS+3:0]   sum_q;
+  logic                          final_sign_q;
+  fpnew_pkg::roundmode_e         rnd_mode_q;
+  logic                          result_is_special_q;
+  fp_t                           special_result_q;
+  fpnew_pkg::status_t            special_status_q;
+  // Internal pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_eff_sub_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_exp_prod_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_exp_diff_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_tent_exp_q;
+  logic                  [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_sticky_q;
+  logic                  [0:NUM_MID_REGS][3*PRECISION_BITS+3:0]   mid_pipe_sum_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_final_sign_q;
+  fpnew_pkg::roundmode_e [0:NUM_MID_REGS]                         mid_pipe_rnd_mode_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_res_is_spec_q;
+  fp_t                   [0:NUM_MID_REGS]                         mid_pipe_spec_res_q;
+  fpnew_pkg::status_t    [0:NUM_MID_REGS]                         mid_pipe_spec_stat_q;
+  TagType                [0:NUM_MID_REGS]                         mid_pipe_tag_q;
+  AuxType                [0:NUM_MID_REGS]                         mid_pipe_aux_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_MID_REGS] mid_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from upstream logic
+  assign mid_pipe_eff_sub_q[0]     = effective_subtraction;
+  assign mid_pipe_exp_prod_q[0]    = exponent_product;
+  assign mid_pipe_exp_diff_q[0]    = exponent_difference;
+  assign mid_pipe_tent_exp_q[0]    = tentative_exponent;
+  assign mid_pipe_add_shamt_q[0]   = addend_shamt;
+  assign mid_pipe_sticky_q[0]      = sticky_before_add;
+  assign mid_pipe_sum_q[0]         = sum;
+  assign mid_pipe_final_sign_q[0]  = final_sign;
+  assign mid_pipe_rnd_mode_q[0]    = inp_pipe_rnd_mode_q[NUM_INP_REGS];
+  assign mid_pipe_res_is_spec_q[0] = result_is_special;
+  assign mid_pipe_spec_res_q[0]    = special_result;
+  assign mid_pipe_spec_stat_q[0]   = special_status;
+  assign mid_pipe_tag_q[0]         = inp_pipe_tag_q[NUM_INP_REGS];
+  assign mid_pipe_aux_q[0]         = inp_pipe_aux_q[NUM_INP_REGS];
+  assign mid_pipe_valid_q[0]       = inp_pipe_valid_q[NUM_INP_REGS];
+  // Input stage: Propagate pipeline ready signal to input pipe
+  assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
+
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(mid_pipe_eff_sub_q[i+1],     mid_pipe_eff_sub_q[i],     reg_ena, '0)
+    `FFL(mid_pipe_exp_prod_q[i+1],    mid_pipe_exp_prod_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_exp_diff_q[i+1],    mid_pipe_exp_diff_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_tent_exp_q[i+1],    mid_pipe_tent_exp_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_add_shamt_q[i+1],   mid_pipe_add_shamt_q[i],   reg_ena, '0)
+    `FFL(mid_pipe_sticky_q[i+1],      mid_pipe_sticky_q[i],      reg_ena, '0)
+    `FFL(mid_pipe_sum_q[i+1],         mid_pipe_sum_q[i],         reg_ena, '0)
+    `FFL(mid_pipe_final_sign_q[i+1],  mid_pipe_final_sign_q[i],  reg_ena, '0)
+    `FFL(mid_pipe_rnd_mode_q[i+1],    mid_pipe_rnd_mode_q[i],    reg_ena, fpnew_pkg::RNE)
+    `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0)
+    `FFL(mid_pipe_spec_res_q[i+1],    mid_pipe_spec_res_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_spec_stat_q[i+1],   mid_pipe_spec_stat_q[i],   reg_ena, '0)
+    `FFL(mid_pipe_tag_q[i+1],         mid_pipe_tag_q[i],         reg_ena, TagType'('0))
+    `FFL(mid_pipe_aux_q[i+1],         mid_pipe_aux_q[i],         reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS];
+  assign exponent_product_q      = mid_pipe_exp_prod_q[NUM_MID_REGS];
+  assign exponent_difference_q   = mid_pipe_exp_diff_q[NUM_MID_REGS];
+  assign tentative_exponent_q    = mid_pipe_tent_exp_q[NUM_MID_REGS];
+  assign addend_shamt_q          = mid_pipe_add_shamt_q[NUM_MID_REGS];
+  assign sticky_before_add_q     = mid_pipe_sticky_q[NUM_MID_REGS];
+  assign sum_q                   = mid_pipe_sum_q[NUM_MID_REGS];
+  assign final_sign_q            = mid_pipe_final_sign_q[NUM_MID_REGS];
+  assign rnd_mode_q              = mid_pipe_rnd_mode_q[NUM_MID_REGS];
+  assign result_is_special_q     = mid_pipe_res_is_spec_q[NUM_MID_REGS];
+  assign special_result_q        = mid_pipe_spec_res_q[NUM_MID_REGS];
+  assign special_status_q        = mid_pipe_spec_stat_q[NUM_MID_REGS];
+
+  // --------------
+  // Normalization
+  // --------------
+  logic        [LOWER_SUM_WIDTH-1:0]  sum_lower;              // lower 2p+3 bits of sum are searched
+  logic        [LZC_RESULT_WIDTH-1:0] leading_zero_count;     // the number of leading zeroes
+  logic signed [LZC_RESULT_WIDTH:0]   leading_zero_count_sgn; // signed leading-zero count
+  logic                               lzc_zeroes;             // in case only zeroes found
+
+  logic        [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount
+  logic signed [EXP_WIDTH-1:0]          normalized_exponent;
+
+  logic [3*PRECISION_BITS+4:0] sum_shifted;       // result after first normalization shift
+  logic [PRECISION_BITS:0]     final_mantissa;    // final mantissa before rounding with round bit
+  logic [2*PRECISION_BITS+2:0] sum_sticky_bits;   // remaining 2p+3 sticky bits after normalization
+  logic                        sticky_after_norm; // sticky bit after normalization
+
+  logic signed [EXP_WIDTH-1:0] final_exponent;
+
+  assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0];
+
+  // Leading zero counter for cancellations
+  lzc #(
+    .WIDTH ( LOWER_SUM_WIDTH ),
+    .MODE  ( 1               ) // MODE = 1 counts leading zeroes
+  ) i_lzc (
+    .in_i    ( sum_lower          ),
+    .cnt_o   ( leading_zero_count ),
+    .empty_o ( lzc_zeroes         )
+  );
+
+  assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count});
+
+  // Normalization shift amount based on exponents and LZC (unsigned as only left shifts)
+  always_comb begin : norm_shift_amount
+    // Product-anchored case or cancellations require LZC
+    if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin
+      // Normal result (biased exponent > 0 and not a zero)
+      if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin
+        // Undo initial product shift, remove the counted zeroes
+        norm_shamt          = PRECISION_BITS + 2 + leading_zero_count;
+        normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift
+      // Subnormal result
+      end else begin
+        // Cap the shift distance to align mantissa with minimum exponent
+        norm_shamt          = unsigned'(signed'(PRECISION_BITS) + 2 + exponent_product_q);
+        normalized_exponent = 0; // subnormals encoded as 0
+      end
+    // Addend-anchored case
+    end else begin
+      norm_shamt          = addend_shamt_q; // Undo the initial shift
+      normalized_exponent = tentative_exponent_q;
+    end
+  end
+
+  // Do the large normalization shift
+  assign sum_shifted       = sum_q << norm_shamt;
+
+  // The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left
+  // or right of the (non-carry) MSB of the sum.
+  always_comb begin : small_norm
+    // Default assignment, discarding carry bit
+    {final_mantissa, sum_sticky_bits} = sum_shifted;
+    final_exponent                    = normalized_exponent;
+
+    // The normalized sum has overflown, align right and fix exponent
+    if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit
+      {final_mantissa, sum_sticky_bits} = sum_shifted >> 1;
+      final_exponent                    = normalized_exponent + 1;
+    // The normalized sum is normal, nothing to do
+    end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB
+      // do nothing
+    // The normalized sum is still denormal, align left - unless the result is not already subnormal
+    end else if (normalized_exponent > 1) begin
+      {final_mantissa, sum_sticky_bits} = sum_shifted << 1;
+      final_exponent                    = normalized_exponent - 1;
+    // Otherwise we're denormal
+    end else begin
+      final_exponent = '0;
+    end
+  end
+
+  // Update the sticky bit with the shifted-out bits
+  assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q;
+
+  // ----------------------------
+  // Rounding and classification
+  // ----------------------------
+  logic                         pre_round_sign;
+  logic [EXP_BITS-1:0]          pre_round_exponent;
+  logic [MAN_BITS-1:0]          pre_round_mantissa;
+  logic [EXP_BITS+MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding
+  logic [1:0]                   round_sticky_bits;
+
+  logic of_before_round, of_after_round; // overflow
+  logic uf_before_round, uf_after_round; // underflow
+  logic result_zero;
+
+  logic                         rounded_sign;
+  logic [EXP_BITS+MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding
+
+  // Classification before round. RISC-V mandates checking underflow AFTER rounding!
+  assign of_before_round = final_exponent >= 2**(EXP_BITS)-1; // infinity exponent is all ones
+  assign uf_before_round = final_exponent == 0;               // exponent for subnormals capped to 0
+
+  // Assemble result before rounding. In case of overflow, the largest normal value is set.
+  assign pre_round_sign     = final_sign_q;
+  assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : unsigned'(final_exponent[EXP_BITS-1:0]);
+  assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[MAN_BITS:1]; // bit 0 is R bit
+  assign pre_round_abs      = {pre_round_exponent, pre_round_mantissa};
+
+  // In case of overflow, the round and sticky bits are set for proper rounding
+  assign round_sticky_bits  = (of_before_round) ? 2'b11 : {final_mantissa[0], sticky_after_norm};
+
+  // Perform the rounding
+  fpnew_rounding #(
+    .AbsWidth ( EXP_BITS + MAN_BITS )
+  ) i_fpnew_rounding (
+    .abs_value_i             ( pre_round_abs           ),
+    .sign_i                  ( pre_round_sign          ),
+    .round_sticky_bits_i     ( round_sticky_bits       ),
+    .rnd_mode_i              ( rnd_mode_q              ),
+    .effective_subtraction_i ( effective_subtraction_q ),
+    .abs_rounded_o           ( rounded_abs             ),
+    .sign_o                  ( rounded_sign            ),
+    .exact_zero_o            ( result_zero             )
+  );
+
+  // Classification after rounding
+  assign uf_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // exponent = 0
+  assign of_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // exponent all ones
+
+  // -----------------
+  // Result selection
+  // -----------------
+  logic [WIDTH-1:0]     regular_result;
+  fpnew_pkg::status_t   regular_status;
+
+  // Assemble regular result
+  assign regular_result    = {rounded_sign, rounded_abs};
+  assign regular_status.NV = 1'b0; // only valid cases are handled in regular path
+  assign regular_status.DZ = 1'b0; // no divisions
+  assign regular_status.OF = of_before_round | of_after_round;   // rounding can introduce overflow
+  assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF
+  assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round;
+
+  // Final results for output pipeline
+  fp_t                result_d;
+  fpnew_pkg::status_t status_d;
+
+  // Select output depending on special case detection
+  assign result_d = result_is_special_q ? special_result_q : regular_result;
+  assign status_d = result_is_special_q ? special_status_q : regular_status;
+
+  // ----------------
+  // Output Pipeline
+  // ----------------
+  // Output pipeline signals, index i holds signal after i register stages
+  fp_t                [0:NUM_OUT_REGS] out_pipe_result_q;
+  fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
+  TagType             [0:NUM_OUT_REGS] out_pipe_tag_q;
+  AuxType             [0:NUM_OUT_REGS] out_pipe_aux_q;
+  logic               [0:NUM_OUT_REGS] out_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_OUT_REGS] out_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign out_pipe_result_q[0] = result_d;
+  assign out_pipe_status_q[0] = status_d;
+  assign out_pipe_tag_q[0]    = mid_pipe_tag_q[NUM_MID_REGS];
+  assign out_pipe_aux_q[0]    = mid_pipe_aux_q[NUM_MID_REGS];
+  assign out_pipe_valid_q[0]  = mid_pipe_valid_q[NUM_MID_REGS];
+  // Input stage: Propagate pipeline ready signal to inside pipe
+  assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
+    `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
+    `FFL(out_pipe_tag_q[i+1],    out_pipe_tag_q[i],    reg_ena, TagType'('0))
+    `FFL(out_pipe_aux_q[i+1],    out_pipe_aux_q[i],    reg_ena, AuxType'('0))
+  end
+  // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+  assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+  // Output stage: assign module outputs
+  assign result_o        = out_pipe_result_q[NUM_OUT_REGS];
+  assign status_o        = out_pipe_status_q[NUM_OUT_REGS];
+  assign extension_bit_o = 1'b1; // always NaN-Box result
+  assign tag_o           = out_pipe_tag_q[NUM_OUT_REGS];
+  assign aux_o           = out_pipe_aux_q[NUM_OUT_REGS];
+  assign out_valid_o     = out_pipe_valid_q[NUM_OUT_REGS];
+  assign busy_o          = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
+endmodule
diff --git a/verilog/rtl/fpnew_fma_multi.sv b/verilog/rtl/fpnew_fma_multi.sv
new file mode 100644
index 0000000..840b889
--- /dev/null
+++ b/verilog/rtl/fpnew_fma_multi.sv
@@ -0,0 +1,820 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_fma_multi #(
+  parameter fpnew_pkg::fmt_logic_t   FpFmtConfig = '1,
+  parameter int unsigned             NumPipeRegs = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig  = fpnew_pkg::BEFORE,
+  parameter type                     TagType     = logic,
+  parameter type                     AuxType     = logic,
+  // Do not change
+  localparam int unsigned WIDTH       = fpnew_pkg::max_fp_width(FpFmtConfig),
+  localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
+) (
+  input  logic                        clk_i,
+  input  logic                        rst_ni,
+  // Input signals
+  input  logic [2:0][WIDTH-1:0]       operands_i, // 3 operands
+  input  logic [NUM_FORMATS-1:0][2:0] is_boxed_i, // 3 operands
+  input  fpnew_pkg::roundmode_e       rnd_mode_i,
+  input  fpnew_pkg::operation_e       op_i,
+  input  logic                        op_mod_i,
+  input  fpnew_pkg::fp_format_e       src_fmt_i, // format of the multiplicands
+  input  fpnew_pkg::fp_format_e       dst_fmt_i, // format of the addend and result
+  input  TagType                      tag_i,
+  input  AuxType                      aux_i,
+  // Input Handshake
+  input  logic                        in_valid_i,
+  output logic                        in_ready_o,
+  input  logic                        flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]            result_o,
+  output fpnew_pkg::status_t          status_o,
+  output logic                        extension_bit_o,
+  output TagType                      tag_o,
+  output AuxType                      aux_o,
+  // Output handshake
+  output logic                        out_valid_o,
+  input  logic                        out_ready_i,
+  // Indication of valid data in flight
+  output logic                        busy_o
+);
+
+  // ----------
+  // Constants
+  // ----------
+  // The super-format that can hold all formats
+  localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig);
+
+  localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits;
+  localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits;
+
+  // Precision bits 'p' include the implicit bit
+  localparam int unsigned PRECISION_BITS = SUPER_MAN_BITS + 1;
+  // The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection
+  localparam int unsigned LOWER_SUM_WIDTH  = 2 * PRECISION_BITS + 3;
+  localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH);
+  // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid
+  // datapath leakage. This is either given by the exponent bits or the width of the LZC result.
+  // In most reasonable FP formats the internal exponent will be wider than the LZC result.
+  localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(SUPER_EXP_BITS + 2, LZC_RESULT_WIDTH);
+  // Shift amount width: maximum internal mantissa size is 3p+3 bits
+  localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3);
+  // Pipelines
+  localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
+                               : 0); // no regs here otherwise
+  localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
+                          ? NumPipeRegs
+                          : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                             ? ((NumPipeRegs + 2) / 3) // First to get distributed regs
+                             : 0); // no regs here otherwise
+  localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? (NumPipeRegs / 3) // Last to get distributed regs
+                               : 0); // no regs here otherwise
+
+  // ----------------
+  // Type definition
+  // ----------------
+  typedef struct packed {
+    logic                      sign;
+    logic [SUPER_EXP_BITS-1:0] exponent;
+    logic [SUPER_MAN_BITS-1:0] mantissa;
+  } fp_t;
+
+  // ---------------
+  // Input pipeline
+  // ---------------
+  // Selected pipeline output signals as non-arrays
+  logic [2:0][WIDTH-1:0] operands_q;
+  fpnew_pkg::fp_format_e src_fmt_q;
+  fpnew_pkg::fp_format_e dst_fmt_q;
+
+  // Input pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_INP_REGS][2:0][WIDTH-1:0]       inp_pipe_operands_q;
+  logic                  [0:NUM_INP_REGS][NUM_FORMATS-1:0][2:0] inp_pipe_is_boxed_q;
+  fpnew_pkg::roundmode_e [0:NUM_INP_REGS]                       inp_pipe_rnd_mode_q;
+  fpnew_pkg::operation_e [0:NUM_INP_REGS]                       inp_pipe_op_q;
+  logic                  [0:NUM_INP_REGS]                       inp_pipe_op_mod_q;
+  fpnew_pkg::fp_format_e [0:NUM_INP_REGS]                       inp_pipe_src_fmt_q;
+  fpnew_pkg::fp_format_e [0:NUM_INP_REGS]                       inp_pipe_dst_fmt_q;
+  TagType                [0:NUM_INP_REGS]                       inp_pipe_tag_q;
+  AuxType                [0:NUM_INP_REGS]                       inp_pipe_aux_q;
+  logic                  [0:NUM_INP_REGS]                       inp_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_INP_REGS] inp_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign inp_pipe_operands_q[0] = operands_i;
+  assign inp_pipe_is_boxed_q[0] = is_boxed_i;
+  assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+  assign inp_pipe_op_q[0]       = op_i;
+  assign inp_pipe_op_mod_q[0]   = op_mod_i;
+  assign inp_pipe_src_fmt_q[0]  = src_fmt_i;
+  assign inp_pipe_dst_fmt_q[0]  = dst_fmt_i;
+  assign inp_pipe_tag_q[0]      = tag_i;
+  assign inp_pipe_aux_q[0]      = aux_i;
+  assign inp_pipe_valid_q[0]    = in_valid_i;
+  // Input stage: Propagate pipeline ready signal to updtream circuitry
+  assign in_ready_o = inp_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+    `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
+    `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+    `FFL(inp_pipe_op_q[i+1],       inp_pipe_op_q[i],       reg_ena, fpnew_pkg::FMADD)
+    `FFL(inp_pipe_op_mod_q[i+1],   inp_pipe_op_mod_q[i],   reg_ena, '0)
+    `FFL(inp_pipe_src_fmt_q[i+1],  inp_pipe_src_fmt_q[i],  reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(inp_pipe_dst_fmt_q[i+1],  inp_pipe_dst_fmt_q[i],  reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
+    `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
+  assign src_fmt_q  = inp_pipe_src_fmt_q[NUM_INP_REGS];
+  assign dst_fmt_q  = inp_pipe_dst_fmt_q[NUM_INP_REGS];
+
+  // -----------------
+  // Input processing
+  // -----------------
+  logic        [NUM_FORMATS-1:0][2:0]                     fmt_sign;
+  logic signed [NUM_FORMATS-1:0][2:0][SUPER_EXP_BITS-1:0] fmt_exponent;
+  logic        [NUM_FORMATS-1:0][2:0][SUPER_MAN_BITS-1:0] fmt_mantissa;
+
+  fpnew_pkg::fp_info_t [NUM_FORMATS-1:0][2:0] info_q;
+
+  // FP Input initialization
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      logic [2:0][FP_WIDTH-1:0] trimmed_ops;
+
+      // Classify input
+      fpnew_classifier #(
+        .FpFormat    ( fpnew_pkg::fp_format_e'(fmt) ),
+        .NumOperands ( 3                            )
+      ) i_fpnew_classifier (
+        .operands_i ( trimmed_ops                            ),
+        .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS][fmt] ),
+        .info_o     ( info_q[fmt]                            )
+      );
+      for (genvar op = 0; op < 3; op++) begin : gen_operands
+        assign trimmed_ops[op]       = operands_q[op][FP_WIDTH-1:0];
+        assign fmt_sign[fmt][op]     = operands_q[op][FP_WIDTH-1];
+        assign fmt_exponent[fmt][op] = signed'({1'b0, operands_q[op][MAN_BITS+:EXP_BITS]});
+        assign fmt_mantissa[fmt][op] = {info_q[fmt][op].is_normal, operands_q[op][MAN_BITS-1:0]} <<
+                                       (SUPER_MAN_BITS - MAN_BITS); // move to left of mantissa
+      end
+    end else begin : inactive_format
+      assign info_q[fmt]                 = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+      assign fmt_sign[fmt]               = fpnew_pkg::DONT_CARE;             // format disabled
+      assign fmt_exponent[fmt]           = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+      assign fmt_mantissa[fmt]           = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+    end
+  end
+
+  fp_t                 operand_a, operand_b, operand_c;
+  fpnew_pkg::fp_info_t info_a,    info_b,    info_c;
+
+  // Operation selection and operand adjustment
+  // | \c op_q  | \c op_mod_q | Operation Adjustment
+  // |:--------:|:-----------:|---------------------
+  // | FMADD    | \c 0        | FMADD: none
+  // | FMADD    | \c 1        | FMSUB: Invert sign of operand C
+  // | FNMSUB   | \c 0        | FNMSUB: Invert sign of operand A
+  // | FNMSUB   | \c 1        | FNMADD: Invert sign of operands A and C
+  // | ADD      | \c 0        | ADD: Set operand A to +1.0
+  // | ADD      | \c 1        | SUB: Set operand A to +1.0, invert sign of operand C
+  // | MUL      | \c 0        | MUL: Set operand C to +0.0
+  // | *others* | \c -        | *invalid*
+  // \note \c op_mod_q always inverts the sign of the addend.
+  always_comb begin : op_select
+
+    // Default assignments - packing-order-agnostic
+    operand_a = {fmt_sign[src_fmt_q][0], fmt_exponent[src_fmt_q][0], fmt_mantissa[src_fmt_q][0]};
+    operand_b = {fmt_sign[src_fmt_q][1], fmt_exponent[src_fmt_q][1], fmt_mantissa[src_fmt_q][1]};
+    operand_c = {fmt_sign[dst_fmt_q][2], fmt_exponent[dst_fmt_q][2], fmt_mantissa[dst_fmt_q][2]};
+    info_a    = info_q[src_fmt_q][0];
+    info_b    = info_q[src_fmt_q][1];
+    info_c    = info_q[dst_fmt_q][2];
+
+    // op_mod_q inverts sign of operand C
+    operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+
+    unique case (inp_pipe_op_q[NUM_INP_REGS])
+      fpnew_pkg::FMADD:  ; // do nothing
+      fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product
+      fpnew_pkg::ADD: begin // Set multiplicand to +1
+        operand_a = '{sign: 1'b0, exponent: fpnew_pkg::bias(src_fmt_q), mantissa: '0};
+        info_a    = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value.
+      end
+      fpnew_pkg::MUL: begin // Set addend to -0 (for proper rounding with RDN)
+        operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0};
+        info_c    = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value.
+      end
+      default: begin // propagate don't cares
+        operand_a  = '{default: fpnew_pkg::DONT_CARE};
+        operand_b  = '{default: fpnew_pkg::DONT_CARE};
+        operand_c  = '{default: fpnew_pkg::DONT_CARE};
+        info_a     = '{default: fpnew_pkg::DONT_CARE};
+        info_b     = '{default: fpnew_pkg::DONT_CARE};
+        info_c     = '{default: fpnew_pkg::DONT_CARE};
+      end
+    endcase
+  end
+
+  // ---------------------
+  // Input classification
+  // ---------------------
+  logic any_operand_inf;
+  logic any_operand_nan;
+  logic signalling_nan;
+  logic effective_subtraction;
+  logic tentative_sign;
+
+  // Reduction for special case handling
+  assign any_operand_inf = (| {info_a.is_inf,        info_b.is_inf,        info_c.is_inf});
+  assign any_operand_nan = (| {info_a.is_nan,        info_b.is_nan,        info_c.is_nan});
+  assign signalling_nan  = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling});
+  // Effective subtraction in FMA occurs when product and addend signs differ
+  assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign;
+  // The tentative sign of the FMA shall be the sign of the product
+  assign tentative_sign = operand_a.sign ^ operand_b.sign;
+
+  // ----------------------
+  // Special case handling
+  // ----------------------
+  logic [WIDTH-1:0]   special_result;
+  fpnew_pkg::status_t special_status;
+  logic               result_is_special;
+
+  logic [NUM_FORMATS-1:0][WIDTH-1:0]    fmt_special_result;
+  fpnew_pkg::status_t [NUM_FORMATS-1:0] fmt_special_status;
+  logic [NUM_FORMATS-1:0]               fmt_result_is_special;
+
+
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1;
+    localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1);
+    localparam logic [MAN_BITS-1:0] ZERO_MANTISSA = '0;
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      always_comb begin : special_results
+        logic [FP_WIDTH-1:0] special_res;
+
+        // Default assignment
+        special_res                = {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
+        fmt_special_status[fmt]    = '0;
+        fmt_result_is_special[fmt] = 1'b0;
+
+        // Handle potentially mixed nan & infinity input => important for the case where infinity and
+        // zero are multiplied and added to a qnan.
+        // RISC-V mandates raising the NV exception in these cases:
+        // (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs)
+        if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin
+          fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN
+          fmt_special_status[fmt].NV = 1'b1; // invalid operation
+        // NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP
+        end else if (any_operand_nan) begin
+          fmt_result_is_special[fmt] = 1'b1;           // bypass FMA, output is the canonical qNaN
+          fmt_special_status[fmt].NV = signalling_nan; // raise the invalid operation flag if signalling
+        // Special cases involving infinity
+        end else if (any_operand_inf) begin
+          fmt_result_is_special[fmt] = 1'b1; // bypass FMA
+          // Effective addition of opposite infinities (±inf - ±inf) is invalid!
+          if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction)
+            fmt_special_status[fmt].NV = 1'b1; // invalid operation
+          // Handle cases where output will be inf because of inf product input
+          else if (info_a.is_inf || info_b.is_inf) begin
+            // Result is infinity with the sign of the product
+            special_res = {operand_a.sign ^ operand_b.sign, QNAN_EXPONENT, ZERO_MANTISSA};
+          // Handle cases where the addend is inf
+          end else if (info_c.is_inf) begin
+            // Result is inifinity with sign of the addend (= operand_c)
+            special_res = {operand_c.sign, QNAN_EXPONENT, ZERO_MANTISSA};
+          end
+        end
+        // Initialize special result with ones (NaN-box)
+        fmt_special_result[fmt]               = '1;
+        fmt_special_result[fmt][FP_WIDTH-1:0] = special_res;
+      end
+    end else begin : inactive_format
+      assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_special_status[fmt] = '0;
+      assign fmt_result_is_special[fmt] = 1'b0;
+    end
+  end
+
+  // Detect special case from source format, I2F casts don't produce a special result
+  assign result_is_special = fmt_result_is_special[dst_fmt_q]; // they're all the same
+  // Signalling input NaNs raise invalid flag, otherwise no flags set
+  assign special_status = fmt_special_status[dst_fmt_q];
+  // Assemble result according to destination format
+  assign special_result = fmt_special_result[dst_fmt_q]; // destination format
+
+  // ---------------------------
+  // Initial exponent data path
+  // ---------------------------
+  logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c;
+  logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference;
+  logic signed [EXP_WIDTH-1:0] tentative_exponent;
+
+  // Zero-extend exponents into signed container - implicit width extension
+  assign exponent_a = signed'({1'b0, operand_a.exponent});
+  assign exponent_b = signed'({1'b0, operand_b.exponent});
+  assign exponent_c = signed'({1'b0, operand_c.exponent});
+
+  // Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx)
+  // with Ex the encoded exponent and nx the implicit bit. Internal exponents are biased to dst fmt.
+  assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm
+  // Biased product exponent is the sum of encoded exponents minus the bias.
+  assign exponent_product = (info_a.is_zero || info_b.is_zero) // in case the product is zero, set minimum exp.
+                            ? 2 - signed'(fpnew_pkg::bias(dst_fmt_q))
+                            : signed'(exponent_a + info_a.is_subnormal
+                                      + exponent_b + info_b.is_subnormal
+                                      - 2*signed'(fpnew_pkg::bias(src_fmt_q))
+                                      + signed'(fpnew_pkg::bias(dst_fmt_q))); // rebias for dst fmt
+  // Exponent difference is the addend exponent minus the product exponent
+  assign exponent_difference = exponent_addend - exponent_product;
+  // The tentative exponent will be the larger of the product or addend exponent
+  assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product;
+
+  // Shift amount for addend based on exponents (unsigned as only right shifts)
+  logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt;
+
+  always_comb begin : addend_shift_amount
+    // Product-anchored case, saturated shift (addend is only in the sticky bit)
+    if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1))
+      addend_shamt = 3 * PRECISION_BITS + 4;
+    // Addend and product will have mutual bits to add
+    else if (exponent_difference <= signed'(PRECISION_BITS + 2))
+      addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference);
+    // Addend-anchored case, saturated shift (product is only in the sticky bit)
+    else
+      addend_shamt = 0;
+  end
+
+  // ------------------
+  // Product data path
+  // ------------------
+  logic [PRECISION_BITS-1:0]   mantissa_a, mantissa_b, mantissa_c;
+  logic [2*PRECISION_BITS-1:0] product;             // the p*p product is 2p bits wide
+  logic [3*PRECISION_BITS+3:0] product_shifted;     // addends are 3p+4 bit wide (including G/R)
+
+  // Add implicit bits to mantissae
+  assign mantissa_a = {info_a.is_normal, operand_a.mantissa};
+  assign mantissa_b = {info_b.is_normal, operand_b.mantissa};
+  assign mantissa_c = {info_c.is_normal, operand_c.mantissa};
+
+  // Mantissa multiplier (a*b)
+  assign product = mantissa_a * mantissa_b;
+
+  // Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky:
+  // | 000...000 | product | RS |
+  //  <-  p+2  -> <-  2p -> < 2>
+  assign product_shifted = product << 2; // constant shift
+
+  // -----------------
+  // Addend data path
+  // -----------------
+  logic [3*PRECISION_BITS+3:0] addend_after_shift;  // upper 3p+4 bits are needed to go on
+  logic [PRECISION_BITS-1:0]   addend_sticky_bits;  // up to p bit of shifted addend are sticky
+  logic                        sticky_before_add;   // they are compressed into a single sticky bit
+  logic [3*PRECISION_BITS+3:0] addend_shifted;      // addends are 3p+4 bit wide (including G/R)
+  logic                        inject_carry_in;     // inject carry for subtractions if needed
+
+  // In parallel, the addend is right-shifted according to the exponent difference. Up to p bits are
+  // shifted out and compressed into a sticky bit.
+  // BEFORE THE SHIFT:
+  // | mantissa_c | 000..000 |
+  //  <-    p   -> <- 3p+4 ->
+  // AFTER THE SHIFT:
+  // | 000..........000 | mantissa_c | 000...............0GR |  sticky bits  |
+  //  <- addend_shamt -> <-    p   -> <- 2p+4-addend_shamt -> <-  up to p  ->
+  assign {addend_after_shift, addend_sticky_bits} =
+      (mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt;
+
+  assign sticky_before_add     = (| addend_sticky_bits);
+
+  // In case of a subtraction, the addend is inverted
+  assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift;
+  assign inject_carry_in = effective_subtraction & ~sticky_before_add;
+
+  // ------
+  // Adder
+  // ------
+  logic [3*PRECISION_BITS+4:0] sum_raw;   // added one bit for the carry
+  logic                        sum_carry; // observe carry bit from sum for sign fixing
+  logic [3*PRECISION_BITS+3:0] sum;       // discard carry as sum won't overflow
+  logic                        final_sign;
+
+  //Mantissa adder (ab+c). In normal addition, it cannot overflow.
+  assign sum_raw = product_shifted + addend_shifted + inject_carry_in;
+  assign sum_carry = sum_raw[3*PRECISION_BITS+4];
+
+  // Complement negative sum (can only happen in subtraction -> overflows for positive results)
+  assign sum        = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw;
+
+  // In case of a mispredicted subtraction result, do a sign flip
+  assign final_sign = (effective_subtraction && (sum_carry == tentative_sign))
+                      ? 1'b1
+                      : (effective_subtraction ? 1'b0 : tentative_sign);
+
+  // ---------------
+  // Internal pipeline
+  // ---------------
+  // Pipeline output signals as non-arrays
+  logic                          effective_subtraction_q;
+  logic signed [EXP_WIDTH-1:0]   exponent_product_q;
+  logic signed [EXP_WIDTH-1:0]   exponent_difference_q;
+  logic signed [EXP_WIDTH-1:0]   tentative_exponent_q;
+  logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q;
+  logic                          sticky_before_add_q;
+  logic [3*PRECISION_BITS+3:0]   sum_q;
+  logic                          final_sign_q;
+  fpnew_pkg::fp_format_e         dst_fmt_q2;
+  fpnew_pkg::roundmode_e         rnd_mode_q;
+  logic                          result_is_special_q;
+  fp_t                           special_result_q;
+  fpnew_pkg::status_t            special_status_q;
+  // Internal pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_eff_sub_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_exp_prod_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_exp_diff_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_tent_exp_q;
+  logic                  [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_sticky_q;
+  logic                  [0:NUM_MID_REGS][3*PRECISION_BITS+3:0]   mid_pipe_sum_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_final_sign_q;
+  fpnew_pkg::roundmode_e [0:NUM_MID_REGS]                         mid_pipe_rnd_mode_q;
+  fpnew_pkg::fp_format_e [0:NUM_MID_REGS]                         mid_pipe_dst_fmt_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_res_is_spec_q;
+  fp_t                   [0:NUM_MID_REGS]                         mid_pipe_spec_res_q;
+  fpnew_pkg::status_t    [0:NUM_MID_REGS]                         mid_pipe_spec_stat_q;
+  TagType                [0:NUM_MID_REGS]                         mid_pipe_tag_q;
+  AuxType                [0:NUM_MID_REGS]                         mid_pipe_aux_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_MID_REGS] mid_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from upstream logic
+  assign mid_pipe_eff_sub_q[0]     = effective_subtraction;
+  assign mid_pipe_exp_prod_q[0]    = exponent_product;
+  assign mid_pipe_exp_diff_q[0]    = exponent_difference;
+  assign mid_pipe_tent_exp_q[0]    = tentative_exponent;
+  assign mid_pipe_add_shamt_q[0]   = addend_shamt;
+  assign mid_pipe_sticky_q[0]      = sticky_before_add;
+  assign mid_pipe_sum_q[0]         = sum;
+  assign mid_pipe_final_sign_q[0]  = final_sign;
+  assign mid_pipe_rnd_mode_q[0]    = inp_pipe_rnd_mode_q[NUM_INP_REGS];
+  assign mid_pipe_dst_fmt_q[0]     = dst_fmt_q;
+  assign mid_pipe_res_is_spec_q[0] = result_is_special;
+  assign mid_pipe_spec_res_q[0]    = special_result;
+  assign mid_pipe_spec_stat_q[0]   = special_status;
+  assign mid_pipe_tag_q[0]         = inp_pipe_tag_q[NUM_INP_REGS];
+  assign mid_pipe_aux_q[0]         = inp_pipe_aux_q[NUM_INP_REGS];
+  assign mid_pipe_valid_q[0]       = inp_pipe_valid_q[NUM_INP_REGS];
+  // Input stage: Propagate pipeline ready signal to input pipe
+  assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
+
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(mid_pipe_eff_sub_q[i+1],     mid_pipe_eff_sub_q[i],     reg_ena, '0)
+    `FFL(mid_pipe_exp_prod_q[i+1],    mid_pipe_exp_prod_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_exp_diff_q[i+1],    mid_pipe_exp_diff_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_tent_exp_q[i+1],    mid_pipe_tent_exp_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_add_shamt_q[i+1],   mid_pipe_add_shamt_q[i],   reg_ena, '0)
+    `FFL(mid_pipe_sticky_q[i+1],      mid_pipe_sticky_q[i],      reg_ena, '0)
+    `FFL(mid_pipe_sum_q[i+1],         mid_pipe_sum_q[i],         reg_ena, '0)
+    `FFL(mid_pipe_final_sign_q[i+1],  mid_pipe_final_sign_q[i],  reg_ena, '0)
+    `FFL(mid_pipe_rnd_mode_q[i+1],    mid_pipe_rnd_mode_q[i],    reg_ena, fpnew_pkg::RNE)
+    `FFL(mid_pipe_dst_fmt_q[i+1],     mid_pipe_dst_fmt_q[i],     reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0)
+    `FFL(mid_pipe_spec_res_q[i+1],    mid_pipe_spec_res_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_spec_stat_q[i+1],   mid_pipe_spec_stat_q[i],   reg_ena, '0)
+    `FFL(mid_pipe_tag_q[i+1],         mid_pipe_tag_q[i],         reg_ena, TagType'('0))
+    `FFL(mid_pipe_aux_q[i+1],         mid_pipe_aux_q[i],         reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS];
+  assign exponent_product_q      = mid_pipe_exp_prod_q[NUM_MID_REGS];
+  assign exponent_difference_q   = mid_pipe_exp_diff_q[NUM_MID_REGS];
+  assign tentative_exponent_q    = mid_pipe_tent_exp_q[NUM_MID_REGS];
+  assign addend_shamt_q          = mid_pipe_add_shamt_q[NUM_MID_REGS];
+  assign sticky_before_add_q     = mid_pipe_sticky_q[NUM_MID_REGS];
+  assign sum_q                   = mid_pipe_sum_q[NUM_MID_REGS];
+  assign final_sign_q            = mid_pipe_final_sign_q[NUM_MID_REGS];
+  assign rnd_mode_q              = mid_pipe_rnd_mode_q[NUM_MID_REGS];
+  assign dst_fmt_q2              = mid_pipe_dst_fmt_q[NUM_MID_REGS];
+  assign result_is_special_q     = mid_pipe_res_is_spec_q[NUM_MID_REGS];
+  assign special_result_q        = mid_pipe_spec_res_q[NUM_MID_REGS];
+  assign special_status_q        = mid_pipe_spec_stat_q[NUM_MID_REGS];
+
+  // --------------
+  // Normalization
+  // --------------
+  logic        [LOWER_SUM_WIDTH-1:0]  sum_lower;              // lower 2p+3 bits of sum are searched
+  logic        [LZC_RESULT_WIDTH-1:0] leading_zero_count;     // the number of leading zeroes
+  logic signed [LZC_RESULT_WIDTH:0]   leading_zero_count_sgn; // signed leading-zero count
+  logic                               lzc_zeroes;             // in case only zeroes found
+
+  logic        [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount
+  logic signed [EXP_WIDTH-1:0]          normalized_exponent;
+
+  logic [3*PRECISION_BITS+4:0] sum_shifted;       // result after first normalization shift
+  logic [PRECISION_BITS:0]     final_mantissa;    // final mantissa before rounding with round bit
+  logic [2*PRECISION_BITS+2:0] sum_sticky_bits;   // remaining 2p+3 sticky bits after normalization
+  logic                        sticky_after_norm; // sticky bit after normalization
+
+  logic signed [EXP_WIDTH-1:0] final_exponent;
+
+  assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0];
+
+  // Leading zero counter for cancellations
+  lzc #(
+    .WIDTH ( LOWER_SUM_WIDTH ),
+    .MODE  ( 1               ) // MODE = 1 counts leading zeroes
+  ) i_lzc (
+    .in_i    ( sum_lower          ),
+    .cnt_o   ( leading_zero_count ),
+    .empty_o ( lzc_zeroes         )
+  );
+
+  assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count});
+
+  // Normalization shift amount based on exponents and LZC (unsigned as only left shifts)
+  always_comb begin : norm_shift_amount
+    // Product-anchored case or cancellations require LZC
+    if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin
+      // Normal result (biased exponent > 0 and not a zero)
+      if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin
+        // Undo initial product shift, remove the counted zeroes
+        norm_shamt          = PRECISION_BITS + 2 + leading_zero_count;
+        normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift
+      // Subnormal result
+      end else begin
+        // Cap the shift distance to align mantissa with minimum exponent
+        norm_shamt          = unsigned'(signed'(PRECISION_BITS + 2 + exponent_product_q));
+        normalized_exponent = 0; // subnormals encoded as 0
+      end
+    // Addend-anchored case
+    end else begin
+      norm_shamt          = addend_shamt_q; // Undo the initial shift
+      normalized_exponent = tentative_exponent_q;
+    end
+  end
+
+  // Do the large normalization shift
+  assign sum_shifted       = sum_q << norm_shamt;
+
+  // The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left
+  // or right of the (non-carry) MSB of the sum.
+  always_comb begin : small_norm
+    // Default assignment, discarding carry bit
+    {final_mantissa, sum_sticky_bits} = sum_shifted;
+    final_exponent                    = normalized_exponent;
+
+    // The normalized sum has overflown, align right and fix exponent
+    if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit
+      {final_mantissa, sum_sticky_bits} = sum_shifted >> 1;
+      final_exponent                    = normalized_exponent + 1;
+    // The normalized sum is normal, nothing to do
+    end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB
+      // do nothing
+    // The normalized sum is still denormal, align left - unless the result is not already subnormal
+    end else if (normalized_exponent > 1) begin
+      {final_mantissa, sum_sticky_bits} = sum_shifted << 1;
+      final_exponent                    = normalized_exponent - 1;
+    // Otherwise we're denormal
+    end else begin
+      final_exponent = '0;
+    end
+  end
+
+  // Update the sticky bit with the shifted-out bits
+  assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q;
+
+  // ----------------------------
+  // Rounding and classification
+  // ----------------------------
+  logic                                     pre_round_sign;
+  logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding
+  logic [1:0]                               round_sticky_bits;
+
+  logic of_before_round, of_after_round; // overflow
+  logic uf_before_round, uf_after_round; // underflow
+
+  logic [NUM_FORMATS-1:0][SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] fmt_pre_round_abs; // per format
+  logic [NUM_FORMATS-1:0][1:0]                               fmt_round_sticky_bits;
+
+  logic [NUM_FORMATS-1:0]                                    fmt_of_after_round;
+  logic [NUM_FORMATS-1:0]                                    fmt_uf_after_round;
+
+  logic                                     rounded_sign;
+  logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding
+  logic                                     result_zero;
+
+  // Classification before round. RISC-V mandates checking underflow AFTER rounding!
+  assign of_before_round = final_exponent >= 2**(fpnew_pkg::exp_bits(dst_fmt_q2))-1; // infinity exponent is all ones
+  assign uf_before_round = final_exponent == 0;               // exponent for subnormals capped to 0
+
+  // Pack exponent and mantissa into proper rounding form
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble
+    // Set up some constants
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    logic [EXP_BITS-1:0] pre_round_exponent;
+    logic [MAN_BITS-1:0] pre_round_mantissa;
+
+    if (FpFmtConfig[fmt]) begin : active_format
+
+      assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : final_exponent[EXP_BITS-1:0];
+      assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[SUPER_MAN_BITS-:MAN_BITS];
+      // Assemble result before rounding. In case of overflow, the largest normal value is set.
+      assign fmt_pre_round_abs[fmt] = {pre_round_exponent, pre_round_mantissa}; // 0-extend
+
+      // Round bit is after mantissa (1 in case of overflow for rounding)
+      assign fmt_round_sticky_bits[fmt][1] = final_mantissa[SUPER_MAN_BITS-MAN_BITS] |
+                                             of_before_round;
+
+      // remaining bits in mantissa to sticky (1 in case of overflow for rounding)
+      if (MAN_BITS < SUPER_MAN_BITS) begin : narrow_sticky
+        assign fmt_round_sticky_bits[fmt][0] = (| final_mantissa[SUPER_MAN_BITS-MAN_BITS-1:0]) |
+                                               sticky_after_norm | of_before_round;
+      end else begin : normal_sticky
+        assign fmt_round_sticky_bits[fmt][0] = sticky_after_norm | of_before_round;
+      end
+    end else begin : inactive_format
+      assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_round_sticky_bits[fmt] = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Assemble result before rounding. In case of overflow, the largest normal value is set.
+  assign pre_round_sign     = final_sign_q;
+  assign pre_round_abs      = fmt_pre_round_abs[dst_fmt_q2];
+
+  // In case of overflow, the round and sticky bits are set for proper rounding
+  assign round_sticky_bits  = fmt_round_sticky_bits[dst_fmt_q2];
+
+  // Perform the rounding
+  fpnew_rounding #(
+    .AbsWidth ( SUPER_EXP_BITS + SUPER_MAN_BITS )
+  ) i_fpnew_rounding (
+    .abs_value_i             ( pre_round_abs           ),
+    .sign_i                  ( pre_round_sign          ),
+    .round_sticky_bits_i     ( round_sticky_bits       ),
+    .rnd_mode_i              ( rnd_mode_q              ),
+    .effective_subtraction_i ( effective_subtraction_q ),
+    .abs_rounded_o           ( rounded_abs             ),
+    .sign_o                  ( rounded_sign            ),
+    .exact_zero_o            ( result_zero             )
+  );
+
+  logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result;
+
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      always_comb begin : post_process
+        // detect of / uf
+        fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal
+        fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp.
+
+        // Assemble regular result, nan box short ones.
+        fmt_result[fmt]               = '1;
+        fmt_result[fmt][FP_WIDTH-1:0] = {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]};
+      end
+    end else begin : inactive_format
+      assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE;
+      assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE;
+      assign fmt_result[fmt]         = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Classification after rounding select by destination format
+  assign uf_after_round = fmt_uf_after_round[dst_fmt_q2];
+  assign of_after_round = fmt_of_after_round[dst_fmt_q2];
+
+
+  // -----------------
+  // Result selection
+  // -----------------
+  logic [WIDTH-1:0]     regular_result;
+  fpnew_pkg::status_t   regular_status;
+
+  // Assemble regular result
+  assign regular_result = fmt_result[dst_fmt_q2];
+  assign regular_status.NV = 1'b0; // only valid cases are handled in regular path
+  assign regular_status.DZ = 1'b0; // no divisions
+  assign regular_status.OF = of_before_round | of_after_round;   // rounding can introduce overflow
+  assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF
+  assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round;
+
+  // Final results for output pipeline
+  logic [WIDTH-1:0]   result_d;
+  fpnew_pkg::status_t status_d;
+
+  // Select output depending on special case detection
+  assign result_d = result_is_special_q ? special_result_q : regular_result;
+  assign status_d = result_is_special_q ? special_status_q : regular_status;
+
+  // ----------------
+  // Output Pipeline
+  // ----------------
+  // Output pipeline signals, index i holds signal after i register stages
+  logic               [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
+  fpnew_pkg::status_t [0:NUM_OUT_REGS]            out_pipe_status_q;
+  TagType             [0:NUM_OUT_REGS]            out_pipe_tag_q;
+  AuxType             [0:NUM_OUT_REGS]            out_pipe_aux_q;
+  logic               [0:NUM_OUT_REGS]            out_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_OUT_REGS] out_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign out_pipe_result_q[0] = result_d;
+  assign out_pipe_status_q[0] = status_d;
+  assign out_pipe_tag_q[0]    = mid_pipe_tag_q[NUM_MID_REGS];
+  assign out_pipe_aux_q[0]    = mid_pipe_aux_q[NUM_MID_REGS];
+  assign out_pipe_valid_q[0]  = mid_pipe_valid_q[NUM_MID_REGS];
+  // Input stage: Propagate pipeline ready signal to inside pipe
+  assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
+    `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
+    `FFL(out_pipe_tag_q[i+1],    out_pipe_tag_q[i],    reg_ena, TagType'('0))
+    `FFL(out_pipe_aux_q[i+1],    out_pipe_aux_q[i],    reg_ena, AuxType'('0))
+  end
+  // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+  assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+  // Output stage: assign module outputs
+  assign result_o        = out_pipe_result_q[NUM_OUT_REGS];
+  assign status_o        = out_pipe_status_q[NUM_OUT_REGS];
+  assign extension_bit_o = 1'b1; // always NaN-Box result
+  assign tag_o           = out_pipe_tag_q[NUM_OUT_REGS];
+  assign aux_o           = out_pipe_aux_q[NUM_OUT_REGS];
+  assign out_valid_o     = out_pipe_valid_q[NUM_OUT_REGS];
+  assign busy_o          = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
+endmodule
diff --git a/verilog/rtl/fpnew_noncomp.sv b/verilog/rtl/fpnew_noncomp.sv
new file mode 100644
index 0000000..acddd48
--- /dev/null
+++ b/verilog/rtl/fpnew_noncomp.sv
@@ -0,0 +1,403 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+
+module fpnew_noncomp #(
+  parameter fpnew_pkg::fp_format_e   FpFormat    = fpnew_pkg::fp_format_e'(0),
+  parameter int unsigned             NumPipeRegs = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig  = fpnew_pkg::BEFORE,
+  parameter type                     TagType     = logic,
+  parameter type                     AuxType     = logic,
+
+  localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change
+) (
+  input logic                  clk_i,
+  input logic                  rst_ni,
+  // Input signals
+  input logic [1:0][WIDTH-1:0]     operands_i, // 2 operands
+  input logic [1:0]                is_boxed_i, // 2 operands
+  input fpnew_pkg::roundmode_e     rnd_mode_i,
+  input fpnew_pkg::operation_e     op_i,
+  input logic                      op_mod_i,
+  input TagType                    tag_i,
+  input AuxType                    aux_i,
+  // Input Handshake
+  input  logic                     in_valid_i,
+  output logic                     in_ready_o,
+  input  logic                     flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]         result_o,
+  output fpnew_pkg::status_t       status_o,
+  output logic                     extension_bit_o,
+  output fpnew_pkg::classmask_e    class_mask_o,
+  output logic                     is_class_o,
+  output TagType                   tag_o,
+  output AuxType                   aux_o,
+  // Output handshake
+  output logic                     out_valid_o,
+  input  logic                     out_ready_i,
+  // Indication of valid data in flight
+  output logic                     busy_o
+);
+
+  // ----------
+  // Constants
+  // ----------
+  localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
+  localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
+  // Pipelines
+  localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE || PipeConfig == fpnew_pkg::INSIDE)
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? ((NumPipeRegs + 1) / 2) // First to get distributed regs
+                               : 0); // no regs here otherwise
+  localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? (NumPipeRegs / 2) // Last to get distributed regs
+                               : 0); // no regs here otherwise
+
+  // ----------------
+  // Type definition
+  // ----------------
+  typedef struct packed {
+    logic                sign;
+    logic [EXP_BITS-1:0] exponent;
+    logic [MAN_BITS-1:0] mantissa;
+  } fp_t;
+
+  // ---------------
+  // Input pipeline
+  // ---------------
+  // Input pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q;
+  logic                  [0:NUM_INP_REGS][1:0]            inp_pipe_is_boxed_q;
+  fpnew_pkg::roundmode_e [0:NUM_INP_REGS]                 inp_pipe_rnd_mode_q;
+  fpnew_pkg::operation_e [0:NUM_INP_REGS]                 inp_pipe_op_q;
+  logic                  [0:NUM_INP_REGS]                 inp_pipe_op_mod_q;
+  TagType                [0:NUM_INP_REGS]                 inp_pipe_tag_q;
+  AuxType                [0:NUM_INP_REGS]                 inp_pipe_aux_q;
+  logic                  [0:NUM_INP_REGS]                 inp_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_INP_REGS] inp_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign inp_pipe_operands_q[0] = operands_i;
+  assign inp_pipe_is_boxed_q[0] = is_boxed_i;
+  assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+  assign inp_pipe_op_q[0]       = op_i;
+  assign inp_pipe_op_mod_q[0]   = op_mod_i;
+  assign inp_pipe_tag_q[0]      = tag_i;
+  assign inp_pipe_aux_q[0]      = aux_i;
+  assign inp_pipe_valid_q[0]    = in_valid_i;
+  // Input stage: Propagate pipeline ready signal to updtream circuitry
+  assign in_ready_o = inp_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+    `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
+    `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+    `FFL(inp_pipe_op_q[i+1],       inp_pipe_op_q[i],       reg_ena, fpnew_pkg::FMADD)
+    `FFL(inp_pipe_op_mod_q[i+1],   inp_pipe_op_mod_q[i],   reg_ena, '0)
+    `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
+    `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
+  end
+
+  // ---------------------
+  // Input classification
+  // ---------------------
+  fpnew_pkg::fp_info_t [1:0] info_q;
+
+  // Classify input
+  fpnew_classifier #(
+    .FpFormat    ( FpFormat ),
+    .NumOperands ( 2        )
+    ) i_class_a (
+    .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ),
+    .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ),
+    .info_o     ( info_q                            )
+  );
+
+  fp_t                 operand_a, operand_b;
+  fpnew_pkg::fp_info_t info_a,    info_b;
+
+  // Packing-order-agnostic assignments
+  assign operand_a = inp_pipe_operands_q[NUM_INP_REGS][0];
+  assign operand_b = inp_pipe_operands_q[NUM_INP_REGS][1];
+  assign info_a    = info_q[0];
+  assign info_b    = info_q[1];
+
+  logic any_operand_inf;
+  logic any_operand_nan;
+  logic signalling_nan;
+
+  // Reduction for special case handling
+  assign any_operand_inf = (| {info_a.is_inf,        info_b.is_inf});
+  assign any_operand_nan = (| {info_a.is_nan,        info_b.is_nan});
+  assign signalling_nan  = (| {info_a.is_signalling, info_b.is_signalling});
+
+  logic operands_equal, operand_a_smaller;
+
+  // Equality checks for zeroes too
+  assign operands_equal    = (operand_a == operand_b) || (info_a.is_zero && info_b.is_zero);
+  // Invert result if non-zero signs involved (unsigned comparison)
+  assign operand_a_smaller = (operand_a < operand_b) ^ (operand_a.sign || operand_b.sign);
+
+  // ---------------
+  // Sign Injection
+  // ---------------
+  fp_t                sgnj_result;
+  fpnew_pkg::status_t sgnj_status;
+  logic               sgnj_extension_bit;
+
+  // Sign Injection - operation is encoded in rnd_mode_q:
+  // RNE = SGNJ, RTZ = SGNJN, RDN = SGNJX, RUP = Passthrough (no NaN-box check)
+  always_comb begin : sign_injections
+    logic sign_a, sign_b; // internal signs
+    // Default assignment
+    sgnj_result = operand_a; // result based on operand a
+
+    // NaN-boxing check will treat invalid inputs as canonical NaNs
+    if (!info_a.is_boxed) sgnj_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)};
+
+    // Internal signs are treated as positive in case of non-NaN-boxed values
+    sign_a = operand_a.sign & info_a.is_boxed;
+    sign_b = operand_b.sign & info_b.is_boxed;
+
+    // Do the sign injection based on rm field
+    unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
+      fpnew_pkg::RNE: sgnj_result.sign = sign_b;          // SGNJ
+      fpnew_pkg::RTZ: sgnj_result.sign = ~sign_b;         // SGNJN
+      fpnew_pkg::RDN: sgnj_result.sign = sign_a ^ sign_b; // SGNJX
+      fpnew_pkg::RUP: sgnj_result      = operand_a;       // passthrough
+      default: sgnj_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
+    endcase
+  end
+
+  assign sgnj_status = '0;        // sign injections never raise exceptions
+
+  // op_mod_q enables integer sign-extension of result (for storing to integer regfile)
+  assign sgnj_extension_bit = inp_pipe_op_mod_q[NUM_INP_REGS] ? sgnj_result.sign : 1'b1;
+
+  // ------------------
+  // Minimum / Maximum
+  // ------------------
+  fp_t                minmax_result;
+  fpnew_pkg::status_t minmax_status;
+  logic               minmax_extension_bit;
+
+  // Minimum/Maximum - operation is encoded in rnd_mode_q:
+  // RNE = MIN, RTZ = MAX
+  always_comb begin : min_max
+    // Default assignment
+    minmax_status = '0;
+
+    // Min/Max use quiet comparisons - only sNaN are invalid
+    minmax_status.NV = signalling_nan;
+
+    // Both NaN inputs cause a NaN output
+    if (info_a.is_nan && info_b.is_nan)
+      minmax_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN
+    // If one operand is NaN, the non-NaN operand is returned
+    else if (info_a.is_nan) minmax_result = operand_b;
+    else if (info_b.is_nan) minmax_result = operand_a;
+    // Otherwise decide according to the operation
+    else begin
+      unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
+        fpnew_pkg::RNE: minmax_result = operand_a_smaller ? operand_a : operand_b; // MIN
+        fpnew_pkg::RTZ: minmax_result = operand_a_smaller ? operand_b : operand_a; // MAX
+        default: minmax_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
+      endcase
+    end
+  end
+
+  assign minmax_extension_bit = 1'b1; // NaN-box as result is always a float value
+
+  // ------------
+  // Comparisons
+  // ------------
+  fp_t                cmp_result;
+  fpnew_pkg::status_t cmp_status;
+  logic               cmp_extension_bit;
+
+  // Comparisons - operation is encoded in rnd_mode_q:
+  // RNE = LE, RTZ = LT, RDN = EQ
+  // op_mod_q inverts boolean outputs
+  always_comb begin : comparisons
+    // Default assignment
+    cmp_result = '0; // false
+    cmp_status = '0; // no flags
+
+    // Signalling NaNs always compare as false and are illegal
+    if (signalling_nan) cmp_status.NV = 1'b1; // invalid operation
+    // Otherwise do comparisons
+    else begin
+      unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
+        fpnew_pkg::RNE: begin // Less than or equal
+          if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid
+          else cmp_result = (operand_a_smaller | operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+        end
+        fpnew_pkg::RTZ: begin // Less than
+          if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid
+          else cmp_result = (operand_a_smaller & ~operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+        end
+        fpnew_pkg::RDN: begin // Equal
+          if (any_operand_nan) cmp_result = inp_pipe_op_mod_q[NUM_INP_REGS]; // NaN always not equal
+          else cmp_result = operands_equal ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+        end
+        default: cmp_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
+      endcase
+    end
+  end
+
+  assign cmp_extension_bit = 1'b0; // Comparisons always produce booleans in integer registers
+
+  // ---------------
+  // Classification
+  // ---------------
+  fpnew_pkg::status_t    class_status;
+  logic                  class_extension_bit;
+  fpnew_pkg::classmask_e class_mask_d; // the result is actually here
+
+  // Classification - always return the classification mask on the dedicated port
+  always_comb begin : classify
+    if (info_a.is_normal) begin
+      class_mask_d = operand_a.sign       ? fpnew_pkg::NEGNORM    : fpnew_pkg::POSNORM;
+    end else if (info_a.is_subnormal) begin
+      class_mask_d = operand_a.sign       ? fpnew_pkg::NEGSUBNORM : fpnew_pkg::POSSUBNORM;
+    end else if (info_a.is_zero) begin
+      class_mask_d = operand_a.sign       ? fpnew_pkg::NEGZERO    : fpnew_pkg::POSZERO;
+    end else if (info_a.is_inf) begin
+      class_mask_d = operand_a.sign       ? fpnew_pkg::NEGINF     : fpnew_pkg::POSINF;
+    end else if (info_a.is_nan) begin
+      class_mask_d = info_a.is_signalling ? fpnew_pkg::SNAN       : fpnew_pkg::QNAN;
+    end else begin
+      class_mask_d = fpnew_pkg::QNAN; // default value
+    end
+  end
+
+  assign class_status        = '0;   // classification does not set flags
+  assign class_extension_bit = 1'b0; // classification always produces results in integer registers
+
+  // -----------------
+  // Result selection
+  // -----------------
+  fp_t                   result_d;
+  fpnew_pkg::status_t    status_d;
+  logic                  extension_bit_d;
+  logic                  is_class_d;
+
+  // Select result
+  always_comb begin : select_result
+    unique case (inp_pipe_op_q[NUM_INP_REGS])
+      fpnew_pkg::SGNJ: begin
+        result_d        = sgnj_result;
+        status_d        = sgnj_status;
+        extension_bit_d = sgnj_extension_bit;
+      end
+      fpnew_pkg::MINMAX: begin
+        result_d        = minmax_result;
+        status_d        = minmax_status;
+        extension_bit_d = minmax_extension_bit;
+      end
+      fpnew_pkg::CMP: begin
+        result_d        = cmp_result;
+        status_d        = cmp_status;
+        extension_bit_d = cmp_extension_bit;
+      end
+      fpnew_pkg::CLASSIFY: begin
+        result_d        = '{default: fpnew_pkg::DONT_CARE}; // unused
+        status_d        = class_status;
+        extension_bit_d = class_extension_bit;
+      end
+      default: begin
+        result_d        = '{default: fpnew_pkg::DONT_CARE}; // dont care
+        status_d        = '{default: fpnew_pkg::DONT_CARE}; // dont care
+        extension_bit_d = fpnew_pkg::DONT_CARE;             // dont care
+      end
+    endcase
+  end
+
+  assign is_class_d = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::CLASSIFY);
+
+  // ----------------
+  // Output Pipeline
+  // ----------------
+  // Output pipeline signals, index i holds signal after i register stages
+  fp_t                   [0:NUM_OUT_REGS] out_pipe_result_q;
+  fpnew_pkg::status_t    [0:NUM_OUT_REGS] out_pipe_status_q;
+  logic                  [0:NUM_OUT_REGS] out_pipe_extension_bit_q;
+  fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q;
+  logic                  [0:NUM_OUT_REGS] out_pipe_is_class_q;
+  TagType                [0:NUM_OUT_REGS] out_pipe_tag_q;
+  AuxType                [0:NUM_OUT_REGS] out_pipe_aux_q;
+  logic                  [0:NUM_OUT_REGS] out_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_OUT_REGS] out_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign out_pipe_result_q[0]        = result_d;
+  assign out_pipe_status_q[0]        = status_d;
+  assign out_pipe_extension_bit_q[0] = extension_bit_d;
+  assign out_pipe_class_mask_q[0]    = class_mask_d;
+  assign out_pipe_is_class_q[0]      = is_class_d;
+  assign out_pipe_tag_q[0]           = inp_pipe_tag_q[NUM_INP_REGS];
+  assign out_pipe_aux_q[0]           = inp_pipe_aux_q[NUM_INP_REGS];
+  assign out_pipe_valid_q[0]         = inp_pipe_valid_q[NUM_INP_REGS];
+  // Input stage: Propagate pipeline ready signal to inside pipe
+  assign inp_pipe_ready[NUM_INP_REGS] = out_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(out_pipe_result_q[i+1],        out_pipe_result_q[i],        reg_ena, '0)
+    `FFL(out_pipe_status_q[i+1],        out_pipe_status_q[i],        reg_ena, '0)
+    `FFL(out_pipe_extension_bit_q[i+1], out_pipe_extension_bit_q[i], reg_ena, '0)
+    `FFL(out_pipe_class_mask_q[i+1],    out_pipe_class_mask_q[i],    reg_ena, fpnew_pkg::QNAN)
+    `FFL(out_pipe_is_class_q[i+1],      out_pipe_is_class_q[i],      reg_ena, '0)
+    `FFL(out_pipe_tag_q[i+1],           out_pipe_tag_q[i],           reg_ena, TagType'('0))
+    `FFL(out_pipe_aux_q[i+1],           out_pipe_aux_q[i],           reg_ena, AuxType'('0))
+  end
+  // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+  assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+  // Output stage: assign module outputs
+  assign result_o        = out_pipe_result_q[NUM_OUT_REGS];
+  assign status_o        = out_pipe_status_q[NUM_OUT_REGS];
+  assign extension_bit_o = out_pipe_extension_bit_q[NUM_OUT_REGS];
+  assign class_mask_o    = out_pipe_class_mask_q[NUM_OUT_REGS];
+  assign is_class_o      = out_pipe_is_class_q[NUM_OUT_REGS];
+  assign tag_o           = out_pipe_tag_q[NUM_OUT_REGS];
+  assign aux_o           = out_pipe_aux_q[NUM_OUT_REGS];
+  assign out_valid_o     = out_pipe_valid_q[NUM_OUT_REGS];
+  assign busy_o          = (| {inp_pipe_valid_q, out_pipe_valid_q});
+endmodule
diff --git a/verilog/rtl/fpnew_opgroup_block.sv b/verilog/rtl/fpnew_opgroup_block.sv
new file mode 100644
index 0000000..e3be31d
--- /dev/null
+++ b/verilog/rtl/fpnew_opgroup_block.sv
@@ -0,0 +1,230 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_opgroup_block #(
+  parameter fpnew_pkg::opgroup_e        OpGroup       = fpnew_pkg::ADDMUL,
+  // FPU configuration
+  parameter int unsigned                Width         = 32,
+  parameter logic                       EnableVectors = 1'b1,
+  parameter fpnew_pkg::fmt_logic_t      FpFmtMask     = '1,
+  parameter fpnew_pkg::ifmt_logic_t     IntFmtMask    = '1,
+  parameter fpnew_pkg::fmt_unsigned_t   FmtPipeRegs   = '{default: 0},
+  parameter fpnew_pkg::fmt_unit_types_t FmtUnitTypes  = '{default: fpnew_pkg::PARALLEL},
+  parameter fpnew_pkg::pipe_config_t    PipeConfig    = fpnew_pkg::BEFORE,
+  parameter type                        TagType       = logic,
+  // Do not change
+  localparam int unsigned NUM_FORMATS  = fpnew_pkg::NUM_FP_FORMATS,
+  localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup)
+) (
+  input logic                                     clk_i,
+  input logic                                     rst_ni,
+  // Input signals
+  input logic [NUM_OPERANDS-1:0][Width-1:0]       operands_i,
+  input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i,
+  input fpnew_pkg::roundmode_e                    rnd_mode_i,
+  input fpnew_pkg::operation_e                    op_i,
+  input logic                                     op_mod_i,
+  input fpnew_pkg::fp_format_e                    src_fmt_i,
+  input fpnew_pkg::fp_format_e                    dst_fmt_i,
+  input fpnew_pkg::int_format_e                   int_fmt_i,
+  input logic                                     vectorial_op_i,
+  input TagType                                   tag_i,
+  // Input Handshake
+  input  logic                                    in_valid_i,
+  output logic                                    in_ready_o,
+  input  logic                                    flush_i,
+  // Output signals
+  output logic [Width-1:0]                        result_o,
+  output fpnew_pkg::status_t                      status_o,
+  output logic                                    extension_bit_o,
+  output TagType                                  tag_o,
+  // Output handshake
+  output logic                                    out_valid_o,
+  input  logic                                    out_ready_i,
+  // Indication of valid data in flight
+  output logic                                    busy_o
+);
+
+  // ----------------
+  // Type Definition
+  // ----------------
+  typedef struct packed {
+    logic [Width-1:0]   result;
+    fpnew_pkg::status_t status;
+    logic               ext_bit;
+    TagType             tag;
+  } output_t;
+
+  // Handshake signals for the slices
+  logic [NUM_FORMATS-1:0] fmt_in_ready, fmt_out_valid, fmt_out_ready, fmt_busy;
+  output_t [NUM_FORMATS-1:0] fmt_outputs;
+
+  // -----------
+  // Input Side
+  // -----------
+  assign in_ready_o = in_valid_i & fmt_in_ready[dst_fmt_i]; // Ready is given by selected format
+
+  // -------------------------
+  // Generate Parallel Slices
+  // -------------------------
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_parallel_slices
+    // Some constants for this format
+    localparam logic ANY_MERGED = fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask);
+    localparam logic IS_FIRST_MERGED =
+        fpnew_pkg::is_first_enabled_multi(fpnew_pkg::fp_format_e'(fmt), FmtUnitTypes, FpFmtMask);
+
+    // Generate slice only if format enabled
+    if (FpFmtMask[fmt] && (FmtUnitTypes[fmt] == fpnew_pkg::PARALLEL)) begin : active_format
+
+      logic in_valid;
+
+      assign in_valid = in_valid_i & (dst_fmt_i == fmt); // enable selected format
+
+      fpnew_opgroup_fmt_slice #(
+        .OpGroup       ( OpGroup                      ),
+        .FpFormat      ( fpnew_pkg::fp_format_e'(fmt) ),
+        .Width         ( Width                        ),
+        .EnableVectors ( EnableVectors                ),
+        .NumPipeRegs   ( FmtPipeRegs[fmt]             ),
+        .PipeConfig    ( PipeConfig                   ),
+        .TagType       ( TagType                      )
+      ) i_fmt_slice (
+        .clk_i,
+        .rst_ni,
+        .operands_i     ( operands_i               ),
+        .is_boxed_i     ( is_boxed_i[fmt]          ),
+        .rnd_mode_i,
+        .op_i,
+        .op_mod_i,
+        .vectorial_op_i,
+        .tag_i,
+        .in_valid_i     ( in_valid                 ),
+        .in_ready_o     ( fmt_in_ready[fmt]        ),
+        .flush_i,
+        .result_o       ( fmt_outputs[fmt].result  ),
+        .status_o       ( fmt_outputs[fmt].status  ),
+        .extension_bit_o( fmt_outputs[fmt].ext_bit ),
+        .tag_o          ( fmt_outputs[fmt].tag     ),
+        .out_valid_o    ( fmt_out_valid[fmt]       ),
+        .out_ready_i    ( fmt_out_ready[fmt]       ),
+        .busy_o         ( fmt_busy[fmt]            )
+      );
+    // If the format wants to use merged ops, tie off the dangling ones not used here
+    end else if (FpFmtMask[fmt] && ANY_MERGED && !IS_FIRST_MERGED) begin : merged_unused
+
+      localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask);
+      // Ready is split up into formats
+      assign fmt_in_ready[fmt]  = fmt_in_ready[int'(FMT)];
+
+      assign fmt_out_valid[fmt] = 1'b0; // don't emit values
+      assign fmt_busy[fmt]      = 1'b0; // never busy
+      // Outputs are don't care
+      assign fmt_outputs[fmt].result  = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_outputs[fmt].status  = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE;
+      assign fmt_outputs[fmt].tag     = TagType'(fpnew_pkg::DONT_CARE);
+
+    // Tie off disabled formats
+    end else if (!FpFmtMask[fmt] || (FmtUnitTypes[fmt] == fpnew_pkg::DISABLED)) begin : disable_fmt
+      assign fmt_in_ready[fmt]  = 1'b0; // don't accept operations
+      assign fmt_out_valid[fmt] = 1'b0; // don't emit values
+      assign fmt_busy[fmt]      = 1'b0; // never busy
+      // Outputs are don't care
+      assign fmt_outputs[fmt].result  = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_outputs[fmt].status  = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE;
+      assign fmt_outputs[fmt].tag     = TagType'(fpnew_pkg::DONT_CARE);
+    end
+  end
+
+  // ----------------------
+  // Generate Merged Slice
+  // ----------------------
+  if (fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask)) begin : gen_merged_slice
+
+    localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask);
+    localparam REG = fpnew_pkg::get_num_regs_multi(FmtPipeRegs, FmtUnitTypes, FpFmtMask);
+
+    logic in_valid;
+
+    assign in_valid = in_valid_i & (FmtUnitTypes[dst_fmt_i] == fpnew_pkg::MERGED);
+
+    fpnew_opgroup_multifmt_slice #(
+      .OpGroup       ( OpGroup          ),
+      .Width         ( Width            ),
+      .FpFmtConfig   ( FpFmtMask        ),
+      .IntFmtConfig  ( IntFmtMask       ),
+      .EnableVectors ( EnableVectors    ),
+      .NumPipeRegs   ( REG              ),
+      .PipeConfig    ( PipeConfig       ),
+      .TagType       ( TagType          )
+    ) i_multifmt_slice (
+      .clk_i,
+      .rst_ni,
+      .operands_i,
+      .is_boxed_i,
+      .rnd_mode_i,
+      .op_i,
+      .op_mod_i,
+      .src_fmt_i,
+      .dst_fmt_i,
+      .int_fmt_i,
+      .vectorial_op_i,
+      .tag_i,
+      .in_valid_i      ( in_valid                 ),
+      .in_ready_o      ( fmt_in_ready[FMT]        ),
+      .flush_i,
+      .result_o        ( fmt_outputs[FMT].result  ),
+      .status_o        ( fmt_outputs[FMT].status  ),
+      .extension_bit_o ( fmt_outputs[FMT].ext_bit ),
+      .tag_o           ( fmt_outputs[FMT].tag     ),
+      .out_valid_o     ( fmt_out_valid[FMT]       ),
+      .out_ready_i     ( fmt_out_ready[FMT]       ),
+      .busy_o          ( fmt_busy[FMT]            )
+    );
+
+  end
+
+  // ------------------
+  // Arbitrate Outputs
+  // ------------------
+  output_t arbiter_output;
+
+  // Round-Robin arbiter to decide which result to use
+  rr_arb_tree #(
+    .NumIn     ( NUM_FORMATS ),
+    .DataType  ( output_t    ),
+    .AxiVldRdy ( 1'b1        )
+  ) i_arbiter (
+    .clk_i,
+    .rst_ni,
+    .flush_i,
+    .rr_i   ( '0             ),
+    .req_i  ( fmt_out_valid  ),
+    .gnt_o  ( fmt_out_ready  ),
+    .data_i ( fmt_outputs    ),
+    .gnt_i  ( out_ready_i    ),
+    .req_o  ( out_valid_o    ),
+    .data_o ( arbiter_output ),
+    .idx_o  ( /* unused */   )
+  );
+
+  // Unpack output
+  assign result_o        = arbiter_output.result;
+  assign status_o        = arbiter_output.status;
+  assign extension_bit_o = arbiter_output.ext_bit;
+  assign tag_o           = arbiter_output.tag;
+
+  assign busy_o = (| fmt_busy);
+
+endmodule
diff --git a/verilog/rtl/fpnew_opgroup_fmt_slice.sv b/verilog/rtl/fpnew_opgroup_fmt_slice.sv
new file mode 100644
index 0000000..fda2a57
--- /dev/null
+++ b/verilog/rtl/fpnew_opgroup_fmt_slice.sv
@@ -0,0 +1,276 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_opgroup_fmt_slice #(
+  parameter fpnew_pkg::opgroup_e     OpGroup       = fpnew_pkg::ADDMUL,
+  parameter fpnew_pkg::fp_format_e   FpFormat      = fpnew_pkg::fp_format_e'(0),
+  // FPU configuration
+  parameter int unsigned             Width         = 32,
+  parameter logic                    EnableVectors = 1'b1,
+  parameter int unsigned             NumPipeRegs   = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig    = fpnew_pkg::BEFORE,
+  parameter type                     TagType       = logic,
+  // Do not change
+  localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup)
+) (
+  input logic                               clk_i,
+  input logic                               rst_ni,
+  // Input signals
+  input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i,
+  input logic [NUM_OPERANDS-1:0]            is_boxed_i,
+  input fpnew_pkg::roundmode_e              rnd_mode_i,
+  input fpnew_pkg::operation_e              op_i,
+  input logic                               op_mod_i,
+  input logic                               vectorial_op_i,
+  input TagType                             tag_i,
+  // Input Handshake
+  input  logic                              in_valid_i,
+  output logic                              in_ready_o,
+  input  logic                              flush_i,
+  // Output signals
+  output logic [Width-1:0]                  result_o,
+  output fpnew_pkg::status_t                status_o,
+  output logic                              extension_bit_o,
+  output TagType                            tag_o,
+  // Output handshake
+  output logic                              out_valid_o,
+  input  logic                              out_ready_i,
+  // Indication of valid data in flight
+  output logic                              busy_o
+);
+
+  localparam int unsigned FP_WIDTH  = fpnew_pkg::fp_width(FpFormat);
+  localparam int unsigned NUM_LANES = fpnew_pkg::num_lanes(Width, FpFormat, EnableVectors);
+
+
+  logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes
+  logic                 vectorial_op;
+
+  logic [NUM_LANES*FP_WIDTH-1:0] slice_result;
+  logic [Width-1:0]              slice_regular_result, slice_class_result, slice_vec_class_result;
+
+  fpnew_pkg::status_t    [NUM_LANES-1:0] lane_status;
+  logic                  [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used
+  fpnew_pkg::classmask_e [NUM_LANES-1:0] lane_class_mask;
+  TagType                [NUM_LANES-1:0] lane_tags; // only the first one is actually used
+  logic                  [NUM_LANES-1:0] lane_vectorial, lane_busy, lane_is_class; // dito
+
+  logic result_is_vector, result_is_class;
+
+  // -----------
+  // Input Side
+  // -----------
+  assign in_ready_o   = lane_in_ready[0]; // Upstream ready is given by first lane
+  assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled
+
+  // ---------------
+  // Generate Lanes
+  // ---------------
+  for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes
+    logic [FP_WIDTH-1:0] local_result; // lane-local results
+    logic                local_sign;
+
+    // Generate instances only if needed, lane 0 always generated
+    if ((lane == 0) || EnableVectors) begin : active_lane
+      logic in_valid, out_valid, out_ready; // lane-local handshake
+
+      logic [NUM_OPERANDS-1:0][FP_WIDTH-1:0] local_operands; // lane-local operands
+      logic [FP_WIDTH-1:0]                   op_result;      // lane-local results
+      fpnew_pkg::status_t                    op_status;
+
+      assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors
+      // Slice out the operands for this lane
+      always_comb begin : prepare_input
+        for (int i = 0; i < int'(NUM_OPERANDS); i++) begin
+          local_operands[i] = operands_i[i][(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH];
+        end
+      end
+
+      // Instantiate the operation from the selected opgroup
+      if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance
+        fpnew_fma #(
+          .FpFormat    ( FpFormat    ),
+          .NumPipeRegs ( NumPipeRegs ),
+          .PipeConfig  ( PipeConfig  ),
+          .TagType     ( TagType     ),
+          .AuxType     ( logic       )
+        ) i_fma (
+          .clk_i,
+          .rst_ni,
+          .operands_i      ( local_operands               ),
+          .is_boxed_i      ( is_boxed_i[NUM_OPERANDS-1:0] ),
+          .rnd_mode_i,
+          .op_i,
+          .op_mod_i,
+          .tag_i,
+          .aux_i           ( vectorial_op         ), // Remember whether operation was vectorial
+          .in_valid_i      ( in_valid             ),
+          .in_ready_o      ( lane_in_ready[lane]  ),
+          .flush_i,
+          .result_o        ( op_result            ),
+          .status_o        ( op_status            ),
+          .extension_bit_o ( lane_ext_bit[lane]   ),
+          .tag_o           ( lane_tags[lane]      ),
+          .aux_o           ( lane_vectorial[lane] ),
+          .out_valid_o     ( out_valid            ),
+          .out_ready_i     ( out_ready            ),
+          .busy_o          ( lane_busy[lane]      )
+        );
+        assign lane_is_class[lane]   = 1'b0;
+        assign lane_class_mask[lane] = fpnew_pkg::NEGINF;
+      end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance
+        // fpnew_divsqrt #(
+        //   .FpFormat   (FpFormat),
+        //   .NumPipeRegs(NumPipeRegs),
+        //   .PipeConfig (PipeConfig),
+        //   .TagType    (TagType),
+        //   .AuxType    (logic)
+        // ) i_divsqrt (
+        //   .clk_i,
+        //   .rst_ni,
+        //   .operands_i      ( local_operands               ),
+        //   .is_boxed_i      ( is_boxed_i[NUM_OPERANDS-1:0] ),
+        //   .rnd_mode_i,
+        //   .op_i,
+        //   .op_mod_i,
+        //   .tag_i,
+        //   .aux_i           ( vectorial_op         ), // Remember whether operation was vectorial
+        //   .in_valid_i      ( in_valid             ),
+        //   .in_ready_o      ( lane_in_ready[lane]  ),
+        //   .flush_i,
+        //   .result_o        ( op_result            ),
+        //   .status_o        ( op_status            ),
+        //   .extension_bit_o ( lane_ext_bit[lane]   ),
+        //   .tag_o           ( lane_tags[lane]      ),
+        //   .aux_o           ( lane_vectorial[lane] ),
+        //   .out_valid_o     ( out_valid            ),
+        //   .out_ready_i     ( out_ready            ),
+        //   .busy_o          ( lane_busy[lane]      )
+        // );
+        // assign lane_is_class[lane] = 1'b0;
+      end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance
+        fpnew_noncomp #(
+          .FpFormat   (FpFormat),
+          .NumPipeRegs(NumPipeRegs),
+          .PipeConfig (PipeConfig),
+          .TagType    (TagType),
+          .AuxType    (logic)
+        ) i_noncomp (
+          .clk_i,
+          .rst_ni,
+          .operands_i      ( local_operands               ),
+          .is_boxed_i      ( is_boxed_i[NUM_OPERANDS-1:0] ),
+          .rnd_mode_i,
+          .op_i,
+          .op_mod_i,
+          .tag_i,
+          .aux_i           ( vectorial_op          ), // Remember whether operation was vectorial
+          .in_valid_i      ( in_valid              ),
+          .in_ready_o      ( lane_in_ready[lane]   ),
+          .flush_i,
+          .result_o        ( op_result             ),
+          .status_o        ( op_status             ),
+          .extension_bit_o ( lane_ext_bit[lane]    ),
+          .class_mask_o    ( lane_class_mask[lane] ),
+          .is_class_o      ( lane_is_class[lane]   ),
+          .tag_o           ( lane_tags[lane]       ),
+          .aux_o           ( lane_vectorial[lane]  ),
+          .out_valid_o     ( out_valid             ),
+          .out_ready_i     ( out_ready             ),
+          .busy_o          ( lane_busy[lane]       )
+        );
+      end // ADD OTHER OPTIONS HERE
+
+      // Handshakes are only done if the lane is actually used
+      assign out_ready            = out_ready_i & ((lane == 0) | result_is_vector);
+      assign lane_out_valid[lane] = out_valid   & ((lane == 0) | result_is_vector);
+
+      // Properly NaN-box or sign-extend the slice result if not in use
+      assign local_result      = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]};
+      assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0;
+
+    // Otherwise generate constant sign-extension
+    end else begin
+      assign lane_out_valid[lane] = 1'b0; // unused lane
+      assign lane_in_ready[lane]  = 1'b0; // unused lane
+      assign local_result         = '{default: lane_ext_bit[0]}; // sign-extend/nan box
+      assign lane_status[lane]    = '0;
+      assign lane_busy[lane]      = 1'b0;
+      assign lane_is_class[lane]  = 1'b0;
+    end
+
+    // Insert lane result into slice result
+    assign slice_result[(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH] = local_result;
+
+    // Create Classification results
+    if ((lane+1)*8 <= Width) begin : vectorial_class // vectorial class blocks are 8bits in size
+      assign local_sign = (lane_class_mask[lane] == fpnew_pkg::NEGINF ||
+                           lane_class_mask[lane] == fpnew_pkg::NEGNORM ||
+                           lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM ||
+                           lane_class_mask[lane] == fpnew_pkg::NEGZERO);
+      // Write the current block segment
+      assign slice_vec_class_result[(lane+1)*8-1:lane*8] = {
+        local_sign,  // BIT 7
+        ~local_sign, // BIT 6
+        lane_class_mask[lane] == fpnew_pkg::QNAN, // BIT 5
+        lane_class_mask[lane] == fpnew_pkg::SNAN, // BIT 4
+        lane_class_mask[lane] == fpnew_pkg::POSZERO
+            || lane_class_mask[lane] == fpnew_pkg::NEGZERO, // BIT 3
+        lane_class_mask[lane] == fpnew_pkg::POSSUBNORM
+            || lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM, // BIT 2
+        lane_class_mask[lane] == fpnew_pkg::POSNORM
+            || lane_class_mask[lane] == fpnew_pkg::NEGNORM, // BIT 1
+        lane_class_mask[lane] == fpnew_pkg::POSINF
+            || lane_class_mask[lane] == fpnew_pkg::NEGINF // BIT 0
+      };
+    end
+  end
+
+  // ------------
+  // Output Side
+  // ------------
+  assign result_is_vector = lane_vectorial[0];
+  assign result_is_class  = lane_is_class[0];
+
+  assign slice_regular_result = $signed({extension_bit_o, slice_result});
+
+  localparam int unsigned CLASS_VEC_BITS = (NUM_LANES*8 > Width) ? 8 * (Width / 8) : NUM_LANES*8;
+
+  // Pad out unused vec_class bits
+  if (CLASS_VEC_BITS < Width) begin : pad_vectorial_class
+    assign slice_vec_class_result[Width-1:CLASS_VEC_BITS] = '0;
+  end
+
+  // localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1;
+
+  assign slice_class_result = result_is_vector ? slice_vec_class_result : lane_class_mask[0];
+
+  // Select the proper result
+  assign result_o = result_is_class ? slice_class_result : slice_regular_result;
+
+  assign extension_bit_o                              = lane_ext_bit[0]; // upper lanes unused
+  assign tag_o                                        = lane_tags[0];    // upper lanes unused
+  assign busy_o                                       = (| lane_busy);
+  assign out_valid_o                                  = lane_out_valid[0]; // upper lanes unused
+
+
+  // Collapse the lane status
+  always_comb begin : output_processing
+    // Collapse the status
+    automatic fpnew_pkg::status_t temp_status;
+    temp_status = '0;
+    for (int i = 0; i < int'(NUM_LANES); i++)
+      temp_status |= lane_status[i];
+    status_o = temp_status;
+  end
+endmodule
diff --git a/verilog/rtl/fpnew_opgroup_multifmt_slice.sv b/verilog/rtl/fpnew_opgroup_multifmt_slice.sv
new file mode 100644
index 0000000..4f139e9
--- /dev/null
+++ b/verilog/rtl/fpnew_opgroup_multifmt_slice.sv
@@ -0,0 +1,424 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_opgroup_multifmt_slice #(
+  parameter fpnew_pkg::opgroup_e     OpGroup       = fpnew_pkg::CONV,
+  parameter int unsigned             Width         = 64,
+  // FPU configuration
+  parameter fpnew_pkg::fmt_logic_t   FpFmtConfig   = '1,
+  parameter fpnew_pkg::ifmt_logic_t  IntFmtConfig  = '1,
+  parameter logic                    EnableVectors = 1'b1,
+  parameter int unsigned             NumPipeRegs   = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig    = fpnew_pkg::BEFORE,
+  parameter type                     TagType       = logic,
+  // Do not change
+  localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup),
+  localparam int unsigned NUM_FORMATS  = fpnew_pkg::NUM_FP_FORMATS
+) (
+  input logic                                     clk_i,
+  input logic                                     rst_ni,
+  // Input signals
+  input logic [NUM_OPERANDS-1:0][Width-1:0]       operands_i,
+  input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i,
+  input fpnew_pkg::roundmode_e                    rnd_mode_i,
+  input fpnew_pkg::operation_e                    op_i,
+  input logic                                     op_mod_i,
+  input fpnew_pkg::fp_format_e                    src_fmt_i,
+  input fpnew_pkg::fp_format_e                    dst_fmt_i,
+  input fpnew_pkg::int_format_e                   int_fmt_i,
+  input logic                                     vectorial_op_i,
+  input TagType                                   tag_i,
+  // Input Handshake
+  input  logic                                    in_valid_i,
+  output logic                                    in_ready_o,
+  input  logic                                    flush_i,
+  // Output signals
+  output logic [Width-1:0]                        result_o,
+  output fpnew_pkg::status_t                      status_o,
+  output logic                                    extension_bit_o,
+  output TagType                                  tag_o,
+  // Output handshake
+  output logic                                    out_valid_o,
+  input  logic                                    out_ready_i,
+  // Indication of valid data in flight
+  output logic                                    busy_o
+);
+
+  localparam int unsigned MAX_FP_WIDTH   = fpnew_pkg::max_fp_width(FpFmtConfig);
+  localparam int unsigned MAX_INT_WIDTH  = fpnew_pkg::max_int_width(IntFmtConfig);
+  localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, 1'b1);
+  localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS;
+  // We will send the format information along with the data
+  localparam int unsigned FMT_BITS =
+      fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS));
+  localparam int unsigned AUX_BITS = FMT_BITS + 2; // also add vectorial and integer flags
+
+  logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes
+  logic                 vectorial_op;
+  logic [FMT_BITS-1:0]  dst_fmt; // destination format to pass along with operation
+  logic [AUX_BITS-1:0]  aux_data;
+
+  // additional flags for CONV
+  logic       dst_fmt_is_int, dst_is_cpk;
+  logic [1:0] dst_vec_op; // info for vectorial results (for packing)
+  logic [2:0] target_aux_d, target_aux_q;
+  logic       is_up_cast, is_down_cast;
+
+  logic [NUM_FORMATS-1:0][Width-1:0]     fmt_slice_result;
+  logic [NUM_INT_FORMATS-1:0][Width-1:0] ifmt_slice_result;
+  logic [Width-1:0]                      conv_slice_result;
+
+
+  logic [Width-1:0] conv_target_d, conv_target_q; // vectorial conversions update a register
+
+  fpnew_pkg::status_t [NUM_LANES-1:0]   lane_status;
+  logic   [NUM_LANES-1:0]               lane_ext_bit; // only the first one is actually used
+  TagType [NUM_LANES-1:0]               lane_tags; // only the first one is actually used
+  logic   [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used
+  logic   [NUM_LANES-1:0]               lane_busy; // dito
+
+  logic                result_is_vector;
+  logic [FMT_BITS-1:0] result_fmt;
+  logic                result_fmt_is_int, result_is_cpk;
+  logic [1:0]          result_vec_op; // info for vectorial results (for packing)
+
+  // -----------
+  // Input Side
+  // -----------
+  assign in_ready_o   = lane_in_ready[0]; // Upstream ready is given by first lane
+  assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled
+
+  // Cast-and-Pack ops are encoded in operation and modifier
+  assign dst_fmt_is_int = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::F2I);
+  assign dst_is_cpk     = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::CPKAB ||
+                                                          op_i == fpnew_pkg::CPKCD);
+  assign dst_vec_op     = (OpGroup == fpnew_pkg::CONV) & {(op_i == fpnew_pkg::CPKCD), op_mod_i};
+
+  assign is_up_cast   = (fpnew_pkg::fp_width(dst_fmt_i) > fpnew_pkg::fp_width(src_fmt_i));
+  assign is_down_cast = (fpnew_pkg::fp_width(dst_fmt_i) < fpnew_pkg::fp_width(src_fmt_i));
+
+  // The destination format is the int format for F2I casts
+  assign dst_fmt    = dst_fmt_is_int ? int_fmt_i : dst_fmt_i;
+
+  // The data sent along consists of the vectorial flag and format bits
+  assign aux_data      = {dst_fmt_is_int, vectorial_op, dst_fmt};
+  assign target_aux_d  = {dst_vec_op, dst_is_cpk};
+
+  // CONV passes one operand for assembly after the unit: opC for cpk, opB for others
+  if (OpGroup == fpnew_pkg::CONV) begin : conv_target
+    assign conv_target_d = dst_is_cpk ? operands_i[2] : operands_i[1];
+  end
+
+  // For 2-operand units, prepare boxing info
+  logic [NUM_FORMATS-1:0]      is_boxed_1op;
+  logic [NUM_FORMATS-1:0][1:0] is_boxed_2op;
+
+  always_comb begin : boxed_2op
+    for (int fmt = 0; fmt < NUM_FORMATS; fmt++) begin
+      is_boxed_1op[fmt] = is_boxed_i[fmt][0];
+      is_boxed_2op[fmt] = is_boxed_i[fmt][1:0];
+    end
+  end
+
+  // ---------------
+  // Generate Lanes
+  // ---------------
+  for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes
+    localparam int unsigned LANE = unsigned'(lane); // unsigned to please the linter
+    // Get a mask of active formats for this lane
+    localparam fpnew_pkg::fmt_logic_t ACTIVE_FORMATS =
+        fpnew_pkg::get_lane_formats(Width, FpFmtConfig, LANE);
+    localparam fpnew_pkg::ifmt_logic_t ACTIVE_INT_FORMATS =
+        fpnew_pkg::get_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE);
+    localparam int unsigned MAX_WIDTH = fpnew_pkg::max_fp_width(ACTIVE_FORMATS);
+
+    // Cast-specific parameters
+    localparam fpnew_pkg::fmt_logic_t CONV_FORMATS =
+        fpnew_pkg::get_conv_lane_formats(Width, FpFmtConfig, LANE);
+    localparam fpnew_pkg::ifmt_logic_t CONV_INT_FORMATS =
+        fpnew_pkg::get_conv_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE);
+    localparam int unsigned CONV_WIDTH = fpnew_pkg::max_fp_width(CONV_FORMATS);
+
+    // Lane parameters from Opgroup
+    localparam fpnew_pkg::fmt_logic_t LANE_FORMATS = (OpGroup == fpnew_pkg::CONV)
+                                                     ? CONV_FORMATS : ACTIVE_FORMATS;
+    localparam int unsigned LANE_WIDTH = (OpGroup == fpnew_pkg::CONV) ? CONV_WIDTH : MAX_WIDTH;
+
+    logic [LANE_WIDTH-1:0] local_result; // lane-local results
+
+    // Generate instances only if needed, lane 0 always generated
+    if ((lane == 0) || EnableVectors) begin : active_lane
+      logic in_valid, out_valid, out_ready; // lane-local handshake
+
+      logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands;  // lane-local oprands
+      logic [LANE_WIDTH-1:0]                   op_result;       // lane-local results
+      fpnew_pkg::status_t                      op_status;
+
+      assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors
+
+      // Slice out the operands for this lane, upper bits are ignored in the unit
+      always_comb begin : prepare_input
+        for (int unsigned i = 0; i < NUM_OPERANDS; i++) begin
+          local_operands[i] = operands_i[i] >> LANE*fpnew_pkg::fp_width(src_fmt_i);
+        end
+
+        // override operand 0 for some conversions
+        if (OpGroup == fpnew_pkg::CONV) begin
+          // Source is an integer
+          if (op_i == fpnew_pkg::I2F) begin
+            local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::int_width(int_fmt_i);
+          // vectorial F2F up casts
+          end else if (op_i == fpnew_pkg::F2F) begin
+            if (vectorial_op && op_mod_i && is_up_cast) begin // up cast with upper half
+              local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::fp_width(src_fmt_i) +
+                                                   MAX_FP_WIDTH/2;
+            end
+          // CPK
+          end else if (dst_is_cpk) begin
+            if (lane == 1) begin
+              local_operands[0] = operands_i[1][LANE_WIDTH-1:0]; // using opB as second argument
+            end
+          end
+        end
+      end
+
+      // Instantiate the operation from the selected opgroup
+      if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance
+        fpnew_fma_multi #(
+          .FpFmtConfig ( LANE_FORMATS         ),
+          .NumPipeRegs ( NumPipeRegs          ),
+          .PipeConfig  ( PipeConfig           ),
+          .TagType     ( TagType              ),
+          .AuxType     ( logic [AUX_BITS-1:0] )
+        ) i_fpnew_fma_multi (
+          .clk_i,
+          .rst_ni,
+          .operands_i      ( local_operands  ),
+          .is_boxed_i,
+          .rnd_mode_i,
+          .op_i,
+          .op_mod_i,
+          .src_fmt_i,
+          .dst_fmt_i,
+          .tag_i,
+          .aux_i           ( aux_data            ),
+          .in_valid_i      ( in_valid            ),
+          .in_ready_o      ( lane_in_ready[lane] ),
+          .flush_i,
+          .result_o        ( op_result           ),
+          .status_o        ( op_status           ),
+          .extension_bit_o ( lane_ext_bit[lane]  ),
+          .tag_o           ( lane_tags[lane]     ),
+          .aux_o           ( lane_aux[lane]      ),
+          .out_valid_o     ( out_valid           ),
+          .out_ready_i     ( out_ready           ),
+          .busy_o          ( lane_busy[lane]     )
+        );
+
+      end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance
+        fpnew_divsqrt_multi #(
+          .FpFmtConfig ( LANE_FORMATS         ),
+          .NumPipeRegs ( NumPipeRegs          ),
+          .PipeConfig  ( PipeConfig           ),
+          .TagType     ( TagType              ),
+          .AuxType     ( logic [AUX_BITS-1:0] )
+        ) i_fpnew_divsqrt_multi (
+          .clk_i,
+          .rst_ni,
+          .operands_i      ( local_operands[1:0] ), // 2 operands
+          .is_boxed_i      ( is_boxed_2op        ), // 2 operands
+          .rnd_mode_i,
+          .op_i,
+          .dst_fmt_i,
+          .tag_i,
+          .aux_i           ( aux_data            ),
+          .in_valid_i      ( in_valid            ),
+          .in_ready_o      ( lane_in_ready[lane] ),
+          .flush_i,
+          .result_o        ( op_result           ),
+          .status_o        ( op_status           ),
+          .extension_bit_o ( lane_ext_bit[lane]  ),
+          .tag_o           ( lane_tags[lane]     ),
+          .aux_o           ( lane_aux[lane]      ),
+          .out_valid_o     ( out_valid           ),
+          .out_ready_i     ( out_ready           ),
+          .busy_o          ( lane_busy[lane]     )
+        );
+      end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance
+
+      end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance
+        fpnew_cast_multi #(
+          .FpFmtConfig  ( LANE_FORMATS         ),
+          .IntFmtConfig ( CONV_INT_FORMATS     ),
+          .NumPipeRegs  ( NumPipeRegs          ),
+          .PipeConfig   ( PipeConfig           ),
+          .TagType      ( TagType              ),
+          .AuxType      ( logic [AUX_BITS-1:0] )
+        ) i_fpnew_cast_multi (
+          .clk_i,
+          .rst_ni,
+          .operands_i      ( local_operands[0]   ),
+          .is_boxed_i      ( is_boxed_1op        ),
+          .rnd_mode_i,
+          .op_i,
+          .op_mod_i,
+          .src_fmt_i,
+          .dst_fmt_i,
+          .int_fmt_i,
+          .tag_i,
+          .aux_i           ( aux_data            ),
+          .in_valid_i      ( in_valid            ),
+          .in_ready_o      ( lane_in_ready[lane] ),
+          .flush_i,
+          .result_o        ( op_result           ),
+          .status_o        ( op_status           ),
+          .extension_bit_o ( lane_ext_bit[lane]  ),
+          .tag_o           ( lane_tags[lane]     ),
+          .aux_o           ( lane_aux[lane]      ),
+          .out_valid_o     ( out_valid           ),
+          .out_ready_i     ( out_ready           ),
+          .busy_o          ( lane_busy[lane]     )
+        );
+      end // ADD OTHER OPTIONS HERE
+
+      // Handshakes are only done if the lane is actually used
+      assign out_ready            = out_ready_i & ((lane == 0) | result_is_vector);
+      assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector);
+
+      // Properly NaN-box or sign-extend the slice result if not in use
+      assign local_result      = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]};
+      assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0;
+
+    // Otherwise generate constant sign-extension
+    end else begin : inactive_lane
+      assign lane_out_valid[lane] = 1'b0; // unused lane
+      assign lane_in_ready[lane]  = 1'b0; // unused lane
+      assign local_result         = '{default: lane_ext_bit[0]}; // sign-extend/nan box
+      assign lane_status[lane]    = '0;
+      assign lane_busy[lane]      = 1'b0;
+    end
+
+    // Generate result packing depending on float format
+    for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : pack_fp_result
+      // Set up some constants
+      localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+      // only for active formats within the lane
+      if (ACTIVE_FORMATS[fmt]) begin
+        assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] =
+            local_result[FP_WIDTH-1:0];
+      end else if ((LANE+1)*FP_WIDTH <= Width) begin
+        assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] =
+            '{default: lane_ext_bit[LANE]};
+      end else if (LANE*FP_WIDTH < Width) begin
+        assign fmt_slice_result[fmt][Width-1:LANE*FP_WIDTH] =
+            '{default: lane_ext_bit[LANE]};
+      end
+    end
+
+    // Generate result packing depending on integer format
+    if (OpGroup == fpnew_pkg::CONV) begin : int_results_enabled
+      for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : pack_int_result
+        // Set up some constants
+        localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+        if (ACTIVE_INT_FORMATS[ifmt]) begin
+          assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] =
+            local_result[INT_WIDTH-1:0];
+        end else if ((LANE+1)*INT_WIDTH <= Width) begin
+          assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] = '0;
+        end else if (LANE*INT_WIDTH < Width) begin
+          assign ifmt_slice_result[ifmt][Width-1:LANE*INT_WIDTH] = '0;
+        end
+      end
+    end
+  end
+
+  // Extend slice result if needed
+  for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : extend_fp_result
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    if (NUM_LANES*FP_WIDTH < Width)
+      assign fmt_slice_result[fmt][Width-1:NUM_LANES*FP_WIDTH] = '{default: lane_ext_bit[0]};
+  end
+
+  // Mute int results if unused
+  for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : int_results_disabled
+    if (OpGroup != fpnew_pkg::CONV) begin : mute_int_result
+      assign ifmt_slice_result[ifmt] = '0;
+    end
+  end
+
+  // Bypass lanes with target operand for vectorial casts
+  if (OpGroup == fpnew_pkg::CONV) begin : target_regs
+    // Bypass pipeline signals, index i holds signal after i register stages
+    logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q;
+    logic [0:NumPipeRegs][2:0]       byp_pipe_aux_q;
+    logic [0:NumPipeRegs]            byp_pipe_valid_q;
+    // Ready signal is combinatorial for all stages
+    logic [0:NumPipeRegs] byp_pipe_ready;
+
+    // Input stage: First element of pipeline is taken from inputs
+    assign byp_pipe_target_q[0]  = conv_target_d;
+    assign byp_pipe_aux_q[0]     = target_aux_d;
+    assign byp_pipe_valid_q[0]   = in_valid_i & vectorial_op;
+    // Generate the register stages
+    for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline
+      // Internal register enable for this stage
+      logic reg_ena;
+      // Determine the ready signal of the current stage - advance the pipeline:
+      // 1. if the next stage is ready for our data
+      // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+      assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1];
+      // Valid: enabled by ready signal, synchronous clear with the flush signal
+      `FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+      // Enable register if pipleine ready and a valid data item is present
+      assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i];
+      // Generate the pipeline registers within the stages, use enable-registers
+      `FFL(byp_pipe_target_q[i+1],  byp_pipe_target_q[i],  reg_ena, '0)
+      `FFL(byp_pipe_aux_q[i+1],     byp_pipe_aux_q[i],     reg_ena, '0)
+    end
+    // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+    assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector;
+    // Output stage: assign module outputs
+    assign conv_target_q = byp_pipe_target_q[NumPipeRegs];
+
+    // decode the aux data
+    assign {result_vec_op, result_is_cpk} = byp_pipe_aux_q[NumPipeRegs];
+  end else begin : no_conv
+    assign {result_vec_op, result_is_cpk} = '0;
+  end
+
+  // ------------
+  // Output Side
+  // ------------
+  assign {result_fmt_is_int, result_is_vector, result_fmt} = lane_aux[0];
+
+  assign result_o = result_fmt_is_int
+                    ? ifmt_slice_result[result_fmt]
+                    : fmt_slice_result[result_fmt];
+
+  assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones
+  assign tag_o           = lane_tags[0];    // don't care about upper ones
+  assign busy_o          = (| lane_busy);
+
+  assign out_valid_o     = lane_out_valid[0]; // don't care about upper ones
+
+  // Collapse the status
+  always_comb begin : output_processing
+    // Collapse the status
+    automatic fpnew_pkg::status_t temp_status;
+    temp_status = '0;
+    for (int i = 0; i < int'(NUM_LANES); i++)
+      temp_status |= lane_status[i];
+    status_o = temp_status;
+  end
+endmodule
diff --git a/verilog/rtl/fpnew_pkg.sv b/verilog/rtl/fpnew_pkg.sv
new file mode 100644
index 0000000..2d258cf
--- /dev/null
+++ b/verilog/rtl/fpnew_pkg.sv
@@ -0,0 +1,491 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+package fpnew_pkg;
+
+  // ---------
+  // FP TYPES
+  // ---------
+  // | Enumerator | Format           | Width  | EXP_BITS | MAN_BITS
+  // |:----------:|------------------|-------:|:--------:|:--------:
+  // | FP32       | IEEE binary32    | 32 bit | 8        | 23
+  // | FP64       | IEEE binary64    | 64 bit | 11       | 52
+  // | FP16       | IEEE binary16    | 16 bit | 5        | 10
+  // | FP8        | binary8          |  8 bit | 5        | 2
+  // | FP16ALT    | binary16alt      | 16 bit | 8        | 7
+  // *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty!
+
+  // Encoding for a format
+  typedef struct packed {
+    int unsigned exp_bits;
+    int unsigned man_bits;
+  } fp_encoding_t;
+
+  localparam int unsigned NUM_FP_FORMATS = 5; // change me to add formats
+  localparam int unsigned FP_FORMAT_BITS = $clog2(NUM_FP_FORMATS);
+
+  // FP formats
+  typedef enum logic [FP_FORMAT_BITS-1:0] {
+    FP32    = 'd0,
+    FP64    = 'd1,
+    FP16    = 'd2,
+    FP8     = 'd3,
+    FP16ALT = 'd4
+    // add new formats here
+  } fp_format_e;
+
+  // Encodings for supported FP formats
+  localparam fp_encoding_t [0:NUM_FP_FORMATS-1] FP_ENCODINGS  = '{
+    '{8,  23}, // IEEE binary32 (single)
+    '{11, 52}, // IEEE binary64 (double)
+    '{5,  10}, // IEEE binary16 (half)
+    '{5,  2},  // custom binary8
+    '{8,  7}   // custom binary16alt
+    // add new formats here
+  };
+
+  typedef logic [0:NUM_FP_FORMATS-1]       fmt_logic_t;    // Logic indexed by FP format (for masks)
+  typedef logic [0:NUM_FP_FORMATS-1][31:0] fmt_unsigned_t; // Unsigned indexed by FP format
+
+  localparam fmt_logic_t CPK_FORMATS = 5'b11000; // FP32 and FP64 can provide CPK only
+
+  // ---------
+  // INT TYPES
+  // ---------
+  // | Enumerator | Width  |
+  // |:----------:|-------:|
+  // | INT8       |  8 bit |
+  // | INT16      | 16 bit |
+  // | INT32      | 32 bit |
+  // | INT64      | 64 bit |
+  // *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty!
+
+  localparam int unsigned NUM_INT_FORMATS = 4; // change me to add formats
+  localparam int unsigned INT_FORMAT_BITS = $clog2(NUM_INT_FORMATS);
+
+  // Int formats
+  typedef enum logic [INT_FORMAT_BITS-1:0] {
+    INT8,
+    INT16,
+    INT32,
+    INT64
+    // add new formats here
+  } int_format_e;
+
+  // Returns the width of an INT format by index
+  function automatic int unsigned int_width(int_format_e ifmt);
+    unique case (ifmt)
+      INT8:  return 8;
+      INT16: return 16;
+      INT32: return 32;
+      INT64: return 64;
+     // default: begin
+        // pragma translate_off
+       // $fatal(1, "Invalid INT format supplied");
+        // pragma translate_on
+        // just return any integer to avoid any latches
+        // hopefully this error is caught by simulation
+        //return INT8;
+      //end
+    endcase
+  endfunction
+
+  typedef logic [0:NUM_INT_FORMATS-1] ifmt_logic_t; // Logic indexed by INT format (for masks)
+
+  // --------------
+  // FP OPERATIONS
+  // --------------
+  localparam int unsigned NUM_OPGROUPS = 4;
+
+  // Each FP operation belongs to an operation group
+  typedef enum logic [1:0] {
+    ADDMUL, DIVSQRT, NONCOMP, CONV
+  } opgroup_e;
+
+  localparam int unsigned OP_BITS = 4;
+
+  typedef enum logic [OP_BITS-1:0] {
+    FMADD, FNMSUB, ADD, MUL,     // ADDMUL operation group
+    DIV, SQRT,                   // DIVSQRT operation group
+    SGNJ, MINMAX, CMP, CLASSIFY, // NONCOMP operation group
+    F2F, F2I, I2F, CPKAB, CPKCD  // CONV operation group
+  } operation_e;
+
+  // -------------------
+  // RISC-V FP-SPECIFIC
+  // -------------------
+  // Rounding modes
+  typedef enum logic [2:0] {
+    RNE = 3'b000,
+    RTZ = 3'b001,
+    RDN = 3'b010,
+    RUP = 3'b011,
+    RMM = 3'b100,
+    DYN = 3'b111
+  } roundmode_e;
+
+  // Status flags
+  typedef struct packed {
+    logic NV; // Invalid
+    logic DZ; // Divide by zero
+    logic OF; // Overflow
+    logic UF; // Underflow
+    logic NX; // Inexact
+  } status_t;
+
+  // Information about a floating point value
+  typedef struct packed {
+    logic is_normal;     // is the value normal
+    logic is_subnormal;  // is the value subnormal
+    logic is_zero;       // is the value zero
+    logic is_inf;        // is the value infinity
+    logic is_nan;        // is the value NaN
+    logic is_signalling; // is the value a signalling NaN
+    logic is_quiet;      // is the value a quiet NaN
+    logic is_boxed;      // is the value properly NaN-boxed (RISC-V specific)
+  } fp_info_t;
+
+  // Classification mask
+  typedef enum logic [9:0] {
+    NEGINF     = 10'b00_0000_0001,
+    NEGNORM    = 10'b00_0000_0010,
+    NEGSUBNORM = 10'b00_0000_0100,
+    NEGZERO    = 10'b00_0000_1000,
+    POSZERO    = 10'b00_0001_0000,
+    POSSUBNORM = 10'b00_0010_0000,
+    POSNORM    = 10'b00_0100_0000,
+    POSINF     = 10'b00_1000_0000,
+    SNAN       = 10'b01_0000_0000,
+    QNAN       = 10'b10_0000_0000
+  } classmask_e;
+
+  // ------------------
+  // FPU configuration
+  // ------------------
+  // Pipelining registers can be inserted (at elaboration time) into operational units
+  typedef enum logic [1:0] {
+    BEFORE,     // registers are inserted at the inputs of the unit
+    AFTER,      // registers are inserted at the outputs of the unit
+    INSIDE,     // registers are inserted at predetermined (suboptimal) locations in the unit
+    DISTRIBUTED // registers are evenly distributed, INSIDE >= AFTER >= BEFORE
+  } pipe_config_t;
+
+  // Arithmetic units can be arranged in parallel (per format), merged (multi-format) or not at all.
+  typedef enum logic [1:0] {
+    DISABLED, // arithmetic units are not generated
+    PARALLEL, // arithmetic units are generated in prallel slices, one for each format
+    MERGED    // arithmetic units are contained within a merged unit holding multiple formats
+  } unit_type_t;
+
+  // Array of unit types indexed by format
+  typedef unit_type_t [0:NUM_FP_FORMATS-1] fmt_unit_types_t;
+
+  // Array of format-specific unit types by opgroup
+  typedef fmt_unit_types_t [0:NUM_OPGROUPS-1] opgrp_fmt_unit_types_t;
+  // same with unsigned
+  typedef fmt_unsigned_t [0:NUM_OPGROUPS-1] opgrp_fmt_unsigned_t;
+
+  // FPU configuration: features
+  typedef struct packed {
+    int unsigned Width;
+    logic        EnableVectors;
+    logic        EnableNanBox;
+    fmt_logic_t  FpFmtMask;
+    ifmt_logic_t IntFmtMask;
+  } fpu_features_t;
+
+  localparam fpu_features_t RV64D = '{
+    Width:         64,
+    EnableVectors: 1'b0,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b11000,
+    IntFmtMask:    4'b0011
+  };
+
+  localparam fpu_features_t RV32D = '{
+    Width:         64,
+    EnableVectors: 1'b1,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b11000,
+    IntFmtMask:    4'b0010
+  };
+
+  localparam fpu_features_t RV32F = '{
+    Width:         32,
+    EnableVectors: 1'b0,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b10000,
+    IntFmtMask:    4'b0010
+  };
+
+  localparam fpu_features_t RV64D_Xsflt = '{
+    Width:         64,
+    EnableVectors: 1'b1,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b11111,
+    IntFmtMask:    4'b1111
+  };
+
+  localparam fpu_features_t RV32F_Xsflt = '{
+    Width:         32,
+    EnableVectors: 1'b1,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b10111,
+    IntFmtMask:    4'b1110
+  };
+
+  localparam fpu_features_t RV32F_Xf16alt_Xfvec = '{
+    Width:         32,
+    EnableVectors: 1'b1,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b10001,
+    IntFmtMask:    4'b0110
+  };
+
+  // FPU configuraion: implementation
+  typedef struct packed {
+    opgrp_fmt_unsigned_t   PipeRegs;
+    opgrp_fmt_unit_types_t UnitTypes;
+    pipe_config_t          PipeConfig;
+  } fpu_implementation_t;
+
+  localparam fpu_implementation_t DEFAULT_NOREGS = '{
+    PipeRegs:   '{default: 0},
+    UnitTypes:  '{'{default: PARALLEL}, // ADDMUL
+                  '{default: MERGED},   // DIVSQRT
+                  '{default: PARALLEL}, // NONCOMP
+                  '{default: MERGED}},  // CONV
+    PipeConfig: BEFORE
+  };
+
+  localparam fpu_implementation_t DEFAULT_SNITCH = '{
+    PipeRegs:   '{default: 1},
+    UnitTypes:  '{'{default: PARALLEL}, // ADDMUL
+                  '{default: DISABLED}, // DIVSQRT
+                  '{default: PARALLEL}, // NONCOMP
+                  '{default: MERGED}},  // CONV
+    PipeConfig: BEFORE
+  };
+
+  // -----------------------
+  // Synthesis optimization
+  // -----------------------
+  localparam logic DONT_CARE = 1'b1; // the value to assign as don't care
+
+  // -------------------------
+  // General helper functions
+  // -------------------------
+  function automatic int minimum(int a, int b);
+    return (a < b) ? a : b;
+  endfunction
+
+  function automatic int maximum(int a, int b);
+    return (a > b) ? a : b;
+  endfunction
+
+  // -------------------------------------------
+  // Helper functions for FP formats and values
+  // -------------------------------------------
+  // Returns the width of a FP format
+  function automatic int unsigned fp_width(fp_format_e fmt);
+    return FP_ENCODINGS[fmt].exp_bits + FP_ENCODINGS[fmt].man_bits + 1;
+  endfunction
+
+  // Returns the widest FP format present
+  function automatic int unsigned max_fp_width(fmt_logic_t cfg);
+    automatic int unsigned res = 0;
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
+      if (cfg[i])
+        res = unsigned'(maximum(res, fp_width(fp_format_e'(i))));
+    return res;
+  endfunction
+
+  // Returns the narrowest FP format present
+  function automatic int unsigned min_fp_width(fmt_logic_t cfg);
+    automatic int unsigned res = max_fp_width(cfg);
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
+      if (cfg[i])
+        res = unsigned'(minimum(res, fp_width(fp_format_e'(i))));
+    return res;
+  endfunction
+
+  // Returns the number of expoent bits for a format
+  function automatic int unsigned exp_bits(fp_format_e fmt);
+    return FP_ENCODINGS[fmt].exp_bits;
+  endfunction
+
+  // Returns the number of mantissa bits for a format
+  function automatic int unsigned man_bits(fp_format_e fmt);
+    return FP_ENCODINGS[fmt].man_bits;
+  endfunction
+
+  // Returns the bias value for a given format (as per IEEE 754-2008)
+  function automatic int unsigned bias(fp_format_e fmt);
+    return unsigned'(2**(FP_ENCODINGS[fmt].exp_bits-1)-1); // symmetrical bias
+  endfunction
+
+  function automatic fp_encoding_t super_format(fmt_logic_t cfg);
+    automatic fp_encoding_t res;
+    res = '0;
+    for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+      if (cfg[fmt]) begin // only active format
+        res.exp_bits = unsigned'(maximum(res.exp_bits, exp_bits(fp_format_e'(fmt))));
+        res.man_bits = unsigned'(maximum(res.man_bits, man_bits(fp_format_e'(fmt))));
+      end
+    return res;
+  endfunction
+
+  // -------------------------------------------
+  // Helper functions for INT formats and values
+  // -------------------------------------------
+  // Returns the widest INT format present
+  function automatic int unsigned max_int_width(ifmt_logic_t cfg);
+    automatic int unsigned res = 0;
+    for (int ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin
+      if (cfg[ifmt]) res = maximum(res, int_width(int_format_e'(ifmt)));
+    end
+    return res;
+  endfunction
+
+  // --------------------------------------------------
+  // Helper functions for operations and FPU structure
+  // --------------------------------------------------
+  // Returns the operation group of the given operation
+  function automatic opgroup_e get_opgroup(operation_e op);
+    unique case (op)
+      FMADD, FNMSUB, ADD, MUL:     return ADDMUL;
+      DIV, SQRT:                   return DIVSQRT;
+      SGNJ, MINMAX, CMP, CLASSIFY: return NONCOMP;
+      F2F, F2I, I2F, CPKAB, CPKCD: return CONV;
+      default:                     return NONCOMP;
+    endcase
+  endfunction
+
+  // Returns the number of operands by operation group
+  function automatic int unsigned num_operands(opgroup_e grp);
+    unique case (grp)
+      ADDMUL:  return 3;
+      DIVSQRT: return 2;
+      NONCOMP: return 2;
+      CONV:    return 3; // vectorial casts use 3 operands
+      default: return 0;
+    endcase
+  endfunction
+
+  // Returns the number of lanes according to width, format and vectors
+  function automatic int unsigned num_lanes(int unsigned width, fp_format_e fmt, logic vec);
+    return vec ? width / fp_width(fmt) : 1; // if no vectors, only one lane
+  endfunction
+
+  // Returns the maximum number of lanes in the FPU according to width, format config and vectors
+  function automatic int unsigned max_num_lanes(int unsigned width, fmt_logic_t cfg, logic vec);
+    return vec ? width / min_fp_width(cfg) : 1; // if no vectors, only one lane
+  endfunction
+
+  // Returns a mask of active FP formats that are present in lane lane_no of a multiformat slice
+  function automatic fmt_logic_t get_lane_formats(int unsigned width,
+                                                  fmt_logic_t cfg,
+                                                  int unsigned lane_no);
+    automatic fmt_logic_t res;
+    for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+      // Mask active formats with the number of lanes for that format
+      res[fmt] = cfg[fmt] & (width / fp_width(fp_format_e'(fmt)) > lane_no);
+    return res;
+  endfunction
+
+  // Returns a mask of active INT formats that are present in lane lane_no of a multiformat slice
+  function automatic ifmt_logic_t get_lane_int_formats(int unsigned width,
+                                                       fmt_logic_t cfg,
+                                                       ifmt_logic_t icfg,
+                                                       int unsigned lane_no);
+    automatic ifmt_logic_t res;
+    automatic fmt_logic_t lanefmts;
+    res = '0;
+    lanefmts = get_lane_formats(width, cfg, lane_no);
+
+    for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++)
+      for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+        // Mask active int formats with the width of the float formats
+        if ((fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt))))
+          res[ifmt] |= icfg[ifmt] && lanefmts[fmt];
+    return res;
+  endfunction
+
+  // Returns a mask of active FP formats that are present in lane lane_no of a CONV slice
+  function automatic fmt_logic_t get_conv_lane_formats(int unsigned width,
+                                                       fmt_logic_t cfg,
+                                                       int unsigned lane_no);
+    automatic fmt_logic_t res;
+    for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+      // Mask active formats with the number of lanes for that format, CPK at least twice
+      res[fmt] = cfg[fmt] && ((width / fp_width(fp_format_e'(fmt)) > lane_no) ||
+                             (CPK_FORMATS[fmt] && (lane_no < 2)));
+    return res;
+  endfunction
+
+  // Returns a mask of active INT formats that are present in lane lane_no of a CONV slice
+  function automatic ifmt_logic_t get_conv_lane_int_formats(int unsigned width,
+                                                            fmt_logic_t cfg,
+                                                            ifmt_logic_t icfg,
+                                                            int unsigned lane_no);
+    automatic ifmt_logic_t res;
+    automatic fmt_logic_t lanefmts;
+    res = '0;
+    lanefmts = get_conv_lane_formats(width, cfg, lane_no);
+
+    for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++)
+      for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+        // Mask active int formats with the width of the float formats
+        res[ifmt] |= icfg[ifmt] && lanefmts[fmt] &&
+                     (fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt)));
+    return res;
+  endfunction
+
+  // Return whether any active format is set as MERGED
+  function automatic logic any_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg);
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
+      if (cfg[i] && types[i] == MERGED)
+        return 1'b1;
+      return 1'b0;
+  endfunction
+
+  // Return whether the given format is the first active one set as MERGED
+  function automatic logic is_first_enabled_multi(fp_format_e fmt,
+                                                  fmt_unit_types_t types,
+                                                  fmt_logic_t cfg);
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin
+      if (cfg[i] && types[i] == MERGED) return (fp_format_e'(i) == fmt);
+    end
+    return 1'b0;
+  endfunction
+
+  // Returns the first format that is active and is set as MERGED
+  function automatic fp_format_e get_first_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg);
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
+      if (cfg[i] && types[i] == MERGED)
+        return fp_format_e'(i);
+      return fp_format_e'(0);
+  endfunction
+
+  // Returns the largest number of regs that is active and is set as MERGED
+  function automatic int unsigned get_num_regs_multi(fmt_unsigned_t regs,
+                                                     fmt_unit_types_t types,
+                                                     fmt_logic_t cfg);
+    automatic int unsigned res = 0;
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin
+      if (cfg[i] && types[i] == MERGED) res = maximum(res, regs[i]);
+    end
+    return res;
+  endfunction
+
+endpackage
diff --git a/verilog/rtl/fpnew_rounding.sv b/verilog/rtl/fpnew_rounding.sv
new file mode 100644
index 0000000..4e4b7c7
--- /dev/null
+++ b/verilog/rtl/fpnew_rounding.sv
@@ -0,0 +1,72 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_rounding #(
+  parameter int unsigned AbsWidth=2 // Width of the abolute value, without sign bit
+) (
+  // Input value
+  input logic [AbsWidth-1:0]   abs_value_i,             // absolute value without sign
+  input logic                  sign_i,
+  // Rounding information
+  input logic [1:0]            round_sticky_bits_i,     // round and sticky bits {RS}
+  input fpnew_pkg::roundmode_e rnd_mode_i,
+  input logic                  effective_subtraction_i, // sign of inputs affects rounding of zeroes
+  // Output value
+  output logic [AbsWidth-1:0]  abs_rounded_o,           // absolute value without sign
+  output logic                 sign_o,
+  // Output classification
+  output logic                 exact_zero_o             // output is an exact zero
+);
+
+  logic round_up; // Rounding decision
+
+  // Take the rounding decision according to RISC-V spec
+  // RoundMode | Mnemonic | Meaning
+  // :--------:|:--------:|:-------
+  //    000    |   RNE    | Round to Nearest, ties to Even
+  //    001    |   RTZ    | Round towards Zero
+  //    010    |   RDN    | Round Down (towards -\infty)
+  //    011    |   RUP    | Round Up (towards \infty)
+  //    100    |   RMM    | Round to Nearest, ties to Max Magnitude
+  //  others   |          | *invalid*
+  always_comb begin : rounding_decision
+    unique case (rnd_mode_i)
+      fpnew_pkg::RNE: // Decide accoring to round/sticky bits
+        unique case (round_sticky_bits_i)
+          2'b00,
+          2'b01: round_up = 1'b0;           // < ulp/2 away, round down
+          2'b10: round_up = abs_value_i[0]; // = ulp/2 away, round towards even result
+          2'b11: round_up = 1'b1;           // > ulp/2 away, round up
+          //default: round_up = fpnew_pkg::DONT_CARE;
+        endcase
+      fpnew_pkg::RTZ: round_up = 1'b0; // always round down
+      fpnew_pkg::RDN: round_up = (| round_sticky_bits_i) ? sign_i  : 1'b0; // to 0 if +, away if -
+      fpnew_pkg::RUP: round_up = (| round_sticky_bits_i) ? ~sign_i : 1'b0; // to 0 if -, away if +
+      fpnew_pkg::RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up
+      default: round_up = fpnew_pkg::DONT_CARE; // propagate x
+    endcase
+  end
+
+  // Perform the rounding, exponent change and overflow to inf happens automagically
+  assign abs_rounded_o = abs_value_i + round_up;
+
+  // True zero result is a zero result without dirty round/sticky bits
+  assign exact_zero_o = (abs_value_i == '0) && (round_sticky_bits_i == '0);
+
+  // In case of effective subtraction (thus signs of addition operands must have differed) and a
+  // true zero result, the result sign is '-' in case of RDN and '+' for other modes.
+  assign sign_o = (exact_zero_o && effective_subtraction_i)
+                  ? (rnd_mode_i == fpnew_pkg::RDN)
+                  : sign_i;
+
+endmodule
diff --git a/verilog/rtl/fpnew_top.sv b/verilog/rtl/fpnew_top.sv
new file mode 100644
index 0000000..5b37edd
--- /dev/null
+++ b/verilog/rtl/fpnew_top.sv
@@ -0,0 +1,172 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_top #(
+  // FPU configuration
+  parameter fpnew_pkg::fpu_features_t       Features       = fpnew_pkg::RV64D_Xsflt,
+  parameter fpnew_pkg::fpu_implementation_t Implementation = fpnew_pkg::DEFAULT_NOREGS,
+  parameter type                            TagType        = logic,
+  // Do not change
+  localparam int unsigned WIDTH        = Features.Width,
+  localparam int unsigned NUM_OPERANDS = 3
+) (
+  input logic                               clk_i,
+  input logic                               rst_ni,
+  // Input signals
+  input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i,
+  input fpnew_pkg::roundmode_e              rnd_mode_i,
+  input fpnew_pkg::operation_e              op_i,
+  input logic                               op_mod_i,
+  input fpnew_pkg::fp_format_e              src_fmt_i,
+  input fpnew_pkg::fp_format_e              dst_fmt_i,
+  input fpnew_pkg::int_format_e             int_fmt_i,
+  input logic                               vectorial_op_i,
+  input TagType                             tag_i,
+  // Input Handshake
+  input  logic                              in_valid_i,
+  output logic                              in_ready_o,
+  input  logic                              flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]                  result_o,
+  output fpnew_pkg::status_t                status_o,
+  output TagType                            tag_o,
+  // Output handshake
+  output logic                              out_valid_o,
+  input  logic                              out_ready_i,
+  // Indication of valid data in flight
+  output logic                              busy_o
+);
+
+  localparam int unsigned NUM_OPGROUPS = fpnew_pkg::NUM_OPGROUPS;
+  localparam int unsigned NUM_FORMATS  = fpnew_pkg::NUM_FP_FORMATS;
+
+  // ----------------
+  // Type Definition
+  // ----------------
+  typedef struct packed {
+    logic [WIDTH-1:0]   result;
+    fpnew_pkg::status_t status;
+    TagType             tag;
+  } output_t;
+
+  // Handshake signals for the blocks
+  logic [NUM_OPGROUPS-1:0] opgrp_in_ready, opgrp_out_valid, opgrp_out_ready, opgrp_ext, opgrp_busy;
+  output_t [NUM_OPGROUPS-1:0] opgrp_outputs;
+
+  logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed;
+
+  // -----------
+  // Input Side
+  // -----------
+  assign in_ready_o = in_valid_i & opgrp_in_ready[fpnew_pkg::get_opgroup(op_i)];
+
+  // NaN-boxing check
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_nanbox_check
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    // NaN boxing is only generated if it's enabled and needed
+    if (Features.EnableNanBox && (FP_WIDTH < WIDTH)) begin : check
+      for (genvar op = 0; op < int'(NUM_OPERANDS); op++) begin : operands
+        assign is_boxed[fmt][op] = (!vectorial_op_i)
+                                   ? operands_i[op][WIDTH-1:FP_WIDTH] == '1
+                                   : 1'b1;
+      end
+    end else begin : no_check
+      assign is_boxed[fmt] = '1;
+    end
+  end
+
+  // -------------------------
+  // Generate Operation Blocks
+  // -------------------------
+  for (genvar opgrp = 0; opgrp < int'(NUM_OPGROUPS); opgrp++) begin : gen_operation_groups
+    localparam int unsigned NUM_OPS = fpnew_pkg::num_operands(fpnew_pkg::opgroup_e'(opgrp));
+
+    logic in_valid;
+    logic [NUM_FORMATS-1:0][NUM_OPS-1:0] input_boxed;
+
+    assign in_valid = in_valid_i & (fpnew_pkg::get_opgroup(op_i) == fpnew_pkg::opgroup_e'(opgrp));
+
+    // slice out input boxing
+    always_comb begin : slice_inputs
+      for (int unsigned fmt = 0; fmt < NUM_FORMATS; fmt++)
+        input_boxed[fmt] = is_boxed[fmt][NUM_OPS-1:0];
+    end
+
+    fpnew_opgroup_block #(
+      .OpGroup       ( fpnew_pkg::opgroup_e'(opgrp)    ),
+      .Width         ( WIDTH                           ),
+      .EnableVectors ( Features.EnableVectors          ),
+      .FpFmtMask     ( Features.FpFmtMask              ),
+      .IntFmtMask    ( Features.IntFmtMask             ),
+      .FmtPipeRegs   ( Implementation.PipeRegs[opgrp]  ),
+      .FmtUnitTypes  ( Implementation.UnitTypes[opgrp] ),
+      .PipeConfig    ( Implementation.PipeConfig       ),
+      .TagType       ( TagType                         )
+    ) i_opgroup_block (
+      .clk_i,
+      .rst_ni,
+      .operands_i      ( operands_i[NUM_OPS-1:0] ),
+      .is_boxed_i      ( input_boxed             ),
+      .rnd_mode_i,
+      .op_i,
+      .op_mod_i,
+      .src_fmt_i,
+      .dst_fmt_i,
+      .int_fmt_i,
+      .vectorial_op_i,
+      .tag_i,
+      .in_valid_i      ( in_valid              ),
+      .in_ready_o      ( opgrp_in_ready[opgrp] ),
+      .flush_i,
+      .result_o        ( opgrp_outputs[opgrp].result ),
+      .status_o        ( opgrp_outputs[opgrp].status ),
+      .extension_bit_o ( opgrp_ext[opgrp]            ), 
+      .tag_o           ( opgrp_outputs[opgrp].tag    ),
+      .out_valid_o     ( opgrp_out_valid[opgrp]      ),
+      .out_ready_i     ( opgrp_out_ready[opgrp]      ),
+      .busy_o          ( opgrp_busy[opgrp]           )
+    );
+  end
+
+  // ------------------
+  // Arbitrate Outputs
+  // ------------------
+  output_t arbiter_output;
+
+  // Round-Robin arbiter to decide which result to use
+  rr_arb_tree #(
+    .NumIn     ( NUM_OPGROUPS ),
+    .DataType  ( output_t     ),
+    .AxiVldRdy ( 1'b1         )
+  ) i_arbiter (
+    .clk_i,
+    .rst_ni,
+    .flush_i,
+    .rr_i   ( '0             ),
+    .req_i  ( opgrp_out_valid ),
+    .gnt_o  ( opgrp_out_ready ),
+    .data_i ( opgrp_outputs   ),
+    .gnt_i  ( out_ready_i     ),
+    .req_o  ( out_valid_o     ),
+    .data_o ( arbiter_output  ),
+    .idx_o  ( /* unused */    )
+  );
+
+  // Unpack output
+  assign result_o        = arbiter_output.result;
+  assign status_o        = arbiter_output.status;
+  assign tag_o           = arbiter_output.tag;
+
+  assign busy_o = (| opgrp_busy);
+
+endmodule
diff --git a/verilog/rtl/gpio.sv b/verilog/rtl/gpio.sv
new file mode 100644
index 0000000..762553c
--- /dev/null
+++ b/verilog/rtl/gpio.sv
@@ -0,0 +1,144 @@
+
+// General Purpose Input/Output module
+
+
+module gpio (
+  input clk_i,
+  input rst_ni,
+
+  // Below Regster interface can be changed
+  input  tlul_pkg::tl_h2d_t tl_i,
+  output tlul_pkg::tl_d2h_t tl_o,
+
+  input        [31:0] cio_gpio_i,
+  output logic [31:0] cio_gpio_o,
+  output logic [31:0] cio_gpio_en_o,
+
+  output logic [31:0] intr_gpio_o
+);
+
+  import gpio_reg_pkg::* ;
+
+  gpio_reg2hw_t reg2hw;
+  gpio_hw2reg_t hw2reg;
+
+  logic [31:0] cio_gpio_q;
+  logic [31:0] cio_gpio_en_q;
+
+  // possibly filter the input based upon register configuration
+
+  logic [31:0] data_in_d;
+
+  for (genvar i = 0 ; i < 32 ; i++) begin : gen_filter
+    prim_filter_ctr #(.Cycles(16)) filter (
+      .clk_i,
+      .rst_ni,
+      .enable_i(reg2hw.ctrl_en_input_filter.q[i]),
+      .filter_i(cio_gpio_i[i]),
+      .filter_o(data_in_d[i])
+    );
+  end
+
+  // GPIO_IN
+  assign hw2reg.data_in.de = 1'b1;
+  assign hw2reg.data_in.d  = data_in_d;
+
+  // GPIO_OUT
+  assign cio_gpio_o                     = cio_gpio_q;
+  assign cio_gpio_en_o                  = cio_gpio_en_q;
+
+  assign hw2reg.direct_out.d            = cio_gpio_q;
+  assign hw2reg.masked_out_upper.data.d = cio_gpio_q[31:16];
+  assign hw2reg.masked_out_upper.mask.d = 16'h 0;
+  assign hw2reg.masked_out_lower.data.d = cio_gpio_q[15:0];
+  assign hw2reg.masked_out_lower.mask.d = 16'h 0;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      cio_gpio_q  <= '0;
+    end else if (reg2hw.direct_out.qe) begin
+      cio_gpio_q <= reg2hw.direct_out.q;
+    end else if (reg2hw.masked_out_upper.data.qe) begin
+      cio_gpio_q[31:16] <=
+        ( reg2hw.masked_out_upper.mask.q & reg2hw.masked_out_upper.data.q) |
+        (~reg2hw.masked_out_upper.mask.q & cio_gpio_q[31:16]);
+    end else if (reg2hw.masked_out_lower.data.qe) begin
+      cio_gpio_q[15:0] <=
+        ( reg2hw.masked_out_lower.mask.q & reg2hw.masked_out_lower.data.q) |
+        (~reg2hw.masked_out_lower.mask.q & cio_gpio_q[15:0]);
+    end
+  end
+
+  // GPIO OE
+  assign hw2reg.direct_oe.d = cio_gpio_en_q;
+  assign hw2reg.masked_oe_upper.data.d = cio_gpio_en_q[31:16];
+  assign hw2reg.masked_oe_upper.mask.d = 16'h 0;
+  assign hw2reg.masked_oe_lower.data.d = cio_gpio_en_q[15:0];
+  assign hw2reg.masked_oe_lower.mask.d = 16'h 0;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      cio_gpio_en_q  <= '0;
+    end else if (reg2hw.direct_oe.qe) begin
+      cio_gpio_en_q <= reg2hw.direct_oe.q;
+    end else if (reg2hw.masked_oe_upper.data.qe) begin
+      cio_gpio_en_q[31:16] <=
+        ( reg2hw.masked_oe_upper.mask.q & reg2hw.masked_oe_upper.data.q) |
+        (~reg2hw.masked_oe_upper.mask.q & cio_gpio_en_q[31:16]);
+    end else if (reg2hw.masked_oe_lower.data.qe) begin
+      cio_gpio_en_q[15:0] <=
+        ( reg2hw.masked_oe_lower.mask.q & reg2hw.masked_oe_lower.data.q) |
+        (~reg2hw.masked_oe_lower.mask.q & cio_gpio_en_q[15:0]);
+    end
+  end
+
+  logic [31:0] data_in_q;
+  always_ff @(posedge clk_i) begin
+    data_in_q <= data_in_d;
+  end
+
+  logic [31:0] event_intr_rise, event_intr_fall, event_intr_actlow, event_intr_acthigh;
+  logic [31:0] event_intr_combined;
+
+  // instantiate interrupt hardware primitive
+  prim_intr_hw #(.Width(32)) intr_hw (
+    .clk_i,
+    .rst_ni,
+    .event_intr_i           (event_intr_combined),
+    .reg2hw_intr_enable_q_i (reg2hw.intr_enable.q),
+    .reg2hw_intr_test_q_i   (reg2hw.intr_test.q),
+    .reg2hw_intr_test_qe_i  (reg2hw.intr_test.qe),
+    .reg2hw_intr_state_q_i  (reg2hw.intr_state.q),
+    .hw2reg_intr_state_de_o (hw2reg.intr_state.de),
+    .hw2reg_intr_state_d_o  (hw2reg.intr_state.d),
+    .intr_o                 (intr_gpio_o)
+  );
+
+  // detect four possible individual interrupts
+  assign event_intr_rise    = (~data_in_q &  data_in_d) & reg2hw.intr_ctrl_en_rising.q;
+  assign event_intr_fall    = ( data_in_q & ~data_in_d) & reg2hw.intr_ctrl_en_falling.q;
+  assign event_intr_acthigh =                data_in_d  & reg2hw.intr_ctrl_en_lvlhigh.q;
+  assign event_intr_actlow  =               ~data_in_d  & reg2hw.intr_ctrl_en_lvllow.q;
+
+  assign event_intr_combined = event_intr_rise   |
+                               event_intr_fall   |
+                               event_intr_actlow |
+                               event_intr_acthigh;
+
+
+  // Register module
+  gpio_reg_top u_reg (
+    .clk_i (clk_i),
+    .rst_ni (rst_ni),
+
+    .tl_i (tl_i),
+    .tl_o (tl_o),
+
+    .reg2hw (reg2hw),
+    .hw2reg (hw2reg),
+
+    .devmode_i  (1'b1)
+  );
+
+
+endmodule
diff --git a/verilog/rtl/gpio_reg_pkg.sv b/verilog/rtl/gpio_reg_pkg.sv
new file mode 100644
index 0000000..b85347a
--- /dev/null
+++ b/verilog/rtl/gpio_reg_pkg.sv
@@ -0,0 +1,248 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Register Package auto-generated by `reggen` containing data structure
+
+package gpio_reg_pkg;
+
+  // Address width within the block
+  parameter int BlockAw = 6;
+
+  ////////////////////////////
+  // Typedefs for registers //
+  ////////////////////////////
+  typedef struct packed {
+    logic [31:0] q;
+  } gpio_reg2hw_intr_state_reg_t;
+
+  typedef struct packed {
+    logic [31:0] q;
+  } gpio_reg2hw_intr_enable_reg_t;
+
+  typedef struct packed {
+    logic [31:0] q;
+    logic        qe;
+  } gpio_reg2hw_intr_test_reg_t;
+
+  typedef struct packed {
+    logic [31:0] q;
+    logic        qe;
+  } gpio_reg2hw_direct_out_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic [15:0] q;
+      logic        qe;
+    } data;
+    struct packed {
+      logic [15:0] q;
+      logic        qe;
+    } mask;
+  } gpio_reg2hw_masked_out_lower_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic [15:0] q;
+      logic        qe;
+    } data;
+    struct packed {
+      logic [15:0] q;
+      logic        qe;
+    } mask;
+  } gpio_reg2hw_masked_out_upper_reg_t;
+
+  typedef struct packed {
+    logic [31:0] q;
+    logic        qe;
+  } gpio_reg2hw_direct_oe_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic [15:0] q;
+      logic        qe;
+    } data;
+    struct packed {
+      logic [15:0] q;
+      logic        qe;
+    } mask;
+  } gpio_reg2hw_masked_oe_lower_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic [15:0] q;
+      logic        qe;
+    } data;
+    struct packed {
+      logic [15:0] q;
+      logic        qe;
+    } mask;
+  } gpio_reg2hw_masked_oe_upper_reg_t;
+
+  typedef struct packed {
+    logic [31:0] q;
+  } gpio_reg2hw_intr_ctrl_en_rising_reg_t;
+
+  typedef struct packed {
+    logic [31:0] q;
+  } gpio_reg2hw_intr_ctrl_en_falling_reg_t;
+
+  typedef struct packed {
+    logic [31:0] q;
+  } gpio_reg2hw_intr_ctrl_en_lvlhigh_reg_t;
+
+  typedef struct packed {
+    logic [31:0] q;
+  } gpio_reg2hw_intr_ctrl_en_lvllow_reg_t;
+
+  typedef struct packed {
+    logic [31:0] q;
+  } gpio_reg2hw_ctrl_en_input_filter_reg_t;
+
+
+  typedef struct packed {
+    logic [31:0] d;
+    logic        de;
+  } gpio_hw2reg_intr_state_reg_t;
+
+  typedef struct packed {
+    logic [31:0] d;
+    logic        de;
+  } gpio_hw2reg_data_in_reg_t;
+
+  typedef struct packed {
+    logic [31:0] d;
+  } gpio_hw2reg_direct_out_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic [15:0] d;
+    } data;
+    struct packed {
+      logic [15:0] d;
+    } mask;
+  } gpio_hw2reg_masked_out_lower_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic [15:0] d;
+    } data;
+    struct packed {
+      logic [15:0] d;
+    } mask;
+  } gpio_hw2reg_masked_out_upper_reg_t;
+
+  typedef struct packed {
+    logic [31:0] d;
+  } gpio_hw2reg_direct_oe_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic [15:0] d;
+    } data;
+    struct packed {
+      logic [15:0] d;
+    } mask;
+  } gpio_hw2reg_masked_oe_lower_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic [15:0] d;
+    } data;
+    struct packed {
+      logic [15:0] d;
+    } mask;
+  } gpio_hw2reg_masked_oe_upper_reg_t;
+
+
+  ///////////////////////////////////////
+  // Register to internal design logic //
+  ///////////////////////////////////////
+  typedef struct packed {
+    gpio_reg2hw_intr_state_reg_t intr_state; // [458:427]
+    gpio_reg2hw_intr_enable_reg_t intr_enable; // [426:395]
+    gpio_reg2hw_intr_test_reg_t intr_test; // [394:362]
+    gpio_reg2hw_direct_out_reg_t direct_out; // [361:329]
+    gpio_reg2hw_masked_out_lower_reg_t masked_out_lower; // [328:295]
+    gpio_reg2hw_masked_out_upper_reg_t masked_out_upper; // [294:261]
+    gpio_reg2hw_direct_oe_reg_t direct_oe; // [260:228]
+    gpio_reg2hw_masked_oe_lower_reg_t masked_oe_lower; // [227:194]
+    gpio_reg2hw_masked_oe_upper_reg_t masked_oe_upper; // [193:160]
+    gpio_reg2hw_intr_ctrl_en_rising_reg_t intr_ctrl_en_rising; // [159:128]
+    gpio_reg2hw_intr_ctrl_en_falling_reg_t intr_ctrl_en_falling; // [127:96]
+    gpio_reg2hw_intr_ctrl_en_lvlhigh_reg_t intr_ctrl_en_lvlhigh; // [95:64]
+    gpio_reg2hw_intr_ctrl_en_lvllow_reg_t intr_ctrl_en_lvllow; // [63:32]
+    gpio_reg2hw_ctrl_en_input_filter_reg_t ctrl_en_input_filter; // [31:0]
+  } gpio_reg2hw_t;
+
+  ///////////////////////////////////////
+  // Internal design logic to register //
+  ///////////////////////////////////////
+  typedef struct packed {
+    gpio_hw2reg_intr_state_reg_t intr_state; // [257:225]
+    gpio_hw2reg_data_in_reg_t data_in; // [224:192]
+    gpio_hw2reg_direct_out_reg_t direct_out; // [191:160]
+    gpio_hw2reg_masked_out_lower_reg_t masked_out_lower; // [159:128]
+    gpio_hw2reg_masked_out_upper_reg_t masked_out_upper; // [127:96]
+    gpio_hw2reg_direct_oe_reg_t direct_oe; // [95:64]
+    gpio_hw2reg_masked_oe_lower_reg_t masked_oe_lower; // [63:32]
+    gpio_hw2reg_masked_oe_upper_reg_t masked_oe_upper; // [31:0]
+  } gpio_hw2reg_t;
+
+  // Register Address
+  parameter logic [BlockAw-1:0] GPIO_INTR_STATE_OFFSET = 6'h 0;
+  parameter logic [BlockAw-1:0] GPIO_INTR_ENABLE_OFFSET = 6'h 4;
+  parameter logic [BlockAw-1:0] GPIO_INTR_TEST_OFFSET = 6'h 8;
+  parameter logic [BlockAw-1:0] GPIO_DATA_IN_OFFSET = 6'h c;
+  parameter logic [BlockAw-1:0] GPIO_DIRECT_OUT_OFFSET = 6'h 10;
+  parameter logic [BlockAw-1:0] GPIO_MASKED_OUT_LOWER_OFFSET = 6'h 14;
+  parameter logic [BlockAw-1:0] GPIO_MASKED_OUT_UPPER_OFFSET = 6'h 18;
+  parameter logic [BlockAw-1:0] GPIO_DIRECT_OE_OFFSET = 6'h 1c;
+  parameter logic [BlockAw-1:0] GPIO_MASKED_OE_LOWER_OFFSET = 6'h 20;
+  parameter logic [BlockAw-1:0] GPIO_MASKED_OE_UPPER_OFFSET = 6'h 24;
+  parameter logic [BlockAw-1:0] GPIO_INTR_CTRL_EN_RISING_OFFSET = 6'h 28;
+  parameter logic [BlockAw-1:0] GPIO_INTR_CTRL_EN_FALLING_OFFSET = 6'h 2c;
+  parameter logic [BlockAw-1:0] GPIO_INTR_CTRL_EN_LVLHIGH_OFFSET = 6'h 30;
+  parameter logic [BlockAw-1:0] GPIO_INTR_CTRL_EN_LVLLOW_OFFSET = 6'h 34;
+  parameter logic [BlockAw-1:0] GPIO_CTRL_EN_INPUT_FILTER_OFFSET = 6'h 38;
+
+
+  // Register Index
+  typedef enum int {
+    GPIO_INTR_STATE,
+    GPIO_INTR_ENABLE,
+    GPIO_INTR_TEST,
+    GPIO_DATA_IN,
+    GPIO_DIRECT_OUT,
+    GPIO_MASKED_OUT_LOWER,
+    GPIO_MASKED_OUT_UPPER,
+    GPIO_DIRECT_OE,
+    GPIO_MASKED_OE_LOWER,
+    GPIO_MASKED_OE_UPPER,
+    GPIO_INTR_CTRL_EN_RISING,
+    GPIO_INTR_CTRL_EN_FALLING,
+    GPIO_INTR_CTRL_EN_LVLHIGH,
+    GPIO_INTR_CTRL_EN_LVLLOW,
+    GPIO_CTRL_EN_INPUT_FILTER
+  } gpio_id_e;
+
+  // Register width information to check illegal writes
+  parameter logic [3:0] GPIO_PERMIT [15] = '{
+    4'b 1111, // index[ 0] GPIO_INTR_STATE
+    4'b 1111, // index[ 1] GPIO_INTR_ENABLE
+    4'b 1111, // index[ 2] GPIO_INTR_TEST
+    4'b 1111, // index[ 3] GPIO_DATA_IN
+    4'b 1111, // index[ 4] GPIO_DIRECT_OUT
+    4'b 1111, // index[ 5] GPIO_MASKED_OUT_LOWER
+    4'b 1111, // index[ 6] GPIO_MASKED_OUT_UPPER
+    4'b 1111, // index[ 7] GPIO_DIRECT_OE
+    4'b 1111, // index[ 8] GPIO_MASKED_OE_LOWER
+    4'b 1111, // index[ 9] GPIO_MASKED_OE_UPPER
+    4'b 1111, // index[10] GPIO_INTR_CTRL_EN_RISING
+    4'b 1111, // index[11] GPIO_INTR_CTRL_EN_FALLING
+    4'b 1111, // index[12] GPIO_INTR_CTRL_EN_LVLHIGH
+    4'b 1111, // index[13] GPIO_INTR_CTRL_EN_LVLLOW
+    4'b 1111  // index[14] GPIO_CTRL_EN_INPUT_FILTER
+  };
+endpackage
+
diff --git a/verilog/rtl/gpio_reg_top.sv b/verilog/rtl/gpio_reg_top.sv
new file mode 100644
index 0000000..1c2ba13
--- /dev/null
+++ b/verilog/rtl/gpio_reg_top.sv
@@ -0,0 +1,706 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Register Top module auto-generated by `reggen`
+
+
+module gpio_reg_top (
+  input clk_i,
+  input rst_ni,
+
+  // Below Regster interface can be changed
+  input  tlul_pkg::tl_h2d_t tl_i,
+  output tlul_pkg::tl_d2h_t tl_o,
+  // To HW
+  output gpio_reg_pkg::gpio_reg2hw_t reg2hw, // Write
+  input  gpio_reg_pkg::gpio_hw2reg_t hw2reg, // Read
+
+  // Config
+  input devmode_i // If 1, explicit error return for unmapped register access
+);
+
+  import gpio_reg_pkg::* ;
+
+  localparam int AW = 6;
+  localparam int DW = 32;
+  localparam int DBW = DW/8;                    // Byte Width
+
+  // register signals
+  logic           reg_we;
+  logic           reg_re;
+  logic [AW-1:0]  reg_addr;
+  logic [DW-1:0]  reg_wdata;
+  logic [DBW-1:0] reg_be;
+  logic [DW-1:0]  reg_rdata;
+  logic           reg_error;
+
+  logic          addrmiss, wr_err;
+
+  logic [DW-1:0] reg_rdata_next;
+
+  tlul_pkg::tl_h2d_t tl_reg_h2d;
+  tlul_pkg::tl_d2h_t tl_reg_d2h;
+
+  assign tl_reg_h2d = tl_i;
+  assign tl_o       = tl_reg_d2h;
+
+  tlul_adapter_reg #(
+    .RegAw(AW),
+    .RegDw(DW)
+  ) u_reg_if (
+    .clk_i,
+    .rst_ni,
+
+    .tl_i (tl_reg_h2d),
+    .tl_o (tl_reg_d2h),
+
+    .we_o    (reg_we),
+    .re_o    (reg_re),
+    .addr_o  (reg_addr),
+    .wdata_o (reg_wdata),
+    .be_o    (reg_be),
+    .rdata_i (reg_rdata),
+    .error_i (reg_error)
+  );
+
+  assign reg_rdata = reg_rdata_next ;
+  assign reg_error = (devmode_i & addrmiss) | wr_err ;
+
+  // Define SW related signals
+  // Format: <reg>_<field>_{wd|we|qs}
+  //        or <reg>_{wd|we|qs} if field == 1 or 0
+  logic [31:0] intr_state_qs;
+  logic [31:0] intr_state_wd;
+  logic intr_state_we;
+  logic [31:0] intr_enable_qs;
+  logic [31:0] intr_enable_wd;
+  logic intr_enable_we;
+  logic [31:0] intr_test_wd;
+  logic intr_test_we;
+  logic [31:0] data_in_qs;
+  logic [31:0] direct_out_qs;
+  logic [31:0] direct_out_wd;
+  logic direct_out_we;
+  logic direct_out_re;
+  logic [15:0] masked_out_lower_data_qs;
+  logic [15:0] masked_out_lower_data_wd;
+  logic masked_out_lower_data_we;
+  logic masked_out_lower_data_re;
+  logic [15:0] masked_out_lower_mask_wd;
+  logic masked_out_lower_mask_we;
+  logic [15:0] masked_out_upper_data_qs;
+  logic [15:0] masked_out_upper_data_wd;
+  logic masked_out_upper_data_we;
+  logic masked_out_upper_data_re;
+  logic [15:0] masked_out_upper_mask_wd;
+  logic masked_out_upper_mask_we;
+  logic [31:0] direct_oe_qs;
+  logic [31:0] direct_oe_wd;
+  logic direct_oe_we;
+  logic direct_oe_re;
+  logic [15:0] masked_oe_lower_data_qs;
+  logic [15:0] masked_oe_lower_data_wd;
+  logic masked_oe_lower_data_we;
+  logic masked_oe_lower_data_re;
+  logic [15:0] masked_oe_lower_mask_qs;
+  logic [15:0] masked_oe_lower_mask_wd;
+  logic masked_oe_lower_mask_we;
+  logic masked_oe_lower_mask_re;
+  logic [15:0] masked_oe_upper_data_qs;
+  logic [15:0] masked_oe_upper_data_wd;
+  logic masked_oe_upper_data_we;
+  logic masked_oe_upper_data_re;
+  logic [15:0] masked_oe_upper_mask_qs;
+  logic [15:0] masked_oe_upper_mask_wd;
+  logic masked_oe_upper_mask_we;
+  logic masked_oe_upper_mask_re;
+  logic [31:0] intr_ctrl_en_rising_qs;
+  logic [31:0] intr_ctrl_en_rising_wd;
+  logic intr_ctrl_en_rising_we;
+  logic [31:0] intr_ctrl_en_falling_qs;
+  logic [31:0] intr_ctrl_en_falling_wd;
+  logic intr_ctrl_en_falling_we;
+  logic [31:0] intr_ctrl_en_lvlhigh_qs;
+  logic [31:0] intr_ctrl_en_lvlhigh_wd;
+  logic intr_ctrl_en_lvlhigh_we;
+  logic [31:0] intr_ctrl_en_lvllow_qs;
+  logic [31:0] intr_ctrl_en_lvllow_wd;
+  logic intr_ctrl_en_lvllow_we;
+  logic [31:0] ctrl_en_input_filter_qs;
+  logic [31:0] ctrl_en_input_filter_wd;
+  logic ctrl_en_input_filter_we;
+
+  // Register instances
+  // R[intr_state]: V(False)
+
+  prim_subreg #(
+    .DW      (32),
+    .SWACCESS("W1C"),
+    .RESVAL  (32'h0)
+  ) u_intr_state (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_state_we),
+    .wd     (intr_state_wd),
+
+    // from internal hardware
+    .de     (hw2reg.intr_state.de),
+    .d      (hw2reg.intr_state.d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_state.q ),
+
+    // to register interface (read)
+    .qs     (intr_state_qs)
+  );
+
+
+  // R[intr_enable]: V(False)
+
+  prim_subreg #(
+    .DW      (32),
+    .SWACCESS("RW"),
+    .RESVAL  (32'h0)
+  ) u_intr_enable (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_enable_we),
+    .wd     (intr_enable_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_enable.q ),
+
+    // to register interface (read)
+    .qs     (intr_enable_qs)
+  );
+
+
+  // R[intr_test]: V(True)
+
+  prim_subreg_ext #(
+    .DW    (32)
+  ) u_intr_test (
+    .re     (1'b0),
+    .we     (intr_test_we),
+    .wd     (intr_test_wd),
+    .d      ('0),
+    .qre    (),
+    .qe     (reg2hw.intr_test.qe),
+    .q      (reg2hw.intr_test.q ),
+    .qs     ()
+  );
+
+
+  // R[data_in]: V(False)
+
+  prim_subreg #(
+    .DW      (32),
+    .SWACCESS("RO"),
+    .RESVAL  (32'h0)
+  ) u_data_in (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.data_in.de),
+    .d      (hw2reg.data_in.d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (data_in_qs)
+  );
+
+
+  // R[direct_out]: V(True)
+
+  prim_subreg_ext #(
+    .DW    (32)
+  ) u_direct_out (
+    .re     (direct_out_re),
+    .we     (direct_out_we),
+    .wd     (direct_out_wd),
+    .d      (hw2reg.direct_out.d),
+    .qre    (),
+    .qe     (reg2hw.direct_out.qe),
+    .q      (reg2hw.direct_out.q ),
+    .qs     (direct_out_qs)
+  );
+
+
+  // R[masked_out_lower]: V(True)
+
+  //   F[data]: 15:0
+  prim_subreg_ext #(
+    .DW    (16)
+  ) u_masked_out_lower_data (
+    .re     (masked_out_lower_data_re),
+    .we     (masked_out_lower_data_we),
+    .wd     (masked_out_lower_data_wd),
+    .d      (hw2reg.masked_out_lower.data.d),
+    .qre    (),
+    .qe     (reg2hw.masked_out_lower.data.qe),
+    .q      (reg2hw.masked_out_lower.data.q ),
+    .qs     (masked_out_lower_data_qs)
+  );
+
+
+  //   F[mask]: 31:16
+  prim_subreg_ext #(
+    .DW    (16)
+  ) u_masked_out_lower_mask (
+    .re     (1'b0),
+    .we     (masked_out_lower_mask_we),
+    .wd     (masked_out_lower_mask_wd),
+    .d      (hw2reg.masked_out_lower.mask.d),
+    .qre    (),
+    .qe     (reg2hw.masked_out_lower.mask.qe),
+    .q      (reg2hw.masked_out_lower.mask.q ),
+    .qs     ()
+  );
+
+
+  // R[masked_out_upper]: V(True)
+
+  //   F[data]: 15:0
+  prim_subreg_ext #(
+    .DW    (16)
+  ) u_masked_out_upper_data (
+    .re     (masked_out_upper_data_re),
+    .we     (masked_out_upper_data_we),
+    .wd     (masked_out_upper_data_wd),
+    .d      (hw2reg.masked_out_upper.data.d),
+    .qre    (),
+    .qe     (reg2hw.masked_out_upper.data.qe),
+    .q      (reg2hw.masked_out_upper.data.q ),
+    .qs     (masked_out_upper_data_qs)
+  );
+
+
+  //   F[mask]: 31:16
+  prim_subreg_ext #(
+    .DW    (16)
+  ) u_masked_out_upper_mask (
+    .re     (1'b0),
+    .we     (masked_out_upper_mask_we),
+    .wd     (masked_out_upper_mask_wd),
+    .d      (hw2reg.masked_out_upper.mask.d),
+    .qre    (),
+    .qe     (reg2hw.masked_out_upper.mask.qe),
+    .q      (reg2hw.masked_out_upper.mask.q ),
+    .qs     ()
+  );
+
+
+  // R[direct_oe]: V(True)
+
+  prim_subreg_ext #(
+    .DW    (32)
+  ) u_direct_oe (
+    .re     (direct_oe_re),
+    .we     (direct_oe_we),
+    .wd     (direct_oe_wd),
+    .d      (hw2reg.direct_oe.d),
+    .qre    (),
+    .qe     (reg2hw.direct_oe.qe),
+    .q      (reg2hw.direct_oe.q ),
+    .qs     (direct_oe_qs)
+  );
+
+
+  // R[masked_oe_lower]: V(True)
+
+  //   F[data]: 15:0
+  prim_subreg_ext #(
+    .DW    (16)
+  ) u_masked_oe_lower_data (
+    .re     (masked_oe_lower_data_re),
+    .we     (masked_oe_lower_data_we),
+    .wd     (masked_oe_lower_data_wd),
+    .d      (hw2reg.masked_oe_lower.data.d),
+    .qre    (),
+    .qe     (reg2hw.masked_oe_lower.data.qe),
+    .q      (reg2hw.masked_oe_lower.data.q ),
+    .qs     (masked_oe_lower_data_qs)
+  );
+
+
+  //   F[mask]: 31:16
+  prim_subreg_ext #(
+    .DW    (16)
+  ) u_masked_oe_lower_mask (
+    .re     (masked_oe_lower_mask_re),
+    .we     (masked_oe_lower_mask_we),
+    .wd     (masked_oe_lower_mask_wd),
+    .d      (hw2reg.masked_oe_lower.mask.d),
+    .qre    (),
+    .qe     (reg2hw.masked_oe_lower.mask.qe),
+    .q      (reg2hw.masked_oe_lower.mask.q ),
+    .qs     (masked_oe_lower_mask_qs)
+  );
+
+
+  // R[masked_oe_upper]: V(True)
+
+  //   F[data]: 15:0
+  prim_subreg_ext #(
+    .DW    (16)
+  ) u_masked_oe_upper_data (
+    .re     (masked_oe_upper_data_re),
+    .we     (masked_oe_upper_data_we),
+    .wd     (masked_oe_upper_data_wd),
+    .d      (hw2reg.masked_oe_upper.data.d),
+    .qre    (),
+    .qe     (reg2hw.masked_oe_upper.data.qe),
+    .q      (reg2hw.masked_oe_upper.data.q ),
+    .qs     (masked_oe_upper_data_qs)
+  );
+
+
+  //   F[mask]: 31:16
+  prim_subreg_ext #(
+    .DW    (16)
+  ) u_masked_oe_upper_mask (
+    .re     (masked_oe_upper_mask_re),
+    .we     (masked_oe_upper_mask_we),
+    .wd     (masked_oe_upper_mask_wd),
+    .d      (hw2reg.masked_oe_upper.mask.d),
+    .qre    (),
+    .qe     (reg2hw.masked_oe_upper.mask.qe),
+    .q      (reg2hw.masked_oe_upper.mask.q ),
+    .qs     (masked_oe_upper_mask_qs)
+  );
+
+
+  // R[intr_ctrl_en_rising]: V(False)
+
+  prim_subreg #(
+    .DW      (32),
+    .SWACCESS("RW"),
+    .RESVAL  (32'h0)
+  ) u_intr_ctrl_en_rising (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_ctrl_en_rising_we),
+    .wd     (intr_ctrl_en_rising_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_ctrl_en_rising.q ),
+
+    // to register interface (read)
+    .qs     (intr_ctrl_en_rising_qs)
+  );
+
+
+  // R[intr_ctrl_en_falling]: V(False)
+
+  prim_subreg #(
+    .DW      (32),
+    .SWACCESS("RW"),
+    .RESVAL  (32'h0)
+  ) u_intr_ctrl_en_falling (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_ctrl_en_falling_we),
+    .wd     (intr_ctrl_en_falling_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_ctrl_en_falling.q ),
+
+    // to register interface (read)
+    .qs     (intr_ctrl_en_falling_qs)
+  );
+
+
+  // R[intr_ctrl_en_lvlhigh]: V(False)
+
+  prim_subreg #(
+    .DW      (32),
+    .SWACCESS("RW"),
+    .RESVAL  (32'h0)
+  ) u_intr_ctrl_en_lvlhigh (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_ctrl_en_lvlhigh_we),
+    .wd     (intr_ctrl_en_lvlhigh_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_ctrl_en_lvlhigh.q ),
+
+    // to register interface (read)
+    .qs     (intr_ctrl_en_lvlhigh_qs)
+  );
+
+
+  // R[intr_ctrl_en_lvllow]: V(False)
+
+  prim_subreg #(
+    .DW      (32),
+    .SWACCESS("RW"),
+    .RESVAL  (32'h0)
+  ) u_intr_ctrl_en_lvllow (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_ctrl_en_lvllow_we),
+    .wd     (intr_ctrl_en_lvllow_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_ctrl_en_lvllow.q ),
+
+    // to register interface (read)
+    .qs     (intr_ctrl_en_lvllow_qs)
+  );
+
+
+  // R[ctrl_en_input_filter]: V(False)
+
+  prim_subreg #(
+    .DW      (32),
+    .SWACCESS("RW"),
+    .RESVAL  (32'h0)
+  ) u_ctrl_en_input_filter (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ctrl_en_input_filter_we),
+    .wd     (ctrl_en_input_filter_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ctrl_en_input_filter.q ),
+
+    // to register interface (read)
+    .qs     (ctrl_en_input_filter_qs)
+  );
+
+
+
+
+  logic [14:0] addr_hit;
+  always_comb begin
+    addr_hit = '0;
+    addr_hit[ 0] = (reg_addr == GPIO_INTR_STATE_OFFSET);
+    addr_hit[ 1] = (reg_addr == GPIO_INTR_ENABLE_OFFSET);
+    addr_hit[ 2] = (reg_addr == GPIO_INTR_TEST_OFFSET);
+    addr_hit[ 3] = (reg_addr == GPIO_DATA_IN_OFFSET);
+    addr_hit[ 4] = (reg_addr == GPIO_DIRECT_OUT_OFFSET);
+    addr_hit[ 5] = (reg_addr == GPIO_MASKED_OUT_LOWER_OFFSET);
+    addr_hit[ 6] = (reg_addr == GPIO_MASKED_OUT_UPPER_OFFSET);
+    addr_hit[ 7] = (reg_addr == GPIO_DIRECT_OE_OFFSET);
+    addr_hit[ 8] = (reg_addr == GPIO_MASKED_OE_LOWER_OFFSET);
+    addr_hit[ 9] = (reg_addr == GPIO_MASKED_OE_UPPER_OFFSET);
+    addr_hit[10] = (reg_addr == GPIO_INTR_CTRL_EN_RISING_OFFSET);
+    addr_hit[11] = (reg_addr == GPIO_INTR_CTRL_EN_FALLING_OFFSET);
+    addr_hit[12] = (reg_addr == GPIO_INTR_CTRL_EN_LVLHIGH_OFFSET);
+    addr_hit[13] = (reg_addr == GPIO_INTR_CTRL_EN_LVLLOW_OFFSET);
+    addr_hit[14] = (reg_addr == GPIO_CTRL_EN_INPUT_FILTER_OFFSET);
+  end
+
+  assign addrmiss = (reg_re || reg_we) ? ~|addr_hit : 1'b0 ;
+
+  // Check sub-word write is permitted
+  always_comb begin
+    wr_err = 1'b0;
+    if (addr_hit[ 0] && reg_we && (GPIO_PERMIT[ 0] != (GPIO_PERMIT[ 0] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 1] && reg_we && (GPIO_PERMIT[ 1] != (GPIO_PERMIT[ 1] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 2] && reg_we && (GPIO_PERMIT[ 2] != (GPIO_PERMIT[ 2] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 3] && reg_we && (GPIO_PERMIT[ 3] != (GPIO_PERMIT[ 3] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 4] && reg_we && (GPIO_PERMIT[ 4] != (GPIO_PERMIT[ 4] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 5] && reg_we && (GPIO_PERMIT[ 5] != (GPIO_PERMIT[ 5] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 6] && reg_we && (GPIO_PERMIT[ 6] != (GPIO_PERMIT[ 6] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 7] && reg_we && (GPIO_PERMIT[ 7] != (GPIO_PERMIT[ 7] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 8] && reg_we && (GPIO_PERMIT[ 8] != (GPIO_PERMIT[ 8] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 9] && reg_we && (GPIO_PERMIT[ 9] != (GPIO_PERMIT[ 9] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[10] && reg_we && (GPIO_PERMIT[10] != (GPIO_PERMIT[10] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[11] && reg_we && (GPIO_PERMIT[11] != (GPIO_PERMIT[11] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[12] && reg_we && (GPIO_PERMIT[12] != (GPIO_PERMIT[12] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[13] && reg_we && (GPIO_PERMIT[13] != (GPIO_PERMIT[13] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[14] && reg_we && (GPIO_PERMIT[14] != (GPIO_PERMIT[14] & reg_be))) wr_err = 1'b1 ;
+  end
+
+  assign intr_state_we = addr_hit[0] & reg_we & ~wr_err;
+  assign intr_state_wd = reg_wdata[31:0];
+
+  assign intr_enable_we = addr_hit[1] & reg_we & ~wr_err;
+  assign intr_enable_wd = reg_wdata[31:0];
+
+  assign intr_test_we = addr_hit[2] & reg_we & ~wr_err;
+  assign intr_test_wd = reg_wdata[31:0];
+
+
+  assign direct_out_we = addr_hit[4] & reg_we & ~wr_err;
+  assign direct_out_wd = reg_wdata[31:0];
+  assign direct_out_re = addr_hit[4] && reg_re;
+
+  assign masked_out_lower_data_we = addr_hit[5] & reg_we & ~wr_err;
+  assign masked_out_lower_data_wd = reg_wdata[15:0];
+  assign masked_out_lower_data_re = addr_hit[5] && reg_re;
+
+  assign masked_out_lower_mask_we = addr_hit[5] & reg_we & ~wr_err;
+  assign masked_out_lower_mask_wd = reg_wdata[31:16];
+
+  assign masked_out_upper_data_we = addr_hit[6] & reg_we & ~wr_err;
+  assign masked_out_upper_data_wd = reg_wdata[15:0];
+  assign masked_out_upper_data_re = addr_hit[6] && reg_re;
+
+  assign masked_out_upper_mask_we = addr_hit[6] & reg_we & ~wr_err;
+  assign masked_out_upper_mask_wd = reg_wdata[31:16];
+
+  assign direct_oe_we = addr_hit[7] & reg_we & ~wr_err;
+  assign direct_oe_wd = reg_wdata[31:0];
+  assign direct_oe_re = addr_hit[7] && reg_re;
+
+  assign masked_oe_lower_data_we = addr_hit[8] & reg_we & ~wr_err;
+  assign masked_oe_lower_data_wd = reg_wdata[15:0];
+  assign masked_oe_lower_data_re = addr_hit[8] && reg_re;
+
+  assign masked_oe_lower_mask_we = addr_hit[8] & reg_we & ~wr_err;
+  assign masked_oe_lower_mask_wd = reg_wdata[31:16];
+  assign masked_oe_lower_mask_re = addr_hit[8] && reg_re;
+
+  assign masked_oe_upper_data_we = addr_hit[9] & reg_we & ~wr_err;
+  assign masked_oe_upper_data_wd = reg_wdata[15:0];
+  assign masked_oe_upper_data_re = addr_hit[9] && reg_re;
+
+  assign masked_oe_upper_mask_we = addr_hit[9] & reg_we & ~wr_err;
+  assign masked_oe_upper_mask_wd = reg_wdata[31:16];
+  assign masked_oe_upper_mask_re = addr_hit[9] && reg_re;
+
+  assign intr_ctrl_en_rising_we = addr_hit[10] & reg_we & ~wr_err;
+  assign intr_ctrl_en_rising_wd = reg_wdata[31:0];
+
+  assign intr_ctrl_en_falling_we = addr_hit[11] & reg_we & ~wr_err;
+  assign intr_ctrl_en_falling_wd = reg_wdata[31:0];
+
+  assign intr_ctrl_en_lvlhigh_we = addr_hit[12] & reg_we & ~wr_err;
+  assign intr_ctrl_en_lvlhigh_wd = reg_wdata[31:0];
+
+  assign intr_ctrl_en_lvllow_we = addr_hit[13] & reg_we & ~wr_err;
+  assign intr_ctrl_en_lvllow_wd = reg_wdata[31:0];
+
+  assign ctrl_en_input_filter_we = addr_hit[14] & reg_we & ~wr_err;
+  assign ctrl_en_input_filter_wd = reg_wdata[31:0];
+
+  // Read data return
+  always_comb begin
+    reg_rdata_next = '0;
+    unique case (1'b1)
+      addr_hit[0]: begin
+        reg_rdata_next[31:0] = intr_state_qs;
+      end
+
+      addr_hit[1]: begin
+        reg_rdata_next[31:0] = intr_enable_qs;
+      end
+
+      addr_hit[2]: begin
+        reg_rdata_next[31:0] = '0;
+      end
+
+      addr_hit[3]: begin
+        reg_rdata_next[31:0] = data_in_qs;
+      end
+
+      addr_hit[4]: begin
+        reg_rdata_next[31:0] = direct_out_qs;
+      end
+
+      addr_hit[5]: begin
+        reg_rdata_next[15:0] = masked_out_lower_data_qs;
+        reg_rdata_next[31:16] = '0;
+      end
+
+      addr_hit[6]: begin
+        reg_rdata_next[15:0] = masked_out_upper_data_qs;
+        reg_rdata_next[31:16] = '0;
+      end
+
+      addr_hit[7]: begin
+        reg_rdata_next[31:0] = direct_oe_qs;
+      end
+
+      addr_hit[8]: begin
+        reg_rdata_next[15:0] = masked_oe_lower_data_qs;
+        reg_rdata_next[31:16] = masked_oe_lower_mask_qs;
+      end
+
+      addr_hit[9]: begin
+        reg_rdata_next[15:0] = masked_oe_upper_data_qs;
+        reg_rdata_next[31:16] = masked_oe_upper_mask_qs;
+      end
+
+      addr_hit[10]: begin
+        reg_rdata_next[31:0] = intr_ctrl_en_rising_qs;
+      end
+
+      addr_hit[11]: begin
+        reg_rdata_next[31:0] = intr_ctrl_en_falling_qs;
+      end
+
+      addr_hit[12]: begin
+        reg_rdata_next[31:0] = intr_ctrl_en_lvlhigh_qs;
+      end
+
+      addr_hit[13]: begin
+        reg_rdata_next[31:0] = intr_ctrl_en_lvllow_qs;
+      end
+
+      addr_hit[14]: begin
+        reg_rdata_next[31:0] = ctrl_en_input_filter_qs;
+      end
+
+      default: begin
+        reg_rdata_next = '1;
+      end
+    endcase
+  end
+endmodule
diff --git a/verilog/rtl/iccm_controller.v b/verilog/rtl/iccm_controller.v
new file mode 100644
index 0000000..99a58ef
--- /dev/null
+++ b/verilog/rtl/iccm_controller.v
@@ -0,0 +1,135 @@
+
+
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+module iccm_controller (
+	clk_i,
+	rst_ni,
+	prog_i,
+	rx_dv_i,
+	rx_byte_i,
+	we_o,
+	addr_o,
+	wdata_o,
+	reset_o
+);
+	input wire clk_i;
+	input wire rst_ni;
+	input wire prog_i;
+	input wire rx_dv_i;
+	input wire [7:0] rx_byte_i;
+	output wire we_o;
+	output wire [11:0] addr_o;
+	output wire [31:0] wdata_o;
+	output wire reset_o;
+	reg [1:0] ctrl_fsm_cs;
+	reg [1:0] ctrl_fsm_ns;
+	wire [7:0] rx_byte_d;
+	reg [7:0] rx_byte_q0;
+	reg [7:0] rx_byte_q1;
+	reg [7:0] rx_byte_q2;
+	reg [7:0] rx_byte_q3;
+	reg we_q;
+	reg we_d;
+	reg [11:0] addr_q;
+	reg [11:0] addr_d;
+	reg reset_q;
+	reg reset_d;
+	reg [1:0] byte_count;
+	localparam [1:0] DONE = 3;
+	localparam [1:0] LOAD = 1;
+	localparam [1:0] PROG = 2;
+	localparam [1:0] RESET = 0;
+	always @(*) begin
+		we_d    = we_q;
+		addr_d  = addr_q;
+		reset_d = reset_q;
+		ctrl_fsm_ns = ctrl_fsm_cs;
+		case (ctrl_fsm_cs)
+			RESET: begin
+				we_d = 1'b0;
+				reset_d = 1'b0;
+				if (rx_dv_i)
+					ctrl_fsm_ns = LOAD;
+				else
+					ctrl_fsm_ns = RESET;
+			end
+			LOAD:
+				if (((byte_count == 2'b11) && (rx_byte_q2 != 8'h0f)) && (rx_byte_d != 8'hff)) begin
+					we_d = 1'b1;
+					ctrl_fsm_ns = PROG;
+				end
+				else
+					ctrl_fsm_ns = DONE;
+			PROG: begin
+				we_d = 1'b0;
+				ctrl_fsm_ns = DONE;
+			end
+			DONE:
+				if (wdata_o == 32'h00000fff || (!rst_ni)) begin
+					ctrl_fsm_ns = DONE;
+					reset_d = 1'b1;
+				end
+				else if (rx_dv_i)
+					ctrl_fsm_ns = LOAD;
+				else
+					ctrl_fsm_ns = DONE;
+		//	default: ctrl_fsm_ns = RESET;
+		endcase
+	end
+	assign rx_byte_d = rx_byte_i;
+	assign we_o = we_q;
+	assign addr_o = addr_q;
+	assign wdata_o = {rx_byte_q0, rx_byte_q1, rx_byte_q2, rx_byte_q3};
+	assign reset_o = reset_q;
+	always @(posedge clk_i or negedge rst_ni)
+		if (!rst_ni) begin
+			we_q <= 1'b0;
+			addr_q <= 12'b000000000000;
+			rx_byte_q0 <= 8'b00000000;
+			rx_byte_q1 <= 8'b00000000;
+			rx_byte_q2 <= 8'b00000000;
+			rx_byte_q3 <= 8'b00000000;
+			reset_q <= 1'b1;
+			byte_count <= 2'b00;
+			ctrl_fsm_cs <= DONE;
+		end 
+		else if (prog_i) begin
+            we_q <= 1'b0;
+            addr_q <= 12'b000000000000;
+            rx_byte_q0 <= 8'b00000000;
+            rx_byte_q1 <= 8'b00000000;
+            rx_byte_q2 <= 8'b00000000;
+            rx_byte_q3 <= 8'b00000000;
+            reset_q <= 1'b0;
+            byte_count <= 2'b00;
+            ctrl_fsm_cs <= RESET;
+		end
+		else begin
+			we_q <= we_d;
+			if (ctrl_fsm_cs == LOAD) begin
+				if (byte_count == 2'b00) begin
+					rx_byte_q0 <= rx_byte_d;
+					byte_count <= 2'b01;
+				end
+				else if (byte_count == 2'b01) begin
+					rx_byte_q1 <= rx_byte_d;
+					byte_count <= 2'b10;
+				end
+				else if (byte_count == 2'b10) begin
+					rx_byte_q2 <= rx_byte_d;
+					byte_count <= 2'b11;
+				end
+				else begin
+					rx_byte_q3 <= rx_byte_d;
+					byte_count <= 2'b00;
+				end
+				addr_q <= addr_d;
+			end
+			if (ctrl_fsm_cs == PROG)
+				addr_q <= addr_d + 1'b1;
+			reset_q <= reset_d;
+			ctrl_fsm_cs <= ctrl_fsm_ns;
+		end
+endmodule
diff --git a/verilog/rtl/instr_mem_top.sv b/verilog/rtl/instr_mem_top.sv
new file mode 100644
index 0000000..9619890
--- /dev/null
+++ b/verilog/rtl/instr_mem_top.sv
@@ -0,0 +1,82 @@
+module instr_mem_top
+(
+  input clk_i,
+  input rst_ni,
+  
+  input  tlul_pkg::tl_h2d_t tl_i,
+  output tlul_pkg::tl_d2h_t tl_o,
+// iccm controller interface 
+  input  [11:0] iccm_ctrl_addr,
+  input  [31:0] iccm_ctrl_wdata,
+  input         iccm_ctrl_we,
+  input         prog_rst_ni,
+    
+
+// sram interface 
+  output  logic        csb,
+  output  logic [11:0] addr_o,
+  output  logic [31:0] wdata_o,
+  output  logic [3:0]  wmask_o,
+  output  logic        we_o,
+  input   logic [31:0] rdata_i
+);
+
+
+logic rvalid;
+
+logic        tl_we;
+logic [31:0] tl_wmask;
+logic [31:0] tl_wdata;
+logic [11:0] tl_addr;
+logic        tl_req;
+logic [3:0]  mask_sel;
+
+assign mask_sel[0] = (tl_wmask[7:0]   != 8'b0) ? 1'b1: 1'b0;
+assign mask_sel[1] = (tl_wmask[15:8]  != 8'b0) ? 1'b1: 1'b0;
+assign mask_sel[2] = (tl_wmask[23:16] != 8'b0) ? 1'b1: 2'b0;
+assign mask_sel[3] = (tl_wmask[31:24] != 8'b0) ? 1'b1: 2'b0;
+
+assign csb     = ~(tl_req | iccm_ctrl_we);
+
+assign addr_o  = (prog_rst_ni) ? tl_addr  : iccm_ctrl_addr;
+assign wdata_o = (prog_rst_ni) ? tl_wdata : iccm_ctrl_wdata;
+assign we_o    = ~((prog_rst_ni) ? tl_we  : iccm_ctrl_we);
+assign wmask_o = (prog_rst_ni) ? mask_sel : 4'b1111;
+
+
+ tlul_sram_adapter #(
+  .SramAw       (12),
+  .SramDw       (32), 
+  .Outstanding  (2),  
+  .ByteAccess   (1),
+  .ErrOnWrite   (0),  // 1: Writes not allowed, automatically error
+  .ErrOnRead    (0)   // 1: Reads not allowed, automatically error  
+
+) inst_mem (
+  .clk_i     (clk_i),
+  .rst_ni    (rst_ni),
+  .tl_i      (tl_i),
+  .tl_o      (tl_o), 
+  .req_o     (tl_req),
+  .gnt_i     (1'b1),
+  .we_o      (tl_we),
+  .addr_o    (tl_addr),
+  .wdata_o   (tl_wdata),
+  .wmask_o   (tl_wmask),
+  .rdata_i   ((rst_ni) ? rdata_i: '0),
+  .rvalid_i  (rvalid),
+  .rerror_i  (2'b0)
+);
+
+ always_ff @(posedge clk_i) begin
+  if (!rst_ni) begin
+    rvalid <= 1'b0;
+  end else if (iccm_ctrl_we | tl_we) begin
+    rvalid <= 1'b0;
+  end else begin 
+    rvalid <= tl_req;
+  end
+ end
+
+
+endmodule
\ No newline at end of file
diff --git a/verilog/rtl/iteration_div_sqrt_mvp.sv b/verilog/rtl/iteration_div_sqrt_mvp.sv
new file mode 100644
index 0000000..0c645e6
--- /dev/null
+++ b/verilog/rtl/iteration_div_sqrt_mvp.sv
@@ -0,0 +1,61 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lei Li                  lile@iis.ee.ethz.ch                //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    12/01/2017                                                 //
+// Design Name:    FPU                                                        //
+// Module Name:    iteration_div_sqrt_mvp                                     //
+// Project Name:   Private FPU                                                //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    iteration unit for div and sqrt                            //
+//                                                                            //
+//                                                                            //
+// Revision:        03/14/2018                                                //
+//                  For div_sqrt_mvp                                          //
+////////////////////////////////////////////////////////////////////////////////
+
+module iteration_div_sqrt_mvp
+#(
+   parameter   WIDTH=25
+)
+  (//Input
+
+   input logic [WIDTH-1:0]      A_DI,
+   input logic [WIDTH-1:0]      B_DI,
+   input logic                  Div_enable_SI,
+   input logic                  Div_start_dly_SI,
+   input logic                  Sqrt_enable_SI,
+   input logic [1:0]            D_DI,
+
+   output logic [1:0]           D_DO,
+   output logic [WIDTH-1:0]     Sum_DO,
+   output logic                 Carry_out_DO
+    );
+
+   logic                        D_carry_D;
+   logic                        Sqrt_cin_D;
+   logic                        Cin_D;
+
+   assign D_DO[0]=~D_DI[0];
+   assign D_DO[1]=~(D_DI[1] ^ D_DI[0]);
+   assign D_carry_D=D_DI[1] | D_DI[0];
+   assign Sqrt_cin_D=Sqrt_enable_SI&&D_carry_D;
+   assign Cin_D=Div_enable_SI?1'b0:Sqrt_cin_D;
+   assign {Carry_out_DO,Sum_DO}=A_DI+B_DI+Cin_D;
+
+endmodule
diff --git a/verilog/rtl/jtag_pkg.sv b/verilog/rtl/jtag_pkg.sv
new file mode 100644
index 0000000..2a67ee0
--- /dev/null
+++ b/verilog/rtl/jtag_pkg.sv
@@ -0,0 +1,24 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+
+package jtag_pkg;
+
+  typedef struct packed {
+    logic tck;
+    logic tms;
+    logic trst_n;
+    logic tdi;
+  } jtag_req_t;
+
+  parameter jtag_req_t JTAG_REQ_DEFAULT = '0;
+
+  typedef struct packed {
+    logic tdo;
+    logic tdo_oe;
+  } jtag_rsp_t;
+
+  parameter jtag_rsp_t JTAG_RSP_DEFAULT = '0;
+
+endpackage : jtag_pkg
diff --git a/verilog/rtl/lzc.sv b/verilog/rtl/lzc.sv
new file mode 100644
index 0000000..424eb2e
--- /dev/null
+++ b/verilog/rtl/lzc.sv
@@ -0,0 +1,112 @@
+// Copyright (c) 2018 - 2019 ETH Zurich, University of Bologna
+// All rights reserved.
+//
+// This code is under development and not yet released to the public.
+// Until it is released, the code is under the copyright of ETH Zurich and
+// the University of Bologna, and may contain confidential and/or unpublished
+// work. Any reuse/redistribution is strictly forbidden without written
+// permission from ETH Zurich.
+//
+// Bug fixes and contributions will eventually be released under the
+// SolderPad open hardware license in the context of the PULP platform
+// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
+// University of Bologna.
+
+/// A trailing zero counter / leading zero counter.
+/// Set MODE to 0 for trailing zero counter => cnt_o is the number of trailing zeros (from the LSB)
+/// Set MODE to 1 for leading zero counter  => cnt_o is the number of leading zeros  (from the MSB)
+/// If the input does not contain a zero, `empty_o` is asserted. Additionally `cnt_o` contains
+/// the maximum number of zeros - 1. For example:
+///   in_i = 000_0000, empty_o = 1, cnt_o = 6 (mode = 0)
+///   in_i = 000_0001, empty_o = 0, cnt_o = 0 (mode = 0)
+///   in_i = 000_1000, empty_o = 0, cnt_o = 3 (mode = 0)
+/// Furthermore, this unit contains a more efficient implementation for Verilator (simulation only).
+/// This speeds up simulation significantly.
+module lzc #(
+  /// The width of the input vector.
+  parameter int unsigned WIDTH = 2,
+  /// Mode selection: 0 -> trailing zero, 1 -> leading zero
+  parameter bit          MODE  = 1'b0,
+  /// Dependent parameter. Do **not** change!
+  ///
+  /// Width of the output signal with the zero count.
+  parameter int unsigned CNT_WIDTH = cf_math_pkg::idx_width(WIDTH)
+) (
+  /// Input vector to be counted.
+  input  logic [WIDTH-1:0]     in_i,
+  /// Count of the leading / trailing zeros.
+  output logic [CNT_WIDTH-1:0] cnt_o,
+  /// Counter is empty: Asserted if all bits in in_i are zero.
+  output logic                 empty_o
+);
+
+  if (WIDTH == 1) begin : gen_degenerate_lzc
+
+    assign cnt_o[0] = !in_i[0];
+    assign empty_o = !in_i[0];
+
+  end else begin : gen_lzc
+
+    localparam int unsigned NumLevels = $clog2(WIDTH);
+
+    // pragma translate_off
+    initial begin
+      assert(WIDTH > 0) else $fatal(1, "input must be at least one bit wide");
+    end
+    // pragma translate_on
+
+    logic [WIDTH-1:0][NumLevels-1:0] index_lut;
+    logic [2**NumLevels-1:0] sel_nodes;
+    logic [2**NumLevels-1:0][NumLevels-1:0] index_nodes;
+
+    logic [WIDTH-1:0] in_tmp;
+
+    // reverse vector if required
+    always_comb begin : flip_vector
+      for (int unsigned i = 0; i < WIDTH; i++) begin
+        in_tmp[i] = (MODE) ? in_i[WIDTH-1-i] : in_i[i];
+      end
+    end
+
+    for (genvar j = 0; unsigned'(j) < WIDTH; j++) begin : g_index_lut
+      assign index_lut[j] = (NumLevels)'(unsigned'(j));
+    end
+
+    for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : g_levels
+      if (unsigned'(level) == NumLevels - 1) begin : g_last_level
+        for (genvar k = 0; k < 2 ** level; k++) begin : g_level
+          // if two successive indices are still in the vector...
+          if (unsigned'(k) * 2 < WIDTH - 1) begin : g_reduce
+            assign sel_nodes[2 ** level - 1 + k] = in_tmp[k * 2] | in_tmp[k * 2 + 1];
+            assign index_nodes[2 ** level - 1 + k] = (in_tmp[k * 2] == 1'b1)
+              ? index_lut[k * 2] :
+                index_lut[k * 2 + 1];
+          end
+          // if only the first index is still in the vector...
+          if (unsigned'(k) * 2 == WIDTH - 1) begin : g_base
+            assign sel_nodes[2 ** level - 1 + k] = in_tmp[k * 2];
+            assign index_nodes[2 ** level - 1 + k] = index_lut[k * 2];
+          end
+          // if index is out of range
+          if (unsigned'(k) * 2 > WIDTH - 1) begin : g_out_of_range
+            assign sel_nodes[2 ** level - 1 + k] = 1'b0;
+            assign index_nodes[2 ** level - 1 + k] = '0;
+          end
+        end
+      end else begin : g_not_last_level
+        for (genvar l = 0; l < 2 ** level; l++) begin : g_level
+          assign sel_nodes[2 ** level - 1 + l] =
+              sel_nodes[2 ** (level + 1) - 1 + l * 2] | sel_nodes[2 ** (level + 1) - 1 + l * 2 + 1];
+          assign index_nodes[2 ** level - 1 + l] = (sel_nodes[2 ** (level + 1) - 1 + l * 2] == 1'b1)
+            ? index_nodes[2 ** (level + 1) - 1 + l * 2] :
+              index_nodes[2 ** (level + 1) - 1 + l * 2 + 1];
+        end
+      end
+    end
+
+    assign cnt_o = NumLevels > unsigned'(0) ? index_nodes[0] : {($clog2(WIDTH)) {1'b0}};
+    assign empty_o = NumLevels > unsigned'(0) ? ~sel_nodes[0] : ~(|in_i);
+
+  end : gen_lzc
+
+endmodule : lzc
diff --git a/verilog/rtl/minus_one.v b/verilog/rtl/minus_one.v
new file mode 100644
index 0000000..f3575c9
--- /dev/null
+++ b/verilog/rtl/minus_one.v
@@ -0,0 +1,122 @@
+/*Author: Zhuxu

+	m99a1@yahoo.cn

+Use parallel prefix tree structure to reduce a 16-bit number by one.

+

+stage 0:	number of genration=16;	number of logic operation=16;	G_0[xx]=~i_operand[xx];

+stage 1:	NOG=16;			NOO=8;				G_1[2n-1]=G_0[2n-1]&&G_0[2n-2];	n=8:1

+stage 2:	NOG=16;			NOO=7;				G_2[2n-1]=G_1[2n-1]&&G_1[2n-3];	n=8:2

+stage 3:	NOG=16;			NOO=6;				G_3[2n-1]=G_2[2n-1]&&G_2[2n-5];	n=8:3	

+stage 4:	NOG=16;			NOO=4;				G_4[2n-1]=G_3[2n-1]&&G_3[2n-9];	n=8:5	

+stage 5:	NOG=16;			NOO=7;				G_5[2n]=G_4[2n]&&G_4[2n-1];	n=7:1

+

+*/

+module minus_one(

+input	[15:0]i_operand,

+output	[15:0]o_result,

+output	o_borrow

+);

+//stage 0

+wire	[15:0]G_0;

+assign	G_0=~i_operand;

+

+//stage 1

+wire	[15:0]G_1;

+assign	G_1[1]=G_0[1]&G_0[0];

+assign	G_1[3]=G_0[3]&G_0[2];

+assign	G_1[5]=G_0[5]&G_0[4];

+assign	G_1[7]=G_0[7]&G_0[6];

+assign	G_1[9]=G_0[9]&G_0[8];

+assign	G_1[11]=G_0[11]&G_0[10];

+assign	G_1[13]=G_0[13]&G_0[12];

+assign	G_1[15]=G_0[15]&G_0[14];

+assign	G_1[0]=G_0[0];

+assign	G_1[2]=G_0[2];

+assign	G_1[4]=G_0[4];

+assign	G_1[6]=G_0[6];

+assign	G_1[8]=G_0[8];

+assign	G_1[10]=G_0[10];

+assign	G_1[12]=G_0[12];

+assign	G_1[14]=G_0[14];

+

+//stage 2

+wire	[15:0]G_2;

+assign	G_2[3]=G_1[3]&G_1[1];

+assign	G_2[5]=G_1[5]&G_1[3];

+assign	G_2[7]=G_1[7]&G_1[5];

+assign	G_2[9]=G_1[9]&G_1[7];

+assign	G_2[11]=G_1[11]&G_1[9];

+assign	G_2[13]=G_1[13]&G_1[11];

+assign	G_2[15]=G_1[15]&G_1[13];

+assign	G_2[0]=G_1[0];

+assign	G_2[2]=G_1[2];

+assign	G_2[1]=G_1[1];

+assign	G_2[4]=G_1[4];

+assign	G_2[6]=G_1[6];

+assign	G_2[8]=G_1[8];

+assign	G_2[10]=G_1[10];

+assign	G_2[12]=G_1[12];

+assign	G_2[14]=G_1[14];

+

+//stage 3

+wire	[15:0]G_3;

+assign	G_3[5]=G_2[5]&G_2[1];

+assign	G_3[7]=G_2[7]&G_2[3];

+assign	G_3[9]=G_2[9]&G_2[5];

+assign	G_3[11]=G_2[11]&G_2[7];

+assign	G_3[13]=G_2[13]&G_2[9];

+assign	G_3[15]=G_2[15]&G_2[11];

+assign	G_3[0]=G_2[0];

+assign	G_3[2]=G_2[2];

+assign	G_3[1]=G_2[1];

+assign	G_3[4]=G_2[4];

+assign	G_3[3]=G_2[3];

+assign	G_3[6]=G_2[6];

+assign	G_3[8]=G_2[8];

+assign	G_3[10]=G_2[10];

+assign	G_3[12]=G_2[12];

+assign	G_3[14]=G_2[14];

+

+//stage 4

+wire	[15:0]G_4;

+assign	G_4[9]=G_3[9]&G_3[1];

+assign	G_4[11]=G_3[11]&G_3[3];

+assign	G_4[13]=G_3[13]&G_3[5];

+assign	G_4[15]=G_3[15]&G_3[7];

+assign	G_4[0]=G_3[0];

+assign	G_4[2]=G_3[2];

+assign	G_4[1]=G_3[1];

+assign	G_4[4]=G_3[4];

+assign	G_4[3]=G_3[3];

+assign	G_4[6]=G_3[6];

+assign	G_4[5]=G_3[5];

+assign	G_4[8]=G_3[8];

+assign	G_4[7]=G_3[7];

+assign	G_4[10]=G_3[10];

+assign	G_4[12]=G_3[12];

+assign	G_4[14]=G_3[14];

+

+//stage 5

+wire	[15:0]G_5;

+assign	G_5[2]=G_4[2]&G_4[1];

+assign	G_5[4]=G_4[4]&G_4[3];

+assign	G_5[6]=G_4[6]&G_4[5];

+assign	G_5[8]=G_4[8]&G_4[7];

+assign	G_5[10]=G_4[10]&G_4[9];

+assign	G_5[12]=G_4[12]&G_4[11];

+assign	G_5[14]=G_4[14]&G_4[13];

+assign	G_5[1]=G_4[1];

+assign	G_5[3]=G_4[3];

+assign	G_5[5]=G_4[5];

+assign	G_5[7]=G_4[7];

+assign	G_5[9]=G_4[9];

+assign	G_5[11]=G_4[11];

+assign	G_5[13]=G_4[13];

+assign	G_5[15]=G_4[15];

+assign	G_5[0]=G_4[0];

+

+//stage 6

+assign	o_result[0]=~i_operand[0];

+assign	o_result[15:1]=(G_5[14:0]&(~i_operand[15:1]))|((~G_5[14:0])&i_operand[15:1]);

+assign	o_borrow=G_5[15];

+

+endmodule
\ No newline at end of file
diff --git a/verilog/rtl/norm_div_sqrt_mvp.sv b/verilog/rtl/norm_div_sqrt_mvp.sv
new file mode 100644
index 0000000..590abe9
--- /dev/null
+++ b/verilog/rtl/norm_div_sqrt_mvp.sv
@@ -0,0 +1,470 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lei Li    lile@iis.ee.ethz.ch                              //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    09/03/2018                                                 //
+// Design Name:    FPU                                                        //
+// Module Name:    norm_div_sqrt_mvp.sv                                       //
+// Project Name:                                                              //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    Floating point Normalizer/Rounding unit                    //
+//                 Since this module is design as a combinatinal logic, it can//
+//                 be added arbinary register stages for different frequency  //
+//                 in the wrapper module.                                     //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Revision Date:  12/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To address some requirements by Stefan                     //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+import defs_div_sqrt_mvp::*;
+
+module norm_div_sqrt_mvp
+  (//Inputs
+   input logic [C_MANT_FP64+4:0]                Mant_in_DI,  // Include the needed 4-bit for rounding and hidden bit
+   input logic signed [C_EXP_FP64+1:0]          Exp_in_DI,
+   input logic                                  Sign_in_DI,
+   input logic                                  Div_enable_SI,
+   input logic                                  Sqrt_enable_SI,
+   input logic                                  Inf_a_SI,
+   input logic                                  Inf_b_SI,
+   input logic                                  Zero_a_SI,
+   input logic                                  Zero_b_SI,
+   input logic                                  NaN_a_SI,
+   input logic                                  NaN_b_SI,
+   input logic                                  SNaN_SI,
+   input logic [C_RM-1:0]                       RM_SI,
+   input logic                                  Full_precision_SI,
+   input logic                                  FP32_SI,
+   input logic                                  FP64_SI,
+   input logic                                  FP16_SI,
+   input logic                                  FP16ALT_SI,
+   //Outputs
+   output logic [C_EXP_FP64+C_MANT_FP64:0]      Result_DO,
+   output logic [4:0]                           Fflags_SO //{NV,DZ,OF,UF,NX}
+   );
+
+
+   logic                                        Sign_res_D;
+
+   logic                                        NV_OP_S;
+   logic                                        Exp_OF_S;
+   logic                                        Exp_UF_S;
+   logic                                        Div_Zero_S;
+   logic                                        In_Exact_S;
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Normalization                                                           //
+   /////////////////////////////////////////////////////////////////////////////
+   logic [C_MANT_FP64:0]                        Mant_res_norm_D;
+   logic [C_EXP_FP64-1:0]                       Exp_res_norm_D;
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Right shift operations for negtive exponents                            //
+   /////////////////////////////////////////////////////////////////////////////
+
+  logic  [C_EXP_FP64+1:0]                       Exp_Max_RS_FP64_D;
+  logic  [C_EXP_FP32+1:0]                       Exp_Max_RS_FP32_D;
+  logic  [C_EXP_FP16+1:0]                       Exp_Max_RS_FP16_D;
+  logic  [C_EXP_FP16ALT+1:0]                    Exp_Max_RS_FP16ALT_D;
+  //
+  assign Exp_Max_RS_FP64_D=Exp_in_DI[C_EXP_FP64:0]+C_MANT_FP64+1; // to check exponent after (C_MANT_FP64+1)-bit >> when Exp_in_DI is negative
+  assign Exp_Max_RS_FP32_D=Exp_in_DI[C_EXP_FP32:0]+C_MANT_FP32+1; // to check exponent after (C_MANT_FP32+1)-bit >> when Exp_in_DI is negative
+  assign Exp_Max_RS_FP16_D=Exp_in_DI[C_EXP_FP16:0]+C_MANT_FP16+1; // to check exponent after (C_MANT_FP16+1)-bit >> when Exp_in_DI is negative
+  assign Exp_Max_RS_FP16ALT_D=Exp_in_DI[C_EXP_FP16ALT:0]+C_MANT_FP16ALT+1; // to check exponent after (C_MANT_FP16ALT+1)-bit >> when Exp_in_DI is negative
+  logic  [C_EXP_FP64+1:0]                       Num_RS_D;
+  assign Num_RS_D=~Exp_in_DI+1+1;            // How many right shifts(RS) are needed to generate a denormal number? >> is need only when Exp_in_DI is negative
+  logic  [C_MANT_FP64:0]                        Mant_RS_D;
+  logic  [C_MANT_FP64+4:0]                      Mant_forsticky_D;
+  assign  {Mant_RS_D,Mant_forsticky_D} ={Mant_in_DI,{(C_MANT_FP64+1){1'b0}} } >>(Num_RS_D); //
+//
+  logic [C_EXP_FP64+1:0]                        Exp_subOne_D;
+  assign Exp_subOne_D = Exp_in_DI -1;
+
+   //normalization
+   logic [1:0]                                  Mant_lower_D;
+   logic                                        Mant_sticky_bit_D;
+   logic [C_MANT_FP64+4:0]                      Mant_forround_D;
+
+   always_comb
+     begin
+
+       if(NaN_a_SI)  //  if a is NaN, return NaN
+         begin
+           Div_Zero_S=1'b0;
+           Exp_OF_S=1'b0;
+           Exp_UF_S=1'b0;
+           Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+           Exp_res_norm_D='1;
+           Mant_forround_D='0;
+           Sign_res_D=1'b0;
+           NV_OP_S = SNaN_SI;
+         end
+
+      else if(NaN_b_SI)   //if b is NaN, return NaN
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b0;
+          Exp_UF_S=1'b0;
+          Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+          Exp_res_norm_D='1;
+          Mant_forround_D='0;
+          Sign_res_D=1'b0;
+          NV_OP_S = SNaN_SI;
+        end
+
+      else if(Inf_a_SI)
+        begin
+          if(Div_enable_SI&&Inf_b_SI)                     //Inf/Inf, retrurn NaN
+            begin
+              Div_Zero_S=1'b0;
+              Exp_OF_S=1'b0;
+              Exp_UF_S=1'b0;
+              Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+              Exp_res_norm_D='1;
+              Mant_forround_D='0;
+              Sign_res_D=1'b0;
+              NV_OP_S = 1'b1;
+            end
+          else if (Sqrt_enable_SI && Sign_in_DI) begin // catch sqrt(-inf)
+            Div_Zero_S=1'b0;
+            Exp_OF_S=1'b0;
+            Exp_UF_S=1'b0;
+            Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+            Exp_res_norm_D='1;
+            Mant_forround_D='0;
+            Sign_res_D=1'b0;
+            NV_OP_S = 1'b1;
+          end else begin
+            Div_Zero_S=1'b0;
+            Exp_OF_S=1'b1;
+            Exp_UF_S=1'b0;
+            Mant_res_norm_D= '0;
+            Exp_res_norm_D='1;
+            Mant_forround_D='0;
+            Sign_res_D=Sign_in_DI;
+            NV_OP_S = 1'b0;
+          end
+        end
+
+      else if(Div_enable_SI&&Inf_b_SI)
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b1;
+          Exp_UF_S=1'b0;
+          Mant_res_norm_D= '0;
+          Exp_res_norm_D='0;
+          Mant_forround_D='0;
+          Sign_res_D=Sign_in_DI;
+          NV_OP_S = 1'b0;
+        end
+
+     else if(Zero_a_SI)
+       begin
+         if(Div_enable_SI&&Zero_b_SI)
+           begin
+              Div_Zero_S=1'b1;
+              Exp_OF_S=1'b0;
+              Exp_UF_S=1'b0;
+              Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+              Exp_res_norm_D='1;
+              Mant_forround_D='0;
+              Sign_res_D=1'b0;
+              NV_OP_S = 1'b1;
+           end
+         else
+           begin
+             Div_Zero_S=1'b0;
+             Exp_OF_S=1'b0;
+             Exp_UF_S=1'b0;
+             Mant_res_norm_D='0;
+             Exp_res_norm_D='0;
+             Mant_forround_D='0;
+             Sign_res_D=Sign_in_DI;
+             NV_OP_S = 1'b0;
+           end
+       end
+
+     else  if(Div_enable_SI&&(Zero_b_SI))  //div Zero
+       begin
+         Div_Zero_S=1'b1;
+         Exp_OF_S=1'b0;
+         Exp_UF_S=1'b0;
+         Mant_res_norm_D='0;
+         Exp_res_norm_D='1;
+         Mant_forround_D='0;
+         Sign_res_D=Sign_in_DI;
+         NV_OP_S = 1'b0;
+       end
+
+      else if(Sign_in_DI&&Sqrt_enable_SI)   //sqrt(-a)
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b0;
+          Exp_UF_S=1'b0;
+          Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+          Exp_res_norm_D='1;
+          Mant_forround_D='0;
+          Sign_res_D=1'b0;
+          NV_OP_S = 1'b1;
+        end
+
+     else if((Exp_in_DI[C_EXP_FP64:0]=='0))
+       begin
+         if(Mant_in_DI!='0)       //Exp=0, Mant!=0, it is denormal
+           begin
+             Div_Zero_S=1'b0;
+             Exp_OF_S=1'b0;
+             Exp_UF_S=1'b1;
+             Mant_res_norm_D={1'b0,Mant_in_DI[C_MANT_FP64+4:5]};
+             Exp_res_norm_D='0;
+             Mant_forround_D={Mant_in_DI[4:0],{(C_MANT_FP64){1'b0}} };
+             Sign_res_D=Sign_in_DI;
+             NV_OP_S = 1'b0;
+           end
+         else                 // Zero
+           begin
+             Div_Zero_S=1'b0;
+             Exp_OF_S=1'b0;
+             Exp_UF_S=1'b0;
+             Mant_res_norm_D='0;
+             Exp_res_norm_D='0;
+             Mant_forround_D='0;
+             Sign_res_D=Sign_in_DI;
+             NV_OP_S = 1'b0;
+           end
+        end
+
+      else if((Exp_in_DI[C_EXP_FP64:0]==C_EXP_ONE_FP64)&&(~Mant_in_DI[C_MANT_FP64+4]))  //denormal
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b0;
+          Exp_UF_S=1'b1;
+          Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+4:4];
+          Exp_res_norm_D='0;
+          Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}};
+          Sign_res_D=Sign_in_DI;
+          NV_OP_S = 1'b0;
+        end
+
+      else if(Exp_in_DI[C_EXP_FP64+1])    //minus              //consider format
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b0;
+          Exp_UF_S=1'b1;
+          Mant_res_norm_D={Mant_RS_D[C_MANT_FP64:0]};
+          Exp_res_norm_D='0;
+          Mant_forround_D={Mant_forsticky_D[C_MANT_FP64+4:0]};   //??
+          Sign_res_D=Sign_in_DI;
+          NV_OP_S = 1'b0;
+        end
+
+      else if( (Exp_in_DI[C_EXP_FP32]&&FP32_SI) | (Exp_in_DI[C_EXP_FP64]&&FP64_SI) | (Exp_in_DI[C_EXP_FP16]&&FP16_SI) | (Exp_in_DI[C_EXP_FP16ALT]&&FP16ALT_SI) )            //OF
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b1;
+          Exp_UF_S=1'b0;
+          Mant_res_norm_D='0;
+          Exp_res_norm_D='1;
+          Mant_forround_D='0;
+          Sign_res_D=Sign_in_DI;
+          NV_OP_S = 1'b0;
+        end
+
+      else if( ((Exp_in_DI[C_EXP_FP32-1:0]=='1)&&FP32_SI) | ((Exp_in_DI[C_EXP_FP64-1:0]=='1)&&FP64_SI) |  ((Exp_in_DI[C_EXP_FP16-1:0]=='1)&&FP16_SI) | ((Exp_in_DI[C_EXP_FP16ALT-1:0]=='1)&&FP16ALT_SI) )//255
+        begin
+          if(~Mant_in_DI[C_MANT_FP64+4]) // MSB=0
+            begin
+              Div_Zero_S=1'b0;
+              Exp_OF_S=1'b0;
+              Exp_UF_S=1'b0;
+              Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3];
+              Exp_res_norm_D=Exp_subOne_D;
+              Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}};
+              Sign_res_D=Sign_in_DI;
+              NV_OP_S = 1'b0;
+            end
+          else if(Mant_in_DI!='0)         //NaN
+            begin
+              Div_Zero_S=1'b0;
+              Exp_OF_S=1'b1;
+              Exp_UF_S=1'b0;
+              Mant_res_norm_D= '0;
+              Exp_res_norm_D='1;
+              Mant_forround_D='0;
+              Sign_res_D=Sign_in_DI;
+              NV_OP_S = 1'b0;
+            end
+          else                         //infinity
+            begin
+              Div_Zero_S=1'b0;
+              Exp_OF_S=1'b1;
+              Exp_UF_S=1'b0;
+              Mant_res_norm_D= '0;
+              Exp_res_norm_D='1;
+              Mant_forround_D='0;
+              Sign_res_D=Sign_in_DI;
+              NV_OP_S = 1'b0;
+            end
+         end
+
+      else if(Mant_in_DI[C_MANT_FP64+4])  //normal numbers with 1.XXX
+        begin
+           Div_Zero_S=1'b0;
+           Exp_OF_S=1'b0;
+           Exp_UF_S=1'b0;
+           Mant_res_norm_D= Mant_in_DI[C_MANT_FP64+4:4];
+           Exp_res_norm_D=Exp_in_DI[C_EXP_FP64-1:0];
+           Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}};
+           Sign_res_D=Sign_in_DI;
+           NV_OP_S = 1'b0;
+        end
+
+      else                                   //normal numbers with 0.1XX
+         begin
+           Div_Zero_S=1'b0;
+           Exp_OF_S=1'b0;
+           Exp_UF_S=1'b0;
+           Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3];
+           Exp_res_norm_D=Exp_subOne_D;
+           Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}};
+           Sign_res_D=Sign_in_DI;
+           NV_OP_S = 1'b0;
+         end
+
+     end
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Rounding enable only for full precision (Full_precision_SI==1'b1)       //
+   /////////////////////////////////////////////////////////////////////////////
+
+   logic [C_MANT_FP64:0]                   Mant_upper_D;
+   logic [C_MANT_FP64+1:0]                 Mant_upperRounded_D;
+   logic                                   Mant_roundUp_S;
+   logic                                   Mant_rounded_S;
+
+  always_comb //determine which bits for Mant_lower_D and Mant_sticky_bit_D
+    begin
+      if(FP32_SI)
+        begin
+          Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP32], {(C_MANT_FP64-C_MANT_FP32){1'b0}} };
+          Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-1:C_MANT_FP64-C_MANT_FP32-2];
+          Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-3:0];
+        end
+      else if(FP64_SI)
+        begin
+          Mant_upper_D = Mant_res_norm_D[C_MANT_FP64:0];
+          Mant_lower_D = Mant_forround_D[C_MANT_FP64+4:C_MANT_FP64+3];
+          Mant_sticky_bit_D = | Mant_forround_D[C_MANT_FP64+3:0];
+        end
+      else if(FP16_SI)
+        begin
+          Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16], {(C_MANT_FP64-C_MANT_FP16){1'b0}} };
+          Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-1:C_MANT_FP64-C_MANT_FP16-2];
+          Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-3:30];
+        end
+      else  //FP16ALT
+      begin
+          Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16ALT], {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} };
+          Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-1:C_MANT_FP64-C_MANT_FP16ALT-2];
+          Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-3:30];
+      end
+    end
+
+   assign Mant_rounded_S = (|(Mant_lower_D))| Mant_sticky_bit_D;
+
+
+
+
+   always_comb //determine whether to round up or not
+     begin
+        Mant_roundUp_S = 1'b0;
+        case (RM_SI)
+          C_RM_NEAREST :
+            Mant_roundUp_S = Mant_lower_D[1] && ((Mant_lower_D[0] | Mant_sticky_bit_D )| ( (FP32_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP32]) | (FP64_SI&&Mant_upper_D[0]) | (FP16_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16]) | (FP16ALT_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16ALT]) ) );
+          C_RM_TRUNC   :
+            Mant_roundUp_S = 0;
+          C_RM_PLUSINF :
+            Mant_roundUp_S = Mant_rounded_S & ~Sign_in_DI;
+          C_RM_MINUSINF:
+            Mant_roundUp_S = Mant_rounded_S & Sign_in_DI;
+          default          :
+            Mant_roundUp_S = 0;
+        endcase // case (RM_DI)
+     end // always_comb begin
+
+  logic                                 Mant_renorm_S;
+  logic  [C_MANT_FP64:0]                Mant_roundUp_Vector_S; // for all the formats
+
+  assign Mant_roundUp_Vector_S={7'h0,(FP16ALT_SI&&Mant_roundUp_S),2'h0,(FP16_SI&&Mant_roundUp_S),12'h0,(FP32_SI&&Mant_roundUp_S),28'h0,(FP64_SI&&Mant_roundUp_S)};
+
+
+  assign Mant_upperRounded_D = Mant_upper_D + Mant_roundUp_Vector_S;
+  assign Mant_renorm_S       = Mant_upperRounded_D[C_MANT_FP64+1];
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Renormalization for Rounding                                           //
+  /////////////////////////////////////////////////////////////////////////////
+  logic [C_MANT_FP64-1:0]               Mant_res_round_D;
+  logic [C_EXP_FP64-1:0]                Exp_res_round_D;
+
+
+  assign Mant_res_round_D = (Mant_renorm_S)?Mant_upperRounded_D[C_MANT_FP64:1]:Mant_upperRounded_D[C_MANT_FP64-1:0]; // including the process of the hidden bit
+  assign Exp_res_round_D  = Exp_res_norm_D+Mant_renorm_S;
+
+  /////////////////////////////////////////////////////////////////////////////
+  //  Output Assignments                                                     //
+  /////////////////////////////////////////////////////////////////////////////
+  logic [C_MANT_FP64-1:0]               Mant_before_format_ctl_D;
+  logic [C_EXP_FP64-1:0]                Exp_before_format_ctl_D;
+  assign Mant_before_format_ctl_D = Full_precision_SI ? Mant_res_round_D : Mant_res_norm_D;
+  assign Exp_before_format_ctl_D = Full_precision_SI ? Exp_res_round_D : Exp_res_norm_D;
+
+  always_comb    //NaN Boxing
+    begin  //
+      if(FP32_SI)
+          begin
+            Result_DO ={32'hffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP32-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP32]};
+          end
+       else if(FP64_SI)
+          begin
+            Result_DO ={Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP64-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:0]};
+          end
+      else if(FP16_SI)
+          begin
+            Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16]};
+          end
+      else
+          begin
+            Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16ALT-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16ALT]};
+          end
+    end
+
+assign In_Exact_S = (~Full_precision_SI) | Mant_rounded_S;
+assign Fflags_SO = {NV_OP_S,Div_Zero_S,Exp_OF_S,Exp_UF_S,In_Exact_S}; //{NV,DZ,OF,UF,NX}
+
+endmodule // norm_div_sqrt_mvp
diff --git a/verilog/rtl/nrbd_nrsc_mvp.sv b/verilog/rtl/nrbd_nrsc_mvp.sv
new file mode 100644
index 0000000..62bd147
--- /dev/null
+++ b/verilog/rtl/nrbd_nrsc_mvp.sv
@@ -0,0 +1,104 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lei Li      lile@iis.ee.ethz.ch                            //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    10/04/2018                                                 //
+// Design Name:    FPU                                                        //
+// Module Name:    nrbd_nrsc_mvp.sv                                           //
+// Project Name:   Private FPU                                                //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:   non restroring binary  divisior/ square root                //
+//                                                                            //
+// Revision Date:  12/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To address some requirements by Stefan and add low power   //
+//                 control for special cases                                  //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+import defs_div_sqrt_mvp::*;
+
+module nrbd_nrsc_mvp
+
+  (//Input
+   input logic                                 Clk_CI,
+   input logic                                 Rst_RBI,
+   input logic                                 Div_start_SI,
+   input logic                                 Sqrt_start_SI,
+   input logic                                 Start_SI,
+   input logic                                 Kill_SI,
+   input logic                                 Special_case_SBI,
+   input logic                                 Special_case_dly_SBI,
+   input logic [C_PC-1:0]                      Precision_ctl_SI,
+   input logic [1:0]                           Format_sel_SI,
+   input logic [C_MANT_FP64:0]                 Mant_a_DI,
+   input logic [C_MANT_FP64:0]                 Mant_b_DI,
+   input logic [C_EXP_FP64:0]                  Exp_a_DI,
+   input logic [C_EXP_FP64:0]                  Exp_b_DI,
+  //output
+   output logic                                Div_enable_SO,
+   output logic                                Sqrt_enable_SO,
+
+   output logic                                Full_precision_SO,
+   output logic                                FP32_SO,
+   output logic                                FP64_SO,
+   output logic                                FP16_SO,
+   output logic                                FP16ALT_SO,
+   output logic                                Ready_SO,
+   output logic                                Done_SO,
+   output logic  [C_MANT_FP64+4:0]             Mant_z_DO,
+   output logic [C_EXP_FP64+1:0]               Exp_z_DO
+    );
+
+
+    logic                                     Div_start_dly_S,Sqrt_start_dly_S;
+
+
+control_mvp         control_U0
+(  .Clk_CI                                   (Clk_CI                          ),
+   .Rst_RBI                                  (Rst_RBI                         ),
+   .Div_start_SI                             (Div_start_SI                    ),
+   .Sqrt_start_SI                            (Sqrt_start_SI                   ),
+   .Start_SI                                 (Start_SI                        ),
+   .Kill_SI                                  (Kill_SI                         ),
+   .Special_case_SBI                         (Special_case_SBI                ),
+   .Special_case_dly_SBI                     (Special_case_dly_SBI            ),
+   .Precision_ctl_SI                         (Precision_ctl_SI                ),
+   .Format_sel_SI                            (Format_sel_SI                   ),
+   .Numerator_DI                             (Mant_a_DI                       ),
+   .Exp_num_DI                               (Exp_a_DI                        ),
+   .Denominator_DI                           (Mant_b_DI                       ),
+   .Exp_den_DI                               (Exp_b_DI                        ),
+   .Div_start_dly_SO                         (Div_start_dly_S                 ),
+   .Sqrt_start_dly_SO                        (Sqrt_start_dly_S                ),
+   .Div_enable_SO                            (Div_enable_SO                   ),
+   .Sqrt_enable_SO                           (Sqrt_enable_SO                  ),
+   .Full_precision_SO                        (Full_precision_SO               ),
+   .FP32_SO                                  (FP32_SO                         ),
+   .FP64_SO                                  (FP64_SO                         ),
+   .FP16_SO                                  (FP16_SO                         ),
+   .FP16ALT_SO                               (FP16ALT_SO                      ),
+   .Ready_SO                                 (Ready_SO                        ),
+   .Done_SO                                  (Done_SO                         ),
+   .Mant_result_prenorm_DO                   (Mant_z_DO                       ),
+   .Exp_result_prenorm_DO                    (Exp_z_DO                        )
+);
+
+
+
+endmodule
diff --git a/verilog/rtl/preprocess_mvp.sv b/verilog/rtl/preprocess_mvp.sv
new file mode 100644
index 0000000..9e0d25f
--- /dev/null
+++ b/verilog/rtl/preprocess_mvp.sv
@@ -0,0 +1,425 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:                Lei Li  //lile@iis.ee.ethz.ch                    //
+//		                                                                        //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    01/03/2018                                                 //
+// Design Name:    FPU                                                        //
+// Module Name:    preprocess_mvp.sv                                          //
+// Project Name:   Private FPU                                                //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:           decode and data preparation                         //
+//                                                                            //
+// Revision Date:  12/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To address some requirements by Stefan and add low power   //
+//                 control for special cases                                  //
+//                                                                            //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+import defs_div_sqrt_mvp::*;
+
+module preprocess_mvp
+  (
+   input logic                   Clk_CI,
+   input logic                   Rst_RBI,
+   input logic                   Div_start_SI,
+   input logic                   Sqrt_start_SI,
+   input logic                   Ready_SI,
+   //Input Operands
+   input logic [C_OP_FP64-1:0]   Operand_a_DI,
+   input logic [C_OP_FP64-1:0]   Operand_b_DI,
+   input logic [C_RM-1:0]        RM_SI,    //Rounding Mode
+   input logic [C_FS-1:0]        Format_sel_SI,  // Format Selection
+
+   // to control
+   output logic                  Start_SO,
+   output logic [C_EXP_FP64:0]   Exp_a_DO_norm,
+   output logic [C_EXP_FP64:0]   Exp_b_DO_norm,
+   output logic [C_MANT_FP64:0]  Mant_a_DO_norm,
+   output logic [C_MANT_FP64:0]  Mant_b_DO_norm,
+
+   output logic [C_RM-1:0]       RM_dly_SO,
+
+   output logic                  Sign_z_DO,
+   output logic                  Inf_a_SO,
+   output logic                  Inf_b_SO,
+   output logic                  Zero_a_SO,
+   output logic                  Zero_b_SO,
+   output logic                  NaN_a_SO,
+   output logic                  NaN_b_SO,
+   output logic                  SNaN_SO,
+   output logic                  Special_case_SBO,
+   output logic                  Special_case_dly_SBO
+   );
+
+   //Hidden Bits
+   logic                         Hb_a_D;
+   logic                         Hb_b_D;
+
+   logic [C_EXP_FP64-1:0]        Exp_a_D;
+   logic [C_EXP_FP64-1:0]        Exp_b_D;
+   logic [C_MANT_FP64-1:0]       Mant_a_NonH_D;
+   logic [C_MANT_FP64-1:0]       Mant_b_NonH_D;
+   logic [C_MANT_FP64:0]         Mant_a_D;
+   logic [C_MANT_FP64:0]         Mant_b_D;
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Disassemble operands
+   /////////////////////////////////////////////////////////////////////////////
+   logic                      Sign_a_D,Sign_b_D;
+   logic                      Start_S;
+
+     always_comb
+       begin
+         case(Format_sel_SI)
+           2'b00:
+             begin
+               Sign_a_D = Operand_a_DI[C_OP_FP32-1];
+               Sign_b_D = Operand_b_DI[C_OP_FP32-1];
+               Exp_a_D  = {3'h0, Operand_a_DI[C_OP_FP32-2:C_MANT_FP32]};
+               Exp_b_D  = {3'h0, Operand_b_DI[C_OP_FP32-2:C_MANT_FP32]};
+               Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP32-1:0],29'h0};
+               Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP32-1:0],29'h0};
+             end
+           2'b01:
+             begin
+               Sign_a_D = Operand_a_DI[C_OP_FP64-1];
+               Sign_b_D = Operand_b_DI[C_OP_FP64-1];
+               Exp_a_D  = Operand_a_DI[C_OP_FP64-2:C_MANT_FP64];
+               Exp_b_D  = Operand_b_DI[C_OP_FP64-2:C_MANT_FP64];
+               Mant_a_NonH_D = Operand_a_DI[C_MANT_FP64-1:0];
+               Mant_b_NonH_D = Operand_b_DI[C_MANT_FP64-1:0];
+             end
+           2'b10:
+             begin
+               Sign_a_D = Operand_a_DI[C_OP_FP16-1];
+               Sign_b_D = Operand_b_DI[C_OP_FP16-1];
+               Exp_a_D  = {6'h00, Operand_a_DI[C_OP_FP16-2:C_MANT_FP16]};
+               Exp_b_D  = {6'h00, Operand_b_DI[C_OP_FP16-2:C_MANT_FP16]};
+               Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16-1:0],42'h0};
+               Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16-1:0],42'h0};
+             end
+           2'b11:
+             begin
+               Sign_a_D = Operand_a_DI[C_OP_FP16ALT-1];
+               Sign_b_D = Operand_b_DI[C_OP_FP16ALT-1];
+               Exp_a_D  = {3'h0, Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]};
+               Exp_b_D  = {3'h0, Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]};
+               Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16ALT-1:0],45'h0};
+               Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16ALT-1:0],45'h0};
+             end
+           endcase
+       end
+
+
+   assign Mant_a_D = {Hb_a_D,Mant_a_NonH_D};
+   assign Mant_b_D = {Hb_b_D,Mant_b_NonH_D};
+
+   assign Hb_a_D = | Exp_a_D; // hidden bit
+   assign Hb_b_D = | Exp_b_D; // hidden bit
+
+   assign Start_S= Div_start_SI | Sqrt_start_SI;
+
+
+
+   /////////////////////////////////////////////////////////////////////////////
+   // preliminary checks for infinite/zero/NaN operands                       //
+   /////////////////////////////////////////////////////////////////////////////
+
+   logic               Mant_a_prenorm_zero_S;
+   logic               Mant_b_prenorm_zero_S;
+
+   logic               Exp_a_prenorm_zero_S;
+   logic               Exp_b_prenorm_zero_S;
+   assign Exp_a_prenorm_zero_S = ~Hb_a_D;
+   assign Exp_b_prenorm_zero_S = ~Hb_b_D;
+
+   logic               Exp_a_prenorm_Inf_NaN_S;
+   logic               Exp_b_prenorm_Inf_NaN_S;
+
+   logic               Mant_a_prenorm_QNaN_S;
+   logic               Mant_a_prenorm_SNaN_S;
+   logic               Mant_b_prenorm_QNaN_S;
+   logic               Mant_b_prenorm_SNaN_S;
+
+   assign Mant_a_prenorm_QNaN_S=Mant_a_NonH_D[C_MANT_FP64-1]&&(~(|Mant_a_NonH_D[C_MANT_FP64-2:0]));
+   assign Mant_a_prenorm_SNaN_S=(~Mant_a_NonH_D[C_MANT_FP64-1])&&((|Mant_a_NonH_D[C_MANT_FP64-2:0]));
+   assign Mant_b_prenorm_QNaN_S=Mant_b_NonH_D[C_MANT_FP64-1]&&(~(|Mant_b_NonH_D[C_MANT_FP64-2:0]));
+   assign Mant_b_prenorm_SNaN_S=(~Mant_b_NonH_D[C_MANT_FP64-1])&&((|Mant_b_NonH_D[C_MANT_FP64-2:0]));
+
+     always_comb
+       begin
+         case(Format_sel_SI)
+           2'b00:
+             begin
+               Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32);
+               Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32);
+               Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32);
+               Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32);
+             end
+           2'b01:
+             begin
+               Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64);
+               Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64);
+               Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64);
+               Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64);
+             end
+           2'b10:
+             begin
+               Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16);
+               Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16);
+               Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16);
+               Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16);
+             end
+           2'b11:
+             begin
+               Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT);
+               Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT);
+               Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT);
+               Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT);
+             end
+           endcase
+       end
+
+
+
+
+   logic               Zero_a_SN,Zero_a_SP;
+   logic               Zero_b_SN,Zero_b_SP;
+   logic               Inf_a_SN,Inf_a_SP;
+   logic               Inf_b_SN,Inf_b_SP;
+   logic               NaN_a_SN,NaN_a_SP;
+   logic               NaN_b_SN,NaN_b_SP;
+   logic               SNaN_SN,SNaN_SP;
+
+   assign Zero_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_zero_S&&Mant_a_prenorm_zero_S):Zero_a_SP;
+   assign Zero_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_zero_S&&Mant_b_prenorm_zero_S):Zero_b_SP;
+   assign Inf_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&Mant_a_prenorm_zero_S):Inf_a_SP;
+   assign Inf_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&Mant_b_prenorm_zero_S):Inf_b_SP;
+   assign NaN_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&(~Mant_a_prenorm_zero_S)):NaN_a_SP;
+   assign NaN_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&(~Mant_b_prenorm_zero_S)):NaN_b_SP;
+   assign SNaN_SN = (Start_S&&Ready_SI) ? ((Mant_a_prenorm_SNaN_S&&NaN_a_SN) | (Mant_b_prenorm_SNaN_S&&NaN_b_SN)) : SNaN_SP;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Zero_a_SP <='0;
+            Zero_b_SP <='0;
+            Inf_a_SP <='0;
+            Inf_b_SP <='0;
+            NaN_a_SP <='0;
+            NaN_b_SP <='0;
+            SNaN_SP <= '0;
+          end
+        else
+         begin
+           Inf_a_SP <=Inf_a_SN;
+           Inf_b_SP <=Inf_b_SN;
+           Zero_a_SP <=Zero_a_SN;
+           Zero_b_SP <=Zero_b_SN;
+           NaN_a_SP <=NaN_a_SN;
+           NaN_b_SP <=NaN_b_SN;
+           SNaN_SP <= SNaN_SN;
+         end
+      end
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Low power control
+   /////////////////////////////////////////////////////////////////////////////
+
+   assign Special_case_SBO=(~{(Div_start_SI)?(Zero_a_SN | Zero_b_SN |  Inf_a_SN | Inf_b_SN | NaN_a_SN | NaN_b_SN): (Zero_a_SN | Inf_a_SN | NaN_a_SN | Sign_a_D) })&&(Start_S&&Ready_SI);
+
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+       if(~Rst_RBI)
+          begin
+            Special_case_dly_SBO <= '0;
+          end
+       else if((Start_S&&Ready_SI))
+         begin
+            Special_case_dly_SBO <= Special_case_SBO;
+         end
+       else if(Special_case_dly_SBO)
+         begin
+         Special_case_dly_SBO <= 1'b1;
+         end
+      else
+         begin
+            Special_case_dly_SBO <= '0;
+         end
+    end
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Delay sign for normalization and round                                  //
+   /////////////////////////////////////////////////////////////////////////////
+
+   logic                   Sign_z_DN;
+   logic                   Sign_z_DP;
+
+   always_comb
+     begin
+       if(Div_start_SI&&Ready_SI)
+           Sign_z_DN = Sign_a_D ^ Sign_b_D;
+       else if(Sqrt_start_SI&&Ready_SI)
+           Sign_z_DN = Sign_a_D;
+       else
+           Sign_z_DN = Sign_z_DP;
+    end
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+       if(~Rst_RBI)
+          begin
+            Sign_z_DP <= '0;
+          end
+       else
+         begin
+            Sign_z_DP <= Sign_z_DN;
+         end
+    end
+
+   logic [C_RM-1:0]                  RM_DN;
+   logic [C_RM-1:0]                  RM_DP;
+
+   always_comb
+     begin
+       if(Start_S&&Ready_SI)
+           RM_DN = RM_SI;
+       else
+           RM_DN = RM_DP;
+    end
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+       if(~Rst_RBI)
+          begin
+            RM_DP <= '0;
+          end
+       else
+         begin
+            RM_DP <= RM_DN;
+         end
+    end
+   assign RM_dly_SO = RM_DP;
+
+   logic [5:0]                  Mant_leadingOne_a, Mant_leadingOne_b;
+   logic                        Mant_zero_S_a,Mant_zero_S_b;
+
+  lzc #(
+    .WIDTH ( C_MANT_FP64+1 ),
+    .MODE  ( 1             )
+  ) LOD_Ua (
+    .in_i    ( Mant_a_D          ),
+    .cnt_o   ( Mant_leadingOne_a ),
+    .empty_o ( Mant_zero_S_a     )
+  );
+
+   logic [C_MANT_FP64:0]            Mant_a_norm_DN,Mant_a_norm_DP;
+
+   assign  Mant_a_norm_DN = ((Start_S&&Ready_SI))?(Mant_a_D<<(Mant_leadingOne_a)):Mant_a_norm_DP;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Mant_a_norm_DP <= '0;
+          end
+        else
+          begin
+            Mant_a_norm_DP<=Mant_a_norm_DN;
+          end
+     end
+
+   logic [C_EXP_FP64:0]            Exp_a_norm_DN,Exp_a_norm_DP;
+   assign  Exp_a_norm_DN = ((Start_S&&Ready_SI))?(Exp_a_D-Mant_leadingOne_a+(|Mant_leadingOne_a)):Exp_a_norm_DP;  //Covering the process of denormal numbers
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Exp_a_norm_DP <= '0;
+          end
+        else
+          begin
+            Exp_a_norm_DP<=Exp_a_norm_DN;
+          end
+     end
+
+  lzc #(
+    .WIDTH ( C_MANT_FP64+1 ),
+    .MODE  ( 1             )
+  ) LOD_Ub (
+    .in_i    ( Mant_b_D          ),
+    .cnt_o   ( Mant_leadingOne_b ),
+    .empty_o ( Mant_zero_S_b     )
+  );
+
+
+   logic [C_MANT_FP64:0]            Mant_b_norm_DN,Mant_b_norm_DP;
+
+   assign  Mant_b_norm_DN = ((Start_S&&Ready_SI))?(Mant_b_D<<(Mant_leadingOne_b)):Mant_b_norm_DP;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Mant_b_norm_DP <= '0;
+          end
+        else
+          begin
+            Mant_b_norm_DP<=Mant_b_norm_DN;
+          end
+     end
+
+   logic [C_EXP_FP64:0]            Exp_b_norm_DN,Exp_b_norm_DP;
+   assign  Exp_b_norm_DN = ((Start_S&&Ready_SI))?(Exp_b_D-Mant_leadingOne_b+(|Mant_leadingOne_b)):Exp_b_norm_DP; //Covering the process of denormal numbers
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Exp_b_norm_DP <= '0;
+          end
+        else
+          begin
+            Exp_b_norm_DP<=Exp_b_norm_DN;
+          end
+     end
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Output assignments                                                      //
+   /////////////////////////////////////////////////////////////////////////////
+
+   assign Start_SO=Start_S;
+   assign Exp_a_DO_norm=Exp_a_norm_DP;
+   assign Exp_b_DO_norm=Exp_b_norm_DP;
+   assign Mant_a_DO_norm=Mant_a_norm_DP;
+   assign Mant_b_DO_norm=Mant_b_norm_DP;
+   assign Sign_z_DO=Sign_z_DP;
+   assign Inf_a_SO=Inf_a_SP;
+   assign Inf_b_SO=Inf_b_SP;
+   assign Zero_a_SO=Zero_a_SP;
+   assign Zero_b_SO=Zero_b_SP;
+   assign NaN_a_SO=NaN_a_SP;
+   assign NaN_b_SO=NaN_b_SP;
+   assign SNaN_SO=SNaN_SP;
+
+endmodule
diff --git a/verilog/rtl/prim_arbiter_ppc.sv b/verilog/rtl/prim_arbiter_ppc.sv
new file mode 100644
index 0000000..e379f8a
--- /dev/null
+++ b/verilog/rtl/prim_arbiter_ppc.sv
@@ -0,0 +1,127 @@
+
+//
+// N:1 arbiter module
+//
+// Verilog parameter
+//   N:           Number of request ports
+//   DW:          Data width
+//   DataPort:    Set to 1 to enable the data port. Otherwise that port will be ignored.
+//   EnReqStabA:  Checks whether requests remain asserted until granted
+//
+// This is the original implementation of the arbiter which relies on parallel prefix computing
+// optimization to optimize the request / arbiter tree. Not all synthesis tools may support this.
+//
+// Note that the currently winning request is held if the data sink is not ready. This behavior is
+// required by some interconnect protocols (AXI, TL). The module contains an assertion that checks
+// this behavior.
+//
+// Also, this module contains a request stability assertion that checks that requests stay asserted
+// until they have been served. This assertion can be optionally disabled by setting EnReqStabA to
+// zero. This is a non-functional parameter and does not affect the designs behavior.
+//
+// See also: prim_arbiter_tree
+
+
+
+module prim_arbiter_ppc #(
+  parameter int unsigned N  = 8,
+  parameter int unsigned DW = 32,
+
+  // Configurations
+  // EnDataPort: {0, 1}, if 0, input data will be ignored
+  parameter bit EnDataPort = 1,
+
+  // Non-functional parameter to switch on the request stability assertion
+  parameter bit EnReqStabA = 1,
+
+  // Derived parameters
+  localparam int IdxW = $clog2(N)
+) (
+  input clk_i,
+  input rst_ni,
+
+  input        [ N-1:0]    req_i,
+  input        [DW-1:0]    data_i [N],
+  output logic [ N-1:0]    gnt_o,
+  output logic [IdxW-1:0]  idx_o,
+
+  output logic             valid_o,
+  output logic [DW-1:0]    data_o,
+  input                    ready_i
+);
+
+
+  // this case is basically just a bypass
+  if (N == 1) begin : gen_degenerate_case
+
+    assign valid_o  = req_i[0];
+    assign data_o   = data_i[0];
+    assign gnt_o[0] = valid_o & ready_i;
+    assign idx_o    = '0;
+
+  end else begin : gen_normal_case
+
+    logic [N-1:0] masked_req;
+    logic [N-1:0] ppc_out;
+    logic [N-1:0] arb_req;
+    logic [N-1:0] mask, mask_next;
+    logic [N-1:0] winner;
+
+    assign masked_req = mask & req_i;
+    assign arb_req = (|masked_req) ? masked_req : req_i;
+
+    // PPC
+    //   Even below code looks O(n) but DC optimizes it to O(log(N))
+    //   Using Parallel Prefix Computation
+    always_comb begin
+      ppc_out[0] = arb_req[0];
+      for (int i = 1 ; i < N ; i++) begin
+        ppc_out[i] = ppc_out[i-1] | arb_req[i];
+      end
+    end
+
+    // Grant Generation: Leading-One detector
+    assign winner = ppc_out ^ {ppc_out[N-2:0], 1'b0};
+    assign gnt_o    = (ready_i) ? winner : '0;
+
+    assign valid_o = |req_i;
+    // Mask Generation
+    assign mask_next = {ppc_out[N-2:0], 1'b0};
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        mask <= '0;
+      end else if (valid_o && ready_i) begin
+        // Latch only when requests accepted
+        mask <= mask_next;
+      end else if (valid_o && !ready_i) begin
+        // Downstream isn't yet ready so, keep current request alive. (First come first serve)
+        mask <= ppc_out;
+      end
+    end
+
+    if (EnDataPort == 1) begin: gen_datapath
+      always_comb begin
+        data_o = '0;
+        for (int i = 0 ; i < N ; i++) begin
+          if (winner[i]) begin
+            data_o = data_i[i];
+          end
+        end
+      end
+    end else begin: gen_nodatapath
+      assign data_o = '1;
+      // TODO: waive data_i from NOT_READ error
+    end
+
+    always_comb begin
+      idx_o = '0;
+      for (int unsigned i = 0 ; i < N ; i++) begin
+        if (winner[i]) begin
+          idx_o = i[IdxW-1:0];
+        end
+      end
+    end
+  end
+
+ 
+endmodule : prim_arbiter_ppc
diff --git a/verilog/rtl/prim_clock_gating.sv b/verilog/rtl/prim_clock_gating.sv
new file mode 100644
index 0000000..177d2c0
--- /dev/null
+++ b/verilog/rtl/prim_clock_gating.sv
@@ -0,0 +1,12 @@
+
+
+module prim_clock_gating (
+  input        clk_i,
+  input        en_i,
+  input        test_en_i,
+  output logic clk_o
+);
+
+sky130_fd_sc_hd__dlclkp_1 CG( .CLK(clk_i), .GCLK(clk_o), .GATE(en_i | test_en_i));
+
+endmodule
diff --git a/verilog/rtl/prim_filter_ctr.sv b/verilog/rtl/prim_filter_ctr.sv
new file mode 100644
index 0000000..ec635b8
--- /dev/null
+++ b/verilog/rtl/prim_filter_ctr.sv
@@ -0,0 +1,63 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Primitive counter-based input filter, with enable.
+// Configurable number of cycles. Cheaper version of filter for
+// large values of #Cycles
+//
+// when in reset, stored value is zero
+// when enable is false, output is input
+// when enable is true, output is stored value,
+//   new input must be opposite value from stored value for
+//   #Cycles before switching to new value.
+
+module prim_filter_ctr #(parameter int unsigned Cycles = 4) (
+  input  clk_i,
+  input  rst_ni,
+  input  enable_i,
+  input  filter_i,
+  output filter_o
+);
+
+  localparam int unsigned CTR_WIDTH = $clog2(Cycles);
+  localparam logic [CTR_WIDTH-1:0] CYCLESM1 = (CTR_WIDTH)'(Cycles-1);
+
+  logic [CTR_WIDTH-1:0] diff_ctr_q, diff_ctr_d;
+  logic filter_q, stored_value_q, update_stored_value;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      filter_q <= 1'b0;
+    end else begin
+      filter_q <= filter_i;
+    end
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      stored_value_q <= 1'b0;
+    end else if (update_stored_value) begin
+      stored_value_q <= filter_i;
+    end
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      diff_ctr_q <= {CTR_WIDTH{1'b0}};
+    end else begin
+      diff_ctr_q <= diff_ctr_d;
+    end
+  end
+
+  // always look for differences, even if not filter enabled
+  assign diff_ctr_d =
+             (filter_i != filter_q)           ? '0       : // restart
+                     (diff_ctr_q == CYCLESM1) ? CYCLESM1 : // saturate
+                         (diff_ctr_q + 1'b1);              // count up
+  assign update_stored_value = (diff_ctr_d == CYCLESM1);
+
+  assign filter_o = enable_i ? stored_value_q : filter_i;
+
+endmodule
+
diff --git a/verilog/rtl/prim_generic_clock_inv.sv b/verilog/rtl/prim_generic_clock_inv.sv
new file mode 100644
index 0000000..ae8f6b6
--- /dev/null
+++ b/verilog/rtl/prim_generic_clock_inv.sv
@@ -0,0 +1,29 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Clock inverter
+//   Varies on the process
+
+module prim_generic_clock_inv #(
+  parameter bit HasScanMode = 1'b1
+) (
+  input        clk_i,
+  input        scanmode_i,
+  output logic clk_no      // Inverted
+);
+
+  if (HasScanMode) begin : gen_scan
+    prim_generic_clock_mux2 i_dft_tck_mux (
+      .clk0_i ( ~clk_i     ),
+      .clk1_i ( clk_i      ), // bypass the inverted clock for testing
+      .sel_i  ( scanmode_i ),
+      .clk_o  ( clk_no     )
+    );
+  end else begin : gen_noscan
+    logic unused_scanmode;
+    assign unused_scanmode = scanmode_i;
+    assign clk_no = ~clk_i;
+  end
+
+endmodule : prim_generic_clock_inv
diff --git a/verilog/rtl/prim_generic_clock_mux2.sv b/verilog/rtl/prim_generic_clock_mux2.sv
new file mode 100644
index 0000000..8f296e6
--- /dev/null
+++ b/verilog/rtl/prim_generic_clock_mux2.sv
@@ -0,0 +1,22 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+
+
+module prim_generic_clock_mux2 #(
+  parameter bit NoFpgaBufG = 1'b0 // this parameter serves no function in the generic model
+) (
+  input        clk0_i,
+  input        clk1_i,
+  input        sel_i,
+  output logic clk_o
+);
+
+  assign clk_o = (sel_i) ? clk1_i : clk0_i;
+
+  // make sure sel is never X (including during reset)
+  // need to use ##1 as this could break with inverted clocks that
+  // start with a rising edge at the beginning of the simulation.
+
+endmodule : prim_generic_clock_mux2
diff --git a/verilog/rtl/prim_generic_flop.sv b/verilog/rtl/prim_generic_flop.sv
new file mode 100644
index 0000000..8eacf01
--- /dev/null
+++ b/verilog/rtl/prim_generic_flop.sv
@@ -0,0 +1,26 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// `include "prim_assert.sv"
+
+module prim_generic_flop # (
+  parameter int Width      = 1,
+  localparam int WidthSubOne = Width-1,
+  parameter logic [WidthSubOne:0] ResetValue = 0
+) (
+  input clk_i,
+  input rst_ni,
+  input [Width-1:0] d_i,
+  output logic [Width-1:0] q_o
+);
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      q_o <= ResetValue;
+    end else begin
+      q_o <= d_i;
+    end
+  end
+
+endmodule // prim_generic_flop
diff --git a/verilog/rtl/prim_generic_flop_2sync.sv b/verilog/rtl/prim_generic_flop_2sync.sv
new file mode 100644
index 0000000..fdd1358
--- /dev/null
+++ b/verilog/rtl/prim_generic_flop_2sync.sv
@@ -0,0 +1,43 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Generic double-synchronizer flop
+// This may need to be moved to prim_generic if libraries have a specific cell
+// for synchronization
+
+module prim_generic_flop_2sync #(
+  parameter int Width       = 16,
+  localparam int WidthSubOne = Width-1, // temp work around #2679
+  parameter logic [WidthSubOne:0] ResetValue = '0
+) (
+  input                    clk_i,       // receive clock
+  input                    rst_ni,
+  input        [Width-1:0] d_i,
+  output logic [Width-1:0] q_o
+);
+
+  logic [Width-1:0] intq;
+
+  prim_generic_flop #(
+    .Width(Width),
+    .ResetValue(ResetValue)
+  ) u_sync_1 (
+    .clk_i,
+    .rst_ni,
+    .d_i,
+    .q_o(intq)
+  );
+
+  prim_generic_flop #(
+    .Width(Width),
+    .ResetValue(ResetValue)
+  ) u_sync_2 (
+    .clk_i,
+    .rst_ni,
+    .d_i(intq),
+    .q_o
+  );
+
+
+endmodule
diff --git a/verilog/rtl/prim_intr_hw.sv b/verilog/rtl/prim_intr_hw.sv
new file mode 100644
index 0000000..c84e389
--- /dev/null
+++ b/verilog/rtl/prim_intr_hw.sv
@@ -0,0 +1,51 @@
+
+
+module prim_intr_hw # (
+  parameter int unsigned Width = 1,
+  parameter bit FlopOutput = 1
+) (
+  // event
+  input  clk_i,
+  input  rst_ni,
+  input  [Width-1:0]  event_intr_i,
+
+  // register interface
+  input  [Width-1:0]  reg2hw_intr_enable_q_i,
+  input  [Width-1:0]  reg2hw_intr_test_q_i,
+  input               reg2hw_intr_test_qe_i,
+  input  [Width-1:0]  reg2hw_intr_state_q_i,
+  output              hw2reg_intr_state_de_o,
+  output [Width-1:0]  hw2reg_intr_state_d_o,
+
+  // outgoing interrupt
+  output logic [Width-1:0]  intr_o
+);
+
+  logic  [Width-1:0]    new_event;
+  assign new_event =
+             (({Width{reg2hw_intr_test_qe_i}} & reg2hw_intr_test_q_i) | event_intr_i);
+  assign hw2reg_intr_state_de_o = |new_event;
+  // for scalar interrupts, this resolves to '1' with new event
+  // for vector interrupts, new events are OR'd in to existing interrupt state
+  assign hw2reg_intr_state_d_o  =  new_event | reg2hw_intr_state_q_i;
+
+  if (FlopOutput == 1) begin : gen_flop_intr_output
+    // flop the interrupt output
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        intr_o <= 1'b0;
+      end else begin
+        intr_o <= reg2hw_intr_state_q_i & reg2hw_intr_enable_q_i;
+      end
+    end
+
+  end else begin : gen_intr_passthrough_output
+    logic unused_clk;
+    logic unused_rst_n;
+    assign unused_clk = clk_i;
+    assign unused_rst_n = rst_ni;
+    assign intr_o = reg2hw_intr_state_q_i & reg2hw_intr_enable_q_i;
+  end
+
+
+endmodule
diff --git a/verilog/rtl/prim_pkg.sv b/verilog/rtl/prim_pkg.sv
new file mode 100644
index 0000000..ebe38d1
--- /dev/null
+++ b/verilog/rtl/prim_pkg.sv
@@ -0,0 +1,18 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Constants for use in primitives
+//
+// This file is a stop-gap until the DV file list is generated by FuseSoC.
+// Its contents are taken from the file which would be generated by FuseSoC.
+// https://github.com/lowRISC/ibex/issues/893
+
+package prim_pkg;
+
+  // Implementation target specialization
+  typedef enum integer {
+    ImplGeneric,
+    ImplXilinx
+  } impl_e;
+endpackage : prim_pkg
\ No newline at end of file
diff --git a/verilog/rtl/prim_subreg.sv b/verilog/rtl/prim_subreg.sv
new file mode 100644
index 0000000..33d4290
--- /dev/null
+++ b/verilog/rtl/prim_subreg.sv
@@ -0,0 +1,60 @@
+
+
+module prim_subreg #(
+  parameter int            DW       = 32  ,
+  parameter                SWACCESS = "RW",  // {RW, RO, WO, W1C, W1S, W0C, RC}
+  parameter logic [DW-1:0] RESVAL   = '0     // Reset value
+) (
+  input clk_i,
+  input rst_ni,
+
+  // From SW: valid for RW, WO, W1C, W1S, W0C, RC
+  // In case of RC, Top connects Read Pulse to we
+  input          we,
+  input [DW-1:0] wd,
+
+  // From HW: valid for HRW, HWO
+  input          de,
+  input [DW-1:0] d,
+
+  // output to HW and Reg Read
+  output logic          qe,
+  output logic [DW-1:0] q,
+  output logic [DW-1:0] qs
+);
+
+  logic          wr_en;
+  logic [DW-1:0] wr_data;
+
+  prim_subreg_arb #(
+    .DW       ( DW       ),
+    .SWACCESS ( SWACCESS )
+  ) wr_en_data_arb (
+    .we,
+    .wd,
+    .de,
+    .d,
+    .q,
+    .wr_en,
+    .wr_data
+  );
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      qe <= 1'b0;
+    end else begin
+      qe <= we;
+    end
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      q <= RESVAL;
+    end else if (wr_en) begin
+      q <= wr_data;
+    end
+  end
+
+  assign qs = q;
+
+endmodule
diff --git a/verilog/rtl/prim_subreg_arb.sv b/verilog/rtl/prim_subreg_arb.sv
new file mode 100644
index 0000000..adc144f
--- /dev/null
+++ b/verilog/rtl/prim_subreg_arb.sv
@@ -0,0 +1,75 @@
+
+
+module prim_subreg_arb #(
+  parameter int DW       = 32  ,
+  parameter     SWACCESS = "RW"  // {RW, RO, WO, W1C, W1S, W0C, RC}
+) (
+  // From SW: valid for RW, WO, W1C, W1S, W0C, RC.
+  // In case of RC, top connects read pulse to we.
+  input          we,
+  input [DW-1:0] wd,
+
+  // From HW: valid for HRW, HWO.
+  input          de,
+  input [DW-1:0] d,
+
+  // From register: actual reg value.
+  input [DW-1:0] q,
+
+  // To register: actual write enable and write data.
+  output logic          wr_en,
+  output logic [DW-1:0] wr_data
+);
+
+  if ((SWACCESS == "RW") || (SWACCESS == "WO")) begin : gen_w
+    assign wr_en   = we | de;
+    assign wr_data = (we == 1'b1) ? wd : d; // SW higher priority
+    // Unused q - Prevent lint errors.
+    logic [DW-1:0] unused_q;
+    assign unused_q = q;
+  end else if (SWACCESS == "RO") begin : gen_ro
+    assign wr_en   = de;
+    assign wr_data = d;
+    // Unused we, wd, q - Prevent lint errors.
+    logic          unused_we;
+    logic [DW-1:0] unused_wd;
+    logic [DW-1:0] unused_q;
+    assign unused_we = we;
+    assign unused_wd = wd;
+    assign unused_q  = q;
+  end else if (SWACCESS == "W1S") begin : gen_w1s
+    // If SWACCESS is W1S, then assume hw tries to clear.
+    // So, give a chance HW to clear when SW tries to set.
+    // If both try to set/clr at the same bit pos, SW wins.
+    assign wr_en   = we | de;
+    assign wr_data = (de ? d : q) | (we ? wd : '0);
+  end else if (SWACCESS == "W1C") begin : gen_w1c
+    // If SWACCESS is W1C, then assume hw tries to set.
+    // So, give a chance HW to set when SW tries to clear.
+    // If both try to set/clr at the same bit pos, SW wins.
+    assign wr_en   = we | de;
+    assign wr_data = (de ? d : q) & (we ? ~wd : '1);
+  end else if (SWACCESS == "W0C") begin : gen_w0c
+    assign wr_en   = we | de;
+    assign wr_data = (de ? d : q) & (we ? wd : '1);
+  end else if (SWACCESS == "RC") begin : gen_rc
+    // This swtype is not recommended but exists for compatibility.
+    // WARN: we signal is actually read signal not write enable.
+    assign wr_en  = we | de;
+    assign wr_data = (de ? d : q) & (we ? '0 : '1);
+    // Unused wd - Prevent lint errors.
+    logic [DW-1:0] unused_wd;
+    assign unused_wd = wd;
+  end else begin : gen_hw
+    assign wr_en   = de;
+    assign wr_data = d;
+    // Unused we, wd, q - Prevent lint errors.
+    logic          unused_we;
+    logic [DW-1:0] unused_wd;
+    logic [DW-1:0] unused_q;
+    assign unused_we = we;
+    assign unused_wd = wd;
+    assign unused_q  = q;
+  end
+
+endmodule
diff --git a/verilog/rtl/prim_subreg_ext.sv b/verilog/rtl/prim_subreg_ext.sv
new file mode 100644
index 0000000..00d7e9c
--- /dev/null
+++ b/verilog/rtl/prim_subreg_ext.sv
@@ -0,0 +1,24 @@
+
+
+module prim_subreg_ext #(
+  parameter int unsigned DW = 32
+) (
+  input          re,
+  input          we,
+  input [DW-1:0] wd,
+
+  input [DW-1:0] d,
+
+  // output to HW and Reg Read
+  output logic          qe,
+  output logic          qre,
+  output logic [DW-1:0] q,
+  output logic [DW-1:0] qs
+);
+
+  assign qs = d;
+  assign q = wd;
+  assign qe = we;
+  assign qre = re;
+
+endmodule
diff --git a/verilog/rtl/prim_util_pkg.sv b/verilog/rtl/prim_util_pkg.sv
new file mode 100644
index 0000000..54d8a37
--- /dev/null
+++ b/verilog/rtl/prim_util_pkg.sv
@@ -0,0 +1,89 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+
+/**
+ * Utility functions
+ */
+package prim_util_pkg;
+  /**
+   * Math function: $clog2 as specified in Verilog-2005
+   *
+   * Do not use this function if $clog2() is available.
+   *
+   * clog2 =          0        for value == 0
+   *         ceil(log2(value)) for value >= 1
+   *
+   * This implementation is a synthesizable variant of the $clog2 function as
+   * specified in the Verilog-2005 standard (IEEE 1364-2005).
+   *
+   * To quote the standard:
+   *   The system function $clog2 shall return the ceiling of the log
+   *   base 2 of the argument (the log rounded up to an integer
+   *   value). The argument can be an integer or an arbitrary sized
+   *   vector value. The argument shall be treated as an unsigned
+   *   value, and an argument value of 0 shall produce a result of 0.
+   */
+  function automatic integer _clog2(integer value);
+    integer result;
+    // Use an intermediate value to avoid assigning to an input port, which produces a warning in
+    // Synopsys DC.
+    integer v = value;
+    v = v - 1;
+    for (result = 0; v > 0; result++) begin
+      v = v >> 1;
+    end
+    return result;
+  endfunction
+
+
+  /**
+   * Math function: Number of bits needed to address |value| items.
+   *
+   *                  0        for value == 0
+   * vbits =          1        for value == 1
+   *         ceil(log2(value)) for value > 1
+   *
+   *
+   * The primary use case for this function is the definition of registers/arrays
+   * which are wide enough to contain |value| items.
+   *
+   * This function identical to $clog2() for all input values except the value 1;
+   * it could be considered an "enhanced" $clog2() function.
+   *
+   *
+   * Example 1:
+   *   parameter Items = 1;
+   *   localparam ItemsWidth = vbits(Items); // 1
+   *   logic [ItemsWidth-1:0] item_register; // items_register is now [0:0]
+   *
+   * Example 2:
+   *   parameter Items = 64;
+   *   localparam ItemsWidth = vbits(Items); // 6
+   *   logic [ItemsWidth-1:0] item_register; // items_register is now [5:0]
+   *
+   * Note: If you want to store the number "value" inside a register, you need
+   * a register with size vbits(value + 1), since you also need to store
+   * the number 0.
+   *
+   * Example 3:
+   *   logic [vbits(64)-1:0]     store_64_logic_values; // width is [5:0]
+   *   logic [vbits(64 + 1)-1:0] store_number_64;       // width is [6:0]
+   */
+  function automatic integer vbits(integer value);
+`ifdef XCELIUM
+    // The use of system functions was not allowed here in Verilog-2001, but is
+    // valid since (System)Verilog-2005, which is also when $clog2() first
+    // appeared.
+    // Xcelium < 19.10 does not yet support the use of $clog2() here, fall back
+    // to an implementation without a system function. Remove this workaround
+    // if we require a newer Xcelium version.
+    // See #2579 and #2597.
+    return (value == 1) ? 1 : _clog2(value);
+`else
+    return (value == 1) ? 1 : $clog2(value);
+`endif
+  endfunction
+
+endpackage
\ No newline at end of file
diff --git a/verilog/rtl/pwm_top.sv b/verilog/rtl/pwm_top.sv
new file mode 100644
index 0000000..b159a93
--- /dev/null
+++ b/verilog/rtl/pwm_top.sv
@@ -0,0 +1,75 @@
+
+module pwm_top (
+
+  input clk_i,
+  input rst_ni,
+
+  input  tlul_pkg::tl_h2d_t tl_i,
+  output tlul_pkg::tl_d2h_t tl_o,
+
+
+  output        pwm_o,
+  output        pwm_o_2,
+  output        pwm1_oe,
+  output        pwm2_oe
+
+);
+
+
+localparam int AW = 8;
+localparam int DW = 32;
+localparam int DBW = DW/8;  
+
+logic         re;
+logic         we;
+logic [7:0]   addr;
+logic [31:0]  wdata;
+logic [3:0]   be;
+logic [31:0]  rdata;
+logic         err;
+
+//assign err = '0;
+
+PWM pwm_core(
+
+.clk_i      (clk_i),												
+.rst_ni     (rst_ni),												
+
+.re_i       (re),												
+.we_i       (we),												
+.addr_i     (addr),												
+.wdata_i    (wdata),												
+.be_i       (be),										    
+.rdata_o    (rdata),												
+//.error_o    (err),												
+
+.i_extclk   ('0),
+.i_DC       ('0),
+.i_valid_DC ('0),
+.o_pwm      (pwm_o),
+.o_pwm_2    (pwm_o_2),
+.oe_pwm1    (pwm1_oe),
+.oe_pwm2    (pwm2_oe)
+
+);
+
+tlul_adapter_reg #(
+  .RegAw(AW),
+  .RegDw(DW)
+) u_reg_if (
+  .clk_i,
+  .rst_ni,
+
+  .tl_i (tl_i),
+  .tl_o (tl_o),
+
+  .we_o    (we),
+  .re_o    (re),
+  .addr_o  (addr),
+  .wdata_o (wdata),
+  .be_o    (be),
+  .rdata_i (rdata),
+  .error_i (1'b0)
+);
+
+endmodule
diff --git a/verilog/rtl/rr_arb_tree.sv b/verilog/rtl/rr_arb_tree.sv
new file mode 100644
index 0000000..9013a64
--- /dev/null
+++ b/verilog/rtl/rr_arb_tree.sv
@@ -0,0 +1,343 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+//         Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>, ETH Zurich
+// Date: 02.04.2019
+// Description: logarithmic arbitration tree with round robin arbitration scheme.
+
+/// The rr_arb_tree employs non-starving round robin-arbitration - i.e., the priorities
+/// rotate each cycle.
+///
+/// ## Fair vs. unfair Arbitration
+///
+/// This refers to fair throughput distribution when not all inputs have active requests.
+/// This module has an internal state `rr_q` which defines the highest priority input. (When
+/// `ExtPrio` is `1'b1` this state is provided from the outside.) The arbitration tree will
+/// choose the input with the same index as currently defined by the state if it has an active
+/// request. Otherwise a *random* other active input is selected. The parameter `FairArb` is used
+/// to distinguish between two methods of calculating the next state.
+/// * `1'b0`: The next state is calculated by advancing the current state by one. This leads to the
+///           state being calculated without the context of the active request. Leading to an
+///           unfair throughput distribution if not all inputs have active requests.
+/// * `1'b1`: The next state jumps to the next unserved request with higher index.
+///           This is achieved by using two trailing-zero-counters (`lzc`). The upper has the masked
+///           `req_i` signal with all indices which will have a higher priority in the next state.
+///           The trailing zero count defines the input index with the next highest priority after
+///           the current one is served. When the upper is empty the lower `lzc` provides the
+///           wrapped index if there are outstanding requests with lower or same priority.
+/// The implication of throughput fairness on the module timing are:
+/// * The trailing zero counter (`lzc`) has a loglog relation of input to output timing. This means
+///   that in this module the input to register path scales with Log(Log(`NumIn`)).
+/// * The `rr_arb_tree` data multiplexing scales with Log(`NumIn`). This means that the input to output
+///   timing path of this module also scales scales with Log(`NumIn`).
+/// This implies that in this module the input to output path is always longer than the input to
+/// register path. As the output data usually also terminates in a register the parameter `FairArb`
+/// only has implications on the area. When it is `1'b0` a static plus one adder is instantiated.
+/// If it is `1'b1` two `lzc`, a masking logic stage and a two input multiplexer are instantiated.
+/// However these are small in respect of the data multiplexers needed, as the width of the `req_i`
+/// signal is usually less as than `DataWidth`.
+module rr_arb_tree #(
+  /// Number of inputs to be arbitrated.
+  parameter int unsigned NumIn      = 64,
+  /// Data width of the payload in bits. Not needed if `DataType` is overwritten.
+  parameter int unsigned DataWidth  = 32,
+  /// Data type of the payload, can be overwritten with custom type. Only use of `DataWidth`.
+  parameter type         DataType   = logic [DataWidth-1:0],
+  /// The `ExtPrio` option allows to override the internal round robin counter via the
+  /// `rr_i` signal. This can be useful in case multiple arbiters need to have
+  /// rotating priorities that are operating in lock-step. If static priority arbitration
+  /// is needed, just connect `rr_i` to '0.
+  ///
+  /// Set to 1'b1 to enable.
+  parameter bit          ExtPrio    = 1'b0,
+  /// If `AxiVldRdy` is set, the req/gnt signals are compliant with the AXI style vld/rdy
+  /// handshake. Namely, upstream vld (req) must not depend on rdy (gnt), as it can be deasserted
+  /// again even though vld is asserted. Enabling `AxiVldRdy` leads to a reduction of arbiter
+  /// delay and area.
+  ///
+  /// Set to `1'b1` to treat req/gnt as vld/rdy.
+  parameter bit          AxiVldRdy  = 1'b0,
+  /// The `LockIn` option prevents the arbiter from changing the arbitration
+  /// decision when the arbiter is disabled. I.e., the index of the first request
+  /// that wins the arbitration will be locked in case the destination is not
+  /// able to grant the request in the same cycle.
+  ///
+  /// Set to `1'b1` to enable.
+  parameter bit          LockIn     = 1'b0,
+  /// When set, ensures that throughput gets distributed evenly between all inputs.
+  ///
+  /// Set to `1'b0` to disable.
+  parameter bit          FairArb    = 1'b1,
+  /// Dependent parameter, do **not** overwrite.
+  /// Width of the arbitration priority signal and the arbitrated index.
+  parameter int unsigned IdxWidth   = (NumIn > 32'd1) ? unsigned'($clog2(NumIn)) : 32'd1,
+  /// Dependent parameter, do **not** overwrite.
+  /// Type for defining the arbitration priority and arbitrated index signal.
+  parameter type         idx_t      = logic [IdxWidth-1:0]
+) (
+  /// clk_i, positive edge triggered.
+  input  logic                clk_i,
+  /// Asynchronous rst_ni, active low.
+  input  logic                rst_ni,
+  /// Clears the arbiter state. Only used if `ExtPrio` is `1'b0` or `LockIn` is `1'b1`.
+  input  logic                flush_i,
+  /// External round-robin priority. Only used if `ExtPrio` is `1'b1.`
+  input  idx_t                rr_i,
+  /// Input requests arbitration.
+  input  logic    [NumIn-1:0] req_i,
+  /* verilator lint_off UNOPTFLAT */
+  /// Input request is granted.
+  output logic    [NumIn-1:0] gnt_o,
+  /* verilator lint_on UNOPTFLAT */
+  /// Input data for arbitration.
+  input  DataType [NumIn-1:0] data_i,
+  /// Output request is valid.
+  output logic                req_o,
+  /// Output request is granted.
+  input  logic                gnt_i,
+  /// Output data.
+  output DataType             data_o,
+  /// Index from which input the data came from.
+  output idx_t                idx_o
+);
+
+  // pragma translate_off
+  `ifndef VERILATOR
+  // Default SVA rst_ni
+  default disable iff (!rst_ni || flush_i);
+  `endif
+  // pragma translate_on
+
+  // just pass through in this corner case
+  if (NumIn == unsigned'(1)) begin : gen_pass_through
+    assign req_o    = req_i[0];
+    assign gnt_o[0] = gnt_i;
+    assign data_o   = data_i[0];
+    assign idx_o    = '0;
+  // non-degenerate cases
+  end else begin : gen_arbiter
+    localparam int unsigned NumLevels = unsigned'($clog2(NumIn));
+
+    /* verilator lint_off UNOPTFLAT */
+    idx_t    [2**NumLevels-2:0] index_nodes; // used to propagate the indices
+    DataType [2**NumLevels-2:0] data_nodes;  // used to propagate the data
+    logic    [2**NumLevels-2:0] gnt_nodes;   // used to propagate the grant to masters
+    logic    [2**NumLevels-2:0] req_nodes;   // used to propagate the requests to slave
+    /* lint_off */
+    idx_t                       rr_q;
+    logic [NumIn-1:0]           req_d;
+
+    // the final arbitration decision can be taken from the root of the tree
+    assign req_o        = req_nodes[0];
+    assign data_o       = data_nodes[0];
+    assign idx_o        = index_nodes[0];
+
+    if (ExtPrio) begin : gen_ext_rr
+      assign rr_q       = rr_i;
+      assign req_d      = req_i;
+    end else begin : gen_int_rr
+      idx_t rr_d;
+
+      // lock arbiter decision in case we got at least one req and no acknowledge
+      if (LockIn) begin : gen_lock
+        logic  lock_d, lock_q;
+        logic [NumIn-1:0] req_q;
+
+        assign lock_d     = req_o & ~gnt_i;
+        assign req_d      = (lock_q) ? req_q : req_i;
+
+        always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg
+          if (!rst_ni) begin
+            lock_q <= '0;
+          end else begin
+            if (flush_i) begin
+              lock_q <= '0;
+            end else begin
+              lock_q <= lock_d;
+            end
+          end
+        end
+
+        // pragma translate_off
+        `ifndef VERILATOR
+          lock: assert property(
+            @(posedge clk_i) LockIn |-> req_o && !gnt_i |=> idx_o == $past(idx_o)) else
+                $fatal (1, "Lock implies same arbiter decision in next cycle if output is not \
+                            ready.");
+
+          logic [NumIn-1:0] req_tmp;
+          assign req_tmp = req_q & req_i;
+          lock_req: assume property(
+            @(posedge clk_i) LockIn |-> lock_d |=> req_tmp == req_q) else
+                $fatal (1, "It is disallowed to deassert unserved request signals when LockIn is \
+                            enabled.");
+        `endif
+        // pragma translate_on
+
+        always_ff @(posedge clk_i or negedge rst_ni) begin : p_req_regs
+          if (!rst_ni) begin
+            req_q  <= '0;
+          end else begin
+            if (flush_i) begin
+              req_q  <= '0;
+            end else begin
+              req_q  <= req_d;
+            end
+          end
+        end
+      end else begin : gen_no_lock
+        assign req_d = req_i;
+      end
+
+      if (FairArb) begin : gen_fair_arb
+        logic [NumIn-1:0] upper_mask,  lower_mask;
+        idx_t             upper_idx,   lower_idx,   next_idx;
+        logic             upper_empty, lower_empty;
+
+        for (genvar i = 0; i < NumIn; i++) begin : gen_mask
+          assign upper_mask[i] = (i >  rr_q) ? req_d[i] : 1'b0;
+          assign lower_mask[i] = (i <= rr_q) ? req_d[i] : 1'b0;
+        end
+
+        lzc #(
+          .WIDTH ( NumIn ),
+          .MODE  ( 1'b0  )
+        ) i_lzc_upper (
+          .in_i    ( upper_mask  ),
+          .cnt_o   ( upper_idx   ),
+          .empty_o ( upper_empty )
+        );
+
+        lzc #(
+          .WIDTH ( NumIn ),
+          .MODE  ( 1'b0  )
+        ) i_lzc_lower (
+          .in_i    ( lower_mask  ),
+          .cnt_o   ( lower_idx   ),
+          .empty_o ( /*unused*/  )
+        );
+
+        assign next_idx = upper_empty      ? lower_idx : upper_idx;
+        assign rr_d     = (gnt_i && req_o) ? next_idx  : rr_q;
+
+      end else begin : gen_unfair_arb
+        assign rr_d = (gnt_i && req_o) ? ((rr_q == idx_t'(NumIn-1)) ? '0 : rr_q + 1'b1) : rr_q;
+      end
+
+      // this holds the highest priority
+      always_ff @(posedge clk_i or negedge rst_ni) begin : p_rr_regs
+        if (!rst_ni) begin
+          rr_q   <= '0;
+        end else begin
+          if (flush_i) begin
+            rr_q   <= '0;
+          end else begin
+            rr_q   <= rr_d;
+          end
+        end
+      end
+    end
+
+    assign gnt_nodes[0] = gnt_i;
+
+    // arbiter tree
+    for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : gen_levels
+      for (genvar l = 0; l < 2**level; l++) begin : gen_level
+        // local select signal
+        logic sel;
+        // index calcs
+        localparam int unsigned Idx0 = 2**level-1+l;// current node
+        localparam int unsigned Idx1 = 2**(level+1)-1+l*2;
+        //////////////////////////////////////////////////////////////
+        // uppermost level where data is fed in from the inputs
+        if (unsigned'(level) == NumLevels-1) begin : gen_first_level
+          // if two successive indices are still in the vector...
+          if (unsigned'(l) * 2 < NumIn-1) begin : gen_reduce
+            assign req_nodes[Idx0]   = req_d[l*2] | req_d[l*2+1];
+
+            // arbitration: round robin
+            assign sel =  ~req_d[l*2] | req_d[l*2+1] & rr_q[NumLevels-1-level];
+
+            assign index_nodes[Idx0] = idx_t'(sel);
+            assign data_nodes[Idx0]  = (sel) ? data_i[l*2+1] : data_i[l*2];
+            assign gnt_o[l*2]        = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2])   & ~sel;
+            assign gnt_o[l*2+1]      = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2+1]) & sel;
+          end
+          // if only the first index is still in the vector...
+          if (unsigned'(l) * 2 == NumIn-1) begin : gen_first
+            assign req_nodes[Idx0]   = req_d[l*2];
+            assign index_nodes[Idx0] = '0;// always zero in this case
+            assign data_nodes[Idx0]  = data_i[l*2];
+            assign gnt_o[l*2]        = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]);
+          end
+          // if index is out of range, fill up with zeros (will get pruned)
+          if (unsigned'(l) * 2 > NumIn-1) begin : gen_out_of_range
+            assign req_nodes[Idx0]   = 1'b0;
+            assign index_nodes[Idx0] = idx_t'('0);
+            assign data_nodes[Idx0]  = DataType'('0);
+          end
+        //////////////////////////////////////////////////////////////
+        // general case for other levels within the tree
+        end else begin : gen_other_levels
+          assign req_nodes[Idx0]   = req_nodes[Idx1] | req_nodes[Idx1+1];
+
+          // arbitration: round robin
+          assign sel =  ~req_nodes[Idx1] | req_nodes[Idx1+1] & rr_q[NumLevels-1-level];
+
+          assign index_nodes[Idx0] = (sel) ?
+            idx_t'({1'b1, index_nodes[Idx1+1][NumLevels-unsigned'(level)-2:0]}) :
+            idx_t'({1'b0, index_nodes[Idx1][NumLevels-unsigned'(level)-2:0]});
+
+          assign data_nodes[Idx0]  = (sel) ? data_nodes[Idx1+1] : data_nodes[Idx1];
+          assign gnt_nodes[Idx1]   = gnt_nodes[Idx0] & ~sel;
+          assign gnt_nodes[Idx1+1] = gnt_nodes[Idx0] & sel;
+        end
+        //////////////////////////////////////////////////////////////
+      end
+    end
+
+    // pragma translate_off
+    `ifndef VERILATOR
+    initial begin : p_assert
+      assert(NumIn)
+        else $fatal(1, "Input must be at least one element wide.");
+      assert(!(LockIn && ExtPrio))
+        else $fatal(1,"Cannot use LockIn feature together with external ExtPrio.");
+    end
+
+    hot_one : assert property(
+      @(posedge clk_i) $onehot0(gnt_o))
+        else $fatal (1, "Grant signal must be hot1 or zero.");
+
+    gnt0 : assert property(
+      @(posedge clk_i) |gnt_o |-> gnt_i)
+        else $fatal (1, "Grant out implies grant in.");
+
+    gnt1 : assert property(
+      @(posedge clk_i) req_o |-> gnt_i |-> |gnt_o)
+        else $fatal (1, "Req out and grant in implies grant out.");
+
+    gnt_idx : assert property(
+      @(posedge clk_i) req_o |->  gnt_i |-> gnt_o[idx_o])
+        else $fatal (1, "Idx_o / gnt_o do not match.");
+
+    req0 : assert property(
+      @(posedge clk_i) |req_i |-> req_o)
+        else $fatal (1, "Req in implies req out.");
+
+    req1 : assert property(
+      @(posedge clk_i) req_o |-> |req_i)
+        else $fatal (1, "Req out implies req in.");
+    `endif
+    // pragma translate_on
+  end
+
+endmodule : rr_arb_tree
diff --git a/verilog/rtl/rstmgr.sv b/verilog/rtl/rstmgr.sv
new file mode 100644
index 0000000..fca2e70
--- /dev/null
+++ b/verilog/rtl/rstmgr.sv
@@ -0,0 +1,42 @@
+
+// basic reset managemnet logic for azadi
+
+module rstmgr(
+
+    input clk_i, //system clock
+    input rst_ni, // system reset
+    input prog_rst_ni,
+  
+    input  logic  ndmreset, // non-debug module reset
+    output logic  sys_rst_ni // reset for system except debug module
+);
+
+  logic rst_d, rst_q;
+  logic rst_fd, rst_fq; // follower flip flop
+  
+  always_comb begin
+    if(!rst_ni) begin
+      rst_d = 1'b0;
+    end else 
+    if(ndmreset) begin
+      rst_d = 1'b0;
+    end else 
+    if(!prog_rst_ni)begin
+      rst_d = 1'b0;
+    end else begin
+      rst_d = 1'b1;
+    end
+  end
+  
+  always_ff @(posedge clk_i ) begin
+    rst_q <= rst_d;
+  end
+
+  assign rst_fd = rst_q;
+  always_ff @(posedge clk_i ) begin
+    rst_fq <= rst_fd;
+  end
+
+  assign sys_rst_ni = rst_fq;
+
+endmodule
\ No newline at end of file
diff --git a/verilog/rtl/rv_dm.sv b/verilog/rtl/rv_dm.sv
new file mode 100644
index 0000000..398652b
--- /dev/null
+++ b/verilog/rtl/rv_dm.sv
@@ -0,0 +1,319 @@
+
+
+module rv_dm #(
+  parameter int              NrHarts = 1,
+  parameter logic [31:0]     IdcodeValue = 32'h 0000_0001,
+  parameter  logic DirectDmiTap = 1'b1
+) (
+  input  logic               clk_i,       // clock
+  input  logic               rst_ni,      // asynchronous reset active low, connect PoR
+                                          // here, not the system reset
+  input  logic               testmode_i,
+  output logic               ndmreset_o,  // non-debug module reset
+  output logic               dmactive_o,  // debug module is active
+  output logic [NrHarts-1:0] debug_req_o, // async debug request
+  input  logic [NrHarts-1:0] unavailable_i, // communicate whether the hart is unavailable
+                                            // (e.g.: power down)
+
+  // bus device with debug memory, for an execution based technique
+  input  tlul_pkg::tl_h2d_t  tl_d_i,
+  output tlul_pkg::tl_d2h_t  tl_d_o,
+
+  // bus host, for system bus accesses
+  output tlul_pkg::tl_h2d_t  tl_h_o,
+  input  tlul_pkg::tl_d2h_t  tl_h_i,
+
+  input  jtag_pkg::jtag_req_t jtag_req_i,
+  output jtag_pkg::jtag_rsp_t jtag_rsp_o
+);
+
+
+  // Currently only 32 bit busses are supported by our TL-UL IP
+  localparam int BusWidth = 32;
+  // all harts have contiguous IDs
+  localparam logic [NrHarts-1:0] SelectableHarts = {NrHarts{1'b1}};
+
+  // Debug CSRs
+  dm::hartinfo_t [NrHarts-1:0]      hartinfo;
+  logic [NrHarts-1:0]               halted;
+  // logic [NrHarts-1:0]               running;
+  logic [NrHarts-1:0]               resumeack;
+  logic [NrHarts-1:0]               haltreq;
+  logic [NrHarts-1:0]               resumereq;
+  logic                             clear_resumeack;
+  logic                             cmd_valid;
+  dm::command_t                     cmd;
+
+  logic                             cmderror_valid;
+  dm::cmderr_e                      cmderror;
+  logic                             cmdbusy;
+  logic [dm::ProgBufSize-1:0][31:0] progbuf;
+  logic [dm::DataCount-1:0][31:0]   data_csrs_mem;
+  logic [dm::DataCount-1:0][31:0]   data_mem_csrs;
+  logic                             data_valid;
+  logic [19:0]                      hartsel;
+  // System Bus Access Module
+  logic [BusWidth-1:0]              sbaddress_csrs_sba;
+  logic [BusWidth-1:0]              sbaddress_sba_csrs;
+  logic                             sbaddress_write_valid;
+  logic                             sbreadonaddr;
+  logic                             sbautoincrement;
+  logic [2:0]                       sbaccess;
+  logic                             sbreadondata;
+  logic [BusWidth-1:0]              sbdata_write;
+  logic                             sbdata_read_valid;
+  logic                             sbdata_write_valid;
+  logic [BusWidth-1:0]              sbdata_read;
+  logic                             sbdata_valid;
+  logic                             sbbusy;
+  logic                             sberror_valid;
+  logic [2:0]                       sberror;
+
+  dm::dmi_req_t  dmi_req;
+  dm::dmi_resp_t dmi_rsp;
+  logic dmi_req_valid, dmi_req_ready;
+  logic dmi_rsp_valid, dmi_rsp_ready;
+  logic dmi_rst_n;
+
+  // static debug hartinfo
+  localparam dm::hartinfo_t DebugHartInfo = '{
+    zero1:      '0,
+    nscratch:   2, // Debug module needs at least two scratch regs
+    zero0:      0,
+    dataaccess: 1'b1, // data registers are memory mapped in the debugger
+    datasize:   dm::DataCount,
+    dataaddr:   dm::DataAddr
+  };
+  for (genvar i = 0; i < NrHarts; i++) begin : gen_dm_hart_ctrl
+    assign hartinfo[i] = DebugHartInfo;
+  end
+
+  dm_csrs #(
+    .NrHarts(NrHarts),
+    .BusWidth(BusWidth),
+    .SelectableHarts(SelectableHarts)
+  ) i_dm_csrs (
+    .clk_i                   ( clk_i                 ),
+    .rst_ni                  ( rst_ni                ),
+    .testmode_i              ( testmode_i            ),
+    .dmi_rst_ni              ( dmi_rst_n             ),
+    .dmi_req_valid_i         ( dmi_req_valid         ),
+    .dmi_req_ready_o         ( dmi_req_ready         ),
+    .dmi_req_i               ( dmi_req               ),
+    .dmi_resp_valid_o        ( dmi_rsp_valid         ),
+    .dmi_resp_ready_i        ( dmi_rsp_ready         ),
+    .dmi_resp_o              ( dmi_rsp               ),
+    .ndmreset_o              ( ndmreset_o            ),
+    .dmactive_o              ( dmactive_o            ),
+    .hartsel_o               ( hartsel               ),
+    .hartinfo_i              ( hartinfo              ),
+    .halted_i                ( halted                ),
+    .unavailable_i,
+    .resumeack_i             ( resumeack             ),
+    .haltreq_o               ( haltreq               ),
+    .resumereq_o             ( resumereq             ),
+    .clear_resumeack_o       ( clear_resumeack       ),
+    .cmd_valid_o             ( cmd_valid             ),
+    .cmd_o                   ( cmd                   ),
+    .cmderror_valid_i        ( cmderror_valid        ),
+    .cmderror_i              ( cmderror              ),
+    .cmdbusy_i               ( cmdbusy               ),
+    .progbuf_o               ( progbuf               ),
+    .data_i                  ( data_mem_csrs         ),
+    .data_valid_i            ( data_valid            ),
+    .data_o                  ( data_csrs_mem         ),
+    .sbaddress_o             ( sbaddress_csrs_sba    ),
+    .sbaddress_i             ( sbaddress_sba_csrs    ),
+    .sbaddress_write_valid_o ( sbaddress_write_valid ),
+    .sbreadonaddr_o          ( sbreadonaddr          ),
+    .sbautoincrement_o       ( sbautoincrement       ),
+    .sbaccess_o              ( sbaccess              ),
+    .sbreadondata_o          ( sbreadondata          ),
+    .sbdata_o                ( sbdata_write          ),
+    .sbdata_read_valid_o     ( sbdata_read_valid     ),
+    .sbdata_write_valid_o    ( sbdata_write_valid    ),
+    .sbdata_i                ( sbdata_read           ),
+    .sbdata_valid_i          ( sbdata_valid          ),
+    .sbbusy_i                ( sbbusy                ),
+    .sberror_valid_i         ( sberror_valid         ),
+    .sberror_i               ( sberror               )
+  );
+
+  logic                   host_req;
+  logic   [BusWidth-1:0]  host_add;
+  logic                   host_we;
+  logic   [BusWidth-1:0]  host_wdata;
+  logic [BusWidth/8-1:0]  host_be;
+  logic                   host_gnt;
+  logic                   host_r_valid;
+  logic   [BusWidth-1:0]  host_r_rdata;
+  logic                   host_r_err;
+
+  dm_sba #(
+    .BusWidth(BusWidth)
+  ) i_dm_sba (
+    .clk_i                   ( clk_i                 ),
+    .rst_ni                  ( rst_ni                ),
+    .master_req_o            ( host_req              ),
+    .master_add_o            ( host_add              ),
+    .master_we_o             ( host_we               ),
+    .master_wdata_o          ( host_wdata            ),
+    .master_be_o             ( host_be               ),
+    .master_gnt_i            ( host_gnt              ),
+    .master_r_valid_i        ( host_r_valid          ),
+    .master_r_rdata_i        ( host_r_rdata          ),
+    .dmactive_i              ( dmactive_o            ),
+    .sbaddress_i             ( sbaddress_csrs_sba    ),
+    .sbaddress_o             ( sbaddress_sba_csrs    ),
+    .sbaddress_write_valid_i ( sbaddress_write_valid ),
+    .sbreadonaddr_i          ( sbreadonaddr          ),
+    .sbautoincrement_i       ( sbautoincrement       ),
+    .sbaccess_i              ( sbaccess              ),
+    .sbreadondata_i          ( sbreadondata          ),
+    .sbdata_i                ( sbdata_write          ),
+    .sbdata_read_valid_i     ( sbdata_read_valid     ),
+    .sbdata_write_valid_i    ( sbdata_write_valid    ),
+    .sbdata_o                ( sbdata_read           ),
+    .sbdata_valid_o          ( sbdata_valid          ),
+    .sbbusy_o                ( sbbusy                ),
+    .sberror_valid_o         ( sberror_valid         ),
+    .sberror_o               ( sberror               )
+  );
+
+  tlul_host_adapter #(
+    .MAX_REQS(1)
+  ) tl_adapter_host_sba (
+    .clk_i(clk_i),
+    .rst_ni(rst_ni),
+    .req_i        (host_req),
+    .gnt_o        (host_gnt),
+    .addr_i       (host_add),
+    .we_i         (host_we),
+    .wdata_i      (host_wdata),
+    .be_i         (host_be),
+    .valid_o      (host_r_valid),
+    .rdata_o      (host_r_rdata),
+    .err_o        (host_r_err),
+    .tl_h_c_a         (tl_h_o),
+    .tl_h_c_d         (tl_h_i)
+  );
+
+  // DBG doesn't handle error responses so raise assertion if we see one
+  
+  localparam int unsigned AddressWidthWords = BusWidth - $clog2(BusWidth/8);
+
+  logic                         req;
+  logic                         we;
+  logic [BusWidth/8-1:0]        be;
+  logic   [BusWidth-1:0]        wdata;
+  logic   [BusWidth-1:0]        rdata;
+  logic                         rvalid;
+
+  logic [BusWidth-1:0]          addr_b;
+  logic [AddressWidthWords-1:0] addr_w;
+
+  // TODO: The tlul_adapter_sram give us a bitwise write mask currently,
+  // but dm_mem only supports byte write masks. Disable sub-word access in the
+  // adapter for now until we figure out a good strategy to deal with this.
+  assign be = {BusWidth/8{1'b1}};
+
+  assign addr_b = {addr_w, {$clog2(BusWidth/8){1'b0}}};
+
+  dm_mem #(
+    .NrHarts(NrHarts),
+    .BusWidth(BusWidth),
+    .SelectableHarts(SelectableHarts),
+    // The debug module provides a simplified ROM for systems that map the debug ROM to offset 0x0
+    // on the system bus. In that case, only one scratch register has to be implemented in the core.
+    // However, we require that the DM can be placed at arbitrary offsets in the system, which
+    // requires the generalized debug ROM implementation and two scratch registers. We hence set
+    // this parameter to a non-zero value (inside dm_mem, this just feeds into a comparison with 0).
+    .DmBaseAddress(1)
+  ) i_dm_mem (
+    .clk_i                   ( clk_i                 ),
+    .rst_ni                  ( rst_ni                ),
+    .debug_req_o             ( debug_req_o           ),
+    .hartsel_i               ( hartsel               ),
+    .haltreq_i               ( haltreq               ),
+    .resumereq_i             ( resumereq             ),
+    .clear_resumeack_i       ( clear_resumeack       ),
+    .halted_o                ( halted                ),
+    .resuming_o              ( resumeack             ),
+    .cmd_valid_i             ( cmd_valid             ),
+    .cmd_i                   ( cmd                   ),
+    .cmderror_valid_o        ( cmderror_valid        ),
+    .cmderror_o              ( cmderror              ),
+    .cmdbusy_o               ( cmdbusy               ),
+    .progbuf_i               ( progbuf               ),
+    .data_i                  ( data_csrs_mem         ),
+    .data_o                  ( data_mem_csrs         ),
+    .data_valid_o            ( data_valid            ),
+    .req_i                   ( req                   ),
+    .we_i                    ( we                    ),
+    .addr_i                  ( addr_b                ),
+    .wdata_i                 ( wdata                 ),
+    .be_i                    ( be                    ),
+    .rdata_o                 ( rdata                 )
+  );
+
+  // Bound-in DPI module replaces the TAP
+//if (DirectDmiTap) begin
+  // JTAG TAP
+  dmi_jtag #(
+    .IdcodeValue    (IdcodeValue)
+  ) dap (
+    .clk_i            (clk_i),
+    .rst_ni           (rst_ni),
+    .testmode_i       (testmode_i),
+
+    .dmi_rst_no       (dmi_rst_n),
+    .dmi_req_o        (dmi_req),
+    .dmi_req_valid_o  (dmi_req_valid),
+    .dmi_req_ready_i  (dmi_req_ready),
+
+    .dmi_resp_i       (dmi_rsp      ),
+    .dmi_resp_ready_o (dmi_rsp_ready),
+    .dmi_resp_valid_i (dmi_rsp_valid),
+
+    //JTAG
+    .tck_i            (jtag_req_i.tck),
+    .tms_i            (jtag_req_i.tms),
+    .trst_ni          (jtag_req_i.trst_n),
+    .td_i             (jtag_req_i.tdi),
+    .td_o             (jtag_rsp_o.tdo),
+    .tdo_oe_o         (jtag_rsp_o.tdo_oe)
+  );
+//end
+
+  tlul_sram_adapter #(
+    .SramAw(AddressWidthWords),
+    .SramDw(BusWidth),
+    .Outstanding(1),
+    .ByteAccess(0)
+  ) tl_adapter_device_mem (
+    .clk_i(clk_i),
+    .rst_ni(rst_ni),
+
+    .req_o    (req),
+    .gnt_i    (1'b1),
+    .we_o     (we),
+    .addr_o   (addr_w),
+    .wdata_o  (wdata),
+    .wmask_o  (),
+    .rdata_i  (rdata),
+    .rvalid_i (rvalid),
+    .rerror_i (2'b00),
+
+    .tl_o     (tl_d_o),
+    .tl_i     (tl_d_i)
+  );
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      rvalid <= '0;
+    end else begin
+      rvalid <= req & ~we;
+    end
+  end
+
+endmodule
diff --git a/verilog/rtl/rv_plic.sv b/verilog/rtl/rv_plic.sv
new file mode 100644
index 0000000..5d77a3b
--- /dev/null
+++ b/verilog/rtl/rv_plic.sv
@@ -0,0 +1,241 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// RISC-V Platform-Level Interrupt Controller compliant INTC
+//
+//   Current version doesn't support MSI interrupt but it is easy to add
+//   the feature. Create one external register and connect qe signal to the
+//   gateway module (as edge-triggered)
+//
+//   Consider to set MAX_PRIO as small number as possible. It is main factor
+//   of area increase if edge-triggered counter isn't implemented.
+//
+// Verilog parameter
+//   MAX_PRIO: Maximum value of interrupt priority
+
+module rv_plic import rv_plic_reg_pkg::*; #(
+  // derived parameter
+  localparam int SRCW    = $clog2(NumSrc)
+) (
+  input     clk_i,
+  input     rst_ni,
+
+  // Bus Interface (device)
+  input  tlul_pkg::tl_h2d_t tl_i,
+  output tlul_pkg::tl_d2h_t tl_o,
+
+  // Interrupt Sources
+  input  [NumSrc-1:0] intr_src_i,
+
+  // Interrupt notification to targets
+  output [NumTarget-1:0] irq_o,
+
+  output logic [NumTarget-1:0] msip_o
+);
+
+  rv_plic_reg2hw_t reg2hw;
+  rv_plic_hw2reg_t hw2reg;
+
+  localparam int MAX_PRIO    = 3;
+  localparam int PRIOW = $clog2(MAX_PRIO+1);
+
+  logic [SRCW:0]      irq_id_o [NumTarget];
+
+  logic [NumSrc-1:0] le; // 0:level 1:edge
+  logic [NumSrc-1:0] ip;
+
+  logic [NumSrc-1:0] ie [NumTarget];
+
+  logic [NumTarget-1:0] claim_re; // Target read indicator
+  logic [SRCW-1:0]      claim_id [NumTarget];
+  logic [NumSrc-1:0]    claim; // Converted from claim_re/claim_id
+
+  logic [NumTarget-1:0] complete_we; // Target write indicator
+  logic [SRCW-1:0]      complete_id [NumTarget];
+  logic [NumSrc-1:0]    complete; // Converted from complete_re/complete_id
+
+  logic [SRCW:0]      cc_id [NumTarget]; // Write ID
+
+  logic [PRIOW-1:0] prio [NumSrc];
+
+  logic [PRIOW-1:0] threshold [NumTarget];
+
+  // Glue logic between rv_plic_reg_top and others
+  assign cc_id = irq_id_o;
+
+  always_comb begin
+    for (int i = 0 ; i < NumTarget ; i++) begin
+      if (claim_re[i]) begin 
+         claim[claim_id[i]] = 1'b1;
+       end else begin 
+         claim = '0;
+       end
+     end
+  end
+  always_comb begin
+    for (int i = 0 ; i < NumTarget ; i++) begin
+      if (complete_we[i]) begin 
+         complete[complete_id[i]] = 1'b1;
+      end else begin
+         complete = '0;
+      end
+    end
+  end
+
+  //`ASSERT_PULSE(claimPulse, claim_re[i])
+  //`ASSERT_PULSE(completePulse, complete_we[i])
+
+
+
+  //////////////
+  // Priority //
+  //////////////
+  assign prio[0] = reg2hw.prio0.q;
+  assign prio[1] = reg2hw.prio1.q;
+  assign prio[2] = reg2hw.prio2.q;
+  assign prio[3] = reg2hw.prio3.q;
+  assign prio[4] = reg2hw.prio4.q;
+  assign prio[5] = reg2hw.prio5.q;
+  assign prio[6] = reg2hw.prio6.q;
+  assign prio[7] = reg2hw.prio7.q;
+  assign prio[8] = reg2hw.prio8.q;
+  assign prio[9] = reg2hw.prio9.q;
+  assign prio[10] = reg2hw.prio10.q;
+  assign prio[11] = reg2hw.prio11.q;
+  assign prio[12] = reg2hw.prio12.q;
+  assign prio[13] = reg2hw.prio13.q;
+  assign prio[14] = reg2hw.prio14.q;
+  assign prio[15] = reg2hw.prio15.q;
+  assign prio[16] = reg2hw.prio16.q;
+  assign prio[17] = reg2hw.prio17.q;
+  assign prio[18] = reg2hw.prio18.q;
+  assign prio[19] = reg2hw.prio19.q;
+  assign prio[20] = reg2hw.prio20.q;
+  assign prio[21] = reg2hw.prio21.q;
+  assign prio[22] = reg2hw.prio22.q;
+  assign prio[23] = reg2hw.prio23.q;
+  assign prio[24] = reg2hw.prio24.q;
+  assign prio[25] = reg2hw.prio25.q;
+  assign prio[26] = reg2hw.prio26.q;
+  assign prio[27] = reg2hw.prio27.q;
+  assign prio[28] = reg2hw.prio28.q;
+  assign prio[29] = reg2hw.prio29.q;
+  assign prio[30] = reg2hw.prio30.q;
+  assign prio[31] = reg2hw.prio31.q;
+  assign prio[32] = reg2hw.prio32.q;
+  assign prio[33] = reg2hw.prio33.q;
+  assign prio[34] = reg2hw.prio34.q;
+  assign prio[35] = reg2hw.prio35.q;
+  assign prio[36] = reg2hw.prio36.q;
+  assign prio[37] = reg2hw.prio37.q;
+  assign prio[38] = reg2hw.prio38.q;
+  assign prio[39] = reg2hw.prio39.q;
+  assign prio[40] = reg2hw.prio40.q;
+  assign prio[41] = reg2hw.prio41.q;
+  assign prio[42] = reg2hw.prio42.q;
+  assign prio[43] = reg2hw.prio43.q;
+  //////////////////////
+  // Interrupt Enable //
+  //////////////////////
+  for (genvar s = 0; s < 44; s++) begin : gen_ie0
+    assign ie[0][s] = reg2hw.ie0[s].q;
+  end
+
+  ////////////////////////
+  // THRESHOLD register //
+  ////////////////////////
+  assign threshold[0] = reg2hw.threshold0.q;
+
+  /////////////////
+  // CC register //
+  /////////////////
+  assign claim_re[0]    = reg2hw.cc0.re;
+  assign claim_id[0]    = irq_id_o[0];
+  assign complete_we[0] = reg2hw.cc0.qe;
+  assign complete_id[0] = reg2hw.cc0.q;
+  assign hw2reg.cc0.d   = cc_id[0];
+
+  ///////////////////
+  // MSIP register //
+  ///////////////////
+  assign msip_o[0] = reg2hw.msip0.q;
+
+  ////////
+  // IP //
+  ////////
+  for (genvar s = 0; s < 44; s++) begin : gen_ip
+    assign hw2reg.ip[s].de = 1'b1; // Always write
+    assign hw2reg.ip[s].d  = ip[s];
+  end
+
+  ///////////////////////////////////
+  // Detection:: 0: Level, 1: Edge //
+  ///////////////////////////////////
+  for (genvar s = 0; s < 44; s++) begin : gen_le
+    assign le[s] = reg2hw.le[s].q;
+  end
+
+  //////////////
+  // Gateways //
+  //////////////
+  rv_plic_gateway #(
+    .N_SOURCE   (NumSrc)
+  ) u_gateway (
+    .clk_i,
+    .rst_ni,
+
+    .src_i      (intr_src_i),
+    .le_i       (le),
+
+    .claim_i    (claim),
+    .complete_i (complete),
+
+    .ip_o       (ip)
+  );
+
+  ///////////////////////////////////
+  // Target interrupt notification //
+  ///////////////////////////////////
+  for (genvar i = 0 ; i < NumTarget ; i++) begin : gen_target
+    rv_plic_target #(
+      .N_SOURCE    (NumSrc),
+      .MAX_PRIO    (MAX_PRIO)
+    ) u_target (
+      .clk_i,
+      .rst_ni,
+
+      .ip_i        (ip),
+      .ie_i        (ie[i]),
+
+      .prio_i      (prio),
+      .threshold_i (threshold[i]),
+
+      .irq_o       (irq_o[i]),
+      .irq_id_o    (irq_id_o[i])
+
+    );
+  end
+
+  ////////////////////////
+  // Register interface //
+  ////////////////////////
+  //  Limitation of register tool prevents the module from having flexibility to parameters
+  //  So, signals are manually tied at the top.
+  rv_plic_reg_top u_reg (
+    .clk_i,
+    .rst_ni,
+
+    .tl_i,
+    .tl_o,
+
+    .reg2hw,
+    .hw2reg,
+
+    .devmode_i  (1'b1)
+  );
+
+
+
+endmodule
+
diff --git a/verilog/rtl/rv_plic_gateway.sv b/verilog/rtl/rv_plic_gateway.sv
new file mode 100644
index 0000000..c81810b
--- /dev/null
+++ b/verilog/rtl/rv_plic_gateway.sv
@@ -0,0 +1,62 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// RISC-V Platform-Level Interrupt Gateways module
+
+module rv_plic_gateway #(
+  parameter int N_SOURCE = 32
+) (
+  input clk_i,
+  input rst_ni,
+
+  input [N_SOURCE-1:0] src_i,
+  input [N_SOURCE-1:0] le_i,      // Level0 Edge1
+
+  input [N_SOURCE-1:0] claim_i, // $onehot0(claim_i)
+  input [N_SOURCE-1:0] complete_i, // $onehot0(complete_i)
+
+  output logic [N_SOURCE-1:0] ip_o
+);
+
+  logic [N_SOURCE-1:0] ia;    // Interrupt Active
+
+  logic [N_SOURCE-1:0] set;   // Set: (le_i) ? src_i & ~src_q : src_i ;
+  logic [N_SOURCE-1:0] src_q;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) src_q <= '0;
+    else         src_q <= src_i;
+  end
+
+  always_comb begin
+    for (int i = 0 ; i < N_SOURCE; i++) begin
+      set[i] = (le_i[i]) ? src_i[i] & ~src_q[i] : src_i[i] ;
+    end
+  end
+
+  // Interrupt pending is set by source (depends on le_i), cleared by claim_i.
+  // Until interrupt is claimed, set doesn't affect ip_o.
+  // RISC-V PLIC spec mentioned it can have counter for edge triggered
+  // But skipped the feature as counter consumes substantial logic size.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      ip_o <= '0;
+    end else begin
+      ip_o <= (ip_o | (set & ~ia & ~ip_o)) & (~(ip_o & claim_i));
+    end
+  end
+
+  // Interrupt active is to control ip_o. If ip_o is set then until completed
+  // by target, ip_o shouldn't be set by source even claim_i can clear ip_o.
+  // ia can be cleared only when ia was set. If `set` and `complete_i` happen
+  // at the same time, always `set` wins.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      ia <= '0;
+    end else begin
+      ia <= (ia | (set & ~ia)) & (~(ia & complete_i & ~ip_o));
+    end
+  end
+
+endmodule
diff --git a/verilog/rtl/rv_plic_reg_pkg.sv b/verilog/rtl/rv_plic_reg_pkg.sv
new file mode 100644
index 0000000..ca7182f
--- /dev/null
+++ b/verilog/rtl/rv_plic_reg_pkg.sv
@@ -0,0 +1,462 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Register Package auto-generated by `reggen` containing data structure
+
+package rv_plic_reg_pkg;
+
+  // Param list
+  parameter int NumSrc = 44;
+  parameter int NumTarget = 1;
+  parameter int PrioWidth = 2;
+
+  // Address width within the block
+  parameter int BlockAw = 10;
+
+  ////////////////////////////
+  // Typedefs for registers //
+  ////////////////////////////
+  typedef struct packed {
+    logic        q;
+  } rv_plic_reg2hw_le_mreg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio0_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio1_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio2_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio3_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio4_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio5_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio6_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio7_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio8_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio9_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio10_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio11_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio12_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio13_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio14_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio15_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio16_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio17_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio18_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio19_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio20_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio21_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio22_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio23_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio24_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio25_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio26_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio27_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio28_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio29_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio30_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio31_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio32_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio33_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio34_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio35_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio36_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio37_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio38_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio39_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio40_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio41_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio42_reg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_prio43_reg_t;
+
+  typedef struct packed {
+    logic        q;
+  } rv_plic_reg2hw_ie0_mreg_t;
+
+  typedef struct packed {
+    logic [1:0]  q;
+  } rv_plic_reg2hw_threshold0_reg_t;
+
+  typedef struct packed {
+    logic [7:0]  q;
+    logic        qe;
+    logic        re;
+  } rv_plic_reg2hw_cc0_reg_t;
+
+  typedef struct packed {
+    logic        q;
+  } rv_plic_reg2hw_msip0_reg_t;
+
+
+  typedef struct packed {
+    logic        d;
+    logic        de;
+  } rv_plic_hw2reg_ip_mreg_t;
+
+  typedef struct packed {
+    logic [7:0]  d;
+  } rv_plic_hw2reg_cc0_reg_t;
+
+
+  ///////////////////////////////////////
+  // Register to internal design logic //
+  ///////////////////////////////////////
+  typedef struct packed {
+    rv_plic_reg2hw_le_mreg_t [43:0] le; // [700:529]
+    rv_plic_reg2hw_prio0_reg_t prio0; // [528:527]
+    rv_plic_reg2hw_prio1_reg_t prio1; // [526:525]
+    rv_plic_reg2hw_prio2_reg_t prio2; // [524:523]
+    rv_plic_reg2hw_prio3_reg_t prio3; // [522:521]
+    rv_plic_reg2hw_prio4_reg_t prio4; // [520:519]
+    rv_plic_reg2hw_prio5_reg_t prio5; // [518:517]
+    rv_plic_reg2hw_prio6_reg_t prio6; // [516:515]
+    rv_plic_reg2hw_prio7_reg_t prio7; // [514:513]
+    rv_plic_reg2hw_prio8_reg_t prio8; // [512:511]
+    rv_plic_reg2hw_prio9_reg_t prio9; // [510:509]
+    rv_plic_reg2hw_prio10_reg_t prio10; // [508:507]
+    rv_plic_reg2hw_prio11_reg_t prio11; // [506:505]
+    rv_plic_reg2hw_prio12_reg_t prio12; // [504:503]
+    rv_plic_reg2hw_prio13_reg_t prio13; // [502:501]
+    rv_plic_reg2hw_prio14_reg_t prio14; // [500:499]
+    rv_plic_reg2hw_prio15_reg_t prio15; // [498:497]
+    rv_plic_reg2hw_prio16_reg_t prio16; // [496:495]
+    rv_plic_reg2hw_prio17_reg_t prio17; // [494:493]
+    rv_plic_reg2hw_prio18_reg_t prio18; // [492:491]
+    rv_plic_reg2hw_prio19_reg_t prio19; // [490:489]
+    rv_plic_reg2hw_prio20_reg_t prio20; // [488:487]
+    rv_plic_reg2hw_prio21_reg_t prio21; // [486:485]
+    rv_plic_reg2hw_prio22_reg_t prio22; // [484:483]
+    rv_plic_reg2hw_prio23_reg_t prio23; // [482:481]
+    rv_plic_reg2hw_prio24_reg_t prio24; // [480:479]
+    rv_plic_reg2hw_prio25_reg_t prio25; // [478:477]
+    rv_plic_reg2hw_prio26_reg_t prio26; // [476:475]
+    rv_plic_reg2hw_prio27_reg_t prio27; // [474:473]
+    rv_plic_reg2hw_prio28_reg_t prio28; // [472:471]
+    rv_plic_reg2hw_prio29_reg_t prio29; // [470:469]
+    rv_plic_reg2hw_prio30_reg_t prio30; // [468:467]
+    rv_plic_reg2hw_prio31_reg_t prio31; // [466:465]
+    rv_plic_reg2hw_prio32_reg_t prio32; // [464:463]
+    rv_plic_reg2hw_prio33_reg_t prio33; // [462:461]
+    rv_plic_reg2hw_prio34_reg_t prio34; // [460:459]
+    rv_plic_reg2hw_prio35_reg_t prio35; // [458:457]
+    rv_plic_reg2hw_prio36_reg_t prio36; // [456:455]
+    rv_plic_reg2hw_prio37_reg_t prio37; // [454:453]
+    rv_plic_reg2hw_prio38_reg_t prio38; // [452:451]
+    rv_plic_reg2hw_prio39_reg_t prio39; // [450:449]
+    rv_plic_reg2hw_prio40_reg_t prio40; // [448:447]
+    rv_plic_reg2hw_prio41_reg_t prio41; // [446:445]
+    rv_plic_reg2hw_prio42_reg_t prio42; // [444:443]
+    rv_plic_reg2hw_prio43_reg_t prio43; // [442:441]
+    rv_plic_reg2hw_ie0_mreg_t [43:0] ie0; // [184:13]
+    rv_plic_reg2hw_threshold0_reg_t threshold0; // [12:11]
+    rv_plic_reg2hw_cc0_reg_t cc0; // [10:1]
+    rv_plic_reg2hw_msip0_reg_t msip0; // [0:0]
+  } rv_plic_reg2hw_t;
+
+  ///////////////////////////////////////
+  // Internal design logic to register //
+  ///////////////////////////////////////
+  typedef struct packed {
+    rv_plic_hw2reg_ip_mreg_t [43:0] ip; // [351:8]
+    rv_plic_hw2reg_cc0_reg_t cc0; // [7:0]
+  } rv_plic_hw2reg_t;
+
+  // Register Address
+  parameter logic [BlockAw-1:0] RV_PLIC_IP_0_OFFSET = 10'h 0;
+  parameter logic [BlockAw-1:0] RV_PLIC_IP_1_OFFSET = 10'h 4;
+  parameter logic [BlockAw-1:0] RV_PLIC_LE_0_OFFSET = 10'h 8;
+  parameter logic [BlockAw-1:0] RV_PLIC_LE_1_OFFSET = 10'h c;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO0_OFFSET = 10'h 10;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO1_OFFSET = 10'h 14;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO2_OFFSET = 10'h 18;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO3_OFFSET = 10'h 1c;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO4_OFFSET = 10'h 20;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO5_OFFSET = 10'h 24;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO6_OFFSET = 10'h 28;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO7_OFFSET = 10'h 2c;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO8_OFFSET = 10'h 30;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO9_OFFSET = 10'h 34;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO10_OFFSET = 10'h 38;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO11_OFFSET = 10'h 3c;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO12_OFFSET = 10'h 40;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO13_OFFSET = 10'h 44;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO14_OFFSET = 10'h 48;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO15_OFFSET = 10'h 4c;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO16_OFFSET = 10'h 50;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO17_OFFSET = 10'h 54;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO18_OFFSET = 10'h 58;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO19_OFFSET = 10'h 5c;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO20_OFFSET = 10'h 60;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO21_OFFSET = 10'h 64;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO22_OFFSET = 10'h 68;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO23_OFFSET = 10'h 6c;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO24_OFFSET = 10'h 70;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO25_OFFSET = 10'h 74;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO26_OFFSET = 10'h 78;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO27_OFFSET = 10'h 7c;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO28_OFFSET = 10'h 80;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO29_OFFSET = 10'h 84;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO30_OFFSET = 10'h 88;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO31_OFFSET = 10'h 8c;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO32_OFFSET = 10'h 90;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO33_OFFSET = 10'h 94;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO34_OFFSET = 10'h 98;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO35_OFFSET = 10'h 9c;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO36_OFFSET = 10'h a0;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO37_OFFSET = 10'h a4;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO38_OFFSET = 10'h a8;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO39_OFFSET = 10'h ac;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO40_OFFSET = 10'h b0;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO41_OFFSET = 10'h b4;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO42_OFFSET = 10'h b8;
+  parameter logic [BlockAw-1:0] RV_PLIC_PRIO43_OFFSET = 10'h bc;
+  parameter logic [BlockAw-1:0] RV_PLIC_IE0_0_OFFSET  = 10'h c0;
+  parameter logic [BlockAw-1:0] RV_PLIC_IE0_1_OFFSET  = 10'h c4;
+  parameter logic [BlockAw-1:0] RV_PLIC_THRESHOLD0_OFFSET = 10'h c8;
+  parameter logic [BlockAw-1:0] RV_PLIC_CC0_OFFSET = 10'h cc;
+  parameter logic [BlockAw-1:0] RV_PLIC_MSIP0_OFFSET = 10'h d0;
+
+
+  // Register Index
+  typedef enum int {
+    RV_PLIC_IP_0,
+    RV_PLIC_IP_1,
+    RV_PLIC_LE_0,
+    RV_PLIC_LE_1,
+    RV_PLIC_PRIO0,
+    RV_PLIC_PRIO1,
+    RV_PLIC_PRIO2,
+    RV_PLIC_PRIO3,
+    RV_PLIC_PRIO4,
+    RV_PLIC_PRIO5,
+    RV_PLIC_PRIO6,
+    RV_PLIC_PRIO7,
+    RV_PLIC_PRIO8,
+    RV_PLIC_PRIO9,
+    RV_PLIC_PRIO10,
+    RV_PLIC_PRIO11,
+    RV_PLIC_PRIO12,
+    RV_PLIC_PRIO13,
+    RV_PLIC_PRIO14,
+    RV_PLIC_PRIO15,
+    RV_PLIC_PRIO16,
+    RV_PLIC_PRIO17,
+    RV_PLIC_PRIO18,
+    RV_PLIC_PRIO19,
+    RV_PLIC_PRIO20,
+    RV_PLIC_PRIO21,
+    RV_PLIC_PRIO22,
+    RV_PLIC_PRIO23,
+    RV_PLIC_PRIO24,
+    RV_PLIC_PRIO25,
+    RV_PLIC_PRIO26,
+    RV_PLIC_PRIO27,
+    RV_PLIC_PRIO28,
+    RV_PLIC_PRIO29,
+    RV_PLIC_PRIO30,
+    RV_PLIC_PRIO31,
+    RV_PLIC_PRIO32,
+    RV_PLIC_PRIO33,
+    RV_PLIC_PRIO34,
+    RV_PLIC_PRIO35,
+    RV_PLIC_PRIO36,
+    RV_PLIC_PRIO37,
+    RV_PLIC_PRIO38,
+    RV_PLIC_PRIO39,
+    RV_PLIC_PRIO40,
+    RV_PLIC_PRIO41,
+    RV_PLIC_PRIO42,
+    RV_PLIC_PRIO43,
+    RV_PLIC_IE0_0,
+    RV_PLIC_IE0_1,
+    RV_PLIC_THRESHOLD0,
+    RV_PLIC_CC0,
+    RV_PLIC_MSIP0
+  } rv_plic_id_e;
+
+  // Register width information to check illegal writes
+  parameter logic [3:0] RV_PLIC_PERMIT [53] = '{
+    4'b 1111, // index[  0] RV_PLIC_IP_0
+    4'b 1111, // index[  1] RV_PLIC_IP_1
+    4'b 1111, // index[  6] RV_PLIC_LE_0
+    4'b 1111, // index[  7] RV_PLIC_LE_1
+    4'b 0001, // index[ 12] RV_PLIC_PRIO0
+    4'b 0001, // index[ 13] RV_PLIC_PRIO1
+    4'b 0001, // index[ 14] RV_PLIC_PRIO2
+    4'b 0001, // index[ 15] RV_PLIC_PRIO3
+    4'b 0001, // index[ 16] RV_PLIC_PRIO4
+    4'b 0001, // index[ 17] RV_PLIC_PRIO5
+    4'b 0001, // index[ 18] RV_PLIC_PRIO6
+    4'b 0001, // index[ 19] RV_PLIC_PRIO7
+    4'b 0001, // index[ 20] RV_PLIC_PRIO8
+    4'b 0001, // index[ 21] RV_PLIC_PRIO9
+    4'b 0001, // index[ 22] RV_PLIC_PRIO10
+    4'b 0001, // index[ 23] RV_PLIC_PRIO11
+    4'b 0001, // index[ 24] RV_PLIC_PRIO12
+    4'b 0001, // index[ 25] RV_PLIC_PRIO13
+    4'b 0001, // index[ 26] RV_PLIC_PRIO14
+    4'b 0001, // index[ 27] RV_PLIC_PRIO15
+    4'b 0001, // index[ 28] RV_PLIC_PRIO16
+    4'b 0001, // index[ 29] RV_PLIC_PRIO17
+    4'b 0001, // index[ 30] RV_PLIC_PRIO18
+    4'b 0001, // index[ 31] RV_PLIC_PRIO19
+    4'b 0001, // index[ 32] RV_PLIC_PRIO20
+    4'b 0001, // index[ 33] RV_PLIC_PRIO21
+    4'b 0001, // index[ 34] RV_PLIC_PRIO22
+    4'b 0001, // index[ 35] RV_PLIC_PRIO23
+    4'b 0001, // index[ 36] RV_PLIC_PRIO24
+    4'b 0001, // index[ 37] RV_PLIC_PRIO25
+    4'b 0001, // index[ 38] RV_PLIC_PRIO26
+    4'b 0001, // index[ 39] RV_PLIC_PRIO27
+    4'b 0001, // index[ 40] RV_PLIC_PRIO28
+    4'b 0001, // index[ 41] RV_PLIC_PRIO29
+    4'b 0001, // index[ 42] RV_PLIC_PRIO30
+    4'b 0001, // index[ 43] RV_PLIC_PRIO31
+    4'b 0001, // index[ 44] RV_PLIC_PRIO32
+    4'b 0001, // index[ 45] RV_PLIC_PRIO33
+    4'b 0001, // index[ 46] RV_PLIC_PRIO34
+    4'b 0001, // index[ 47] RV_PLIC_PRIO35
+    4'b 0001, // index[ 48] RV_PLIC_PRIO36
+    4'b 0001, // index[ 49] RV_PLIC_PRIO37
+    4'b 0001, // index[ 50] RV_PLIC_PRIO38
+    4'b 0001, // index[ 51] RV_PLIC_PRIO39
+    4'b 0001, // index[ 52] RV_PLIC_PRIO40
+    4'b 0001, // index[ 53] RV_PLIC_PRIO41
+    4'b 0001, // index[ 54] RV_PLIC_PRIO42
+    4'b 0001, // index[ 55] RV_PLIC_PRIO43
+    4'b 1111, // index[184] RV_PLIC_IE0_0
+    4'b 1111, // index[185] RV_PLIC_IE0_1
+    4'b 0001, // index[190] RV_PLIC_THRESHOLD0
+    4'b 0001, // index[191] RV_PLIC_CC0
+    4'b 0001  // index[192] RV_PLIC_MSIP0
+  };
+endpackage
+
diff --git a/verilog/rtl/rv_plic_reg_top.sv b/verilog/rtl/rv_plic_reg_top.sv
new file mode 100644
index 0000000..d7c1e35
--- /dev/null
+++ b/verilog/rtl/rv_plic_reg_top.sv
@@ -0,0 +1,6065 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Register Top module auto-generated by `reggen`
+
+
+module rv_plic_reg_top (
+  input clk_i,
+  input rst_ni,
+
+  // Below Regster interface can be changed
+  input  tlul_pkg::tl_h2d_t tl_i,
+  output tlul_pkg::tl_d2h_t tl_o,
+  // To HW
+  output rv_plic_reg_pkg::rv_plic_reg2hw_t reg2hw, // Write
+  input  rv_plic_reg_pkg::rv_plic_hw2reg_t hw2reg, // Read
+
+  // Config
+  input devmode_i // If 1, explicit error return for unmapped register access
+);
+
+  import rv_plic_reg_pkg::* ;
+
+  localparam int AW = 10;
+  localparam int DW = 32;
+  localparam int DBW = DW/8;                    // Byte Width
+
+  // register signals
+  logic           reg_we;
+  logic           reg_re;
+  logic [AW-1:0]  reg_addr;
+  logic [DW-1:0]  reg_wdata;
+  logic [DBW-1:0] reg_be;
+  logic [DW-1:0]  reg_rdata;
+  logic           reg_error;
+
+  logic          addrmiss, wr_err;
+
+  logic [DW-1:0] reg_rdata_next;
+
+  tlul_pkg::tl_h2d_t tl_reg_h2d;
+  tlul_pkg::tl_d2h_t tl_reg_d2h;
+
+  assign tl_reg_h2d = tl_i;
+  assign tl_o       = tl_reg_d2h;
+
+  tlul_adapter_reg #(
+    .RegAw(AW),
+    .RegDw(DW)
+  ) u_reg_if (
+    .clk_i,
+    .rst_ni,
+
+    .tl_i (tl_reg_h2d),
+    .tl_o (tl_reg_d2h),
+
+    .we_o    (reg_we),
+    .re_o    (reg_re),
+    .addr_o  (reg_addr),
+    .wdata_o (reg_wdata),
+    .be_o    (reg_be),
+    .rdata_i (reg_rdata),
+    .error_i (reg_error)
+  );
+
+  assign reg_rdata = reg_rdata_next ;
+  assign reg_error = (devmode_i & addrmiss) | wr_err ;
+
+  // Define SW related signals
+  // Format: <reg>_<field>_{wd|we|qs}
+  //        or <reg>_{wd|we|qs} if field == 1 or 0
+  logic ip_0_p_0_qs;
+  logic ip_0_p_1_qs;
+  logic ip_0_p_2_qs;
+  logic ip_0_p_3_qs;
+  logic ip_0_p_4_qs;
+  logic ip_0_p_5_qs;
+  logic ip_0_p_6_qs;
+  logic ip_0_p_7_qs;
+  logic ip_0_p_8_qs;
+  logic ip_0_p_9_qs;
+  logic ip_0_p_10_qs;
+  logic ip_0_p_11_qs;
+  logic ip_0_p_12_qs;
+  logic ip_0_p_13_qs;
+  logic ip_0_p_14_qs;
+  logic ip_0_p_15_qs;
+  logic ip_0_p_16_qs;
+  logic ip_0_p_17_qs;
+  logic ip_0_p_18_qs;
+  logic ip_0_p_19_qs;
+  logic ip_0_p_20_qs;
+  logic ip_0_p_21_qs;
+  logic ip_0_p_22_qs;
+  logic ip_0_p_23_qs;
+  logic ip_0_p_24_qs;
+  logic ip_0_p_25_qs;
+  logic ip_0_p_26_qs;
+  logic ip_0_p_27_qs;
+  logic ip_0_p_28_qs;
+  logic ip_0_p_29_qs;
+  logic ip_0_p_30_qs;
+  logic ip_0_p_31_qs;
+  logic ip_1_p_32_qs;
+  logic ip_1_p_33_qs;
+  logic ip_1_p_34_qs;
+  logic ip_1_p_35_qs;
+  logic ip_1_p_36_qs;
+  logic ip_1_p_37_qs;
+  logic ip_1_p_38_qs;
+  logic ip_1_p_39_qs;
+  logic ip_1_p_40_qs;
+  logic ip_1_p_41_qs;
+  logic ip_1_p_42_qs;
+  logic ip_1_p_43_qs;
+  logic le_0_le_0_qs;
+  logic le_0_le_0_wd;
+  logic le_0_le_0_we;
+  logic le_0_le_1_qs;
+  logic le_0_le_1_wd;
+  logic le_0_le_1_we;
+  logic le_0_le_2_qs;
+  logic le_0_le_2_wd;
+  logic le_0_le_2_we;
+  logic le_0_le_3_qs;
+  logic le_0_le_3_wd;
+  logic le_0_le_3_we;
+  logic le_0_le_4_qs;
+  logic le_0_le_4_wd;
+  logic le_0_le_4_we;
+  logic le_0_le_5_qs;
+  logic le_0_le_5_wd;
+  logic le_0_le_5_we;
+  logic le_0_le_6_qs;
+  logic le_0_le_6_wd;
+  logic le_0_le_6_we;
+  logic le_0_le_7_qs;
+  logic le_0_le_7_wd;
+  logic le_0_le_7_we;
+  logic le_0_le_8_qs;
+  logic le_0_le_8_wd;
+  logic le_0_le_8_we;
+  logic le_0_le_9_qs;
+  logic le_0_le_9_wd;
+  logic le_0_le_9_we;
+  logic le_0_le_10_qs;
+  logic le_0_le_10_wd;
+  logic le_0_le_10_we;
+  logic le_0_le_11_qs;
+  logic le_0_le_11_wd;
+  logic le_0_le_11_we;
+  logic le_0_le_12_qs;
+  logic le_0_le_12_wd;
+  logic le_0_le_12_we;
+  logic le_0_le_13_qs;
+  logic le_0_le_13_wd;
+  logic le_0_le_13_we;
+  logic le_0_le_14_qs;
+  logic le_0_le_14_wd;
+  logic le_0_le_14_we;
+  logic le_0_le_15_qs;
+  logic le_0_le_15_wd;
+  logic le_0_le_15_we;
+  logic le_0_le_16_qs;
+  logic le_0_le_16_wd;
+  logic le_0_le_16_we;
+  logic le_0_le_17_qs;
+  logic le_0_le_17_wd;
+  logic le_0_le_17_we;
+  logic le_0_le_18_qs;
+  logic le_0_le_18_wd;
+  logic le_0_le_18_we;
+  logic le_0_le_19_qs;
+  logic le_0_le_19_wd;
+  logic le_0_le_19_we;
+  logic le_0_le_20_qs;
+  logic le_0_le_20_wd;
+  logic le_0_le_20_we;
+  logic le_0_le_21_qs;
+  logic le_0_le_21_wd;
+  logic le_0_le_21_we;
+  logic le_0_le_22_qs;
+  logic le_0_le_22_wd;
+  logic le_0_le_22_we;
+  logic le_0_le_23_qs;
+  logic le_0_le_23_wd;
+  logic le_0_le_23_we;
+  logic le_0_le_24_qs;
+  logic le_0_le_24_wd;
+  logic le_0_le_24_we;
+  logic le_0_le_25_qs;
+  logic le_0_le_25_wd;
+  logic le_0_le_25_we;
+  logic le_0_le_26_qs;
+  logic le_0_le_26_wd;
+  logic le_0_le_26_we;
+  logic le_0_le_27_qs;
+  logic le_0_le_27_wd;
+  logic le_0_le_27_we;
+  logic le_0_le_28_qs;
+  logic le_0_le_28_wd;
+  logic le_0_le_28_we;
+  logic le_0_le_29_qs;
+  logic le_0_le_29_wd;
+  logic le_0_le_29_we;
+  logic le_0_le_30_qs;
+  logic le_0_le_30_wd;
+  logic le_0_le_30_we;
+  logic le_0_le_31_qs;
+  logic le_0_le_31_wd;
+  logic le_0_le_31_we;
+  logic le_1_le_32_qs;
+  logic le_1_le_32_wd;
+  logic le_1_le_32_we;
+  logic le_1_le_33_qs;
+  logic le_1_le_33_wd;
+  logic le_1_le_33_we;
+  logic le_1_le_34_qs;
+  logic le_1_le_34_wd;
+  logic le_1_le_34_we;
+  logic le_1_le_35_qs;
+  logic le_1_le_35_wd;
+  logic le_1_le_35_we;
+  logic le_1_le_36_qs;
+  logic le_1_le_36_wd;
+  logic le_1_le_36_we;
+  logic le_1_le_37_qs;
+  logic le_1_le_37_wd;
+  logic le_1_le_37_we;
+  logic le_1_le_38_qs;
+  logic le_1_le_38_wd;
+  logic le_1_le_38_we;
+  logic le_1_le_39_qs;
+  logic le_1_le_39_wd;
+  logic le_1_le_39_we;
+  logic le_1_le_40_qs;
+  logic le_1_le_40_wd;
+  logic le_1_le_40_we;
+  logic le_1_le_41_qs;
+  logic le_1_le_41_wd;
+  logic le_1_le_41_we;
+  logic le_1_le_42_qs;
+  logic le_1_le_42_wd;
+  logic le_1_le_42_we;
+  logic le_1_le_43_qs;
+  logic le_1_le_43_wd;
+  logic le_1_le_43_we;
+  logic [1:0] prio0_qs;
+  logic [1:0] prio0_wd;
+  logic prio0_we;
+  logic [1:0] prio1_qs;
+  logic [1:0] prio1_wd;
+  logic prio1_we;
+  logic [1:0] prio2_qs;
+  logic [1:0] prio2_wd;
+  logic prio2_we;
+  logic [1:0] prio3_qs;
+  logic [1:0] prio3_wd;
+  logic prio3_we;
+  logic [1:0] prio4_qs;
+  logic [1:0] prio4_wd;
+  logic prio4_we;
+  logic [1:0] prio5_qs;
+  logic [1:0] prio5_wd;
+  logic prio5_we;
+  logic [1:0] prio6_qs;
+  logic [1:0] prio6_wd;
+  logic prio6_we;
+  logic [1:0] prio7_qs;
+  logic [1:0] prio7_wd;
+  logic prio7_we;
+  logic [1:0] prio8_qs;
+  logic [1:0] prio8_wd;
+  logic prio8_we;
+  logic [1:0] prio9_qs;
+  logic [1:0] prio9_wd;
+  logic prio9_we;
+  logic [1:0] prio10_qs;
+  logic [1:0] prio10_wd;
+  logic prio10_we;
+  logic [1:0] prio11_qs;
+  logic [1:0] prio11_wd;
+  logic prio11_we;
+  logic [1:0] prio12_qs;
+  logic [1:0] prio12_wd;
+  logic prio12_we;
+  logic [1:0] prio13_qs;
+  logic [1:0] prio13_wd;
+  logic prio13_we;
+  logic [1:0] prio14_qs;
+  logic [1:0] prio14_wd;
+  logic prio14_we;
+  logic [1:0] prio15_qs;
+  logic [1:0] prio15_wd;
+  logic prio15_we;
+  logic [1:0] prio16_qs;
+  logic [1:0] prio16_wd;
+  logic prio16_we;
+  logic [1:0] prio17_qs;
+  logic [1:0] prio17_wd;
+  logic prio17_we;
+  logic [1:0] prio18_qs;
+  logic [1:0] prio18_wd;
+  logic prio18_we;
+  logic [1:0] prio19_qs;
+  logic [1:0] prio19_wd;
+  logic prio19_we;
+  logic [1:0] prio20_qs;
+  logic [1:0] prio20_wd;
+  logic prio20_we;
+  logic [1:0] prio21_qs;
+  logic [1:0] prio21_wd;
+  logic prio21_we;
+  logic [1:0] prio22_qs;
+  logic [1:0] prio22_wd;
+  logic prio22_we;
+  logic [1:0] prio23_qs;
+  logic [1:0] prio23_wd;
+  logic prio23_we;
+  logic [1:0] prio24_qs;
+  logic [1:0] prio24_wd;
+  logic prio24_we;
+  logic [1:0] prio25_qs;
+  logic [1:0] prio25_wd;
+  logic prio25_we;
+  logic [1:0] prio26_qs;
+  logic [1:0] prio26_wd;
+  logic prio26_we;
+  logic [1:0] prio27_qs;
+  logic [1:0] prio27_wd;
+  logic prio27_we;
+  logic [1:0] prio28_qs;
+  logic [1:0] prio28_wd;
+  logic prio28_we;
+  logic [1:0] prio29_qs;
+  logic [1:0] prio29_wd;
+  logic prio29_we;
+  logic [1:0] prio30_qs;
+  logic [1:0] prio30_wd;
+  logic prio30_we;
+  logic [1:0] prio31_qs;
+  logic [1:0] prio31_wd;
+  logic prio31_we;
+  logic [1:0] prio32_qs;
+  logic [1:0] prio32_wd;
+  logic prio32_we;
+  logic [1:0] prio33_qs;
+  logic [1:0] prio33_wd;
+  logic prio33_we;
+  logic [1:0] prio34_qs;
+  logic [1:0] prio34_wd;
+  logic prio34_we;
+  logic [1:0] prio35_qs;
+  logic [1:0] prio35_wd;
+  logic prio35_we;
+  logic [1:0] prio36_qs;
+  logic [1:0] prio36_wd;
+  logic prio36_we;
+  logic [1:0] prio37_qs;
+  logic [1:0] prio37_wd;
+  logic prio37_we;
+  logic [1:0] prio38_qs;
+  logic [1:0] prio38_wd;
+  logic prio38_we;
+  logic [1:0] prio39_qs;
+  logic [1:0] prio39_wd;
+  logic prio39_we;
+  logic [1:0] prio40_qs;
+  logic [1:0] prio40_wd;
+  logic prio40_we;
+  logic [1:0] prio41_qs;
+  logic [1:0] prio41_wd;
+  logic prio41_we;
+  logic [1:0] prio42_qs;
+  logic [1:0] prio42_wd;
+  logic prio42_we;
+  logic [1:0] prio43_qs;
+  logic [1:0] prio43_wd;
+  logic prio43_we;
+  logic ie0_0_e_0_qs;
+  logic ie0_0_e_0_wd;
+  logic ie0_0_e_0_we;
+  logic ie0_0_e_1_qs;
+  logic ie0_0_e_1_wd;
+  logic ie0_0_e_1_we;
+  logic ie0_0_e_2_qs;
+  logic ie0_0_e_2_wd;
+  logic ie0_0_e_2_we;
+  logic ie0_0_e_3_qs;
+  logic ie0_0_e_3_wd;
+  logic ie0_0_e_3_we;
+  logic ie0_0_e_4_qs;
+  logic ie0_0_e_4_wd;
+  logic ie0_0_e_4_we;
+  logic ie0_0_e_5_qs;
+  logic ie0_0_e_5_wd;
+  logic ie0_0_e_5_we;
+  logic ie0_0_e_6_qs;
+  logic ie0_0_e_6_wd;
+  logic ie0_0_e_6_we;
+  logic ie0_0_e_7_qs;
+  logic ie0_0_e_7_wd;
+  logic ie0_0_e_7_we;
+  logic ie0_0_e_8_qs;
+  logic ie0_0_e_8_wd;
+  logic ie0_0_e_8_we;
+  logic ie0_0_e_9_qs;
+  logic ie0_0_e_9_wd;
+  logic ie0_0_e_9_we;
+  logic ie0_0_e_10_qs;
+  logic ie0_0_e_10_wd;
+  logic ie0_0_e_10_we;
+  logic ie0_0_e_11_qs;
+  logic ie0_0_e_11_wd;
+  logic ie0_0_e_11_we;
+  logic ie0_0_e_12_qs;
+  logic ie0_0_e_12_wd;
+  logic ie0_0_e_12_we;
+  logic ie0_0_e_13_qs;
+  logic ie0_0_e_13_wd;
+  logic ie0_0_e_13_we;
+  logic ie0_0_e_14_qs;
+  logic ie0_0_e_14_wd;
+  logic ie0_0_e_14_we;
+  logic ie0_0_e_15_qs;
+  logic ie0_0_e_15_wd;
+  logic ie0_0_e_15_we;
+  logic ie0_0_e_16_qs;
+  logic ie0_0_e_16_wd;
+  logic ie0_0_e_16_we;
+  logic ie0_0_e_17_qs;
+  logic ie0_0_e_17_wd;
+  logic ie0_0_e_17_we;
+  logic ie0_0_e_18_qs;
+  logic ie0_0_e_18_wd;
+  logic ie0_0_e_18_we;
+  logic ie0_0_e_19_qs;
+  logic ie0_0_e_19_wd;
+  logic ie0_0_e_19_we;
+  logic ie0_0_e_20_qs;
+  logic ie0_0_e_20_wd;
+  logic ie0_0_e_20_we;
+  logic ie0_0_e_21_qs;
+  logic ie0_0_e_21_wd;
+  logic ie0_0_e_21_we;
+  logic ie0_0_e_22_qs;
+  logic ie0_0_e_22_wd;
+  logic ie0_0_e_22_we;
+  logic ie0_0_e_23_qs;
+  logic ie0_0_e_23_wd;
+  logic ie0_0_e_23_we;
+  logic ie0_0_e_24_qs;
+  logic ie0_0_e_24_wd;
+  logic ie0_0_e_24_we;
+  logic ie0_0_e_25_qs;
+  logic ie0_0_e_25_wd;
+  logic ie0_0_e_25_we;
+  logic ie0_0_e_26_qs;
+  logic ie0_0_e_26_wd;
+  logic ie0_0_e_26_we;
+  logic ie0_0_e_27_qs;
+  logic ie0_0_e_27_wd;
+  logic ie0_0_e_27_we;
+  logic ie0_0_e_28_qs;
+  logic ie0_0_e_28_wd;
+  logic ie0_0_e_28_we;
+  logic ie0_0_e_29_qs;
+  logic ie0_0_e_29_wd;
+  logic ie0_0_e_29_we;
+  logic ie0_0_e_30_qs;
+  logic ie0_0_e_30_wd;
+  logic ie0_0_e_30_we;
+  logic ie0_0_e_31_qs;
+  logic ie0_0_e_31_wd;
+  logic ie0_0_e_31_we;
+  logic ie0_1_e_32_qs;
+  logic ie0_1_e_32_wd;
+  logic ie0_1_e_32_we;
+  logic ie0_1_e_33_qs;
+  logic ie0_1_e_33_wd;
+  logic ie0_1_e_33_we;
+  logic ie0_1_e_34_qs;
+  logic ie0_1_e_34_wd;
+  logic ie0_1_e_34_we;
+  logic ie0_1_e_35_qs;
+  logic ie0_1_e_35_wd;
+  logic ie0_1_e_35_we;
+  logic ie0_1_e_36_qs;
+  logic ie0_1_e_36_wd;
+  logic ie0_1_e_36_we;
+  logic ie0_1_e_37_qs;
+  logic ie0_1_e_37_wd;
+  logic ie0_1_e_37_we;
+  logic ie0_1_e_38_qs;
+  logic ie0_1_e_38_wd;
+  logic ie0_1_e_38_we;
+  logic ie0_1_e_39_qs;
+  logic ie0_1_e_39_wd;
+  logic ie0_1_e_39_we;
+  logic ie0_1_e_40_qs;
+  logic ie0_1_e_40_wd;
+  logic ie0_1_e_40_we;
+  logic ie0_1_e_41_qs;
+  logic ie0_1_e_41_wd;
+  logic ie0_1_e_41_we;
+  logic ie0_1_e_42_qs;
+  logic ie0_1_e_42_wd;
+  logic ie0_1_e_42_we;
+  logic ie0_1_e_43_qs;
+  logic ie0_1_e_43_wd;
+  logic ie0_1_e_43_we;
+  logic [1:0] threshold0_qs;
+  logic [1:0] threshold0_wd;
+  logic threshold0_we;
+  logic [7:0] cc0_qs;
+  logic [7:0] cc0_wd;
+  logic cc0_we;
+  logic cc0_re;
+  logic msip0_qs;
+  logic msip0_wd;
+  logic msip0_we;
+
+  // Register instances
+
+  // Subregister 0 of Multireg ip
+  // R[ip_0]: V(False)
+
+  // F[p_0]: 0:0
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_0 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[0].de),
+    .d      (hw2reg.ip[0].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_0_qs)
+  );
+
+
+  // F[p_1]: 1:1
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_1 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[1].de),
+    .d      (hw2reg.ip[1].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_1_qs)
+  );
+
+
+  // F[p_2]: 2:2
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_2 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[2].de),
+    .d      (hw2reg.ip[2].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_2_qs)
+  );
+
+
+  // F[p_3]: 3:3
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_3 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[3].de),
+    .d      (hw2reg.ip[3].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_3_qs)
+  );
+
+
+  // F[p_4]: 4:4
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_4 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[4].de),
+    .d      (hw2reg.ip[4].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_4_qs)
+  );
+
+
+  // F[p_5]: 5:5
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_5 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[5].de),
+    .d      (hw2reg.ip[5].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_5_qs)
+  );
+
+
+  // F[p_6]: 6:6
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_6 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[6].de),
+    .d      (hw2reg.ip[6].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_6_qs)
+  );
+
+
+  // F[p_7]: 7:7
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_7 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[7].de),
+    .d      (hw2reg.ip[7].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_7_qs)
+  );
+
+
+  // F[p_8]: 8:8
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_8 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[8].de),
+    .d      (hw2reg.ip[8].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_8_qs)
+  );
+
+
+  // F[p_9]: 9:9
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_9 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[9].de),
+    .d      (hw2reg.ip[9].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_9_qs)
+  );
+
+
+  // F[p_10]: 10:10
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_10 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[10].de),
+    .d      (hw2reg.ip[10].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_10_qs)
+  );
+
+
+  // F[p_11]: 11:11
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_11 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[11].de),
+    .d      (hw2reg.ip[11].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_11_qs)
+  );
+
+
+  // F[p_12]: 12:12
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_12 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[12].de),
+    .d      (hw2reg.ip[12].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_12_qs)
+  );
+
+
+  // F[p_13]: 13:13
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_13 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[13].de),
+    .d      (hw2reg.ip[13].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_13_qs)
+  );
+
+
+  // F[p_14]: 14:14
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_14 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[14].de),
+    .d      (hw2reg.ip[14].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_14_qs)
+  );
+
+
+  // F[p_15]: 15:15
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_15 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[15].de),
+    .d      (hw2reg.ip[15].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_15_qs)
+  );
+
+
+  // F[p_16]: 16:16
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_16 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[16].de),
+    .d      (hw2reg.ip[16].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_16_qs)
+  );
+
+
+  // F[p_17]: 17:17
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_17 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[17].de),
+    .d      (hw2reg.ip[17].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_17_qs)
+  );
+
+
+  // F[p_18]: 18:18
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_18 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[18].de),
+    .d      (hw2reg.ip[18].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_18_qs)
+  );
+
+
+  // F[p_19]: 19:19
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_19 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[19].de),
+    .d      (hw2reg.ip[19].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_19_qs)
+  );
+
+
+  // F[p_20]: 20:20
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_20 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[20].de),
+    .d      (hw2reg.ip[20].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_20_qs)
+  );
+
+
+  // F[p_21]: 21:21
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_21 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[21].de),
+    .d      (hw2reg.ip[21].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_21_qs)
+  );
+
+
+  // F[p_22]: 22:22
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_22 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[22].de),
+    .d      (hw2reg.ip[22].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_22_qs)
+  );
+
+
+  // F[p_23]: 23:23
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_23 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[23].de),
+    .d      (hw2reg.ip[23].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_23_qs)
+  );
+
+
+  // F[p_24]: 24:24
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_24 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[24].de),
+    .d      (hw2reg.ip[24].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_24_qs)
+  );
+
+
+  // F[p_25]: 25:25
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_25 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[25].de),
+    .d      (hw2reg.ip[25].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_25_qs)
+  );
+
+
+  // F[p_26]: 26:26
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_26 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[26].de),
+    .d      (hw2reg.ip[26].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_26_qs)
+  );
+
+
+  // F[p_27]: 27:27
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_27 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[27].de),
+    .d      (hw2reg.ip[27].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_27_qs)
+  );
+
+
+  // F[p_28]: 28:28
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_28 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[28].de),
+    .d      (hw2reg.ip[28].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_28_qs)
+  );
+
+
+  // F[p_29]: 29:29
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_29 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[29].de),
+    .d      (hw2reg.ip[29].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_29_qs)
+  );
+
+
+  // F[p_30]: 30:30
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_30 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[30].de),
+    .d      (hw2reg.ip[30].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_30_qs)
+  );
+
+
+  // F[p_31]: 31:31
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_0_p_31 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[31].de),
+    .d      (hw2reg.ip[31].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_0_p_31_qs)
+  );
+
+
+  // Subregister 32 of Multireg ip
+  // R[ip_1]: V(False)
+
+  // F[p_32]: 0:0
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_1_p_32 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[32].de),
+    .d      (hw2reg.ip[32].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_1_p_32_qs)
+  );
+
+
+  // F[p_33]: 1:1
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_1_p_33 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[33].de),
+    .d      (hw2reg.ip[33].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_1_p_33_qs)
+  );
+
+
+  // F[p_34]: 2:2
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_1_p_34 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[34].de),
+    .d      (hw2reg.ip[34].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_1_p_34_qs)
+  );
+
+
+  // F[p_35]: 3:3
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_1_p_35 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[35].de),
+    .d      (hw2reg.ip[35].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_1_p_35_qs)
+  );
+
+
+  // F[p_36]: 4:4
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_1_p_36 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[36].de),
+    .d      (hw2reg.ip[36].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_1_p_36_qs)
+  );
+
+
+  // F[p_37]: 5:5
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_1_p_37 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[37].de),
+    .d      (hw2reg.ip[37].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_1_p_37_qs)
+  );
+
+
+  // F[p_38]: 6:6
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_1_p_38 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[38].de),
+    .d      (hw2reg.ip[38].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_1_p_38_qs)
+  );
+
+
+  // F[p_39]: 7:7
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_1_p_39 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[39].de),
+    .d      (hw2reg.ip[39].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_1_p_39_qs)
+  );
+
+
+  // F[p_40]: 8:8
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_1_p_40 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[40].de),
+    .d      (hw2reg.ip[40].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_1_p_40_qs)
+  );
+
+
+  // F[p_41]: 9:9
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_1_p_41 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[41].de),
+    .d      (hw2reg.ip[41].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_1_p_41_qs)
+  );
+
+
+  // F[p_42]: 10:10
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_1_p_42 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[42].de),
+    .d      (hw2reg.ip[42].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_1_p_42_qs)
+  );
+
+
+  // F[p_43]: 11:11
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RO"),
+    .RESVAL  (1'h0)
+  ) u_ip_1_p_43 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    .we     (1'b0),
+    .wd     ('0  ),
+
+    // from internal hardware
+    .de     (hw2reg.ip[43].de),
+    .d      (hw2reg.ip[43].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (),
+
+    // to register interface (read)
+    .qs     (ip_1_p_43_qs)
+  );
+
+  // Subregister 0 of Multireg le
+  // R[le_0]: V(False)
+
+  // F[le_0]: 0:0
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_0 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_0_we),
+    .wd     (le_0_le_0_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[0].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_0_qs)
+  );
+
+
+  // F[le_1]: 1:1
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_1 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_1_we),
+    .wd     (le_0_le_1_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[1].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_1_qs)
+  );
+
+
+  // F[le_2]: 2:2
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_2 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_2_we),
+    .wd     (le_0_le_2_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[2].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_2_qs)
+  );
+
+
+  // F[le_3]: 3:3
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_3 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_3_we),
+    .wd     (le_0_le_3_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[3].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_3_qs)
+  );
+
+
+  // F[le_4]: 4:4
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_4 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_4_we),
+    .wd     (le_0_le_4_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[4].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_4_qs)
+  );
+
+
+  // F[le_5]: 5:5
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_5 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_5_we),
+    .wd     (le_0_le_5_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[5].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_5_qs)
+  );
+
+
+  // F[le_6]: 6:6
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_6 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_6_we),
+    .wd     (le_0_le_6_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[6].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_6_qs)
+  );
+
+
+  // F[le_7]: 7:7
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_7 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_7_we),
+    .wd     (le_0_le_7_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[7].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_7_qs)
+  );
+
+
+  // F[le_8]: 8:8
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_8 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_8_we),
+    .wd     (le_0_le_8_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[8].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_8_qs)
+  );
+
+
+  // F[le_9]: 9:9
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_9 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_9_we),
+    .wd     (le_0_le_9_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[9].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_9_qs)
+  );
+
+
+  // F[le_10]: 10:10
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_10 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_10_we),
+    .wd     (le_0_le_10_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[10].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_10_qs)
+  );
+
+
+  // F[le_11]: 11:11
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_11 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_11_we),
+    .wd     (le_0_le_11_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[11].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_11_qs)
+  );
+
+
+  // F[le_12]: 12:12
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_12 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_12_we),
+    .wd     (le_0_le_12_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[12].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_12_qs)
+  );
+
+
+  // F[le_13]: 13:13
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_13 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_13_we),
+    .wd     (le_0_le_13_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[13].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_13_qs)
+  );
+
+
+  // F[le_14]: 14:14
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_14 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_14_we),
+    .wd     (le_0_le_14_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[14].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_14_qs)
+  );
+
+
+  // F[le_15]: 15:15
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_15 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_15_we),
+    .wd     (le_0_le_15_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[15].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_15_qs)
+  );
+
+
+  // F[le_16]: 16:16
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_16 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_16_we),
+    .wd     (le_0_le_16_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[16].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_16_qs)
+  );
+
+
+  // F[le_17]: 17:17
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_17 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_17_we),
+    .wd     (le_0_le_17_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[17].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_17_qs)
+  );
+
+
+  // F[le_18]: 18:18
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_18 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_18_we),
+    .wd     (le_0_le_18_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[18].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_18_qs)
+  );
+
+
+  // F[le_19]: 19:19
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_19 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_19_we),
+    .wd     (le_0_le_19_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[19].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_19_qs)
+  );
+
+
+  // F[le_20]: 20:20
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_20 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_20_we),
+    .wd     (le_0_le_20_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[20].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_20_qs)
+  );
+
+
+  // F[le_21]: 21:21
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_21 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_21_we),
+    .wd     (le_0_le_21_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[21].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_21_qs)
+  );
+
+
+  // F[le_22]: 22:22
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_22 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_22_we),
+    .wd     (le_0_le_22_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[22].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_22_qs)
+  );
+
+
+  // F[le_23]: 23:23
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_23 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_23_we),
+    .wd     (le_0_le_23_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[23].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_23_qs)
+  );
+
+
+  // F[le_24]: 24:24
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_24 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_24_we),
+    .wd     (le_0_le_24_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[24].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_24_qs)
+  );
+
+
+  // F[le_25]: 25:25
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_25 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_25_we),
+    .wd     (le_0_le_25_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[25].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_25_qs)
+  );
+
+
+  // F[le_26]: 26:26
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_26 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_26_we),
+    .wd     (le_0_le_26_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[26].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_26_qs)
+  );
+
+
+  // F[le_27]: 27:27
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_27 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_27_we),
+    .wd     (le_0_le_27_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[27].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_27_qs)
+  );
+
+
+  // F[le_28]: 28:28
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_28 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_28_we),
+    .wd     (le_0_le_28_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[28].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_28_qs)
+  );
+
+
+  // F[le_29]: 29:29
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_29 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_29_we),
+    .wd     (le_0_le_29_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[29].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_29_qs)
+  );
+
+
+  // F[le_30]: 30:30
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_30 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_30_we),
+    .wd     (le_0_le_30_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[30].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_30_qs)
+  );
+
+
+  // F[le_31]: 31:31
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_0_le_31 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_0_le_31_we),
+    .wd     (le_0_le_31_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[31].q ),
+
+    // to register interface (read)
+    .qs     (le_0_le_31_qs)
+  );
+
+
+  // Subregister 32 of Multireg le
+  // R[le_1]: V(False)
+
+  // F[le_32]: 0:0
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_1_le_32 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_1_le_32_we),
+    .wd     (le_1_le_32_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[32].q ),
+
+    // to register interface (read)
+    .qs     (le_1_le_32_qs)
+  );
+
+
+  // F[le_33]: 1:1
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_1_le_33 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_1_le_33_we),
+    .wd     (le_1_le_33_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[33].q ),
+
+    // to register interface (read)
+    .qs     (le_1_le_33_qs)
+  );
+
+
+  // F[le_34]: 2:2
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_1_le_34 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_1_le_34_we),
+    .wd     (le_1_le_34_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[34].q ),
+
+    // to register interface (read)
+    .qs     (le_1_le_34_qs)
+  );
+
+
+  // F[le_35]: 3:3
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_1_le_35 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_1_le_35_we),
+    .wd     (le_1_le_35_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[35].q ),
+
+    // to register interface (read)
+    .qs     (le_1_le_35_qs)
+  );
+
+
+  // F[le_36]: 4:4
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_1_le_36 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_1_le_36_we),
+    .wd     (le_1_le_36_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[36].q ),
+
+    // to register interface (read)
+    .qs     (le_1_le_36_qs)
+  );
+
+
+  // F[le_37]: 5:5
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_1_le_37 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_1_le_37_we),
+    .wd     (le_1_le_37_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[37].q ),
+
+    // to register interface (read)
+    .qs     (le_1_le_37_qs)
+  );
+
+
+  // F[le_38]: 6:6
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_1_le_38 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_1_le_38_we),
+    .wd     (le_1_le_38_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[38].q ),
+
+    // to register interface (read)
+    .qs     (le_1_le_38_qs)
+  );
+
+
+  // F[le_39]: 7:7
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_1_le_39 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_1_le_39_we),
+    .wd     (le_1_le_39_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[39].q ),
+
+    // to register interface (read)
+    .qs     (le_1_le_39_qs)
+  );
+
+
+  // F[le_40]: 8:8
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_1_le_40 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_1_le_40_we),
+    .wd     (le_1_le_40_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[40].q ),
+
+    // to register interface (read)
+    .qs     (le_1_le_40_qs)
+  );
+
+
+  // F[le_41]: 9:9
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_1_le_41 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_1_le_41_we),
+    .wd     (le_1_le_41_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[41].q ),
+
+    // to register interface (read)
+    .qs     (le_1_le_41_qs)
+  );
+
+
+  // F[le_42]: 10:10
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_1_le_42 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_1_le_42_we),
+    .wd     (le_1_le_42_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[42].q ),
+
+    // to register interface (read)
+    .qs     (le_1_le_42_qs)
+  );
+
+
+  // F[le_43]: 11:11
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_le_1_le_43 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (le_1_le_43_we),
+    .wd     (le_1_le_43_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.le[43].q ),
+
+    // to register interface (read)
+    .qs     (le_1_le_43_qs)
+  );
+
+  // R[prio0]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio0 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio0_we),
+    .wd     (prio0_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio0.q ),
+
+    // to register interface (read)
+    .qs     (prio0_qs)
+  );
+
+
+  // R[prio1]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio1 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio1_we),
+    .wd     (prio1_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio1.q ),
+
+    // to register interface (read)
+    .qs     (prio1_qs)
+  );
+
+
+  // R[prio2]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio2 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio2_we),
+    .wd     (prio2_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio2.q ),
+
+    // to register interface (read)
+    .qs     (prio2_qs)
+  );
+
+
+  // R[prio3]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio3 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio3_we),
+    .wd     (prio3_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio3.q ),
+
+    // to register interface (read)
+    .qs     (prio3_qs)
+  );
+
+
+  // R[prio4]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio4 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio4_we),
+    .wd     (prio4_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio4.q ),
+
+    // to register interface (read)
+    .qs     (prio4_qs)
+  );
+
+
+  // R[prio5]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio5 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio5_we),
+    .wd     (prio5_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio5.q ),
+
+    // to register interface (read)
+    .qs     (prio5_qs)
+  );
+
+
+  // R[prio6]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio6 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio6_we),
+    .wd     (prio6_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio6.q ),
+
+    // to register interface (read)
+    .qs     (prio6_qs)
+  );
+
+
+  // R[prio7]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio7 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio7_we),
+    .wd     (prio7_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio7.q ),
+
+    // to register interface (read)
+    .qs     (prio7_qs)
+  );
+
+
+  // R[prio8]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio8 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio8_we),
+    .wd     (prio8_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio8.q ),
+
+    // to register interface (read)
+    .qs     (prio8_qs)
+  );
+
+
+  // R[prio9]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio9 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio9_we),
+    .wd     (prio9_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio9.q ),
+
+    // to register interface (read)
+    .qs     (prio9_qs)
+  );
+
+
+  // R[prio10]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio10 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio10_we),
+    .wd     (prio10_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio10.q ),
+
+    // to register interface (read)
+    .qs     (prio10_qs)
+  );
+
+
+  // R[prio11]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio11 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio11_we),
+    .wd     (prio11_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio11.q ),
+
+    // to register interface (read)
+    .qs     (prio11_qs)
+  );
+
+
+  // R[prio12]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio12 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio12_we),
+    .wd     (prio12_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio12.q ),
+
+    // to register interface (read)
+    .qs     (prio12_qs)
+  );
+
+
+  // R[prio13]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio13 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio13_we),
+    .wd     (prio13_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio13.q ),
+
+    // to register interface (read)
+    .qs     (prio13_qs)
+  );
+
+
+  // R[prio14]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio14 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio14_we),
+    .wd     (prio14_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio14.q ),
+
+    // to register interface (read)
+    .qs     (prio14_qs)
+  );
+
+
+  // R[prio15]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio15 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio15_we),
+    .wd     (prio15_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio15.q ),
+
+    // to register interface (read)
+    .qs     (prio15_qs)
+  );
+
+
+  // R[prio16]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio16 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio16_we),
+    .wd     (prio16_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio16.q ),
+
+    // to register interface (read)
+    .qs     (prio16_qs)
+  );
+
+
+  // R[prio17]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio17 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio17_we),
+    .wd     (prio17_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio17.q ),
+
+    // to register interface (read)
+    .qs     (prio17_qs)
+  );
+
+
+  // R[prio18]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio18 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio18_we),
+    .wd     (prio18_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio18.q ),
+
+    // to register interface (read)
+    .qs     (prio18_qs)
+  );
+
+
+  // R[prio19]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio19 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio19_we),
+    .wd     (prio19_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio19.q ),
+
+    // to register interface (read)
+    .qs     (prio19_qs)
+  );
+
+
+  // R[prio20]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio20 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio20_we),
+    .wd     (prio20_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio20.q ),
+
+    // to register interface (read)
+    .qs     (prio20_qs)
+  );
+
+
+  // R[prio21]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio21 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio21_we),
+    .wd     (prio21_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio21.q ),
+
+    // to register interface (read)
+    .qs     (prio21_qs)
+  );
+
+
+  // R[prio22]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio22 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio22_we),
+    .wd     (prio22_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio22.q ),
+
+    // to register interface (read)
+    .qs     (prio22_qs)
+  );
+
+
+  // R[prio23]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio23 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio23_we),
+    .wd     (prio23_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio23.q ),
+
+    // to register interface (read)
+    .qs     (prio23_qs)
+  );
+
+
+  // R[prio24]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio24 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio24_we),
+    .wd     (prio24_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio24.q ),
+
+    // to register interface (read)
+    .qs     (prio24_qs)
+  );
+
+
+  // R[prio25]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio25 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio25_we),
+    .wd     (prio25_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio25.q ),
+
+    // to register interface (read)
+    .qs     (prio25_qs)
+  );
+
+
+  // R[prio26]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio26 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio26_we),
+    .wd     (prio26_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio26.q ),
+
+    // to register interface (read)
+    .qs     (prio26_qs)
+  );
+
+
+  // R[prio27]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio27 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio27_we),
+    .wd     (prio27_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio27.q ),
+
+    // to register interface (read)
+    .qs     (prio27_qs)
+  );
+
+
+  // R[prio28]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio28 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio28_we),
+    .wd     (prio28_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio28.q ),
+
+    // to register interface (read)
+    .qs     (prio28_qs)
+  );
+
+
+  // R[prio29]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio29 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio29_we),
+    .wd     (prio29_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio29.q ),
+
+    // to register interface (read)
+    .qs     (prio29_qs)
+  );
+
+
+  // R[prio30]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio30 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio30_we),
+    .wd     (prio30_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio30.q ),
+
+    // to register interface (read)
+    .qs     (prio30_qs)
+  );
+
+
+  // R[prio31]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio31 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio31_we),
+    .wd     (prio31_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio31.q ),
+
+    // to register interface (read)
+    .qs     (prio31_qs)
+  );
+
+
+  // R[prio32]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio32 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio32_we),
+    .wd     (prio32_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio32.q ),
+
+    // to register interface (read)
+    .qs     (prio32_qs)
+  );
+
+
+  // R[prio33]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio33 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio33_we),
+    .wd     (prio33_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio33.q ),
+
+    // to register interface (read)
+    .qs     (prio33_qs)
+  );
+
+
+  // R[prio34]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio34 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio34_we),
+    .wd     (prio34_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio34.q ),
+
+    // to register interface (read)
+    .qs     (prio34_qs)
+  );
+
+
+  // R[prio35]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio35 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio35_we),
+    .wd     (prio35_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio35.q ),
+
+    // to register interface (read)
+    .qs     (prio35_qs)
+  );
+
+
+  // R[prio36]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio36 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio36_we),
+    .wd     (prio36_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio36.q ),
+
+    // to register interface (read)
+    .qs     (prio36_qs)
+  );
+
+
+  // R[prio37]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio37 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio37_we),
+    .wd     (prio37_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio37.q ),
+
+    // to register interface (read)
+    .qs     (prio37_qs)
+  );
+
+
+  // R[prio38]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio38 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio38_we),
+    .wd     (prio38_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio38.q ),
+
+    // to register interface (read)
+    .qs     (prio38_qs)
+  );
+
+
+  // R[prio39]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio39 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio39_we),
+    .wd     (prio39_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio39.q ),
+
+    // to register interface (read)
+    .qs     (prio39_qs)
+  );
+
+
+  // R[prio40]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio40 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio40_we),
+    .wd     (prio40_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio40.q ),
+
+    // to register interface (read)
+    .qs     (prio40_qs)
+  );
+
+
+  // R[prio41]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio41 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio41_we),
+    .wd     (prio41_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio41.q ),
+
+    // to register interface (read)
+    .qs     (prio41_qs)
+  );
+
+
+  // R[prio42]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio42 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio42_we),
+    .wd     (prio42_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio42.q ),
+
+    // to register interface (read)
+    .qs     (prio42_qs)
+  );
+
+
+  // R[prio43]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_prio43 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (prio43_we),
+    .wd     (prio43_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.prio43.q ),
+
+    // to register interface (read)
+    .qs     (prio43_qs)
+  );
+
+  // Subregister 0 of Multireg ie0
+  // R[ie0_0]: V(False)
+
+  // F[e_0]: 0:0
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_0 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_0_we),
+    .wd     (ie0_0_e_0_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[0].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_0_qs)
+  );
+
+
+  // F[e_1]: 1:1
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_1 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_1_we),
+    .wd     (ie0_0_e_1_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[1].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_1_qs)
+  );
+
+
+  // F[e_2]: 2:2
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_2 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_2_we),
+    .wd     (ie0_0_e_2_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[2].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_2_qs)
+  );
+
+
+  // F[e_3]: 3:3
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_3 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_3_we),
+    .wd     (ie0_0_e_3_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[3].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_3_qs)
+  );
+
+
+  // F[e_4]: 4:4
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_4 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_4_we),
+    .wd     (ie0_0_e_4_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[4].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_4_qs)
+  );
+
+
+  // F[e_5]: 5:5
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_5 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_5_we),
+    .wd     (ie0_0_e_5_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[5].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_5_qs)
+  );
+
+
+  // F[e_6]: 6:6
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_6 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_6_we),
+    .wd     (ie0_0_e_6_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[6].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_6_qs)
+  );
+
+
+  // F[e_7]: 7:7
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_7 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_7_we),
+    .wd     (ie0_0_e_7_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[7].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_7_qs)
+  );
+
+
+  // F[e_8]: 8:8
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_8 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_8_we),
+    .wd     (ie0_0_e_8_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[8].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_8_qs)
+  );
+
+
+  // F[e_9]: 9:9
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_9 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_9_we),
+    .wd     (ie0_0_e_9_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[9].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_9_qs)
+  );
+
+
+  // F[e_10]: 10:10
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_10 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_10_we),
+    .wd     (ie0_0_e_10_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[10].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_10_qs)
+  );
+
+
+  // F[e_11]: 11:11
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_11 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_11_we),
+    .wd     (ie0_0_e_11_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[11].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_11_qs)
+  );
+
+
+  // F[e_12]: 12:12
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_12 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_12_we),
+    .wd     (ie0_0_e_12_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[12].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_12_qs)
+  );
+
+
+  // F[e_13]: 13:13
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_13 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_13_we),
+    .wd     (ie0_0_e_13_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[13].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_13_qs)
+  );
+
+
+  // F[e_14]: 14:14
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_14 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_14_we),
+    .wd     (ie0_0_e_14_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[14].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_14_qs)
+  );
+
+
+  // F[e_15]: 15:15
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_15 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_15_we),
+    .wd     (ie0_0_e_15_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[15].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_15_qs)
+  );
+
+
+  // F[e_16]: 16:16
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_16 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_16_we),
+    .wd     (ie0_0_e_16_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[16].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_16_qs)
+  );
+
+
+  // F[e_17]: 17:17
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_17 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_17_we),
+    .wd     (ie0_0_e_17_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[17].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_17_qs)
+  );
+
+
+  // F[e_18]: 18:18
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_18 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_18_we),
+    .wd     (ie0_0_e_18_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[18].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_18_qs)
+  );
+
+
+  // F[e_19]: 19:19
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_19 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_19_we),
+    .wd     (ie0_0_e_19_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[19].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_19_qs)
+  );
+
+
+  // F[e_20]: 20:20
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_20 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_20_we),
+    .wd     (ie0_0_e_20_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[20].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_20_qs)
+  );
+
+
+  // F[e_21]: 21:21
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_21 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_21_we),
+    .wd     (ie0_0_e_21_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[21].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_21_qs)
+  );
+
+
+  // F[e_22]: 22:22
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_22 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_22_we),
+    .wd     (ie0_0_e_22_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[22].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_22_qs)
+  );
+
+
+  // F[e_23]: 23:23
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_23 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_23_we),
+    .wd     (ie0_0_e_23_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[23].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_23_qs)
+  );
+
+
+  // F[e_24]: 24:24
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_24 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_24_we),
+    .wd     (ie0_0_e_24_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[24].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_24_qs)
+  );
+
+
+  // F[e_25]: 25:25
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_25 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_25_we),
+    .wd     (ie0_0_e_25_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[25].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_25_qs)
+  );
+
+
+  // F[e_26]: 26:26
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_26 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_26_we),
+    .wd     (ie0_0_e_26_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[26].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_26_qs)
+  );
+
+
+  // F[e_27]: 27:27
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_27 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_27_we),
+    .wd     (ie0_0_e_27_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[27].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_27_qs)
+  );
+
+
+  // F[e_28]: 28:28
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_28 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_28_we),
+    .wd     (ie0_0_e_28_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[28].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_28_qs)
+  );
+
+
+  // F[e_29]: 29:29
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_29 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_29_we),
+    .wd     (ie0_0_e_29_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[29].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_29_qs)
+  );
+
+
+  // F[e_30]: 30:30
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_30 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_30_we),
+    .wd     (ie0_0_e_30_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[30].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_30_qs)
+  );
+
+
+  // F[e_31]: 31:31
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_0_e_31 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_0_e_31_we),
+    .wd     (ie0_0_e_31_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[31].q ),
+
+    // to register interface (read)
+    .qs     (ie0_0_e_31_qs)
+  );
+
+
+  // Subregister 32 of Multireg ie0
+  // R[ie0_1]: V(False)
+
+  // F[e_32]: 0:0
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_1_e_32 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_1_e_32_we),
+    .wd     (ie0_1_e_32_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[32].q ),
+
+    // to register interface (read)
+    .qs     (ie0_1_e_32_qs)
+  );
+
+
+  // F[e_33]: 1:1
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_1_e_33 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_1_e_33_we),
+    .wd     (ie0_1_e_33_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[33].q ),
+
+    // to register interface (read)
+    .qs     (ie0_1_e_33_qs)
+  );
+
+
+  // F[e_34]: 2:2
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_1_e_34 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_1_e_34_we),
+    .wd     (ie0_1_e_34_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[34].q ),
+
+    // to register interface (read)
+    .qs     (ie0_1_e_34_qs)
+  );
+
+
+  // F[e_35]: 3:3
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_1_e_35 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_1_e_35_we),
+    .wd     (ie0_1_e_35_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[35].q ),
+
+    // to register interface (read)
+    .qs     (ie0_1_e_35_qs)
+  );
+
+
+  // F[e_36]: 4:4
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_1_e_36 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_1_e_36_we),
+    .wd     (ie0_1_e_36_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[36].q ),
+
+    // to register interface (read)
+    .qs     (ie0_1_e_36_qs)
+  );
+
+
+  // F[e_37]: 5:5
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_1_e_37 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_1_e_37_we),
+    .wd     (ie0_1_e_37_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[37].q ),
+
+    // to register interface (read)
+    .qs     (ie0_1_e_37_qs)
+  );
+
+
+  // F[e_38]: 6:6
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_1_e_38 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_1_e_38_we),
+    .wd     (ie0_1_e_38_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[38].q ),
+
+    // to register interface (read)
+    .qs     (ie0_1_e_38_qs)
+  );
+
+
+  // F[e_39]: 7:7
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_1_e_39 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_1_e_39_we),
+    .wd     (ie0_1_e_39_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[39].q ),
+
+    // to register interface (read)
+    .qs     (ie0_1_e_39_qs)
+  );
+
+
+  // F[e_40]: 8:8
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_1_e_40 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_1_e_40_we),
+    .wd     (ie0_1_e_40_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[40].q ),
+
+    // to register interface (read)
+    .qs     (ie0_1_e_40_qs)
+  );
+
+
+  // F[e_41]: 9:9
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_1_e_41 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_1_e_41_we),
+    .wd     (ie0_1_e_41_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[41].q ),
+
+    // to register interface (read)
+    .qs     (ie0_1_e_41_qs)
+  );
+
+
+  // F[e_42]: 10:10
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_1_e_42 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_1_e_42_we),
+    .wd     (ie0_1_e_42_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[42].q ),
+
+    // to register interface (read)
+    .qs     (ie0_1_e_42_qs)
+  );
+
+
+  // F[e_43]: 11:11
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ie0_1_e_43 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ie0_1_e_43_we),
+    .wd     (ie0_1_e_43_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ie0[43].q ),
+
+    // to register interface (read)
+    .qs     (ie0_1_e_43_qs)
+  );
+
+  // R[threshold0]: V(False)
+
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_threshold0 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (threshold0_we),
+    .wd     (threshold0_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.threshold0.q ),
+
+    // to register interface (read)
+    .qs     (threshold0_qs)
+  );
+
+
+  // R[cc0]: V(True)
+
+  prim_subreg_ext #(
+    .DW    (8)
+  ) u_cc0 (
+    .re     (cc0_re),
+    .we     (cc0_we),
+    .wd     (cc0_wd),
+    .d      (hw2reg.cc0.d),
+    .qre    (reg2hw.cc0.re),
+    .qe     (reg2hw.cc0.qe),
+    .q      (reg2hw.cc0.q ),
+    .qs     (cc0_qs)
+  );
+
+
+  // R[msip0]: V(False)
+
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_msip0 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (msip0_we),
+    .wd     (msip0_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.msip0.q ),
+
+    // to register interface (read)
+    .qs     (msip0_qs)
+  );
+
+
+
+
+  logic [52:0] addr_hit;
+  always_comb begin
+    addr_hit = '0;
+    addr_hit[  0] = (reg_addr == RV_PLIC_IP_0_OFFSET);
+    addr_hit[  1] = (reg_addr == RV_PLIC_IP_1_OFFSET);
+    addr_hit[  2] = (reg_addr == RV_PLIC_LE_0_OFFSET);
+    addr_hit[  3] = (reg_addr == RV_PLIC_LE_1_OFFSET);
+    addr_hit[  4] = (reg_addr == RV_PLIC_PRIO0_OFFSET);
+    addr_hit[  5] = (reg_addr == RV_PLIC_PRIO1_OFFSET);
+    addr_hit[  6] = (reg_addr == RV_PLIC_PRIO2_OFFSET);
+    addr_hit[  7] = (reg_addr == RV_PLIC_PRIO3_OFFSET);
+    addr_hit[  8] = (reg_addr == RV_PLIC_PRIO4_OFFSET);
+    addr_hit[  9] = (reg_addr == RV_PLIC_PRIO5_OFFSET);
+    addr_hit[ 10] = (reg_addr == RV_PLIC_PRIO6_OFFSET);
+    addr_hit[ 11] = (reg_addr == RV_PLIC_PRIO7_OFFSET);
+    addr_hit[ 12] = (reg_addr == RV_PLIC_PRIO8_OFFSET);
+    addr_hit[ 13] = (reg_addr == RV_PLIC_PRIO9_OFFSET);
+    addr_hit[ 14] = (reg_addr == RV_PLIC_PRIO10_OFFSET);
+    addr_hit[ 15] = (reg_addr == RV_PLIC_PRIO11_OFFSET);
+    addr_hit[ 16] = (reg_addr == RV_PLIC_PRIO12_OFFSET);
+    addr_hit[ 17] = (reg_addr == RV_PLIC_PRIO13_OFFSET);
+    addr_hit[ 18] = (reg_addr == RV_PLIC_PRIO14_OFFSET);
+    addr_hit[ 19] = (reg_addr == RV_PLIC_PRIO15_OFFSET);
+    addr_hit[ 20] = (reg_addr == RV_PLIC_PRIO16_OFFSET);
+    addr_hit[ 21] = (reg_addr == RV_PLIC_PRIO17_OFFSET);
+    addr_hit[ 22] = (reg_addr == RV_PLIC_PRIO18_OFFSET);
+    addr_hit[ 23] = (reg_addr == RV_PLIC_PRIO19_OFFSET);
+    addr_hit[ 24] = (reg_addr == RV_PLIC_PRIO20_OFFSET);
+    addr_hit[ 25] = (reg_addr == RV_PLIC_PRIO21_OFFSET);
+    addr_hit[ 26] = (reg_addr == RV_PLIC_PRIO22_OFFSET);
+    addr_hit[ 27] = (reg_addr == RV_PLIC_PRIO23_OFFSET);
+    addr_hit[ 28] = (reg_addr == RV_PLIC_PRIO24_OFFSET);
+    addr_hit[ 29] = (reg_addr == RV_PLIC_PRIO25_OFFSET);
+    addr_hit[ 30] = (reg_addr == RV_PLIC_PRIO26_OFFSET);
+    addr_hit[ 31] = (reg_addr == RV_PLIC_PRIO27_OFFSET);
+    addr_hit[ 32] = (reg_addr == RV_PLIC_PRIO28_OFFSET);
+    addr_hit[ 33] = (reg_addr == RV_PLIC_PRIO29_OFFSET);
+    addr_hit[ 34] = (reg_addr == RV_PLIC_PRIO30_OFFSET);
+    addr_hit[ 35] = (reg_addr == RV_PLIC_PRIO31_OFFSET);
+    addr_hit[ 36] = (reg_addr == RV_PLIC_PRIO32_OFFSET);
+    addr_hit[ 37] = (reg_addr == RV_PLIC_PRIO33_OFFSET);
+    addr_hit[ 38] = (reg_addr == RV_PLIC_PRIO34_OFFSET);
+    addr_hit[ 39] = (reg_addr == RV_PLIC_PRIO35_OFFSET);
+    addr_hit[ 40] = (reg_addr == RV_PLIC_PRIO36_OFFSET);
+    addr_hit[ 41] = (reg_addr == RV_PLIC_PRIO37_OFFSET);
+    addr_hit[ 42] = (reg_addr == RV_PLIC_PRIO38_OFFSET);
+    addr_hit[ 43] = (reg_addr == RV_PLIC_PRIO39_OFFSET);
+    addr_hit[ 44] = (reg_addr == RV_PLIC_PRIO40_OFFSET);
+    addr_hit[ 45] = (reg_addr == RV_PLIC_PRIO41_OFFSET);
+    addr_hit[ 46] = (reg_addr == RV_PLIC_PRIO42_OFFSET);
+    addr_hit[ 47] = (reg_addr == RV_PLIC_PRIO43_OFFSET);
+    addr_hit[ 48] = (reg_addr == RV_PLIC_IE0_0_OFFSET);
+    addr_hit[ 49] = (reg_addr == RV_PLIC_IE0_1_OFFSET);
+    addr_hit[ 50] = (reg_addr == RV_PLIC_THRESHOLD0_OFFSET);
+    addr_hit[ 51] = (reg_addr == RV_PLIC_CC0_OFFSET);
+    addr_hit[ 52] = (reg_addr == RV_PLIC_MSIP0_OFFSET);
+  end
+
+  assign addrmiss = (reg_re || reg_we) ? ~|addr_hit : 1'b0 ;
+
+  // Check sub-word write is permitted
+  always_comb begin
+    wr_err = 1'b0;
+    if (addr_hit[  0] && reg_we && (RV_PLIC_PERMIT[  0] != (RV_PLIC_PERMIT[  0] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[  1] && reg_we && (RV_PLIC_PERMIT[  1] != (RV_PLIC_PERMIT[  1] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[  2] && reg_we && (RV_PLIC_PERMIT[  2] != (RV_PLIC_PERMIT[  2] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[  3] && reg_we && (RV_PLIC_PERMIT[  3] != (RV_PLIC_PERMIT[  3] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[  4] && reg_we && (RV_PLIC_PERMIT[  4] != (RV_PLIC_PERMIT[  4] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[  5] && reg_we && (RV_PLIC_PERMIT[  5] != (RV_PLIC_PERMIT[  5] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[  6] && reg_we && (RV_PLIC_PERMIT[  6] != (RV_PLIC_PERMIT[  6] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[  7] && reg_we && (RV_PLIC_PERMIT[  7] != (RV_PLIC_PERMIT[  7] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[  8] && reg_we && (RV_PLIC_PERMIT[  8] != (RV_PLIC_PERMIT[  8] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[  9] && reg_we && (RV_PLIC_PERMIT[  9] != (RV_PLIC_PERMIT[  9] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 10] && reg_we && (RV_PLIC_PERMIT[ 10] != (RV_PLIC_PERMIT[ 10] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 11] && reg_we && (RV_PLIC_PERMIT[ 11] != (RV_PLIC_PERMIT[ 11] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 12] && reg_we && (RV_PLIC_PERMIT[ 12] != (RV_PLIC_PERMIT[ 12] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 13] && reg_we && (RV_PLIC_PERMIT[ 13] != (RV_PLIC_PERMIT[ 13] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 14] && reg_we && (RV_PLIC_PERMIT[ 14] != (RV_PLIC_PERMIT[ 14] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 15] && reg_we && (RV_PLIC_PERMIT[ 15] != (RV_PLIC_PERMIT[ 15] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 16] && reg_we && (RV_PLIC_PERMIT[ 16] != (RV_PLIC_PERMIT[ 16] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 17] && reg_we && (RV_PLIC_PERMIT[ 17] != (RV_PLIC_PERMIT[ 17] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 18] && reg_we && (RV_PLIC_PERMIT[ 18] != (RV_PLIC_PERMIT[ 18] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 19] && reg_we && (RV_PLIC_PERMIT[ 19] != (RV_PLIC_PERMIT[ 19] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 20] && reg_we && (RV_PLIC_PERMIT[ 20] != (RV_PLIC_PERMIT[ 20] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 21] && reg_we && (RV_PLIC_PERMIT[ 21] != (RV_PLIC_PERMIT[ 21] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 22] && reg_we && (RV_PLIC_PERMIT[ 22] != (RV_PLIC_PERMIT[ 22] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 23] && reg_we && (RV_PLIC_PERMIT[ 23] != (RV_PLIC_PERMIT[ 23] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 24] && reg_we && (RV_PLIC_PERMIT[ 24] != (RV_PLIC_PERMIT[ 24] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 25] && reg_we && (RV_PLIC_PERMIT[ 25] != (RV_PLIC_PERMIT[ 25] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 26] && reg_we && (RV_PLIC_PERMIT[ 26] != (RV_PLIC_PERMIT[ 26] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 27] && reg_we && (RV_PLIC_PERMIT[ 27] != (RV_PLIC_PERMIT[ 27] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 28] && reg_we && (RV_PLIC_PERMIT[ 28] != (RV_PLIC_PERMIT[ 28] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 29] && reg_we && (RV_PLIC_PERMIT[ 29] != (RV_PLIC_PERMIT[ 29] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 30] && reg_we && (RV_PLIC_PERMIT[ 30] != (RV_PLIC_PERMIT[ 30] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 31] && reg_we && (RV_PLIC_PERMIT[ 31] != (RV_PLIC_PERMIT[ 31] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 32] && reg_we && (RV_PLIC_PERMIT[ 32] != (RV_PLIC_PERMIT[ 32] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 33] && reg_we && (RV_PLIC_PERMIT[ 33] != (RV_PLIC_PERMIT[ 33] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 34] && reg_we && (RV_PLIC_PERMIT[ 34] != (RV_PLIC_PERMIT[ 34] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 35] && reg_we && (RV_PLIC_PERMIT[ 35] != (RV_PLIC_PERMIT[ 35] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 36] && reg_we && (RV_PLIC_PERMIT[ 36] != (RV_PLIC_PERMIT[ 36] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 37] && reg_we && (RV_PLIC_PERMIT[ 37] != (RV_PLIC_PERMIT[ 37] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 38] && reg_we && (RV_PLIC_PERMIT[ 38] != (RV_PLIC_PERMIT[ 38] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 39] && reg_we && (RV_PLIC_PERMIT[ 39] != (RV_PLIC_PERMIT[ 39] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 40] && reg_we && (RV_PLIC_PERMIT[ 40] != (RV_PLIC_PERMIT[ 40] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 41] && reg_we && (RV_PLIC_PERMIT[ 41] != (RV_PLIC_PERMIT[ 41] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 42] && reg_we && (RV_PLIC_PERMIT[ 42] != (RV_PLIC_PERMIT[ 42] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 43] && reg_we && (RV_PLIC_PERMIT[ 43] != (RV_PLIC_PERMIT[ 43] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 44] && reg_we && (RV_PLIC_PERMIT[ 44] != (RV_PLIC_PERMIT[ 44] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 45] && reg_we && (RV_PLIC_PERMIT[ 45] != (RV_PLIC_PERMIT[ 45] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 46] && reg_we && (RV_PLIC_PERMIT[ 46] != (RV_PLIC_PERMIT[ 46] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 47] && reg_we && (RV_PLIC_PERMIT[ 47] != (RV_PLIC_PERMIT[ 47] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 48] && reg_we && (RV_PLIC_PERMIT[ 48] != (RV_PLIC_PERMIT[ 48] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 49] && reg_we && (RV_PLIC_PERMIT[ 49] != (RV_PLIC_PERMIT[ 49] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 50] && reg_we && (RV_PLIC_PERMIT[ 50] != (RV_PLIC_PERMIT[ 50] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 51] && reg_we && (RV_PLIC_PERMIT[ 51] != (RV_PLIC_PERMIT[ 51] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 52] && reg_we && (RV_PLIC_PERMIT[ 52] != (RV_PLIC_PERMIT[ 52] & reg_be))) wr_err = 1'b1 ;
+  end
+
+
+
+
+
+
+  assign le_0_le_0_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_0_wd = reg_wdata[0];
+
+  assign le_0_le_1_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_1_wd = reg_wdata[1];
+
+  assign le_0_le_2_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_2_wd = reg_wdata[2];
+
+  assign le_0_le_3_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_3_wd = reg_wdata[3];
+
+  assign le_0_le_4_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_4_wd = reg_wdata[4];
+
+  assign le_0_le_5_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_5_wd = reg_wdata[5];
+
+  assign le_0_le_6_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_6_wd = reg_wdata[6];
+
+  assign le_0_le_7_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_7_wd = reg_wdata[7];
+
+  assign le_0_le_8_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_8_wd = reg_wdata[8];
+
+  assign le_0_le_9_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_9_wd = reg_wdata[9];
+
+  assign le_0_le_10_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_10_wd = reg_wdata[10];
+
+  assign le_0_le_11_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_11_wd = reg_wdata[11];
+
+  assign le_0_le_12_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_12_wd = reg_wdata[12];
+
+  assign le_0_le_13_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_13_wd = reg_wdata[13];
+
+  assign le_0_le_14_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_14_wd = reg_wdata[14];
+
+  assign le_0_le_15_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_15_wd = reg_wdata[15];
+
+  assign le_0_le_16_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_16_wd = reg_wdata[16];
+
+  assign le_0_le_17_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_17_wd = reg_wdata[17];
+
+  assign le_0_le_18_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_18_wd = reg_wdata[18];
+
+  assign le_0_le_19_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_19_wd = reg_wdata[19];
+
+  assign le_0_le_20_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_20_wd = reg_wdata[20];
+
+  assign le_0_le_21_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_21_wd = reg_wdata[21];
+
+  assign le_0_le_22_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_22_wd = reg_wdata[22];
+
+  assign le_0_le_23_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_23_wd = reg_wdata[23];
+
+  assign le_0_le_24_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_24_wd = reg_wdata[24];
+
+  assign le_0_le_25_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_25_wd = reg_wdata[25];
+
+  assign le_0_le_26_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_26_wd = reg_wdata[26];
+
+  assign le_0_le_27_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_27_wd = reg_wdata[27];
+
+  assign le_0_le_28_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_28_wd = reg_wdata[28];
+
+  assign le_0_le_29_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_29_wd = reg_wdata[29];
+
+  assign le_0_le_30_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_30_wd = reg_wdata[30];
+
+  assign le_0_le_31_we = addr_hit[2] & reg_we & ~wr_err;
+  assign le_0_le_31_wd = reg_wdata[31];
+
+  assign le_1_le_32_we = addr_hit[3] & reg_we & ~wr_err;
+  assign le_1_le_32_wd = reg_wdata[0];
+
+  assign le_1_le_33_we = addr_hit[3] & reg_we & ~wr_err;
+  assign le_1_le_33_wd = reg_wdata[1];
+
+  assign le_1_le_34_we = addr_hit[3] & reg_we & ~wr_err;
+  assign le_1_le_34_wd = reg_wdata[2];
+
+  assign le_1_le_35_we = addr_hit[3] & reg_we & ~wr_err;
+  assign le_1_le_35_wd = reg_wdata[3];
+
+  assign le_1_le_36_we = addr_hit[3] & reg_we & ~wr_err;
+  assign le_1_le_36_wd = reg_wdata[4];
+
+  assign le_1_le_37_we = addr_hit[3] & reg_we & ~wr_err;
+  assign le_1_le_37_wd = reg_wdata[5];
+
+  assign le_1_le_38_we = addr_hit[3] & reg_we & ~wr_err;
+  assign le_1_le_38_wd = reg_wdata[6];
+
+  assign le_1_le_39_we = addr_hit[3] & reg_we & ~wr_err;
+  assign le_1_le_39_wd = reg_wdata[7];
+
+  assign le_1_le_40_we = addr_hit[3] & reg_we & ~wr_err;
+  assign le_1_le_40_wd = reg_wdata[8];
+
+  assign le_1_le_41_we = addr_hit[3] & reg_we & ~wr_err;
+  assign le_1_le_41_wd = reg_wdata[9];
+
+  assign le_1_le_42_we = addr_hit[3] & reg_we & ~wr_err;
+  assign le_1_le_42_wd = reg_wdata[10];
+
+  assign le_1_le_43_we = addr_hit[3] & reg_we & ~wr_err;
+  assign le_1_le_43_wd = reg_wdata[11];
+
+  assign prio0_we = addr_hit[4] & reg_we & ~wr_err;
+  assign prio0_wd = reg_wdata[1:0];
+
+  assign prio1_we = addr_hit[5] & reg_we & ~wr_err;
+  assign prio1_wd = reg_wdata[1:0];
+
+  assign prio2_we = addr_hit[6] & reg_we & ~wr_err;
+  assign prio2_wd = reg_wdata[1:0];
+
+  assign prio3_we = addr_hit[7] & reg_we & ~wr_err;
+  assign prio3_wd = reg_wdata[1:0];
+
+  assign prio4_we = addr_hit[8] & reg_we & ~wr_err;
+  assign prio4_wd = reg_wdata[1:0];
+
+  assign prio5_we = addr_hit[9] & reg_we & ~wr_err;
+  assign prio5_wd = reg_wdata[1:0];
+
+  assign prio6_we = addr_hit[10] & reg_we & ~wr_err;
+  assign prio6_wd = reg_wdata[1:0];
+
+  assign prio7_we = addr_hit[11] & reg_we & ~wr_err;
+  assign prio7_wd = reg_wdata[1:0];
+
+  assign prio8_we = addr_hit[12] & reg_we & ~wr_err;
+  assign prio8_wd = reg_wdata[1:0];
+
+  assign prio9_we = addr_hit[13] & reg_we & ~wr_err;
+  assign prio9_wd = reg_wdata[1:0];
+
+  assign prio10_we = addr_hit[14] & reg_we & ~wr_err;
+  assign prio10_wd = reg_wdata[1:0];
+
+  assign prio11_we = addr_hit[15] & reg_we & ~wr_err;
+  assign prio11_wd = reg_wdata[1:0];
+
+  assign prio12_we = addr_hit[16] & reg_we & ~wr_err;
+  assign prio12_wd = reg_wdata[1:0];
+
+  assign prio13_we = addr_hit[17] & reg_we & ~wr_err;
+  assign prio13_wd = reg_wdata[1:0];
+
+  assign prio14_we = addr_hit[18] & reg_we & ~wr_err;
+  assign prio14_wd = reg_wdata[1:0];
+
+  assign prio15_we = addr_hit[19] & reg_we & ~wr_err;
+  assign prio15_wd = reg_wdata[1:0];
+
+  assign prio16_we = addr_hit[20] & reg_we & ~wr_err;
+  assign prio16_wd = reg_wdata[1:0];
+
+  assign prio17_we = addr_hit[21] & reg_we & ~wr_err;
+  assign prio17_wd = reg_wdata[1:0];
+
+  assign prio18_we = addr_hit[22] & reg_we & ~wr_err;
+  assign prio18_wd = reg_wdata[1:0];
+
+  assign prio19_we = addr_hit[23] & reg_we & ~wr_err;
+  assign prio19_wd = reg_wdata[1:0];
+
+  assign prio20_we = addr_hit[24] & reg_we & ~wr_err;
+  assign prio20_wd = reg_wdata[1:0];
+
+  assign prio21_we = addr_hit[25] & reg_we & ~wr_err;
+  assign prio21_wd = reg_wdata[1:0];
+
+  assign prio22_we = addr_hit[26] & reg_we & ~wr_err;
+  assign prio22_wd = reg_wdata[1:0];
+
+  assign prio23_we = addr_hit[27] & reg_we & ~wr_err;
+  assign prio23_wd = reg_wdata[1:0];
+
+  assign prio24_we = addr_hit[28] & reg_we & ~wr_err;
+  assign prio24_wd = reg_wdata[1:0];
+
+  assign prio25_we = addr_hit[29] & reg_we & ~wr_err;
+  assign prio25_wd = reg_wdata[1:0];
+
+  assign prio26_we = addr_hit[30] & reg_we & ~wr_err;
+  assign prio26_wd = reg_wdata[1:0];
+
+  assign prio27_we = addr_hit[31] & reg_we & ~wr_err;
+  assign prio27_wd = reg_wdata[1:0];
+
+  assign prio28_we = addr_hit[32] & reg_we & ~wr_err;
+  assign prio28_wd = reg_wdata[1:0];
+
+  assign prio29_we = addr_hit[33] & reg_we & ~wr_err;
+  assign prio29_wd = reg_wdata[1:0];
+
+  assign prio30_we = addr_hit[34] & reg_we & ~wr_err;
+  assign prio30_wd = reg_wdata[1:0];
+
+  assign prio31_we = addr_hit[35] & reg_we & ~wr_err;
+  assign prio31_wd = reg_wdata[1:0];
+
+  assign prio32_we = addr_hit[36] & reg_we & ~wr_err;
+  assign prio32_wd = reg_wdata[1:0];
+
+  assign prio33_we = addr_hit[37] & reg_we & ~wr_err;
+  assign prio33_wd = reg_wdata[1:0];
+
+  assign prio34_we = addr_hit[38] & reg_we & ~wr_err;
+  assign prio34_wd = reg_wdata[1:0];
+
+  assign prio35_we = addr_hit[39] & reg_we & ~wr_err;
+  assign prio35_wd = reg_wdata[1:0];
+
+  assign prio36_we = addr_hit[40] & reg_we & ~wr_err;
+  assign prio36_wd = reg_wdata[1:0];
+
+  assign prio37_we = addr_hit[41] & reg_we & ~wr_err;
+  assign prio37_wd = reg_wdata[1:0];
+
+  assign prio38_we = addr_hit[42] & reg_we & ~wr_err;
+  assign prio38_wd = reg_wdata[1:0];
+
+  assign prio39_we = addr_hit[43] & reg_we & ~wr_err;
+  assign prio39_wd = reg_wdata[1:0];
+
+  assign prio40_we = addr_hit[44] & reg_we & ~wr_err;
+  assign prio40_wd = reg_wdata[1:0];
+
+  assign prio41_we = addr_hit[45] & reg_we & ~wr_err;
+  assign prio41_wd = reg_wdata[1:0];
+
+  assign prio42_we = addr_hit[46] & reg_we & ~wr_err;
+  assign prio42_wd = reg_wdata[1:0];
+
+  assign prio43_we = addr_hit[47] & reg_we & ~wr_err;
+  assign prio43_wd = reg_wdata[1:0];
+
+  assign ie0_0_e_0_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_0_wd = reg_wdata[0];
+
+  assign ie0_0_e_1_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_1_wd = reg_wdata[1];
+
+  assign ie0_0_e_2_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_2_wd = reg_wdata[2];
+
+  assign ie0_0_e_3_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_3_wd = reg_wdata[3];
+
+  assign ie0_0_e_4_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_4_wd = reg_wdata[4];
+
+  assign ie0_0_e_5_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_5_wd = reg_wdata[5];
+
+  assign ie0_0_e_6_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_6_wd = reg_wdata[6];
+
+  assign ie0_0_e_7_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_7_wd = reg_wdata[7];
+
+  assign ie0_0_e_8_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_8_wd = reg_wdata[8];
+
+  assign ie0_0_e_9_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_9_wd = reg_wdata[9];
+
+  assign ie0_0_e_10_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_10_wd = reg_wdata[10];
+
+  assign ie0_0_e_11_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_11_wd = reg_wdata[11];
+
+  assign ie0_0_e_12_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_12_wd = reg_wdata[12];
+
+  assign ie0_0_e_13_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_13_wd = reg_wdata[13];
+
+  assign ie0_0_e_14_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_14_wd = reg_wdata[14];
+
+  assign ie0_0_e_15_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_15_wd = reg_wdata[15];
+
+  assign ie0_0_e_16_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_16_wd = reg_wdata[16];
+
+  assign ie0_0_e_17_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_17_wd = reg_wdata[17];
+
+  assign ie0_0_e_18_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_18_wd = reg_wdata[18];
+
+  assign ie0_0_e_19_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_19_wd = reg_wdata[19];
+
+  assign ie0_0_e_20_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_20_wd = reg_wdata[20];
+
+  assign ie0_0_e_21_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_21_wd = reg_wdata[21];
+
+  assign ie0_0_e_22_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_22_wd = reg_wdata[22];
+
+  assign ie0_0_e_23_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_23_wd = reg_wdata[23];
+
+  assign ie0_0_e_24_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_24_wd = reg_wdata[24];
+
+  assign ie0_0_e_25_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_25_wd = reg_wdata[25];
+
+  assign ie0_0_e_26_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_26_wd = reg_wdata[26];
+
+  assign ie0_0_e_27_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_27_wd = reg_wdata[27];
+
+  assign ie0_0_e_28_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_28_wd = reg_wdata[28];
+
+  assign ie0_0_e_29_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_29_wd = reg_wdata[29];
+
+  assign ie0_0_e_30_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_30_wd = reg_wdata[30];
+
+  assign ie0_0_e_31_we = addr_hit[48] & reg_we & ~wr_err;
+  assign ie0_0_e_31_wd = reg_wdata[31];
+
+  assign ie0_1_e_32_we = addr_hit[49] & reg_we & ~wr_err;
+  assign ie0_1_e_32_wd = reg_wdata[0];
+
+  assign ie0_1_e_33_we = addr_hit[49] & reg_we & ~wr_err;
+  assign ie0_1_e_33_wd = reg_wdata[1];
+
+  assign ie0_1_e_34_we = addr_hit[49] & reg_we & ~wr_err;
+  assign ie0_1_e_34_wd = reg_wdata[2];
+
+  assign ie0_1_e_35_we = addr_hit[49] & reg_we & ~wr_err;
+  assign ie0_1_e_35_wd = reg_wdata[3];
+
+  assign ie0_1_e_36_we = addr_hit[49] & reg_we & ~wr_err;
+  assign ie0_1_e_36_wd = reg_wdata[4];
+
+  assign ie0_1_e_37_we = addr_hit[6] & reg_we & ~wr_err;
+  assign ie0_1_e_37_wd = reg_wdata[5];
+
+  assign ie0_1_e_38_we = addr_hit[49] & reg_we & ~wr_err;
+  assign ie0_1_e_38_wd = reg_wdata[6];
+
+  assign ie0_1_e_39_we = addr_hit[49] & reg_we & ~wr_err;
+  assign ie0_1_e_39_wd = reg_wdata[7];
+
+  assign ie0_1_e_40_we = addr_hit[49] & reg_we & ~wr_err;
+  assign ie0_1_e_40_wd = reg_wdata[8];
+
+  assign ie0_1_e_41_we = addr_hit[49] & reg_we & ~wr_err;
+  assign ie0_1_e_41_wd = reg_wdata[9];
+
+  assign ie0_1_e_42_we = addr_hit[49] & reg_we & ~wr_err;
+  assign ie0_1_e_42_wd = reg_wdata[10];
+
+  assign ie0_1_e_43_we = addr_hit[49] & reg_we & ~wr_err;
+  assign ie0_1_e_43_wd = reg_wdata[11];
+
+
+  assign threshold0_we = addr_hit[50] & reg_we & ~wr_err;
+  assign threshold0_wd = reg_wdata[1:0];
+
+  assign cc0_we = addr_hit[51] & reg_we & ~wr_err;
+  assign cc0_wd = reg_wdata[7:0];
+  assign cc0_re = addr_hit[51] && reg_re;
+
+  assign msip0_we = addr_hit[52] & reg_we & ~wr_err;
+  assign msip0_wd = reg_wdata[0];
+
+  // Read data return
+  always_comb begin
+    reg_rdata_next = '0;
+    unique case (1'b1)
+      addr_hit[0]: begin
+        reg_rdata_next[0] = ip_0_p_0_qs;
+        reg_rdata_next[1] = ip_0_p_1_qs;
+        reg_rdata_next[2] = ip_0_p_2_qs;
+        reg_rdata_next[3] = ip_0_p_3_qs;
+        reg_rdata_next[4] = ip_0_p_4_qs;
+        reg_rdata_next[5] = ip_0_p_5_qs;
+        reg_rdata_next[6] = ip_0_p_6_qs;
+        reg_rdata_next[7] = ip_0_p_7_qs;
+        reg_rdata_next[8] = ip_0_p_8_qs;
+        reg_rdata_next[9] = ip_0_p_9_qs;
+        reg_rdata_next[10] = ip_0_p_10_qs;
+        reg_rdata_next[11] = ip_0_p_11_qs;
+        reg_rdata_next[12] = ip_0_p_12_qs;
+        reg_rdata_next[13] = ip_0_p_13_qs;
+        reg_rdata_next[14] = ip_0_p_14_qs;
+        reg_rdata_next[15] = ip_0_p_15_qs;
+        reg_rdata_next[16] = ip_0_p_16_qs;
+        reg_rdata_next[17] = ip_0_p_17_qs;
+        reg_rdata_next[18] = ip_0_p_18_qs;
+        reg_rdata_next[19] = ip_0_p_19_qs;
+        reg_rdata_next[20] = ip_0_p_20_qs;
+        reg_rdata_next[21] = ip_0_p_21_qs;
+        reg_rdata_next[22] = ip_0_p_22_qs;
+        reg_rdata_next[23] = ip_0_p_23_qs;
+        reg_rdata_next[24] = ip_0_p_24_qs;
+        reg_rdata_next[25] = ip_0_p_25_qs;
+        reg_rdata_next[26] = ip_0_p_26_qs;
+        reg_rdata_next[27] = ip_0_p_27_qs;
+        reg_rdata_next[28] = ip_0_p_28_qs;
+        reg_rdata_next[29] = ip_0_p_29_qs;
+        reg_rdata_next[30] = ip_0_p_30_qs;
+        reg_rdata_next[31] = ip_0_p_31_qs;
+      end
+
+      addr_hit[1]: begin
+        reg_rdata_next[0] = ip_1_p_32_qs;
+        reg_rdata_next[1] = ip_1_p_33_qs;
+        reg_rdata_next[2] = ip_1_p_34_qs;
+        reg_rdata_next[3] = ip_1_p_35_qs;
+        reg_rdata_next[4] = ip_1_p_36_qs;
+        reg_rdata_next[5] = ip_1_p_37_qs;
+        reg_rdata_next[6] = ip_1_p_38_qs;
+        reg_rdata_next[7] = ip_1_p_39_qs;
+        reg_rdata_next[8] = ip_1_p_40_qs;
+        reg_rdata_next[9] = ip_1_p_41_qs;
+        reg_rdata_next[10] = ip_1_p_42_qs;
+        reg_rdata_next[11] = ip_1_p_43_qs;
+      end
+      addr_hit[2]: begin
+        reg_rdata_next[0] = le_0_le_0_qs;
+        reg_rdata_next[1] = le_0_le_1_qs;
+        reg_rdata_next[2] = le_0_le_2_qs;
+        reg_rdata_next[3] = le_0_le_3_qs;
+        reg_rdata_next[4] = le_0_le_4_qs;
+        reg_rdata_next[5] = le_0_le_5_qs;
+        reg_rdata_next[6] = le_0_le_6_qs;
+        reg_rdata_next[7] = le_0_le_7_qs;
+        reg_rdata_next[8] = le_0_le_8_qs;
+        reg_rdata_next[9] = le_0_le_9_qs;
+        reg_rdata_next[10] = le_0_le_10_qs;
+        reg_rdata_next[11] = le_0_le_11_qs;
+        reg_rdata_next[12] = le_0_le_12_qs;
+        reg_rdata_next[13] = le_0_le_13_qs;
+        reg_rdata_next[14] = le_0_le_14_qs;
+        reg_rdata_next[15] = le_0_le_15_qs;
+        reg_rdata_next[16] = le_0_le_16_qs;
+        reg_rdata_next[17] = le_0_le_17_qs;
+        reg_rdata_next[18] = le_0_le_18_qs;
+        reg_rdata_next[19] = le_0_le_19_qs;
+        reg_rdata_next[20] = le_0_le_20_qs;
+        reg_rdata_next[21] = le_0_le_21_qs;
+        reg_rdata_next[22] = le_0_le_22_qs;
+        reg_rdata_next[23] = le_0_le_23_qs;
+        reg_rdata_next[24] = le_0_le_24_qs;
+        reg_rdata_next[25] = le_0_le_25_qs;
+        reg_rdata_next[26] = le_0_le_26_qs;
+        reg_rdata_next[27] = le_0_le_27_qs;
+        reg_rdata_next[28] = le_0_le_28_qs;
+        reg_rdata_next[29] = le_0_le_29_qs;
+        reg_rdata_next[30] = le_0_le_30_qs;
+        reg_rdata_next[31] = le_0_le_31_qs;
+      end
+
+      addr_hit[3]: begin
+        reg_rdata_next[0] = le_1_le_32_qs;
+        reg_rdata_next[1] = le_1_le_33_qs;
+        reg_rdata_next[2] = le_1_le_34_qs;
+        reg_rdata_next[3] = le_1_le_35_qs;
+        reg_rdata_next[4] = le_1_le_36_qs;
+        reg_rdata_next[5] = le_1_le_37_qs;
+        reg_rdata_next[6] = le_1_le_38_qs;
+        reg_rdata_next[7] = le_1_le_39_qs;
+        reg_rdata_next[8] = le_1_le_40_qs;
+        reg_rdata_next[9] = le_1_le_41_qs;
+        reg_rdata_next[10] = le_1_le_42_qs;
+        reg_rdata_next[11] = le_1_le_43_qs;
+      end
+      addr_hit[4]: begin
+        reg_rdata_next[1:0] = prio0_qs;
+      end
+
+      addr_hit[5]: begin
+        reg_rdata_next[1:0] = prio1_qs;
+      end
+
+      addr_hit[6]: begin
+        reg_rdata_next[1:0] = prio2_qs;
+      end
+
+      addr_hit[7]: begin
+        reg_rdata_next[1:0] = prio3_qs;
+      end
+
+      addr_hit[8]: begin
+        reg_rdata_next[1:0] = prio4_qs;
+      end
+
+      addr_hit[9]: begin
+        reg_rdata_next[1:0] = prio5_qs;
+      end
+
+      addr_hit[10]: begin
+        reg_rdata_next[1:0] = prio6_qs;
+      end
+
+      addr_hit[11]: begin
+        reg_rdata_next[1:0] = prio7_qs;
+      end
+
+      addr_hit[12]: begin
+        reg_rdata_next[1:0] = prio8_qs;
+      end
+
+      addr_hit[13]: begin
+        reg_rdata_next[1:0] = prio9_qs;
+      end
+
+      addr_hit[14]: begin
+        reg_rdata_next[1:0] = prio10_qs;
+      end
+
+      addr_hit[15]: begin
+        reg_rdata_next[1:0] = prio11_qs;
+      end
+
+      addr_hit[16]: begin
+        reg_rdata_next[1:0] = prio12_qs;
+      end
+
+      addr_hit[17]: begin
+        reg_rdata_next[1:0] = prio13_qs;
+      end
+
+      addr_hit[18]: begin
+        reg_rdata_next[1:0] = prio14_qs;
+      end
+
+      addr_hit[19]: begin
+        reg_rdata_next[1:0] = prio15_qs;
+      end
+
+      addr_hit[20]: begin
+        reg_rdata_next[1:0] = prio16_qs;
+      end
+
+      addr_hit[21]: begin
+        reg_rdata_next[1:0] = prio17_qs;
+      end
+
+      addr_hit[22]: begin
+        reg_rdata_next[1:0] = prio18_qs;
+      end
+
+      addr_hit[23]: begin
+        reg_rdata_next[1:0] = prio19_qs;
+      end
+
+      addr_hit[24]: begin
+        reg_rdata_next[1:0] = prio20_qs;
+      end
+
+      addr_hit[25]: begin
+        reg_rdata_next[1:0] = prio21_qs;
+      end
+
+      addr_hit[26]: begin
+        reg_rdata_next[1:0] = prio22_qs;
+      end
+
+      addr_hit[27]: begin
+        reg_rdata_next[1:0] = prio23_qs;
+      end
+
+      addr_hit[28]: begin
+        reg_rdata_next[1:0] = prio24_qs;
+      end
+
+      addr_hit[29]: begin
+        reg_rdata_next[1:0] = prio25_qs;
+      end
+
+      addr_hit[30]: begin
+        reg_rdata_next[1:0] = prio26_qs;
+      end
+
+      addr_hit[31]: begin
+        reg_rdata_next[1:0] = prio27_qs;
+      end
+
+      addr_hit[32]: begin
+        reg_rdata_next[1:0] = prio28_qs;
+      end
+
+      addr_hit[33]: begin
+        reg_rdata_next[1:0] = prio29_qs;
+      end
+
+      addr_hit[34]: begin
+        reg_rdata_next[1:0] = prio30_qs;
+      end
+
+      addr_hit[35]: begin
+        reg_rdata_next[1:0] = prio31_qs;
+      end
+
+      addr_hit[36]: begin
+        reg_rdata_next[1:0] = prio32_qs;
+      end
+
+      addr_hit[37]: begin
+        reg_rdata_next[1:0] = prio33_qs;
+      end
+
+      addr_hit[38]: begin
+        reg_rdata_next[1:0] = prio34_qs;
+      end
+
+      addr_hit[39]: begin
+        reg_rdata_next[1:0] = prio35_qs;
+      end
+
+      addr_hit[40]: begin
+        reg_rdata_next[1:0] = prio36_qs;
+      end
+
+      addr_hit[41]: begin
+        reg_rdata_next[1:0] = prio37_qs;
+      end
+
+      addr_hit[42]: begin
+        reg_rdata_next[1:0] = prio38_qs;
+      end
+
+      addr_hit[43]: begin
+        reg_rdata_next[1:0] = prio39_qs;
+      end
+
+      addr_hit[44]: begin
+        reg_rdata_next[1:0] = prio40_qs;
+      end
+
+      addr_hit[45]: begin
+        reg_rdata_next[1:0] = prio41_qs;
+      end
+
+      addr_hit[46]: begin
+        reg_rdata_next[1:0] = prio42_qs;
+      end
+
+      addr_hit[47]: begin
+        reg_rdata_next[1:0] = prio43_qs;
+      end
+
+      addr_hit[48]: begin
+        reg_rdata_next[0] = ie0_0_e_0_qs;
+        reg_rdata_next[1] = ie0_0_e_1_qs;
+        reg_rdata_next[2] = ie0_0_e_2_qs;
+        reg_rdata_next[3] = ie0_0_e_3_qs;
+        reg_rdata_next[4] = ie0_0_e_4_qs;
+        reg_rdata_next[5] = ie0_0_e_5_qs;
+        reg_rdata_next[6] = ie0_0_e_6_qs;
+        reg_rdata_next[7] = ie0_0_e_7_qs;
+        reg_rdata_next[8] = ie0_0_e_8_qs;
+        reg_rdata_next[9] = ie0_0_e_9_qs;
+        reg_rdata_next[10] = ie0_0_e_10_qs;
+        reg_rdata_next[11] = ie0_0_e_11_qs;
+        reg_rdata_next[12] = ie0_0_e_12_qs;
+        reg_rdata_next[13] = ie0_0_e_13_qs;
+        reg_rdata_next[14] = ie0_0_e_14_qs;
+        reg_rdata_next[15] = ie0_0_e_15_qs;
+        reg_rdata_next[16] = ie0_0_e_16_qs;
+        reg_rdata_next[17] = ie0_0_e_17_qs;
+        reg_rdata_next[18] = ie0_0_e_18_qs;
+        reg_rdata_next[19] = ie0_0_e_19_qs;
+        reg_rdata_next[20] = ie0_0_e_20_qs;
+        reg_rdata_next[21] = ie0_0_e_21_qs;
+        reg_rdata_next[22] = ie0_0_e_22_qs;
+        reg_rdata_next[23] = ie0_0_e_23_qs;
+        reg_rdata_next[24] = ie0_0_e_24_qs;
+        reg_rdata_next[25] = ie0_0_e_25_qs;
+        reg_rdata_next[26] = ie0_0_e_26_qs;
+        reg_rdata_next[27] = ie0_0_e_27_qs;
+        reg_rdata_next[28] = ie0_0_e_28_qs;
+        reg_rdata_next[29] = ie0_0_e_29_qs;
+        reg_rdata_next[30] = ie0_0_e_30_qs;
+        reg_rdata_next[31] = ie0_0_e_31_qs;
+      end
+
+      addr_hit[49]: begin
+        reg_rdata_next[0] = ie0_1_e_32_qs;
+        reg_rdata_next[1] = ie0_1_e_33_qs;
+        reg_rdata_next[2] = ie0_1_e_34_qs;
+        reg_rdata_next[3] = ie0_1_e_35_qs;
+        reg_rdata_next[4] = ie0_1_e_36_qs;
+        reg_rdata_next[5] = ie0_1_e_37_qs;
+        reg_rdata_next[6] = ie0_1_e_38_qs;
+        reg_rdata_next[7] = ie0_1_e_39_qs;
+        reg_rdata_next[8] = ie0_1_e_40_qs;
+        reg_rdata_next[9] = ie0_1_e_41_qs;
+        reg_rdata_next[10] = ie0_1_e_42_qs;
+        reg_rdata_next[11] = ie0_1_e_43_qs;
+      end
+
+     
+      addr_hit[50]: begin
+        reg_rdata_next[1:0] = threshold0_qs;
+      end
+
+      addr_hit[51]: begin
+        reg_rdata_next[7:0] = cc0_qs;
+      end
+
+      addr_hit[52]: begin
+        reg_rdata_next[0] = msip0_qs;
+      end
+
+      default: begin
+        reg_rdata_next = '1;
+      end
+    endcase
+  end
+
+endmodule
diff --git a/verilog/rtl/rv_plic_target.sv b/verilog/rtl/rv_plic_target.sv
new file mode 100644
index 0000000..045c202
--- /dev/null
+++ b/verilog/rtl/rv_plic_target.sv
@@ -0,0 +1,128 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// RISC-V Platform-Level Interrupt Generator for Target
+//
+// This module basically doing IE & IP based on priority and threshold_i.
+// Keep in mind that increasing MAX_PRIO affects logic size a lot.
+//
+// The module implements a binary tree to find the maximal entry. the solution
+// has O(N) area and O(log(N)) delay complexity, and thus scales well with
+// many input sources.
+//
+
+
+module rv_plic_target #(
+  parameter int N_SOURCE = 32,
+  parameter int MAX_PRIO = 7,
+
+  // Local param (Do not change this through parameter
+  localparam int SrcWidth  = $clog2(N_SOURCE+1),  // derived parameter
+  localparam int PrioWidth = $clog2(MAX_PRIO+1)   // derived parameter
+) (
+  input clk_i,
+  input rst_ni,
+
+  input [N_SOURCE-1:0]  ip_i,
+  input [N_SOURCE-1:0]  ie_i,
+
+  input [PrioWidth-1:0] prio_i [N_SOURCE],
+  input [PrioWidth-1:0] threshold_i,
+
+  output logic            irq_o,
+  output logic [SrcWidth-1:0] irq_id_o
+);
+
+  // this only works with 2 or more sources
+
+
+  // align to powers of 2 for simplicity
+  // a full binary tree with N levels has 2**N + 2**N-1 nodes
+  localparam int NumLevels = $clog2(N_SOURCE);
+  logic [2**(NumLevels+1)-2:0]            is_tree;
+  logic [2**(NumLevels+1)-2:0][SrcWidth-1:0]  id_tree;
+  logic [2**(NumLevels+1)-2:0][PrioWidth-1:0] max_tree;
+
+  for (genvar level = 0; level < NumLevels+1; level++) begin : gen_tree
+    //
+    // level+1   C0   C1   <- "Base1" points to the first node on "level+1",
+    //            \  /         these nodes are the children of the nodes one level below
+    // level       Pa      <- "Base0", points to the first node on "level",
+    //                         these nodes are the parents of the nodes one level above
+    //
+    // hence we have the following indices for the paPa, C0, C1 nodes:
+    // Pa = 2**level     - 1 + offset       = Base0 + offset
+    // C0 = 2**(level+1) - 1 + 2*offset     = Base1 + 2*offset
+    // C1 = 2**(level+1) - 1 + 2*offset + 1 = Base1 + 2*offset + 1
+    //
+    localparam int Base0 = (2**level)-1;
+    localparam int Base1 = (2**(level+1))-1;
+
+    for (genvar offset = 0; offset < 2**level; offset++) begin : gen_level
+      localparam int Pa = Base0 + offset;
+      localparam int C0 = Base1 + 2*offset;
+      localparam int C1 = Base1 + 2*offset + 1;
+
+      // this assigns the gated interrupt source signals, their
+      // corresponding IDs and priorities to the tree leafs
+      if (level == NumLevels) begin : gen_leafs
+        if (offset < N_SOURCE) begin : gen_assign
+          assign is_tree[Pa]  = ip_i[offset] & ie_i[offset];
+          assign id_tree[Pa]  = offset;
+          assign max_tree[Pa] = prio_i[offset];
+        end else begin : gen_tie_off
+          assign is_tree[Pa]  = '0;
+          assign id_tree[Pa]  = '0;
+          assign max_tree[Pa] = '0;
+        end
+      // this creates the node assignments
+      end else begin : gen_nodes
+        // NOTE: the code below has been written in this way in order to work
+        // around a synthesis issue in Vivado 2018.3 and 2019.2 where the whole
+        // module would be optimized away if these assign statements contained
+        // ternary statements to implement the muxes.
+        //
+        // TODO: rewrite these lines with ternary statmements onec the problem
+        // has been fixed in the tool.
+        //
+        // See also originating issue:
+        // https://github.com/lowRISC/opentitan/issues/1355
+        // Xilinx issue:
+        // https://forums.xilinx.com/t5/Synthesis/Simulation-Synthesis-Mismatch-with-Vivado-2018-3/m-p/1065923#M33849
+
+        logic sel; // local helper variable
+        // in case only one of the parent has a pending irq_o, forward that one
+        // in case both irqs are pending, forward the one with higher priority
+        assign sel = (~is_tree[C0] & is_tree[C1]) |
+                     (is_tree[C0] & is_tree[C1] & logic'(max_tree[C1] > max_tree[C0]));
+        // forwarding muxes
+        assign is_tree[Pa]  = (sel              & is_tree[C1])  | ((~sel)            & is_tree[C0]);
+        assign id_tree[Pa]  = ({SrcWidth{sel}}  & id_tree[C1])  | ({SrcWidth{~sel}}  & id_tree[C0]);
+        assign max_tree[Pa] = ({PrioWidth{sel}} & max_tree[C1]) | ({PrioWidth{~sel}} & max_tree[C0]);
+      end
+    end : gen_level
+  end : gen_tree
+
+  logic irq_d, irq_q;
+  logic [SrcWidth-1:0] irq_id_d, irq_id_q;
+
+  // the results can be found at the tree root
+  assign irq_d    = (max_tree[0] > threshold_i) ? is_tree[0] : 1'b0;
+  assign irq_id_d = (is_tree[0]) ? id_tree[0] : '0;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : gen_regs
+    if (!rst_ni) begin
+      irq_q    <= 1'b0;
+      irq_id_q <= '0;
+    end else begin
+      irq_q    <= irq_d;
+      irq_id_q <= irq_id_d;
+    end
+  end
+
+  assign irq_o    = irq_q;
+  assign irq_id_o = irq_id_q;
+
+endmodule
+
diff --git a/verilog/rtl/rv_timer.sv b/verilog/rtl/rv_timer.sv
new file mode 100644
index 0000000..d88c28e
--- /dev/null
+++ b/verilog/rtl/rv_timer.sv
@@ -0,0 +1,129 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+
+
+module rv_timer (
+  input clk_i,
+  input rst_ni,
+
+  input  tlul_pkg::tl_h2d_t tl_i,
+  output tlul_pkg::tl_d2h_t tl_o,
+
+  output logic intr_timer_expired_0_0_o
+);
+
+  localparam int N_HARTS  = 1;
+  localparam int N_TIMERS = 1;
+
+  import rv_timer_reg_pkg::*;
+
+  rv_timer_reg2hw_t reg2hw;
+  rv_timer_hw2reg_t hw2reg;
+
+  logic [N_HARTS-1:0] active;
+
+  logic [11:0] prescaler [N_HARTS];
+  logic [7:0]  step      [N_HARTS];
+
+  logic [N_HARTS-1:0] tick;
+
+  logic [63:0] mtime_d  [N_HARTS];
+  logic [63:0] mtime    [N_HARTS];
+  logic [63:0] mtimecmp [N_HARTS][N_TIMERS]; // Only [harts][0] is connected to mtimecmp CSRs
+  logic        mtimecmp_update [N_HARTS][N_TIMERS];
+
+  logic [N_HARTS*N_TIMERS-1:0] intr_timer_set;
+  logic [N_HARTS*N_TIMERS-1:0] intr_timer_en;
+  logic [N_HARTS*N_TIMERS-1:0] intr_timer_test_q;
+  logic [N_HARTS-1:0]          intr_timer_test_qe;
+  logic [N_HARTS*N_TIMERS-1:0] intr_timer_state_q;
+  logic [N_HARTS-1:0]          intr_timer_state_de;
+  logic [N_HARTS*N_TIMERS-1:0] intr_timer_state_d;
+
+  logic [N_HARTS*N_TIMERS-1:0] intr_out;
+
+  /////////////////////////////////////////////////
+  // Connecting register interface to the signal //
+  /////////////////////////////////////////////////
+
+  // Once reggen supports nested multireg, the following can be automated. For the moment, it must
+  // be connected manually.
+  assign active[0]  = reg2hw.ctrl[0].q;
+  assign prescaler = '{reg2hw.cfg0.prescale.q};
+  assign step      = '{reg2hw.cfg0.step.q};
+
+  assign hw2reg.timer_v_upper0.de = tick[0];
+  assign hw2reg.timer_v_lower0.de = tick[0];
+  assign hw2reg.timer_v_upper0.d = mtime_d[0][63:32];
+  assign hw2reg.timer_v_lower0.d = mtime_d[0][31: 0];
+  assign mtime[0] = {reg2hw.timer_v_upper0.q, reg2hw.timer_v_lower0.q};
+  assign mtimecmp = '{'{{reg2hw.compare_upper0_0.q,reg2hw.compare_lower0_0.q}}};
+  assign mtimecmp_update[0][0] = reg2hw.compare_upper0_0.qe | reg2hw.compare_lower0_0.qe;
+
+  assign intr_timer_expired_0_0_o = intr_out[0];
+  assign intr_timer_en            = reg2hw.intr_enable0[0].q;
+  assign intr_timer_state_q       = reg2hw.intr_state0[0].q;
+  assign intr_timer_test_q        = reg2hw.intr_test0[0].q;
+  assign intr_timer_test_qe       = reg2hw.intr_test0[0].qe;
+  assign hw2reg.intr_state0[0].de = intr_timer_state_de | mtimecmp_update[0][0];
+  assign hw2reg.intr_state0[0].d  = intr_timer_state_d & ~mtimecmp_update[0][0];
+
+
+  for (genvar h = 0 ; h < N_HARTS ; h++) begin : gen_harts
+    prim_intr_hw #(
+      .Width(N_TIMERS)
+    ) u_intr_hw (
+      .clk_i,
+      .rst_ni,
+      .event_intr_i           (intr_timer_set),
+
+      .reg2hw_intr_enable_q_i (intr_timer_en[h*N_TIMERS+:N_TIMERS]),
+      .reg2hw_intr_test_q_i   (intr_timer_test_q[h*N_TIMERS+:N_TIMERS]),
+      .reg2hw_intr_test_qe_i  (intr_timer_test_qe[h]),
+      .reg2hw_intr_state_q_i  (intr_timer_state_q[h*N_TIMERS+:N_TIMERS]),
+      .hw2reg_intr_state_de_o (intr_timer_state_de),
+      .hw2reg_intr_state_d_o  (intr_timer_state_d[h*N_TIMERS+:N_TIMERS]),
+
+      .intr_o                 (intr_out[h*N_TIMERS+:N_TIMERS])
+    );
+
+    timer_core #(
+      .N (N_TIMERS)
+    ) u_core (
+      .clk_i,
+      .rst_ni,
+
+      .active    (active[h]),
+      .prescaler (prescaler[h]),
+      .step      (step[h]),
+
+      .tick      (tick[h]),
+
+      .mtime_d   (mtime_d[h]),
+      .mtime     (mtime[h]),
+      .mtimecmp  (mtimecmp[h]),
+
+      .intr      (intr_timer_set[h*N_TIMERS+:N_TIMERS])
+    );
+  end : gen_harts
+
+  // Register module
+  rv_timer_reg_top u_reg (
+    .clk_i,
+    .rst_ni,
+
+    .tl_i,
+    .tl_o,
+
+    .reg2hw,
+    .hw2reg,
+
+    .devmode_i  (1'b1)
+  );
+
+
+endmodule
diff --git a/verilog/rtl/rv_timer_reg_pkg.sv b/verilog/rtl/rv_timer_reg_pkg.sv
new file mode 100644
index 0000000..30cfb44
--- /dev/null
+++ b/verilog/rtl/rv_timer_reg_pkg.sv
@@ -0,0 +1,142 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Register Package auto-generated by `reggen` containing data structure
+
+package rv_timer_reg_pkg;
+
+  // Param list
+  parameter int N_HARTS = 1;
+  parameter int N_TIMERS = 1;
+
+  // Address width within the block
+  parameter int BlockAw = 9;
+
+  ////////////////////////////
+  // Typedefs for registers //
+  ////////////////////////////
+  typedef struct packed {
+    logic        q;
+  } rv_timer_reg2hw_ctrl_mreg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic [11:0] q;
+    } prescale;
+    struct packed {
+      logic [7:0]  q;
+    } step;
+  } rv_timer_reg2hw_cfg0_reg_t;
+
+  typedef struct packed {
+    logic [31:0] q;
+  } rv_timer_reg2hw_timer_v_lower0_reg_t;
+
+  typedef struct packed {
+    logic [31:0] q;
+  } rv_timer_reg2hw_timer_v_upper0_reg_t;
+
+  typedef struct packed {
+    logic [31:0] q;
+    logic        qe;
+  } rv_timer_reg2hw_compare_lower0_0_reg_t;
+
+  typedef struct packed {
+    logic [31:0] q;
+    logic        qe;
+  } rv_timer_reg2hw_compare_upper0_0_reg_t;
+
+  typedef struct packed {
+    logic        q;
+  } rv_timer_reg2hw_intr_enable0_mreg_t;
+
+  typedef struct packed {
+    logic        q;
+  } rv_timer_reg2hw_intr_state0_mreg_t;
+
+  typedef struct packed {
+    logic        q;
+    logic        qe;
+  } rv_timer_reg2hw_intr_test0_mreg_t;
+
+
+  typedef struct packed {
+    logic [31:0] d;
+    logic        de;
+  } rv_timer_hw2reg_timer_v_lower0_reg_t;
+
+  typedef struct packed {
+    logic [31:0] d;
+    logic        de;
+  } rv_timer_hw2reg_timer_v_upper0_reg_t;
+
+  typedef struct packed {
+    logic        d;
+    logic        de;
+  } rv_timer_hw2reg_intr_state0_mreg_t;
+
+
+  ///////////////////////////////////////
+  // Register to internal design logic //
+  ///////////////////////////////////////
+  typedef struct packed {
+    rv_timer_reg2hw_ctrl_mreg_t [0:0] ctrl; // [154:154]
+    rv_timer_reg2hw_cfg0_reg_t cfg0; // [153:134]
+    rv_timer_reg2hw_timer_v_lower0_reg_t timer_v_lower0; // [133:102]
+    rv_timer_reg2hw_timer_v_upper0_reg_t timer_v_upper0; // [101:70]
+    rv_timer_reg2hw_compare_lower0_0_reg_t compare_lower0_0; // [69:37]
+    rv_timer_reg2hw_compare_upper0_0_reg_t compare_upper0_0; // [36:4]
+    rv_timer_reg2hw_intr_enable0_mreg_t [0:0] intr_enable0; // [3:3]
+    rv_timer_reg2hw_intr_state0_mreg_t [0:0] intr_state0; // [2:2]
+    rv_timer_reg2hw_intr_test0_mreg_t [0:0] intr_test0; // [1:0]
+  } rv_timer_reg2hw_t;
+
+  ///////////////////////////////////////
+  // Internal design logic to register //
+  ///////////////////////////////////////
+  typedef struct packed {
+    rv_timer_hw2reg_timer_v_lower0_reg_t timer_v_lower0; // [67:35]
+    rv_timer_hw2reg_timer_v_upper0_reg_t timer_v_upper0; // [34:2]
+    rv_timer_hw2reg_intr_state0_mreg_t [0:0] intr_state0; // [1:0]
+  } rv_timer_hw2reg_t;
+
+  // Register Address
+  parameter logic [BlockAw-1:0] RV_TIMER_CTRL_OFFSET = 9'h 0;
+  parameter logic [BlockAw-1:0] RV_TIMER_CFG0_OFFSET = 9'h 100;
+  parameter logic [BlockAw-1:0] RV_TIMER_TIMER_V_LOWER0_OFFSET = 9'h 104;
+  parameter logic [BlockAw-1:0] RV_TIMER_TIMER_V_UPPER0_OFFSET = 9'h 108;
+  parameter logic [BlockAw-1:0] RV_TIMER_COMPARE_LOWER0_0_OFFSET = 9'h 10c;
+  parameter logic [BlockAw-1:0] RV_TIMER_COMPARE_UPPER0_0_OFFSET = 9'h 110;
+  parameter logic [BlockAw-1:0] RV_TIMER_INTR_ENABLE0_OFFSET = 9'h 114;
+  parameter logic [BlockAw-1:0] RV_TIMER_INTR_STATE0_OFFSET = 9'h 118;
+  parameter logic [BlockAw-1:0] RV_TIMER_INTR_TEST0_OFFSET = 9'h 11c;
+
+
+  // Register Index
+  typedef enum int {
+    RV_TIMER_CTRL,
+    RV_TIMER_CFG0,
+    RV_TIMER_TIMER_V_LOWER0,
+    RV_TIMER_TIMER_V_UPPER0,
+    RV_TIMER_COMPARE_LOWER0_0,
+    RV_TIMER_COMPARE_UPPER0_0,
+    RV_TIMER_INTR_ENABLE0,
+    RV_TIMER_INTR_STATE0,
+    RV_TIMER_INTR_TEST0
+  } rv_timer_id_e;
+
+  // Register width information to check illegal writes
+  parameter logic [3:0] RV_TIMER_PERMIT [9] = '{
+    4'b 0001, // index[0] RV_TIMER_CTRL
+    4'b 0111, // index[1] RV_TIMER_CFG0
+    4'b 1111, // index[2] RV_TIMER_TIMER_V_LOWER0
+    4'b 1111, // index[3] RV_TIMER_TIMER_V_UPPER0
+    4'b 1111, // index[4] RV_TIMER_COMPARE_LOWER0_0
+    4'b 1111, // index[5] RV_TIMER_COMPARE_UPPER0_0
+    4'b 0001, // index[6] RV_TIMER_INTR_ENABLE0
+    4'b 0001, // index[7] RV_TIMER_INTR_STATE0
+    4'b 0001  // index[8] RV_TIMER_INTR_TEST0
+  };
+endpackage
+
diff --git a/verilog/rtl/rv_timer_reg_top.sv b/verilog/rtl/rv_timer_reg_top.sv
new file mode 100644
index 0000000..62f1c51
--- /dev/null
+++ b/verilog/rtl/rv_timer_reg_top.sv
@@ -0,0 +1,482 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Register Top module auto-generated by `reggen`
+
+
+
+module rv_timer_reg_top (
+  input clk_i,
+  input rst_ni,
+
+  // Below Regster interface can be changed
+  input  tlul_pkg::tl_h2d_t tl_i,
+  output tlul_pkg::tl_d2h_t tl_o,
+  // To HW
+  output rv_timer_reg_pkg::rv_timer_reg2hw_t reg2hw, // Write
+  input  rv_timer_reg_pkg::rv_timer_hw2reg_t hw2reg, // Read
+
+  // Config
+  input devmode_i // If 1, explicit error return for unmapped register access
+);
+
+  import rv_timer_reg_pkg::* ;
+
+  localparam int AW = 9;
+  localparam int DW = 32;
+  localparam int DBW = DW/8;                    // Byte Width
+
+  // register signals
+  logic           reg_we;
+  logic           reg_re;
+  logic [AW-1:0]  reg_addr;
+  logic [DW-1:0]  reg_wdata;
+  logic [DBW-1:0] reg_be;
+  logic [DW-1:0]  reg_rdata;
+  logic           reg_error;
+
+  logic          addrmiss, wr_err;
+
+  logic [DW-1:0] reg_rdata_next;
+
+  tlul_pkg::tl_h2d_t tl_reg_h2d;
+  tlul_pkg::tl_d2h_t tl_reg_d2h;
+
+  assign tl_reg_h2d = tl_i;
+  assign tl_o       = tl_reg_d2h;
+
+  tlul_adapter_reg #(
+    .RegAw(AW),
+    .RegDw(DW)
+  ) u_reg_if (
+    .clk_i,
+    .rst_ni,
+
+    .tl_i (tl_reg_h2d),
+    .tl_o (tl_reg_d2h),
+
+    .we_o    (reg_we),
+    .re_o    (reg_re),
+    .addr_o  (reg_addr),
+    .wdata_o (reg_wdata),
+    .be_o    (reg_be),
+    .rdata_i (reg_rdata),
+    .error_i (reg_error)
+  );
+
+  assign reg_rdata = reg_rdata_next ;
+  assign reg_error = (devmode_i & addrmiss) | wr_err ;
+
+  // Define SW related signals
+  // Format: <reg>_<field>_{wd|we|qs}
+  //        or <reg>_{wd|we|qs} if field == 1 or 0
+  logic ctrl_qs;
+  logic ctrl_wd;
+  logic ctrl_we;
+  logic [11:0] cfg0_prescale_qs;
+  logic [11:0] cfg0_prescale_wd;
+  logic cfg0_prescale_we;
+  logic [7:0] cfg0_step_qs;
+  logic [7:0] cfg0_step_wd;
+  logic cfg0_step_we;
+  logic [31:0] timer_v_lower0_qs;
+  logic [31:0] timer_v_lower0_wd;
+  logic timer_v_lower0_we;
+  logic [31:0] timer_v_upper0_qs;
+  logic [31:0] timer_v_upper0_wd;
+  logic timer_v_upper0_we;
+  logic [31:0] compare_lower0_0_qs;
+  logic [31:0] compare_lower0_0_wd;
+  logic compare_lower0_0_we;
+  logic [31:0] compare_upper0_0_qs;
+  logic [31:0] compare_upper0_0_wd;
+  logic compare_upper0_0_we;
+  logic intr_enable0_qs;
+  logic intr_enable0_wd;
+  logic intr_enable0_we;
+  logic intr_state0_qs;
+  logic intr_state0_wd;
+  logic intr_state0_we;
+  logic intr_test0_wd;
+  logic intr_test0_we;
+
+  // Register instances
+
+  // Subregister 0 of Multireg ctrl
+  // R[ctrl]: V(False)
+
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ctrl (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ctrl_we),
+    .wd     (ctrl_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ctrl[0].q ),
+
+    // to register interface (read)
+    .qs     (ctrl_qs)
+  );
+
+
+  // R[cfg0]: V(False)
+
+  //   F[prescale]: 11:0
+  prim_subreg #(
+    .DW      (12),
+    .SWACCESS("RW"),
+    .RESVAL  (12'h0)
+  ) u_cfg0_prescale (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (cfg0_prescale_we),
+    .wd     (cfg0_prescale_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.cfg0.prescale.q ),
+
+    // to register interface (read)
+    .qs     (cfg0_prescale_qs)
+  );
+
+
+  //   F[step]: 23:16
+  prim_subreg #(
+    .DW      (8),
+    .SWACCESS("RW"),
+    .RESVAL  (8'h1)
+  ) u_cfg0_step (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (cfg0_step_we),
+    .wd     (cfg0_step_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.cfg0.step.q ),
+
+    // to register interface (read)
+    .qs     (cfg0_step_qs)
+  );
+
+
+  // R[timer_v_lower0]: V(False)
+
+  prim_subreg #(
+    .DW      (32),
+    .SWACCESS("RW"),
+    .RESVAL  (32'h0)
+  ) u_timer_v_lower0 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (timer_v_lower0_we),
+    .wd     (timer_v_lower0_wd),
+
+    // from internal hardware
+    .de     (hw2reg.timer_v_lower0.de),
+    .d      (hw2reg.timer_v_lower0.d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.timer_v_lower0.q ),
+
+    // to register interface (read)
+    .qs     (timer_v_lower0_qs)
+  );
+
+
+  // R[timer_v_upper0]: V(False)
+
+  prim_subreg #(
+    .DW      (32),
+    .SWACCESS("RW"),
+    .RESVAL  (32'h0)
+  ) u_timer_v_upper0 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (timer_v_upper0_we),
+    .wd     (timer_v_upper0_wd),
+
+    // from internal hardware
+    .de     (hw2reg.timer_v_upper0.de),
+    .d      (hw2reg.timer_v_upper0.d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.timer_v_upper0.q ),
+
+    // to register interface (read)
+    .qs     (timer_v_upper0_qs)
+  );
+
+
+  // R[compare_lower0_0]: V(False)
+
+  prim_subreg #(
+    .DW      (32),
+    .SWACCESS("RW"),
+    .RESVAL  (32'hffffffff)
+  ) u_compare_lower0_0 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (compare_lower0_0_we),
+    .wd     (compare_lower0_0_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (reg2hw.compare_lower0_0.qe),
+    .q      (reg2hw.compare_lower0_0.q ),
+
+    // to register interface (read)
+    .qs     (compare_lower0_0_qs)
+  );
+
+
+  // R[compare_upper0_0]: V(False)
+
+  prim_subreg #(
+    .DW      (32),
+    .SWACCESS("RW"),
+    .RESVAL  (32'hffffffff)
+  ) u_compare_upper0_0 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (compare_upper0_0_we),
+    .wd     (compare_upper0_0_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (reg2hw.compare_upper0_0.qe),
+    .q      (reg2hw.compare_upper0_0.q ),
+
+    // to register interface (read)
+    .qs     (compare_upper0_0_qs)
+  );
+
+
+
+  // Subregister 0 of Multireg intr_enable0
+  // R[intr_enable0]: V(False)
+
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_intr_enable0 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_enable0_we),
+    .wd     (intr_enable0_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_enable0[0].q ),
+
+    // to register interface (read)
+    .qs     (intr_enable0_qs)
+  );
+
+
+
+  // Subregister 0 of Multireg intr_state0
+  // R[intr_state0]: V(False)
+
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("W1C"),
+    .RESVAL  (1'h0)
+  ) u_intr_state0 (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_state0_we),
+    .wd     (intr_state0_wd),
+
+    // from internal hardware
+    .de     (hw2reg.intr_state0[0].de),
+    .d      (hw2reg.intr_state0[0].d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_state0[0].q ),
+
+    // to register interface (read)
+    .qs     (intr_state0_qs)
+  );
+
+
+
+  // Subregister 0 of Multireg intr_test0
+  // R[intr_test0]: V(True)
+
+  prim_subreg_ext #(
+    .DW    (1)
+  ) u_intr_test0 (
+    .re     (1'b0),
+    .we     (intr_test0_we),
+    .wd     (intr_test0_wd),
+    .d      ('0),
+    .qre    (),
+    .qe     (reg2hw.intr_test0[0].qe),
+    .q      (reg2hw.intr_test0[0].q ),
+    .qs     ()
+  );
+
+
+
+
+  logic [8:0] addr_hit;
+  always_comb begin
+    addr_hit = '0;
+    addr_hit[0] = (reg_addr == RV_TIMER_CTRL_OFFSET);
+    addr_hit[1] = (reg_addr == RV_TIMER_CFG0_OFFSET);
+    addr_hit[2] = (reg_addr == RV_TIMER_TIMER_V_LOWER0_OFFSET);
+    addr_hit[3] = (reg_addr == RV_TIMER_TIMER_V_UPPER0_OFFSET);
+    addr_hit[4] = (reg_addr == RV_TIMER_COMPARE_LOWER0_0_OFFSET);
+    addr_hit[5] = (reg_addr == RV_TIMER_COMPARE_UPPER0_0_OFFSET);
+    addr_hit[6] = (reg_addr == RV_TIMER_INTR_ENABLE0_OFFSET);
+    addr_hit[7] = (reg_addr == RV_TIMER_INTR_STATE0_OFFSET);
+    addr_hit[8] = (reg_addr == RV_TIMER_INTR_TEST0_OFFSET);
+  end
+
+  assign addrmiss = (reg_re || reg_we) ? ~|addr_hit : 1'b0 ;
+
+  // Check sub-word write is permitted
+  always_comb begin
+    wr_err = 1'b0;
+    if (addr_hit[0] && reg_we && (RV_TIMER_PERMIT[0] != (RV_TIMER_PERMIT[0] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[1] && reg_we && (RV_TIMER_PERMIT[1] != (RV_TIMER_PERMIT[1] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[2] && reg_we && (RV_TIMER_PERMIT[2] != (RV_TIMER_PERMIT[2] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[3] && reg_we && (RV_TIMER_PERMIT[3] != (RV_TIMER_PERMIT[3] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[4] && reg_we && (RV_TIMER_PERMIT[4] != (RV_TIMER_PERMIT[4] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[5] && reg_we && (RV_TIMER_PERMIT[5] != (RV_TIMER_PERMIT[5] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[6] && reg_we && (RV_TIMER_PERMIT[6] != (RV_TIMER_PERMIT[6] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[7] && reg_we && (RV_TIMER_PERMIT[7] != (RV_TIMER_PERMIT[7] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[8] && reg_we && (RV_TIMER_PERMIT[8] != (RV_TIMER_PERMIT[8] & reg_be))) wr_err = 1'b1 ;
+  end
+
+  assign ctrl_we = addr_hit[0] & reg_we & ~wr_err;
+  assign ctrl_wd = reg_wdata[0];
+
+  assign cfg0_prescale_we = addr_hit[1] & reg_we & ~wr_err;
+  assign cfg0_prescale_wd = reg_wdata[11:0];
+
+  assign cfg0_step_we = addr_hit[1] & reg_we & ~wr_err;
+  assign cfg0_step_wd = reg_wdata[23:16];
+
+  assign timer_v_lower0_we = addr_hit[2] & reg_we & ~wr_err;
+  assign timer_v_lower0_wd = reg_wdata[31:0];
+
+  assign timer_v_upper0_we = addr_hit[3] & reg_we & ~wr_err;
+  assign timer_v_upper0_wd = reg_wdata[31:0];
+
+  assign compare_lower0_0_we = addr_hit[4] & reg_we & ~wr_err;
+  assign compare_lower0_0_wd = reg_wdata[31:0];
+
+  assign compare_upper0_0_we = addr_hit[5] & reg_we & ~wr_err;
+  assign compare_upper0_0_wd = reg_wdata[31:0];
+
+  assign intr_enable0_we = addr_hit[6] & reg_we & ~wr_err;
+  assign intr_enable0_wd = reg_wdata[0];
+
+  assign intr_state0_we = addr_hit[7] & reg_we & ~wr_err;
+  assign intr_state0_wd = reg_wdata[0];
+
+  assign intr_test0_we = addr_hit[8] & reg_we & ~wr_err;
+  assign intr_test0_wd = reg_wdata[0];
+
+  // Read data return
+  always_comb begin
+    reg_rdata_next = '0;
+    unique case (1'b1)
+      addr_hit[0]: begin
+        reg_rdata_next[0] = ctrl_qs;
+      end
+
+      addr_hit[1]: begin
+        reg_rdata_next[11:0] = cfg0_prescale_qs;
+        reg_rdata_next[23:16] = cfg0_step_qs;
+      end
+
+      addr_hit[2]: begin
+        reg_rdata_next[31:0] = timer_v_lower0_qs;
+      end
+
+      addr_hit[3]: begin
+        reg_rdata_next[31:0] = timer_v_upper0_qs;
+      end
+
+      addr_hit[4]: begin
+        reg_rdata_next[31:0] = compare_lower0_0_qs;
+      end
+
+      addr_hit[5]: begin
+        reg_rdata_next[31:0] = compare_upper0_0_qs;
+      end
+
+      addr_hit[6]: begin
+        reg_rdata_next[0] = intr_enable0_qs;
+      end
+
+      addr_hit[7]: begin
+        reg_rdata_next[0] = intr_state0_qs;
+      end
+
+      addr_hit[8]: begin
+        reg_rdata_next[0] = '0;
+      end
+
+      default: begin
+        reg_rdata_next = '1;
+      end
+    endcase
+  end
+
+
+endmodule
diff --git a/verilog/rtl/spi_clgen.v b/verilog/rtl/spi_clgen.v
new file mode 100644
index 0000000..2086eaa
--- /dev/null
+++ b/verilog/rtl/spi_clgen.v
@@ -0,0 +1,66 @@
+
+
+module spi_clgen (
+  input                            clk_i,   // input clock (system clock)
+  input                            rst_ni,      // reset
+  input                            enable,   // clock enable
+  input                            go,       // start transfer
+  input                            last_clk, // last clock
+  input     [`SPI_DIVIDER_LEN-1:0] divider,  // clock divider (output clock is divided by this value)
+  output    reg                    clk_out,  // output clock
+  output    reg                    pos_edge, // pulse marking positive edge of clk_out
+  output    reg                    neg_edge // pulse marking negative edge of clk_out
+
+); 
+                            
+  //reg                              clk_out;
+  //reg                              pos_edge;
+  //reg                              neg_edge;
+                            
+  reg       [`SPI_DIVIDER_LEN-1:0] cnt;      // clock counter 
+  wire                             cnt_zero; // conter is equal to zero
+  wire                             cnt_one;  // conter is equal to one
+  
+  
+  assign cnt_zero = cnt == {`SPI_DIVIDER_LEN{1'b0}};
+  assign cnt_one  = cnt == {{`SPI_DIVIDER_LEN-1{1'b0}}, 1'b1};
+  
+  // Counter counts half period
+  always @(posedge clk_i or negedge rst_ni)
+  begin
+    if(~rst_ni)
+      cnt <=  {`SPI_DIVIDER_LEN{1'b1}};
+    else
+      begin
+        if(!enable || cnt_zero)
+          cnt <=  divider;
+        else
+          cnt <=  cnt - {{`SPI_DIVIDER_LEN-1{1'b0}}, 1'b1};
+      end
+  end
+  
+  // clk_out is asserted every other half period
+  always @(posedge clk_i or negedge rst_ni)
+  begin
+    if(~rst_ni)
+      clk_out <=  1'b0;
+    else
+      clk_out <=  (enable && cnt_zero && (!last_clk || clk_out)) ? ~clk_out : clk_out;
+  end
+   
+  // Pos and neg edge signals
+  always @(posedge clk_i or negedge rst_ni)
+  begin
+    if(~rst_ni)
+      begin
+        pos_edge  <=  1'b0;
+        neg_edge  <=  1'b0;
+      end
+    else
+      begin
+        pos_edge  <=  (enable && !clk_out && cnt_one) || (!(|divider) && clk_out) || (!(|divider) && go && !enable);
+        neg_edge  <=  (enable && clk_out && cnt_one) || (!(|divider) && !clk_out && enable);
+      end
+  end
+endmodule
+ 
diff --git a/verilog/rtl/spi_core.sv b/verilog/rtl/spi_core.sv
new file mode 100644
index 0000000..b1c2453
--- /dev/null
+++ b/verilog/rtl/spi_core.sv
@@ -0,0 +1,199 @@
+
+module spi_core
+(
+  // tlul signals
+  input         clk_i,        
+  input         rst_ni,        
+  input  [7:0]  addr_i,            
+  input  [31:0] wdata_i,              
+  output reg [31:0] rdata_o,             
+  input  [3:0]  be_i,           
+  input         we_i,       
+  input         re_i,        
+  output reg    error_o,       
+  output reg    intr_rx_o,
+  output reg    intr_tx_o,         
+                                                     
+  // SPI signals                                     
+  output     [`SPI_SS_NB-1:0] ss_o,         // slave select
+  output                      sclk_o,       // serial clock
+  output                      sd_o,
+  output     reg              sd_oe,       // master out slave in
+  input                       sd_i       // master in slave out
+);
+
+                                               
+  // Internal signals
+  reg       [`SPI_DIVIDER_LEN-1:0] divider;          // Divider register
+  reg       [`SPI_CTRL_BIT_NB-1:0] ctrl;             // Control and status register
+  reg             [`SPI_SS_NB-1:0] ss;               // Slave select register
+  reg                     [32-1:0] wb_dat;           // wb data out
+  wire         [`SPI_MAX_CHAR-1:0] rx;               // Rx register
+  wire                             rx_negedge;       // miso is sampled on negative edge
+  wire                             tx_negedge;       // mosi is driven on negative edge
+  wire    [`SPI_CHAR_LEN_BITS-1:0] char_len;         // char len
+  wire                             go;               // go
+  wire                             lsb;              // lsb first on line
+  wire                             ie;               // interrupt enable
+  wire                             ass;              // automatic slave select
+  wire                             spi_divider_sel;  // divider register select
+  wire                             spi_ctrl_sel;     // ctrl register select
+  wire                             spi_tx_sel;       // tx_l register select
+  wire                             spi_ss_sel;       // ss register select
+  wire                             tip;              // transfer in progress
+  wire                             pos_edge;         // recognize posedge of sclk
+  wire                             neg_edge;         // recognize negedge of sclk
+  wire                             last_bit;         // marks last character bit
+  wire                             tx_en;            // enables spi transmission
+  wire                             rx_en;            // enables spi reception
+  
+  // Address decoder
+  assign spi_divider_sel = we_i & ~re_i & (addr_i[`SPI_OFS_BITS] == `SPI_DEVIDE);
+  assign spi_ctrl_sel    = we_i & ~re_i & (addr_i[`SPI_OFS_BITS] == `SPI_CTRL);
+  assign spi_tx_sel      = we_i & ~re_i & (addr_i[`SPI_OFS_BITS] == `SPI_TX_0) & tx_en;
+  assign spi_ss_sel      = we_i & ~re_i & (addr_i[`SPI_OFS_BITS] == `SPI_SS);
+  
+  // Read from registers
+  always @(addr_i or rx or ctrl or divider or ss)
+  begin
+    case (addr_i[`SPI_OFS_BITS])
+      `SPI_RX_0:    wb_dat =  rx[`SPI_MAX_CHAR-1:0];
+      `SPI_CTRL:    wb_dat =  ctrl;
+      `SPI_DEVIDE:  wb_dat =  divider;
+      `SPI_SS:      wb_dat =  ss;
+      default:      wb_dat =  32'b0;
+    endcase
+  end
+  
+  // Wb data out
+  always @(posedge clk_i)
+  begin
+    if (~rst_ni)
+      rdata_o <=  32'b0;
+    else
+      rdata_o <=  wb_dat;
+  end
+
+  
+  // Wb error
+  assign error_o = 1'b0;
+  
+  // Interrupt
+  always @(posedge clk_i)
+  begin
+    if (~rst_ni)
+      intr_tx_o <=  1'b0;
+    else if (ie && tip && last_bit && pos_edge && tx_en)
+      intr_tx_o <=  1'b1;
+    else 
+      intr_tx_o <=  1'b0;
+  end
+
+  always @(posedge clk_i )
+  begin
+    if (~rst_ni)
+      intr_rx_o <=  1'b0;
+    else if (ie && tip && last_bit && pos_edge && rx_en)
+      intr_rx_o <=  1'b1;
+    else 
+      intr_rx_o <=  1'b0;
+  end
+  
+  // Divider register
+  always @(posedge clk_i)
+  begin
+    if (~rst_ni)
+        divider <=  {`SPI_DIVIDER_LEN{1'b0}};
+    else if (spi_divider_sel && we_i && !tip)
+      begin
+        if (be_i[0])
+          divider[7:0] <=  wdata_i[7:0];
+        if (be_i[1])
+          divider[`SPI_DIVIDER_LEN-1:8] <=  wdata_i[`SPI_DIVIDER_LEN-1:8];
+      end
+  end
+  
+  // Ctrl register
+  always @(posedge clk_i)
+  begin
+    if (~rst_ni)
+      ctrl <=  {`SPI_CTRL_BIT_NB{1'b0}};
+    else if(spi_ctrl_sel && we_i && !tip)
+      begin
+        if (be_i[0])
+          ctrl[7:0] <=  wdata_i[7:0] | {7'b0, ctrl[0]};
+        if (be_i[1])
+          ctrl[`SPI_CTRL_BIT_NB-1:8] <=  wdata_i[`SPI_CTRL_BIT_NB-1:8];
+      end
+    else if(tip && last_bit && pos_edge)
+      ctrl[`SPI_CTRL_GO] <=  1'b0;
+  end
+  
+  assign rx_negedge = ctrl[`SPI_CTRL_RX_NEGEDGE];
+  assign tx_negedge = ctrl[`SPI_CTRL_TX_NEGEDGE];
+  assign go         = ctrl[`SPI_CTRL_GO];
+  assign char_len   = ctrl[`SPI_CTRL_CHAR_LEN];
+  assign lsb        = ctrl[`SPI_CTRL_LSB];
+  assign ie         = ctrl[`SPI_CTRL_IE];
+  assign ass        = ctrl[`SPI_CTRL_ASS];
+  assign rx_en      = ctrl[`SPI_RX_SEL];
+  assign tx_en      = ctrl[`SPI_TX_SEL];
+  
+  always @(posedge clk_i or negedge rst_ni) begin
+    if(~rst_ni) begin
+        sd_oe <= 1'b0;
+    end else if (tx_en & !rx_en) begin
+        sd_oe <= 1'b1;
+    end else begin
+        sd_oe <= 1'b0;
+    end 
+  end
+  // Slave select register
+  always @(posedge clk_i)
+  begin
+    if (~rst_ni)
+      ss <=  {`SPI_SS_NB{1'b0}};
+    else if(spi_ss_sel && we_i && !tip)
+      begin
+        if (be_i[0])
+          ss <=  wdata_i[`SPI_SS_NB-1:0];
+      end
+  end
+  
+  assign ss_o = ~((ss & {`SPI_SS_NB{tip & ass}}) | (ss & {`SPI_SS_NB{!ass}}));
+  
+  spi_clgen clgen (
+    .clk_i       (clk_i), 
+    .rst_ni      (rst_ni), 
+    .go          (go), 
+    .enable      (tip), 
+    .last_clk    (last_bit),
+    .divider     (divider), 
+    .clk_out     (sclk_o), 
+    .pos_edge    (pos_edge), 
+    .neg_edge    (neg_edge)
+    );
+  
+  spi_shift shift (
+    .clk_i        (clk_i), 
+    .rst_ni       (rst_ni), 
+    .len          (char_len[`SPI_CHAR_LEN_BITS-1:0]),
+    .latch        (spi_tx_sel & we_i), 
+    .byte_sel     (be_i), 
+    .lsb          (lsb), 
+    .go           (go), 
+    .pos_edge     (pos_edge), 
+    .neg_edge     (neg_edge), 
+    .rx_negedge   (rx_negedge), 
+    .tx_negedge   (tx_negedge),
+    .tip          (tip), 
+    .last         (last_bit), 
+    .p_in         (wdata_i), 
+    .p_out        (rx), 
+    .s_clk        (sclk_o), 
+    .s_in         (sd_i), 
+    .s_out        (sd_o),
+    .rx_en        (rx_en) 
+    );
+endmodule
+  
diff --git a/verilog/rtl/spi_defines.v b/verilog/rtl/spi_defines.v
new file mode 100644
index 0000000..3e43ac9
--- /dev/null
+++ b/verilog/rtl/spi_defines.v
@@ -0,0 +1,117 @@
+
+// Number of bits used for devider register. If used in system with
+// low frequency of system clock this can be reduced.
+// Use SPI_DIVIDER_LEN for fine tuning theexact number.
+//
+//`define SPI_DIVIDER_LEN_8
+`define SPI_DIVIDER_LEN_16
+//`define SPI_DIVIDER_LEN_24
+//`define SPI_DIVIDER_LEN_32
+
+`ifdef SPI_DIVIDER_LEN_8
+  `define SPI_DIVIDER_LEN       8    // Can be set from 1 to 8
+`endif                                                          
+`ifdef SPI_DIVIDER_LEN_16                                       
+  `define SPI_DIVIDER_LEN       16   // Can be set from 9 to 16
+`endif                                                          
+`ifdef SPI_DIVIDER_LEN_24                                       
+  `define SPI_DIVIDER_LEN       24   // Can be set from 17 to 24
+`endif                                                          
+`ifdef SPI_DIVIDER_LEN_32                                       
+  `define SPI_DIVIDER_LEN       32   // Can be set from 25 to 32 
+`endif
+
+//
+// Maximum nuber of bits that can be send/received at once. 
+// Use SPI_MAX_CHAR for fine tuning the exact number, when using
+// SPI_MAX_CHAR_32, SPI_MAX_CHAR_24, SPI_MAX_CHAR_16, SPI_MAX_CHAR_8.
+//
+`define SPI_MAX_CHAR_32
+//`define SPI_MAX_CHAR_64
+//`define SPI_MAX_CHAR_32
+//`define SPI_MAX_CHAR_24
+//`define SPI_MAX_CHAR_16
+//`define SPI_MAX_CHAR_8
+
+`ifdef SPI_MAX_CHAR_128
+  `define SPI_MAX_CHAR          128  // Can only be set to 128 
+  `define SPI_CHAR_LEN_BITS     7
+`endif
+`ifdef SPI_MAX_CHAR_64
+  `define SPI_MAX_CHAR          64   // Can only be set to 64 
+  `define SPI_CHAR_LEN_BITS     6
+`endif
+`ifdef SPI_MAX_CHAR_32
+  `define SPI_MAX_CHAR          32   // Can be set from 25 to 32 
+  `define SPI_CHAR_LEN_BITS     5
+`endif
+`ifdef SPI_MAX_CHAR_24
+  `define SPI_MAX_CHAR          24   // Can be set from 17 to 24 
+  `define SPI_CHAR_LEN_BITS     5
+`endif
+`ifdef SPI_MAX_CHAR_16
+  `define SPI_MAX_CHAR          16   // Can be set from 9 to 16 
+  `define SPI_CHAR_LEN_BITS     4
+`endif
+`ifdef SPI_MAX_CHAR_8
+  `define SPI_MAX_CHAR          8    // Can be set from 1 to 8 
+  `define SPI_CHAR_LEN_BITS     3
+`endif
+
+//
+// Number of device select signals. Use SPI_SS_NB for fine tuning the 
+// exact number.
+//
+`define SPI_SS_NB_4
+//`define SPI_SS_NB_16
+//`define SPI_SS_NB_24
+//`define SPI_SS_NB_32
+`ifdef SPI_SS_NB_4
+  `define SPI_SS_NB             4    // Can be set from 1 to 4
+`endif
+`ifdef SPI_SS_NB_8
+  `define SPI_SS_NB             8    // Can be set from 1 to 8
+`endif
+`ifdef SPI_SS_NB_16
+  `define SPI_SS_NB             16   // Can be set from 9 to 16
+`endif
+`ifdef SPI_SS_NB_24
+  `define SPI_SS_NB             24   // Can be set from 17 to 24
+`endif
+`ifdef SPI_SS_NB_32
+  `define SPI_SS_NB             32   // Can be set from 25 to 32
+`endif
+
+//
+// Bits of WISHBONE address used for partial decoding of SPI registers.
+//
+`define SPI_OFS_BITS	          6:2
+
+//
+// Register offset
+//
+`define SPI_RX_0                8
+`define SPI_TX_0                0
+`define SPI_CTRL                4
+`define SPI_DEVIDE              5
+`define SPI_SS                  6
+
+//
+// Number of bits in ctrl register
+//
+`define SPI_CTRL_BIT_NB         16
+
+//
+// Control register bit position
+//
+`define SPI_RX_SEL              15
+`define SPI_TX_SEL              14
+`define SPI_CTRL_ASS            13
+`define SPI_CTRL_IE             12
+`define SPI_CTRL_LSB            11
+`define SPI_CTRL_TX_NEGEDGE     10
+`define SPI_CTRL_RX_NEGEDGE     9
+`define SPI_CTRL_GO             8
+`define SPI_CTRL_RES_1          7
+`define SPI_CTRL_CHAR_LEN       6:0
+
diff --git a/verilog/rtl/spi_shift.v b/verilog/rtl/spi_shift.v
new file mode 100644
index 0000000..9bc6067
--- /dev/null
+++ b/verilog/rtl/spi_shift.v
@@ -0,0 +1,103 @@
+
+
+module spi_shift (
+  input                          clk_i,          // system clock
+  input                          rst_ni,          // reset
+  input                          latch,        // latch signal for storing the data in shift register
+  input                    [3:0] byte_sel,     // byte select signals for storing the data in shift register
+  input [`SPI_CHAR_LEN_BITS-1:0] len,          // data len in bits (minus one)
+  input                          lsb,          // lbs first_ni on the line
+  input                          go,           // start stansfer
+  input                          pos_edge,     // recognize posedge of sclk_i
+  input                          neg_edge,     // recognize negedge of sclk_i
+  input                          rx_negedge,   // s_in is sampled on negative edge 
+  input                          tx_negedge,   // s_out is driven on negative edge
+  output   reg                   tip,          // transfer in progress
+  output                         last,         // last bit
+  input                   [31:0] p_in,         // parallel in
+  output     [`SPI_MAX_CHAR-1:0] p_out,        // parallel out
+  input                          s_clk,        // serial clock
+  input                          s_in,         // serial in
+  output   reg                   s_out,        // serial out
+  input                          rx_en         // serial rx enable  
+);
+                                   
+ // reg                            s_out;        
+ // reg                            tip;
+                              
+  reg     [`SPI_CHAR_LEN_BITS:0] cnt;          // data bit count
+  reg        [`SPI_MAX_CHAR-1:0] data; 
+  reg        [`SPI_MAX_CHAR-1:0] data_rx;        // shift register
+  wire    [`SPI_CHAR_LEN_BITS:0] tx_bit_pos;   // next bit position
+  wire    [`SPI_CHAR_LEN_BITS:0] rx_bit_pos;   // next bit position
+  wire                           rx_clk_i;       // rx clock enable
+  wire                           tx_clk_i;       // tx clock enable
+  
+  assign p_out = data_rx;
+  
+  assign tx_bit_pos = lsb ? {!(|len), len} - cnt : cnt - {{`SPI_CHAR_LEN_BITS{1'b0}},1'b1};
+  assign rx_bit_pos = lsb ? {!(|len), len} - (rx_negedge ? cnt + {{`SPI_CHAR_LEN_BITS{1'b0}},1'b1} : cnt) : 
+                            (rx_negedge ? cnt : cnt - {{`SPI_CHAR_LEN_BITS{1'b0}},1'b1});
+  
+  assign last = !(|cnt);
+  
+  assign rx_clk_i = (rx_negedge ? neg_edge : pos_edge) && (!last || s_clk);
+  assign tx_clk_i = (tx_negedge ? neg_edge : pos_edge) && !last;
+  
+  // Character bit counter
+  always @(posedge clk_i or negedge rst_ni)
+  begin
+    if(~rst_ni)
+      cnt <=  {`SPI_CHAR_LEN_BITS+1{1'b0}};
+    else
+      begin
+        if(tip)
+          cnt <=  pos_edge ? (cnt - {{`SPI_CHAR_LEN_BITS{1'b0}}, 1'b1}) : cnt;
+        else
+          cnt <=  !(|len) ? {1'b1, {`SPI_CHAR_LEN_BITS{1'b0}}} : {1'b0, len};
+      end
+  end
+  
+  // Transfer in progress
+  always @(posedge clk_i or negedge rst_ni)
+  begin
+    if(~rst_ni)
+      tip <=  1'b0;
+  else if(go && ~tip)
+    tip <=  1'b1;
+  else if(tip && last && pos_edge)
+    tip <=  1'b0;
+  end
+  
+  // Sending bits to the line
+  always @(posedge clk_i or negedge rst_ni)
+  begin
+    if (~rst_ni)
+      s_out   <=  1'b0;
+    else
+      s_out <=  (tx_clk_i || !tip) ? data[tx_bit_pos[`SPI_CHAR_LEN_BITS-1:0]] : s_out;
+  end
+  
+  // Receiving bits from the line
+  always @(posedge clk_i )
+  begin
+    if (~rst_ni)
+      data   <=  {`SPI_MAX_CHAR{1'b0}};
+    else if (latch && !tip)
+      begin
+        if (byte_sel[0])
+          data[7:0] <=   p_in[7:0];
+        if (byte_sel[1])
+          data[15:8] <=  p_in[15:8];
+        if (byte_sel[2])
+          data[23:16] <= p_in[23:16];
+        if (byte_sel[3])
+          data[`SPI_MAX_CHAR-1:24] <=  p_in[`SPI_MAX_CHAR-1:24];
+      end
+    else if (rx_en && tip) begin
+      data_rx[rx_bit_pos[`SPI_CHAR_LEN_BITS-1:0]] <=  rx_clk_i ? s_in : data_rx[rx_bit_pos[`SPI_CHAR_LEN_BITS-1:0]];
+    end
+  end
+  
+endmodule
+
diff --git a/verilog/rtl/spi_top.sv b/verilog/rtl/spi_top.sv
new file mode 100644
index 0000000..6c65601
--- /dev/null
+++ b/verilog/rtl/spi_top.sv
@@ -0,0 +1,75 @@
+
+module spi_top(
+
+  input clk_i,
+  input rst_ni,
+
+  input  tlul_pkg::tl_h2d_t tl_i,
+  output tlul_pkg::tl_d2h_t tl_o,
+
+  // SPI signals                  
+  output     intr_rx_o,
+  output     intr_tx_o,                   
+  output     [`SPI_SS_NB-1:0] ss_o,        
+  output                      sclk_o,      
+  output                      sd_o,
+  output                      sd_oe,       
+  input                       sd_i      
+
+);
+
+localparam int AW = 8;
+localparam int DW = 32;
+
+logic         re;
+logic         we;
+logic [7:0]   addr;
+logic [31:0]  wdata;
+logic [3:0]   be;
+logic [31:0]  rdata;
+logic         err;
+
+spi_core spi_host(
+  // tlul signals
+  .clk_i,        
+  .rst_ni,        
+  .addr_i      (addr),            
+  .wdata_i     (wdata),              
+  .rdata_o     (rdata),             
+  .be_i        (be),           
+  .we_i        (we),       
+  .re_i        (re),        
+  .error_o     (err),
+
+  .intr_rx_o   (intr_rx_o),
+  .intr_tx_o   (intr_tx_o),         
+                                                     
+  // SPI signals                                     
+  .ss_o        (ss_o),         // slave select
+  .sclk_o      (sclk_o),       // serial clock
+  .sd_o        (sd_o),       // master out slave in
+  .sd_oe       (sd_oe),
+  .sd_i        (sd_i)     // master in slave out
+);
+
+
+tlul_adapter_reg #(
+  .RegAw(AW),
+  .RegDw(DW)
+) u_reg_if (
+  .clk_i,
+  .rst_ni,
+
+  .tl_i    (tl_i),
+  .tl_o    (tl_o),
+
+  .we_o    (we),
+  .re_o    (re),
+  .addr_o  (addr),
+  .wdata_o (wdata),
+  .be_o    (be),
+  .rdata_i (rdata),
+  .error_i (err)
+);
+
+endmodule
diff --git a/verilog/rtl/sram.v b/verilog/rtl/sram.v
new file mode 100644
index 0000000..8fac671
--- /dev/null
+++ b/verilog/rtl/sram.v
@@ -0,0 +1,126 @@
+// OpenRAM SRAM model
+// Words: 1024
+// Word size: 32
+// Write size: 8
+
+module sram #(
+  parameter NUM_WMASKS = 4 ,
+  parameter DATA_WIDTH = 32 ,
+  parameter ADDR_WIDTH = 10 ,
+  parameter RAM_DEPTH = 1 << ADDR_WIDTH,
+  // FIXME: This delay is arbitrary.
+  parameter VERBOSE = 1 , //Set to 0 to only display warnings
+  parameter T_HOLD = 1 ,//Delay to hold dout value after posedge. Value is arbitrary
+  parameter IZERO   = 0 , // binary / Initial RAM with zeros (has priority over INITFILE)
+  parameter IFILE   = "" 
+)
+(
+// Port 0: RW
+    clk0,csb0,web0,wmask0,addr0,din0,dout0,
+// Port 1: R
+    clk1,csb1,addr1,dout1
+  );
+
+  /*parameter NUM_WMASKS = 4 ;
+  parameter DATA_WIDTH = 32 ;
+  parameter ADDR_WIDTH = 10 ;
+  parameter RAM_DEPTH = 1 << ADDR_WIDTH;
+  // FIXME: This delay is arbitrary.
+  parameter DELAY = 3 ;
+  parameter VERBOSE = 1 ; //Set to 0 to only display warnings
+  parameter T_HOLD = 1 ; //Delay to hold dout value after posedge. Value is arbitrary
+  parameter IZERO   = 0 ; // binary / Initial RAM with zeros (has priority over INITFILE)
+  parameter IFILE   = ""; 
+*/
+  input  clk0; // clock
+  input   csb0; // active low chip select
+  input  web0; // active low write control
+  input [NUM_WMASKS-1:0]   wmask0; // write mask
+  input [ADDR_WIDTH-1:0]  addr0;
+  input [DATA_WIDTH-1:0]  din0;
+  output [DATA_WIDTH-1:0] dout0;
+  input  clk1; // clock
+  input   csb1; // active low chip select
+  input [ADDR_WIDTH-1:0]  addr1;
+  output [DATA_WIDTH-1:0] dout1;
+
+  reg  csb0_reg;
+  reg  web0_reg;
+  reg [NUM_WMASKS-1:0]   wmask0_reg;
+  reg [ADDR_WIDTH-1:0]  addr0_reg;
+  reg [DATA_WIDTH-1:0]  din0_reg;
+  reg [DATA_WIDTH-1:0]  dout0;
+
+  // All inputs are registers
+  always @(posedge clk0)
+  begin
+    csb0_reg = csb0;
+    web0_reg = web0;
+    wmask0_reg = wmask0;
+    addr0_reg = addr0;
+    din0_reg = din0;
+    //dout0 = 32'bx;
+    //if ( !csb0_reg && web0_reg && VERBOSE ) 
+      //$display($time," Reading %m addr0=%b dout0=%b",addr0_reg,mem[addr0_reg]);
+    //if ( !csb0_reg && !web0_reg && VERBOSE )
+      //$display($time," Writing %m addr0=%b din0=%b wmask0=%b",addr0_reg,din0_reg,wmask0_reg);
+  end
+
+  reg  csb1_reg;
+  reg [ADDR_WIDTH-1:0]  addr1_reg;
+  reg [DATA_WIDTH-1:0]  dout1;
+
+
+  // All inputs are registers
+  always @(posedge clk1)
+  begin
+    csb1_reg = csb1;
+    addr1_reg = addr1;
+    //if (!csb0 && !web0 && !csb1 && (addr0 == addr1))
+         //$display($time," WARNING: Writing and reading addr0=%b and addr1=%b simultaneously!",addr0,addr1);
+    //#(T_HOLD) dout1 = 32'bx;
+    //if ( !csb1_reg && VERBOSE ) 
+      //$display($time," Reading %m addr1=%b dout1=%b",addr1_reg,mem[addr1_reg]);
+  end
+
+integer i;
+reg [DATA_WIDTH-1:0]    mem [0:RAM_DEPTH-1];
+//initial
+    //if (IZERO)
+      //for (i=0; i<RAM_DEPTH; i=i+1) mem[i] = {DATA_WIDTH{1'b0}};
+    //else
+      //if (IFILE != "") $readmemh({IFILE,".hex"}, mem);
+      
+  // Memory Write Block Port 0
+  // Write Operation : When web0 = 0, csb0 = 0
+  always @ (negedge clk0)
+  begin : MEM_WRITE0
+    if ( !csb0_reg && !web0_reg ) begin
+        if (wmask0_reg[0])
+                mem[addr0_reg][7:0] = din0_reg[7:0];
+        if (wmask0_reg[1])
+                mem[addr0_reg][15:8] = din0_reg[15:8];
+        if (wmask0_reg[2])
+                mem[addr0_reg][23:16] = din0_reg[23:16];
+        if (wmask0_reg[3])
+                mem[addr0_reg][31:24] = din0_reg[31:24];
+    end
+  end
+
+  // Memory Read Block Port 0
+  // Read Operation : When web0 = 1, csb0 = 0
+  always @ (negedge clk0)
+  begin : MEM_READ0
+    if (!csb0_reg && web0_reg)
+       dout0 <=  mem[addr0_reg];
+  end
+
+  // Memory Read Block Port 1
+  // Read Operation : When web1 = 1, csb1 = 0
+  always @ (negedge clk1)
+  begin : MEM_READ1
+    if (!csb1_reg)
+       dout1 <=  mem[addr1_reg];
+  end
+
+endmodule
diff --git a/verilog/rtl/timer_core.sv b/verilog/rtl/timer_core.sv
new file mode 100644
index 0000000..7cd1c62
--- /dev/null
+++ b/verilog/rtl/timer_core.sv
@@ -0,0 +1,49 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+module timer_core #(
+  parameter int N = 1
+) (
+  input clk_i,
+  input rst_ni,
+
+  input        active,
+  input [11:0] prescaler,
+  input [ 7:0] step,
+
+  output logic        tick,
+  output logic [63:0] mtime_d,
+  input        [63:0] mtime,
+  input        [63:0] mtimecmp [N],
+
+  output logic [N-1:0] intr
+);
+
+  logic [11:0] tick_count;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : generate_tick
+    if (!rst_ni) begin
+      tick_count <= 12'h0;
+    end else if (!active) begin
+      tick_count <= 12'h0;
+    end else if (tick_count == prescaler) begin
+      tick_count <= 12'h0;
+    end else begin
+      tick_count <= tick_count + 1'b1;
+    end
+  end
+
+  assign tick = active & (tick_count >= prescaler);
+
+  assign mtime_d = mtime + 64'(step);
+
+  // interrupt is generated if mtime is greater than or equal to mtimecmp
+  // TODO: Check if it must consider overflow case
+  for (genvar t = 0 ; t < N ; t++) begin : gen_intr
+    assign intr[t] = active & (mtime >= mtimecmp[t]);
+  end
+
+endmodule : timer_core
diff --git a/verilog/rtl/tl_main_pkg.sv b/verilog/rtl/tl_main_pkg.sv
new file mode 100644
index 0000000..4421478
--- /dev/null
+++ b/verilog/rtl/tl_main_pkg.sv
@@ -0,0 +1,50 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// tl_main package generated by `tlgen.py` tool
+
+package tl_main_pkg;
+
+  localparam logic [31:0] ADDR_SPACE_ICCM       = 32'h 20000000;
+  localparam logic [31:0] ADDR_SPACE_DEBUG_ROM  = 32'h 10040000;
+  localparam logic [31:0] ADDR_SPACE_DCCM       = 32'h 10000000;
+  localparam logic [31:0] ADDR_SPACE_TIMER0     = 32'h 40000000;
+  localparam logic [31:0] ADDR_SPACE_UART0      = 32'h 40060000;
+  localparam logic [31:0] ADDR_SPACE_SPI0       = 32'h 40080000;
+  localparam logic [31:0] ADDR_SPACE_PWM        = 32'h 400b0000; 
+  localparam logic [31:0] ADDR_SPACE_GPIO       = 32'h 400c0000;
+  localparam logic [31:0] ADDR_SPACE_PLIC       = 32'h 40050000;
+
+  localparam logic [31:0] ADDR_MASK_ICCM       = 32'h 0000ffff;
+  localparam logic [31:0] ADDR_MASK_DEBUG_ROM  = 32'h 0000ffff;
+  localparam logic [31:0] ADDR_MASK_DCCM       = 32'h 0000ffff;
+  localparam logic [31:0] ADDR_MASK_TIMER0     = 32'h 0000ffff;
+  localparam logic [31:0] ADDR_MASK_UART0      = 32'h 0000ffff;
+  localparam logic [31:0] ADDR_MASK_SPI0       = 32'h 0000ffff;
+  localparam logic [31:0] ADDR_MASK_PWM        = 32'h 0000ffff;
+  localparam logic [31:0] ADDR_MASK_GPIO       = 32'h 0000ffff;
+  localparam logic [31:0] ADDR_MASK_PLIC       = 32'h 0000ffff;
+
+  localparam int N_HOST   = 3;
+  localparam int N_DEVICE = 9;
+
+  typedef enum int {
+    TlIccm = 0,
+    TlDebugRom = 1,
+    TlDccm = 2,
+    TlTimer0 = 3,
+    TlUart0 = 4,
+    TlSpi0 = 5,
+    TlPwm = 6,
+    TlGpio = 7,
+    TlPlic = 8
+  } tl_device_e;
+
+  typedef enum int {
+    TlBrqif = 0,
+    TlBrqlsu = 1,
+    TlDmSba = 2
+  } tl_host_e;
+
+endpackage
diff --git a/verilog/rtl/tl_xbar_main.sv b/verilog/rtl/tl_xbar_main.sv
new file mode 100644
index 0000000..65f3a0b
--- /dev/null
+++ b/verilog/rtl/tl_xbar_main.sv
@@ -0,0 +1,424 @@
+
+// main XBAR 
+
+module tl_xbar_main (
+
+  input clk_i,
+  input rst_ni,
+
+
+  // Host interfaces
+  input  tlul_pkg::tl_h2d_t tl_brqif_i,
+  output tlul_pkg::tl_d2h_t tl_brqif_o,
+  input  tlul_pkg::tl_h2d_t tl_brqlsu_i,
+  output tlul_pkg::tl_d2h_t tl_brqlsu_o,
+  input  tlul_pkg::tl_h2d_t tl_dm_sba_i,
+  output tlul_pkg::tl_d2h_t tl_dm_sba_o,
+
+  // Device interfaces
+  output tlul_pkg::tl_h2d_t tl_iccm_o,
+  input  tlul_pkg::tl_d2h_t tl_iccm_i,
+  output tlul_pkg::tl_h2d_t tl_debug_rom_o,
+  input  tlul_pkg::tl_d2h_t tl_debug_rom_i,
+  output tlul_pkg::tl_h2d_t tl_dccm_o,
+  input  tlul_pkg::tl_d2h_t tl_dccm_i,
+  output tlul_pkg::tl_h2d_t tl_timer0_o,
+  input  tlul_pkg::tl_d2h_t tl_timer0_i,
+  output tlul_pkg::tl_h2d_t tl_uart_o,
+  input  tlul_pkg::tl_d2h_t tl_uart_i,
+  output tlul_pkg::tl_h2d_t tl_spi_o,
+  input  tlul_pkg::tl_d2h_t tl_spi_i,
+  output tlul_pkg::tl_h2d_t tl_pwm_o,
+  input  tlul_pkg::tl_d2h_t tl_pwm_i,
+  output tlul_pkg::tl_h2d_t tl_gpio_o,
+  input  tlul_pkg::tl_d2h_t tl_gpio_i,
+  output tlul_pkg::tl_h2d_t tl_plic_o,
+  input  tlul_pkg::tl_d2h_t tl_plic_i
+
+
+);
+
+  import tlul_pkg::*;
+  import tl_main_pkg::*;
+
+  // scanmode_i is currently not used, but provisioned for future use
+  // this assignment prevents lint warnings
+
+
+// host 1 IFU
+  tlul_pkg::tl_h2d_t brqifu_to_s1n; 
+  tlul_pkg::tl_d2h_t s1n_to_brqifu;
+  logic [1:0] device_sel_1;
+
+// host 2 LSU
+  tlul_pkg::tl_h2d_t brqlsu_to_s1n;
+  tlul_pkg::tl_d2h_t s1n_to_brqlsu;
+  logic [3:0] device_sel_2;
+
+// host 3 debug system bus access
+  tlul_pkg::tl_h2d_t dbg_to_s1n;
+  tlul_pkg::tl_d2h_t s1n_to_dbg;
+  logic [3:0] device_sel_3;
+
+// Dveice connections
+
+  tlul_pkg::tl_h2d_t  h1_dv_i[2];
+  tlul_pkg::tl_d2h_t  h1_dv_o[2];
+  tlul_pkg::tl_h2d_t  h2_dv_i[9];
+  tlul_pkg::tl_d2h_t  h2_dv_o[9];
+  tlul_pkg::tl_h2d_t  h3_dv_i[8];
+  tlul_pkg::tl_d2h_t  h3_dv_o[8];
+
+// ICCM
+  tlul_pkg::tl_h2d_t s1n_sm1_1[3];
+  tlul_pkg::tl_d2h_t sm1_s1n_1[3];
+
+// DCCM
+  tlul_pkg::tl_h2d_t s1n_sm1_2[2];
+  tlul_pkg::tl_d2h_t sm1_s1n_2[2];
+
+// DEBUG ROM
+  tlul_pkg::tl_h2d_t s1n_sm1_4[2];
+  tlul_pkg::tl_d2h_t sm1_s1n_4[2];
+
+// TIMER 
+  tlul_pkg::tl_h2d_t s1n_sm1_5[2];
+  tlul_pkg::tl_d2h_t sm1_s1n_5[2];
+
+// UART
+  tlul_pkg::tl_h2d_t s1n_sm1_6[2];
+  tlul_pkg::tl_d2h_t sm1_s1n_6[2];
+
+// SPI
+  tlul_pkg::tl_h2d_t s1n_sm1_7[2];
+  tlul_pkg::tl_d2h_t sm1_s1n_7[2];
+
+// PWM
+  tlul_pkg::tl_h2d_t s1n_sm1_8[2];
+  tlul_pkg::tl_d2h_t sm1_s1n_8[2];
+
+// GPIO
+  tlul_pkg::tl_h2d_t s1n_sm1_9[2];
+  tlul_pkg::tl_d2h_t sm1_s1n_9[2];
+
+// PLIC
+  tlul_pkg::tl_h2d_t s1n_sm1_10[2];
+  tlul_pkg::tl_d2h_t sm1_s1n_10[2];
+
+// Device 1 host connections (ICCM)
+  assign h1_dv_o[0]   = sm1_s1n_1[0];
+  assign h3_dv_o[1]   = sm1_s1n_1[1];
+  assign h2_dv_o[8]  = sm1_s1n_1[2];
+  assign s1n_sm1_1[0] = h1_dv_i[0];
+  assign s1n_sm1_1[1] = h3_dv_i[1];
+  assign s1n_sm1_1[2] = h2_dv_i[8];
+
+// Device 2 host connections (DCCM)
+  assign h2_dv_o[0] = sm1_s1n_2[0];
+  assign h3_dv_o[0] = sm1_s1n_2[1];
+  assign s1n_sm1_2[0] = h2_dv_i[0];
+  assign s1n_sm1_2[1] = h3_dv_i[0];
+
+// Device 3 host connections (DEBUG ROM)
+  assign h1_dv_o[1] = sm1_s1n_4[0];
+  assign h2_dv_o[1] = sm1_s1n_4[1];
+  assign s1n_sm1_4[0]    = h1_dv_i[1];
+  assign s1n_sm1_4[1]    = h2_dv_i[1];
+
+// Device 4 host connections (TIMER0) 
+  assign h2_dv_o[2] = sm1_s1n_5[0];
+  assign h3_dv_o[2] = sm1_s1n_5[1];
+  assign s1n_sm1_5[0]   = h2_dv_i[2];
+  assign s1n_sm1_5[1]   = h3_dv_i[2];
+
+// Device 5 host connections (UART)
+  assign h2_dv_o[3] = sm1_s1n_6[0];
+  assign h3_dv_o[3] = sm1_s1n_6[1];
+  assign s1n_sm1_6[0]   = h2_dv_i[3];
+  assign s1n_sm1_6[1]   = h3_dv_i[3];
+
+// Device 6 host connections (SPI)
+  assign h2_dv_o[4] = sm1_s1n_7[0];
+  assign h3_dv_o[4] = sm1_s1n_7[1];
+  assign s1n_sm1_7[0]   = h2_dv_i[4];
+  assign s1n_sm1_7[1]   = h3_dv_i[4];
+
+// Device 7 host connections (PWM)
+  assign h2_dv_o[5] = sm1_s1n_8[0];
+  assign h3_dv_o[5] = sm1_s1n_8[1];
+  assign s1n_sm1_8[0]   = h2_dv_i[5];
+  assign s1n_sm1_8[1]   = h3_dv_i[5];
+
+// Device 8 host connections (GPIO)
+  assign h2_dv_o[6] = sm1_s1n_9[0];
+  assign h3_dv_o[6] = sm1_s1n_9[1];
+  assign s1n_sm1_9[0]   = h2_dv_i[6];
+  assign s1n_sm1_9[1]   = h3_dv_i[6];
+
+// Device 9 host connections (PLIC)
+  assign h2_dv_o[7] = sm1_s1n_10[0];
+  assign h3_dv_o[7] = sm1_s1n_10[1];
+  assign s1n_sm1_10[0]   = h2_dv_i[7];
+  assign s1n_sm1_10[1]   = h3_dv_i[7];
+
+
+// hostv 1 connections
+  assign brqifu_to_s1n  = tl_brqif_i;
+  assign tl_brqif_o     = s1n_to_brqifu;
+// hostv 2 connections
+  assign brqlsu_to_s1n  = tl_brqlsu_i;
+  assign tl_brqlsu_o    = s1n_to_brqlsu;
+// host 3 connections
+  assign dbg_to_s1n     = tl_dm_sba_i;
+  assign tl_dm_sba_o    = s1n_to_dbg;
+
+// host 1 device selection
+  always_comb begin 
+      device_sel_1 = 2'd2;
+    if((brqifu_to_s1n.a_address & ~(ADDR_MASK_ICCM)) == ADDR_SPACE_ICCM) begin
+      device_sel_1 = 2'd0;
+    end else if ((brqifu_to_s1n.a_address & ~(ADDR_MASK_DEBUG_ROM)) == ADDR_SPACE_DEBUG_ROM) begin
+      device_sel_1 = 2'd1;
+    end
+  end
+
+// host 1 socket 
+  tlul_socket_1n #(
+    .HReqDepth (4'h0),
+    .HRspDepth (4'h0),
+    .DReqDepth (12'h0),
+    .DRspDepth (12'h0),
+    .N         (2)
+  ) host_1 (
+    .clk_i        (clk_i),
+    .rst_ni       (rst_ni),
+    .tl_h_i       (brqifu_to_s1n),
+    .tl_h_o       (s1n_to_brqifu),
+    .tl_d_o       (h1_dv_i),
+    .tl_d_i       (h1_dv_o),
+    .dev_select_i (device_sel_1)
+  );
+
+// host 2 socket
+  always_comb begin 
+    
+     device_sel_2 = 4'd9;
+
+    if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_DCCM)) == ADDR_SPACE_DCCM) begin
+     device_sel_2 = 4'd0; 
+    end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_DEBUG_ROM)) == ADDR_SPACE_DEBUG_ROM) begin
+      device_sel_2 = 4'd1;
+    end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_TIMER0))    == ADDR_SPACE_TIMER0) begin
+      device_sel_2 = 4'd2;
+    end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_UART0))     == ADDR_SPACE_UART0) begin
+      device_sel_2 = 4'd3;
+    end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_SPI0))      == ADDR_SPACE_SPI0) begin
+      device_sel_2 = 4'd4;
+    end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_PWM))       == ADDR_SPACE_PWM) begin
+      device_sel_2 = 4'd5;
+    end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_GPIO))      == ADDR_SPACE_GPIO) begin
+      device_sel_2 = 4'd6;
+    end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_PLIC))      == ADDR_SPACE_PLIC) begin
+      device_sel_2 = 4'd7;
+    end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_ICCM))      == ADDR_SPACE_ICCM) begin
+      device_sel_2 = 4'd8;
+    end
+  end
+
+// host 2 socket
+
+  tlul_socket_1n #(
+    .HReqDepth (4'h0),
+    .HRspDepth (4'h0),
+    .DReqDepth (36'h0),
+    .DRspDepth (36'h0),
+    .N         (9)
+  ) host_2 (
+    .clk_i        (clk_i),
+    .rst_ni       (rst_ni),
+    .tl_h_i       (brqlsu_to_s1n),
+    .tl_h_o       (s1n_to_brqlsu),
+    .tl_d_o       (h2_dv_i),
+    .tl_d_i       (h2_dv_o),
+    .dev_select_i (device_sel_2)
+  );
+
+// host 3 device selection
+
+  always_comb begin 
+    
+     device_sel_3 = 4'd8;
+
+    if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_DCCM)) == ADDR_SPACE_DCCM) begin
+     device_sel_3 = 4'd0; 
+    end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_ICCM))   == ADDR_SPACE_ICCM) begin
+      device_sel_3 = 4'd1;
+    end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_TIMER0)) == ADDR_SPACE_TIMER0) begin
+      device_sel_3 = 4'd2;
+    end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_UART0))  == ADDR_SPACE_UART0) begin
+      device_sel_3 = 4'd3;
+    end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_SPI0))   == ADDR_SPACE_SPI0) begin
+      device_sel_3 = 4'd4;
+    end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_PWM))    == ADDR_SPACE_PWM) begin
+      device_sel_3 = 4'd5;
+    end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_GPIO))   == ADDR_SPACE_GPIO) begin
+      device_sel_3 = 4'd6;
+    end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_PLIC))   == ADDR_SPACE_PLIC) begin
+      device_sel_3 = 4'd7;
+    end
+  end
+
+  tlul_socket_1n #(
+    .HReqDepth (4'h0),
+    .HRspDepth (4'h0),
+    .DReqDepth (36'h0),
+    .DRspDepth (36'h0),
+    .N         (8)
+  ) host_3 (
+    .clk_i        (clk_i),
+    .rst_ni       (rst_ni),
+    .tl_h_i       (dbg_to_s1n),
+    .tl_h_o       (s1n_to_dbg),
+    .tl_d_o       (h3_dv_i),
+    .tl_d_i       (h3_dv_o),
+    .dev_select_i (device_sel_3)
+  );
+
+
+// Devices
+  tlul_socket_m1 #(
+    .HReqDepth (8'h0),
+    .HRspDepth (8'h0),
+    .DReqDepth (4'h0),
+    .DRspDepth (4'h0),
+    .M         (3)
+  ) ICCM (
+    .clk_i        (clk_i),
+    .rst_ni       (rst_ni),
+    .tl_h_i       (s1n_sm1_1),
+    .tl_h_o       (sm1_s1n_1),
+    .tl_d_o       (tl_iccm_o),
+    .tl_d_i       (tl_iccm_i)
+  );
+
+  tlul_socket_m1 #(
+    .HReqDepth (8'h0),
+    .HRspDepth (8'h0),
+    .DReqDepth (4'h0),
+    .DRspDepth (4'h0),
+    .M         (2)
+  ) DCCM (
+    .clk_i        (clk_i),
+    .rst_ni       (rst_ni),
+    .tl_h_i       (s1n_sm1_2),
+    .tl_h_o       (sm1_s1n_2),
+    .tl_d_o       (tl_dccm_o),
+    .tl_d_i       (tl_dccm_i)
+  );
+
+  tlul_socket_m1 #(
+    .HReqDepth (8'h0),
+    .HRspDepth (8'h0),
+    .DReqDepth (4'h0),
+    .DRspDepth (4'h0),
+    .M         (2)
+  ) DEBUG_ROM (
+    .clk_i        (clk_i),
+    .rst_ni       (rst_ni),
+    .tl_h_i       (s1n_sm1_4),
+    .tl_h_o       (sm1_s1n_4),
+    .tl_d_o       (tl_debug_rom_o),
+    .tl_d_i       (tl_debug_rom_i)
+  );
+
+  tlul_socket_m1 #(
+    .HReqDepth (8'h0),
+    .HRspDepth (8'h0),
+    .DReqDepth (4'h0),
+    .DRspDepth (4'h0),
+    .M         (2)
+  ) TIMER (
+    .clk_i        (clk_i),
+    .rst_ni       (rst_ni),
+    .tl_h_i       (s1n_sm1_5),
+    .tl_h_o       (sm1_s1n_5),
+    .tl_d_o       (tl_timer0_o),
+    .tl_d_i       (tl_timer0_i)
+  );
+
+  tlul_socket_m1 #(
+    .HReqDepth (8'h0),
+    .HRspDepth (8'h0),
+    .DReqDepth (4'h0),
+    .DRspDepth (4'h0),
+    .M         (2)
+  ) UART (
+    .clk_i        (clk_i),
+    .rst_ni       (rst_ni),
+    .tl_h_i       (s1n_sm1_6),
+    .tl_h_o       (sm1_s1n_6),
+    .tl_d_o       (tl_uart_o),
+    .tl_d_i       (tl_uart_i)
+  );
+
+  tlul_socket_m1 #(
+    .HReqDepth (8'h0),
+    .HRspDepth (8'h0),
+    .DReqDepth (4'h0),
+    .DRspDepth (4'h0),
+    .M         (2)
+  ) SPI (
+    .clk_i        (clk_i),
+    .rst_ni       (rst_ni),
+    .tl_h_i       (s1n_sm1_7),
+    .tl_h_o       (sm1_s1n_7),
+    .tl_d_o       (tl_spi_o),
+    .tl_d_i       (tl_spi_i)
+  );
+
+  tlul_socket_m1 #(
+    .HReqDepth (8'h0),
+    .HRspDepth (8'h0),
+    .DReqDepth (4'h0),
+    .DRspDepth (4'h0),
+    .M         (2)
+  ) PWM (
+    .clk_i        (clk_i),
+    .rst_ni       (rst_ni),
+    .tl_h_i       (s1n_sm1_8),
+    .tl_h_o       (sm1_s1n_8),
+    .tl_d_o       (tl_pwm_o),
+    .tl_d_i       (tl_pwm_i)
+  );
+
+  tlul_socket_m1 #(
+    .HReqDepth (8'h0),
+    .HRspDepth (8'h0),
+    .DReqDepth (4'h0),
+    .DRspDepth (4'h0),
+    .M         (2)
+  ) GPIO (
+    .clk_i        (clk_i),
+    .rst_ni       (rst_ni),
+    .tl_h_i       (s1n_sm1_9),
+    .tl_h_o       (sm1_s1n_9),
+    .tl_d_o       (tl_gpio_o),
+    .tl_d_i       (tl_gpio_i)
+  );
+
+  tlul_socket_m1 #(
+    .HReqDepth (8'h0),
+    .HRspDepth (8'h0),
+    .DReqDepth (4'h0),
+    .DRspDepth (4'h0),
+    .M         (2)
+  ) PLIC (
+    .clk_i        (clk_i),
+    .rst_ni       (rst_ni),
+    .tl_h_i       (s1n_sm1_10),
+    .tl_h_o       (sm1_s1n_10),
+    .tl_d_o       (tl_plic_o),
+    .tl_d_i       (tl_plic_i)
+  );
+
+endmodule
\ No newline at end of file
diff --git a/verilog/rtl/tlul_adapter_reg.sv b/verilog/rtl/tlul_adapter_reg.sv
new file mode 100644
index 0000000..c801bc5
--- /dev/null
+++ b/verilog/rtl/tlul_adapter_reg.sv
@@ -0,0 +1,125 @@
+module tlul_adapter_reg import tlul_pkg::*; #(
+  parameter  int RegAw = 8,
+  parameter  int RegDw = 32, // Shall be matched with TL_DW
+  localparam int RegBw = RegDw/8
+) (
+  input clk_i,
+  input rst_ni,
+
+  // TL-UL interface
+  input  tl_h2d_t tl_i,
+  output tl_d2h_t tl_o,
+
+  // Register interface
+  output logic             re_o,
+  output logic             we_o,
+  output logic [RegAw-1:0] addr_o,
+  output logic [RegDw-1:0] wdata_o,
+  output logic [RegBw-1:0] be_o,
+  input        [RegDw-1:0] rdata_i,
+  input                    error_i
+);
+
+  localparam int IW  = $bits(tl_i.a_source);
+  localparam int SZW = $bits(tl_i.a_size);
+
+  logic outstanding;    // Indicates current request is pending
+  logic a_ack, d_ack;
+
+  logic [RegDw-1:0] rdata;
+  logic             error, err_internal;
+
+  logic addr_align_err;     // Size and alignment
+//  logic malformed_meta_err; // User signal format error or unsupported
+  logic tl_err;             // Common TL-UL error checker
+
+  logic [IW-1:0]  reqid;
+  logic [SZW-1:0] reqsz;
+  tlul_pkg::tl_d_m_op       rspop;
+
+  logic rd_req, wr_req;
+
+  assign a_ack   = tl_i.a_valid & tl_o.a_ready;
+  assign d_ack   = tl_o.d_valid & tl_i.d_ready;
+  // Request signal
+  assign wr_req  = a_ack & ((tl_i.a_opcode == PutFullData) | (tl_i.a_opcode == PutPartialData));
+  assign rd_req  = a_ack & (tl_i.a_opcode == Get);
+
+  assign we_o    = wr_req & ~err_internal;
+  assign re_o    = rd_req & ~err_internal;
+  assign addr_o  = {tl_i.a_address[RegAw-1:2], 2'b00}; // generate always word-align
+  assign wdata_o = tl_i.a_data;
+  assign be_o    = tl_i.a_mask;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni)    outstanding <= 1'b0;
+    else if (a_ack) outstanding <= 1'b1;
+    else if (d_ack) outstanding <= 1'b0;
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      reqid <= '0;
+      reqsz <= '0;
+      rspop <= AccessAck;
+    end else if (a_ack) begin
+      reqid <= tl_i.a_source;
+      reqsz <= tl_i.a_size;
+      // Return AccessAckData regardless of error
+      rspop <= (rd_req) ? AccessAckData : AccessAck ;
+    end
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      rdata  <= '0;
+      error <= 1'b0;
+    end else if (a_ack) begin
+      rdata <= (err_internal) ? '1 : rdata_i;
+      error <= error_i | err_internal;
+    end
+  end
+
+  assign tl_o = '{
+    a_ready:  ~outstanding,
+    d_valid:  outstanding,
+    d_opcode: rspop,
+    d_param:  '0,
+    d_size:   reqsz,
+    d_source: reqid,
+    d_sink:   '0,
+    d_data:   rdata,
+    d_error: error
+  };
+
+  ////////////////////
+  // Error Handling //
+  ////////////////////
+  assign err_internal = addr_align_err | tl_err ;
+
+  // malformed_meta_err
+  //    Raised if not supported feature is turned on or user signal has malformed
+ // assign malformed_meta_err = (tl_i.a_user.parity_en == 1'b1);
+
+  // addr_align_err
+  //    Raised if addr isn't aligned with the size
+  //    Read size error is checked in tlul_assert.sv
+  //    Here is it added due to the limitation of register interface.
+  always_comb begin
+    if (wr_req) begin
+      // Only word-align is accepted based on comportability spec
+      addr_align_err = |tl_i.a_address[1:0];
+    end else begin
+      // No request
+      addr_align_err = 1'b0;
+    end
+  end
+
+  // tl_err : separate checker
+  tlul_err u_err (
+    .tl_i (tl_i),
+    .err_o (tl_err)
+  );
+
+
+endmodule
\ No newline at end of file
diff --git a/verilog/rtl/tlul_err.sv b/verilog/rtl/tlul_err.sv
new file mode 100644
index 0000000..32d7b67
--- /dev/null
+++ b/verilog/rtl/tlul_err.sv
@@ -0,0 +1,86 @@
+
+
+module tlul_err import tlul_pkg::*; (
+
+  input tl_h2d_t tl_i,
+
+  output logic err_o
+);
+
+  localparam int IW  = $bits(tl_i.a_source);
+  localparam int SZW = $bits(tl_i.a_size);
+  localparam int DW  = $bits(tl_i.a_data);
+  localparam int MW  = $bits(tl_i.a_mask);
+  localparam int SubAW = $clog2(DW/8);
+
+  logic opcode_allowed, a_config_allowed;
+
+  logic op_full, op_partial, op_get;
+  assign op_full    = (tl_i.a_opcode == PutFullData);
+  assign op_partial = (tl_i.a_opcode == PutPartialData);
+  assign op_get     = (tl_i.a_opcode == Get);
+
+  // Anything that doesn't fall into the permitted category, it raises an error
+  assign err_o = ~(opcode_allowed & a_config_allowed);
+
+  // opcode check
+  assign opcode_allowed = (tl_i.a_opcode == PutFullData)
+                        | (tl_i.a_opcode == PutPartialData)
+                        | (tl_i.a_opcode == Get);
+
+  // a channel configuration check
+  logic addr_sz_chk;    // address and size alignment check
+  logic mask_chk;       // inactive lane a_mask check
+  logic fulldata_chk;   // PutFullData should have size match to mask
+
+  logic [MW-1:0] mask;
+
+  assign mask = (1 << tl_i.a_address[SubAW-1:0]);
+
+  always_comb begin
+    addr_sz_chk  = 1'b0;
+    mask_chk     = 1'b0;
+    fulldata_chk = 1'b0; // Only valid when opcode is PutFullData
+
+    if (tl_i.a_valid) begin
+      unique case (tl_i.a_size)
+        'h0: begin // 1 Byte
+          addr_sz_chk  = 1'b1;
+          mask_chk     = ~|(tl_i.a_mask & ~mask);
+          fulldata_chk = |(tl_i.a_mask & mask);
+        end
+
+        'h1: begin // 2 Byte
+          addr_sz_chk  = ~tl_i.a_address[0];
+          // check inactive lanes if lower 2B, check a_mask[3:2], if uppwer 2B, a_mask[1:0]
+          mask_chk     = (tl_i.a_address[1]) ? ~|(tl_i.a_mask & 4'b0011)
+                       : ~|(tl_i.a_mask & 4'b1100);
+          fulldata_chk = (tl_i.a_address[1]) ? &tl_i.a_mask[3:2] : &tl_i.a_mask[1:0] ;
+        end
+
+        'h2: begin // 4 Byte
+          addr_sz_chk  = ~|tl_i.a_address[SubAW-1:0];
+          mask_chk     = 1'b1;
+          fulldata_chk = &tl_i.a_mask[3:0];
+        end
+
+        default: begin // else
+          addr_sz_chk  = 1'b0;
+          mask_chk     = 1'b0;
+          fulldata_chk = 1'b0;
+        end
+      endcase
+    end else begin
+      addr_sz_chk  = 1'b0;
+      mask_chk     = 1'b0;
+      fulldata_chk = 1'b0;
+    end
+  end
+
+  assign a_config_allowed = addr_sz_chk
+                          & mask_chk
+                          & (op_get | op_partial | fulldata_chk) ;
+
+
+endmodule
+
diff --git a/verilog/rtl/tlul_err_resp.sv b/verilog/rtl/tlul_err_resp.sv
new file mode 100644
index 0000000..bd5a73c
--- /dev/null
+++ b/verilog/rtl/tlul_err_resp.sv
@@ -0,0 +1,56 @@
+
+// TL-UL error responder module, used by tlul_socket_1n to help response
+// to requests to no correct address space. Responses are always one cycle
+// after request with no stalling unless response is stuck on the way out.
+//`include "/home/sajjad/Shaheen-sv/src/buraq_core_top/ibex_core/tlul_pkg.sv"
+module tlul_err_resp (
+  input                     clk_i,
+  input                     rst_ni,
+  input  tlul_pkg::tl_h2d_t tl_h_i,
+  output tlul_pkg::tl_d2h_t tl_h_o
+);
+  import tlul_pkg::*;
+
+  tlul_pkg::tl_a_m_op        err_opcode;
+//  tlul_pkg::tl_a_m_op    get;
+  logic [$bits(tl_h_i.a_source)-1:0] err_source;
+  logic [$bits(tl_h_i.a_size)-1:0]   err_size;
+  logic                              err_req_pending, err_rsp_pending;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      err_req_pending <= 1'b0;
+      err_source      <= {tlul_pkg::TL_AIW{1'b0}};
+      err_opcode      <= tlul_pkg::Get;
+      err_size        <= '0;
+    end else if (tl_h_i.a_valid && tl_h_o.a_ready) begin
+      err_req_pending <= 1'b1;
+      err_source      <= tl_h_i.a_source;
+      err_opcode      <= tl_h_i.a_opcode;
+      err_size        <= tl_h_i.a_size;
+    end else if (!err_rsp_pending) begin
+      err_req_pending <= 1'b0;
+    end
+  end
+
+  assign tl_h_o.a_ready  = ~err_rsp_pending & ~(err_req_pending & ~tl_h_i.d_ready);
+  assign tl_h_o.d_valid  = err_req_pending | err_rsp_pending;
+  assign tl_h_o.d_data   = '1; // Return all F
+  assign tl_h_o.d_source = err_source;
+  assign tl_h_o.d_sink   = '0;
+  assign tl_h_o.d_param  = '0;
+  assign tl_h_o.d_size   = err_size;
+  assign tl_h_o.d_opcode = (err_opcode == tlul_pkg::Get) ? AccessAckData : AccessAck;
+  assign tl_h_o.d_error  = 1'b1;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      err_rsp_pending <= 1'b0;
+    end else if ((err_req_pending || err_rsp_pending) && !tl_h_i.d_ready) begin
+      err_rsp_pending <= 1'b1;
+    end else begin
+      err_rsp_pending <= 1'b0;
+    end
+  end
+
+endmodule
diff --git a/verilog/rtl/tlul_fifo_sync.sv b/verilog/rtl/tlul_fifo_sync.sv
new file mode 100644
index 0000000..917e059
--- /dev/null
+++ b/verilog/rtl/tlul_fifo_sync.sv
@@ -0,0 +1,86 @@
+
+// TL-UL fifo, used to add elasticity or an asynchronous clock crossing
+// to an TL-UL bus.  This instantiates two FIFOs, one for the request side,
+// and one for the response side.
+
+module tlul_fifo_sync #(
+  parameter bit          ReqPass = 1'b1,
+  parameter bit          RspPass = 1'b1,
+  parameter int unsigned ReqDepth = 0,
+  parameter int unsigned RspDepth = 0,
+  parameter int unsigned SpareReqW = 1,
+  parameter int unsigned SpareRspW = 1
+) (
+  input                     clk_i,
+  input                     rst_ni,
+  input  tlul_pkg::tl_h2d_t tl_h_i,
+  output tlul_pkg::tl_d2h_t tl_h_o,
+  output tlul_pkg::tl_h2d_t tl_d_o,
+  input  tlul_pkg::tl_d2h_t tl_d_i,
+  input  [SpareReqW-1:0]    spare_req_i,
+  output [SpareReqW-1:0]    spare_req_o,
+  input  [SpareRspW-1:0]    spare_rsp_i,
+  output [SpareRspW-1:0]    spare_rsp_o
+);
+
+  // Put everything on the request side into one FIFO
+  localparam int unsigned REQFIFO_WIDTH = $bits(tlul_pkg::tl_h2d_t) -2 + SpareReqW;
+
+  fifo_sync #(.Width(REQFIFO_WIDTH), .Pass(ReqPass), .Depth(ReqDepth)) reqfifo (
+    .clk_i (clk_i),
+    .rst_ni (rst_ni),
+    .clr_i         (1'b0          ),
+    .wvalid_i      (tl_h_i.a_valid),
+    .wready_o      (tl_h_o.a_ready),
+    .wdata_i       ({tl_h_i.a_opcode ,
+                     tl_h_i.a_param  ,
+                     tl_h_i.a_size   ,
+                     tl_h_i.a_source ,
+                     tl_h_i.a_address,
+                     tl_h_i.a_mask   ,
+                     tl_h_i.a_data   ,
+                     spare_req_i}),
+    .depth_o       (),
+    .rvalid_o      (tl_d_o.a_valid),
+    .rready_i      (tl_d_i.a_ready),
+    .rdata_o       ({tl_d_o.a_opcode ,
+                     tl_d_o.a_param  ,
+                     tl_d_o.a_size   ,
+                     tl_d_o.a_source ,
+                     tl_d_o.a_address,
+                     tl_d_o.a_mask   ,
+                     tl_d_o.a_data   ,
+                     spare_req_o}));
+
+  // Put everything on the response side into the other FIFO
+
+  localparam int unsigned RSPFIFO_WIDTH = $bits(tlul_pkg::tl_d2h_t) -2 + SpareRspW;
+
+  fifo_sync #(.Width(RSPFIFO_WIDTH), .Pass(RspPass), .Depth(RspDepth)) rspfifo (
+    .clk_i (clk_i),
+    .rst_ni (rst_ni),
+    .clr_i         (1'b0          ),
+    .wvalid_i      (tl_d_i.d_valid),
+    .wready_o      (tl_d_o.d_ready),
+    .wdata_i       ({tl_d_i.d_opcode,
+                     tl_d_i.d_param ,
+                     tl_d_i.d_size  ,
+                     tl_d_i.d_source,
+                     tl_d_i.d_sink  ,
+                     (tl_d_i.d_opcode == tlul_pkg::AccessAckData) ? tl_d_i.d_data :
+                                                                    {tlul_pkg::TL_DW{1'b0}} ,
+                     tl_d_i.d_error ,
+                     spare_rsp_i}),
+    .depth_o       (),
+    .rvalid_o      (tl_h_o.d_valid),
+    .rready_i      (tl_h_i.d_ready),
+    .rdata_o       ({tl_h_o.d_opcode,
+                     tl_h_o.d_param ,
+                     tl_h_o.d_size  ,
+                     tl_h_o.d_source,
+                     tl_h_o.d_sink  ,
+                     tl_h_o.d_data  ,
+                     tl_h_o.d_error ,
+                     spare_rsp_o}));
+
+endmodule
diff --git a/verilog/rtl/tlul_host_adapter.sv b/verilog/rtl/tlul_host_adapter.sv
new file mode 100644
index 0000000..8d18df5
--- /dev/null
+++ b/verilog/rtl/tlul_host_adapter.sv
@@ -0,0 +1,95 @@
+// tlul_adapter (Host adapter) converts basic req/grant/rvalid into TL-UL interface. If
+// MAX_REQS == 1 it is purely combinational logic. If MAX_REQS > 1 flops are required.
+//
+// The host driving the adapter is responsible for ensuring it doesn't have more requests in flight
+// than the specified MAX_REQS.
+//
+// The outgoing address is always word aligned. The access size is always the word size (as
+// specified by TL_DW). For write accesses that occupy all lanes the operation is PutFullData,
+// otherwise it is PutPartialData, mask is generated from be_i. For reads all lanes are enabled as
+// required by TL-UL (every bit in mask set).
+//
+// When MAX_REQS > 1 tlul_adapter_host does not do anything to order responses from the TL-UL
+// interface which could return them out of order. It is the host's responsibility to either only
+// have outstanding requests to an address space it knows will return responses in order or to not
+// care about out of order responses (note that if read data is returned out of order there is no
+// way to determine this).
+
+module tlul_host_adapter #(
+    parameter int unsigned MAX_REQS = 1
+) (
+    input clk_i,
+    input rst_ni,
+// interface with host agent 
+    input                               req_i,
+    output logic                        gnt_o,
+    input logic [tlul_pkg::TL_AW-1:0]  addr_i,
+    input logic                         we_i,
+    input logic [tlul_pkg::TL_DW-1:0]  wdata_i,
+    input logic [tlul_pkg::TL_DBW-1:0] be_i,
+    output logic                        valid_o,
+    output logic [tlul_pkg::TL_DW-1:0] rdata_o,
+    output logic                        err_o,
+// interface with other tilelink agents or tlul interface
+    output tlul_pkg::tl_h2d_t          tl_h_c_a, // tilelink host channel A
+    input  tlul_pkg::tl_d2h_t          tl_h_c_d  // tilelink host channel D
+);
+
+    localparam int WordSize = $clog2(tlul_pkg::TL_DBW);
+
+    logic [tlul_pkg::TL_AIW-1:0] tl_source;
+    logic [tlul_pkg::TL_DBW-1:0] tl_be;
+
+    if(MAX_REQS == 1) begin
+        assign tl_source = '0;
+    end else begin
+        localparam int ReqNumW = $clog2(MAX_REQS);
+        logic [ReqNumW-1:0] source_d, source_q;
+
+        always_ff @(posedge clk_i) begin
+            if(!rst_ni) begin
+                source_q <= '0;
+            end else begin
+                source_q <= source_d;
+            end
+        end
+    
+
+        always_comb begin
+            source_d = source_q;
+
+            if(req_i && gnt_o) begin
+                if(source_q == MAX_REQS -1) source_d = '0;
+                else source_d = source_q + 1;
+            end
+        end
+        assign tl_source = tlul_pkg::TL_AIW'(source_q);
+    end
+
+// For TL-UL Get opcode all active bytes must have their mask bit set, so all reads get all tl_be
+// bits set. For writes the supplied be_i is used as the mask.
+    assign tl_be = ~we_i ? {tlul_pkg::TL_DBW{1'b1}} : be_i;
+
+    assign tl_h_c_a = '{
+        a_valid:    req_i,
+        a_opcode:   (~we_i) ? tlul_pkg::Get         :   
+                    (&be_i) ? tlul_pkg::PutFullData :
+                              tlul_pkg::PutPartialData,
+        a_param:    3'h0,
+        a_size:     tlul_pkg::TL_SZW'(WordSize),
+        a_mask:     tl_be,
+        a_source:   tl_source,
+        a_address:  {addr_i[31:WordSize], {WordSize{1'b0}}},
+        a_data:     wdata_i,
+        d_ready:    1'b1
+    };
+
+    assign gnt_o = tl_h_c_d.a_ready;
+    //assign rdata_0 = tl_h_c_d.d_data;
+    assign err_o   = tl_h_c_d.d_error;
+    assign valid_o = tl_h_c_d.d_valid;
+    logic [31:0] rddata;
+    assign rddata = tl_h_c_d.d_data;
+    assign rdata_o = rddata;
+
+endmodule
\ No newline at end of file
diff --git a/verilog/rtl/tlul_pkg.sv b/verilog/rtl/tlul_pkg.sv
new file mode 100644
index 0000000..9da373f
--- /dev/null
+++ b/verilog/rtl/tlul_pkg.sv
@@ -0,0 +1,120 @@
+package tlul_pkg;
+
+
+    parameter ArbiterImpl = "PPC";
+function automatic integer _clog2(integer value);
+    integer result;
+    value = value - 1;
+    for (result = 0; value > 0; result = result + 1) begin
+      value = value >> 1;
+    end
+    return result;
+  endfunction
+
+
+  /**
+   * Math function: Number of bits needed to address |value| items.
+   *
+   *                  0        for value == 0
+   * vbits =          1        for value == 1
+   *         ceil(log2(value)) for value > 1
+   *
+   *
+   * The primary use case for this function is the definition of registers/arrays
+   * which are wide enough to contain |value| items.
+   *
+   * This function identical to $clog2() for all input values except the value 1;
+   * it could be considered an "enhanced" $clog2() function.
+   *
+   *
+   * Example 1:
+   *   parameter Items = 1;
+   *   localparam ItemsWidth = vbits(Items); // 1
+   *   logic [ItemsWidth-1:0] item_register; // items_register is now [0:0]
+   *
+   * Example 2:
+   *   parameter Items = 64;
+   *   localparam ItemsWidth = vbits(Items); // 6
+   *   logic [ItemsWidth-1:0] item_register; // items_register is now [5:0]
+   *
+   * Note: If you want to store the number "value" inside a register, you need
+   * a register with size vbits(value + 1), since you also need to store
+   * the number 0.
+   *
+   * Example 3:
+   *   logic [vbits(64)-1:0]     store_64_logic_values; // width is [5:0]
+   *   logic [vbits(64 + 1)-1:0] store_number_64;       // width is [6:0]
+   */
+  function automatic integer vbits(integer value);
+`ifdef XCELIUM
+    // The use of system functions was not allowed here in Verilog-2001, but is
+    // valid since (System)Verilog-2005, which is also when $clog2() first
+    // appeared.
+    // Xcelium < 19.10 does not yet support the use of $clog2() here, fall back
+    // to an implementation without a system function. Remove this workaround
+    // if we require a newer Xcelium version.
+    // See #2579 and #2597.
+    return (value == 1) ? 1 : prim_util_pkg::_clog2(value);
+`else
+    return (value == 1) ? 1 : $clog2(value);
+`endif
+  endfunction
+
+    localparam int TL_AW=32;
+    localparam int TL_DW=32;
+    localparam int TL_AIW=8;
+    localparam int TL_DIW=1;
+    localparam int TL_DBW=(TL_DW>>3);
+    localparam int TL_SZW=$clog2($clog2(TL_DBW)+1);
+
+// opcodes for channel D messages/operations defined in official TileLink spec
+    typedef enum logic [2:0] {
+        PutFullData     = 3'h0,
+        PutPartialData  = 3'h1,
+        Get             = 3'h4
+    } tl_a_m_op;
+// opcodes for channel D messages/operations defined in official TileLink spec
+    typedef enum logic [2:0] {
+        AccessAck     = 3'h0,
+        AccessAckData = 3'h1
+    } tl_d_m_op;
+
+    typedef struct packed {
+        logic                        a_valid;
+        tl_a_m_op                    a_opcode;
+        logic           [2:0]        a_param;
+        logic           [TL_SZW-1:0] a_size;
+        logic           [TL_AIW-1:0] a_source;
+        logic           [TL_AW-1:0]  a_address;
+        logic           [TL_DBW-1:0] a_mask;
+        logic           [TL_DW-1:0]  a_data;
+        logic                        d_ready;
+    } tl_h2d_t;
+
+    localparam tl_h2d_t TL_H2D_DEFAULT = '{
+        d_ready:  1'b1,
+        a_opcode: tl_a_m_op'('0),
+        default:  '0
+    };
+
+    typedef struct packed {
+        logic                   d_valid;
+        tl_d_m_op               d_opcode;
+        logic             [2:0] d_param;
+        logic      [TL_SZW-1:0] d_size;
+        logic      [TL_AIW-1:0] d_source;
+        logic      [TL_DIW-1:0] d_sink;
+        logic      [TL_DW-1:0]  d_data;
+        logic                   d_error;
+        logic                   a_ready;
+    } tl_d2h_t;
+
+    localparam tl_d2h_t TL_D2H_DEFAULT = '{
+        a_ready:  1'b1,
+        d_opcode: tl_d_m_op'('0),
+        default:  '0
+    };
+
+
+
+endpackage
diff --git a/verilog/rtl/tlul_socket_1n.sv b/verilog/rtl/tlul_socket_1n.sv
new file mode 100644
index 0000000..8ab6bdb
--- /dev/null
+++ b/verilog/rtl/tlul_socket_1n.sv
@@ -0,0 +1,209 @@
+
+// TL-UL socket 1:N module
+//
+// configuration settings
+//   device_count: 4
+//
+// Verilog parameters
+//   HReqPass:      if 1 then host requests can pass through on empty fifo,
+//                  default 1
+//   HRspPass:      if 1 then host responses can pass through on empty fifo,
+//                  default 1
+//   DReqPass:      (one per device_count) if 1 then device i requests can
+//                  pass through on empty fifo, default 1
+//   DRspPass:      (one per device_count) if 1 then device i responses can
+//                  pass through on empty fifo, default 1
+//   HReqDepth:     Depth of host request FIFO, default 2
+//   HRspDepth:     Depth of host response FIFO, default 2
+//   DReqDepth:     (one per device_count) Depth of device i request FIFO,
+//                  default 2
+//   DRspDepth:     (one per device_count) Depth of device i response FIFO,
+//                  default 2
+//
+// Requests must stall to one device until all responses from other devices
+// have returned.  Need to keep a counter of all outstanding requests and
+// wait until that counter is zero before switching devices.
+//
+// This module will return a request error if the input value of 'dev_select_i'
+// is not within the range 0..N-1. Thus the instantiator of the socket
+// can indicate error by any illegal value of dev_select_i. 4'b1111 is
+// recommended for visibility
+//
+// The maximum value of N is 15
+
+
+module tlul_socket_1n #(
+  parameter int unsigned  N         = 4,
+  parameter bit           HReqPass  = 1'b1,
+  parameter bit           HRspPass  = 1'b1,
+  parameter bit [N-1:0]   DReqPass  = {N{1'b1}},
+  parameter bit [N-1:0]   DRspPass  = {N{1'b1}},
+  parameter bit [3:0]     HReqDepth = 4'h2,
+  parameter bit [3:0]     HRspDepth = 4'h2,
+  parameter bit [N*4-1:0] DReqDepth = {N{4'h2}},
+  parameter bit [N*4-1:0] DRspDepth = {N{4'h2}},
+  localparam int unsigned NWD       = $clog2(N+1) // derived parameter
+) (
+  input                     clk_i,
+  input                     rst_ni,
+  input  tlul_pkg::tl_h2d_t tl_h_i,
+  output tlul_pkg::tl_d2h_t tl_h_o,
+  output tlul_pkg::tl_h2d_t tl_d_o    [N],
+  input  tlul_pkg::tl_d2h_t tl_d_i    [N],
+  input  [NWD-1:0]          dev_select_i
+);
+
+  // Since our steering is done after potential FIFOing, we need to
+  // shove our device select bits into spare bits of reqfifo
+
+  // instantiate the host fifo, create intermediate bus 't'
+
+  // FIFO'd version of device select
+  logic [NWD-1:0] dev_select_t;
+
+  tlul_pkg::tl_h2d_t   tl_t_o;
+  tlul_pkg::tl_d2h_t   tl_t_i;
+
+  tlul_fifo_sync #(
+    .ReqPass(HReqPass),
+    .RspPass(HRspPass),
+    .ReqDepth(HReqDepth),
+    .RspDepth(HRspDepth),
+    .SpareReqW(NWD)
+  ) fifo_h (
+    .clk_i,
+    .rst_ni,
+    .tl_h_i,
+    .tl_h_o,
+    .tl_d_o     (tl_t_o),
+    .tl_d_i     (tl_t_i),
+    .spare_req_i (dev_select_i),
+    .spare_req_o (dev_select_t),
+    .spare_rsp_i (1'b0),
+    .spare_rsp_o ());
+
+
+  // We need to keep track of how many requests are outstanding,
+  // and to which device. New requests are compared to this and
+  // stall until that number is zero.
+  localparam int MaxOutstanding = 4**tlul_pkg::TL_AIW; // Up to 256 ounstanding
+  localparam int OutstandingW = $clog2(MaxOutstanding+1);
+  logic [OutstandingW-1:0] num_req_outstanding;
+  logic [NWD-1:0]          dev_select_outstanding;
+  logic                    hold_all_requests;
+  logic                    accept_t_req, accept_t_rsp;
+
+  assign  accept_t_req = tl_t_o.a_valid & tl_t_i.a_ready;
+  assign  accept_t_rsp = tl_t_i.d_valid & tl_t_o.d_ready;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      num_req_outstanding <= '0;
+      dev_select_outstanding <= '0;
+    end else if (accept_t_req) begin
+      if (!accept_t_rsp) begin
+        num_req_outstanding <= num_req_outstanding + 1'b1;
+      end
+      dev_select_outstanding <= dev_select_t;
+    end else if (accept_t_rsp) begin
+      num_req_outstanding <= num_req_outstanding - 1'b1;
+    end
+  end
+
+  assign hold_all_requests =
+      (num_req_outstanding != '0) &
+      (dev_select_t != dev_select_outstanding);
+
+  // Make N copies of 't' request side with modified reqvalid, call
+  // them 'u[0]' .. 'u[n-1]'.
+
+  tlul_pkg::tl_h2d_t   tl_u_o [N+1];
+  tlul_pkg::tl_d2h_t   tl_u_i [N+1];
+
+  for (genvar i = 0 ; i < N ; i++) begin : gen_u_o
+    assign tl_u_o[i].a_valid   = tl_t_o.a_valid &
+                                 (dev_select_t == NWD'(i)) &
+                                 ~hold_all_requests;
+    assign tl_u_o[i].a_opcode  = tl_t_o.a_opcode;
+    assign tl_u_o[i].a_param   = tl_t_o.a_param;
+    assign tl_u_o[i].a_size    = tl_t_o.a_size;
+    assign tl_u_o[i].a_source  = tl_t_o.a_source;
+    assign tl_u_o[i].a_address = tl_t_o.a_address;
+    assign tl_u_o[i].a_mask    = tl_t_o.a_mask;
+    assign tl_u_o[i].a_data    = tl_t_o.a_data;
+  end
+
+  tlul_pkg::tl_d2h_t tl_t_p ;
+
+  // for the returning reqready, only look at the device we're addressing
+  logic hfifo_reqready;
+  always_comb begin
+    hfifo_reqready = tl_u_i[N].a_ready; // default to error
+    for (int idx = 0 ; idx < N ; idx++) begin
+      //if (dev_select_outstanding == NWD'(idx)) hfifo_reqready = tl_u_i[idx].a_ready;
+      if (dev_select_t == NWD'(idx)) hfifo_reqready = tl_u_i[idx].a_ready;
+    end
+    if (hold_all_requests) hfifo_reqready = 1'b0;
+  end
+  // Adding a_valid as a qualifier. This prevents the a_ready from having unknown value
+  // when the address is unknown and the Host TL-UL FIFO is bypass mode.
+  assign tl_t_i.a_ready = tl_t_o.a_valid & hfifo_reqready;
+
+  always_comb begin
+    tl_t_p = tl_u_i[N];
+    for (int idx = 0 ; idx < N ; idx++) begin
+      if (dev_select_outstanding == NWD'(idx)) tl_t_p = tl_u_i[idx];
+    end
+  end
+  assign tl_t_i.d_valid  = tl_t_p.d_valid ;
+  assign tl_t_i.d_opcode = tl_t_p.d_opcode;
+  assign tl_t_i.d_param  = tl_t_p.d_param ;
+  assign tl_t_i.d_size   = tl_t_p.d_size  ;
+  assign tl_t_i.d_source = tl_t_p.d_source;
+  assign tl_t_i.d_sink   = tl_t_p.d_sink  ;
+  assign tl_t_i.d_data   = tl_t_p.d_data  ;
+  assign tl_t_i.d_error  = tl_t_p.d_error ;
+
+
+  // accept responses from devices when selected if upstream is accepting
+  for (genvar i = 0 ; i < N+1 ; i++) begin : gen_u_o_d_ready
+    assign tl_u_o[i].d_ready = tl_t_o.d_ready;
+  end
+
+  // finally instantiate all device FIFOs and the error responder
+  for (genvar i = 0 ; i < N ; i++) begin : gen_dfifo
+    tlul_fifo_sync #(
+      .ReqPass(DReqPass[i]),
+      .RspPass(DRspPass[i]),
+      .ReqDepth(DReqDepth[i*4+:4]),
+      .RspDepth(DRspDepth[i*4+:4])
+    ) fifo_d (
+      .clk_i,
+      .rst_ni,
+      .tl_h_i      (tl_u_o[i]),
+      .tl_h_o      (tl_u_i[i]),
+      .tl_d_o      (tl_d_o[i]),
+      .tl_d_i      (tl_d_i[i]),
+      .spare_req_i (1'b0),
+      .spare_req_o (),
+      .spare_rsp_i (1'b0),
+      .spare_rsp_o ());
+  end
+
+  assign tl_u_o[N].a_valid     = tl_t_o.a_valid &
+                                 (dev_select_t == NWD'(N)) &
+                                 ~hold_all_requests;
+  assign tl_u_o[N].a_opcode    = tl_t_o.a_opcode;
+  assign tl_u_o[N].a_param     = tl_t_o.a_param;
+  assign tl_u_o[N].a_size      = tl_t_o.a_size;
+  assign tl_u_o[N].a_source    = tl_t_o.a_source;
+  assign tl_u_o[N].a_address   = tl_t_o.a_address;
+  assign tl_u_o[N].a_mask      = tl_t_o.a_mask;
+  assign tl_u_o[N].a_data      = tl_t_o.a_data;
+  tlul_err_resp err_resp (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .tl_h_i     (tl_u_o[N]),
+    .tl_h_o     (tl_u_i[N]));
+
+endmodule
diff --git a/verilog/rtl/tlul_socket_m1.sv b/verilog/rtl/tlul_socket_m1.sv
new file mode 100644
index 0000000..2a9d335
--- /dev/null
+++ b/verilog/rtl/tlul_socket_m1.sv
@@ -0,0 +1,245 @@
+
+// TL-UL socket M:1 module
+//
+// Verilog parameters
+//   M:             Number of host ports.
+//   HReqPass:      M bit array to allow requests to pass through the host i
+//                  FIFO with no clock delay if the request FIFO is empty. If
+//                  1'b0, at least one clock cycle of latency is created.
+//                  Default is 1'b1.
+//   HRspPass:      Same as HReqPass but for host response FIFO.
+//   HReqDepth:     Mx4 bit array. bit[i*4+:4] is depth of host i request FIFO.
+//                  Depth of zero is allowed if ReqPass is true. A maximum value
+//                  of 16 is allowed, default is 2.
+//   HRspDepth:     Same as HReqDepth but for host response FIFO.
+//   DReqPass:      Same as HReqPass but for device request FIFO.
+//   DRspPass:      Same as HReqPass but for device response FIFO.
+//   DReqDepth:     Same as HReqDepth but for device request FIFO.
+//   DRspDepth:     Same as HReqDepth but for device response FIFO.
+
+module tlul_socket_m1 #(
+  parameter int unsigned  M         = 4,
+  parameter bit [M-1:0]   HReqPass  = {M{1'b1}},
+  parameter bit [M-1:0]   HRspPass  = {M{1'b1}},
+  parameter bit [M*4-1:0] HReqDepth = {M{4'h2}},
+  parameter bit [M*4-1:0] HRspDepth = {M{4'h2}},
+  parameter bit           DReqPass  = 1'b1,
+  parameter bit           DRspPass  = 1'b1,
+  parameter bit [3:0]     DReqDepth = 4'h2,
+  parameter bit [3:0]     DRspDepth = 4'h2
+) (
+  input                     clk_i,
+  input                     rst_ni,
+
+  input  tlul_pkg::tl_h2d_t tl_h_i [M],
+  output tlul_pkg::tl_d2h_t tl_h_o [M],
+
+  output tlul_pkg::tl_h2d_t tl_d_o,
+  input  tlul_pkg::tl_d2h_t tl_d_i
+);
+
+  // Signals
+  //
+  //  tl_h_i/o[0] |  tl_h_i/o[1] | ... |  tl_h_i/o[M-1]
+  //      |              |                    |
+  // u_hostfifo[0]  u_hostfifo[1]        u_hostfifo[M-1]
+  //      |              |                    |
+  //       hreq_fifo_o(i) / hrsp_fifo_i(i)
+  //     ---------------------------------------
+  //     |       request/grant/req_data        |
+  //     |                                     |
+  //     |           PRIM_ARBITER              |
+  //     |                                     |
+  //     |  arb_valid / arb_ready / arb_data   |
+  //     ---------------------------------------
+  //                     |
+  //                dreq_fifo_i / drsp_fifo_o
+  //                     |
+  //                u_devicefifo
+  //                     |
+  //                  tl_d_o/i
+  //
+  // Required ID width to distinguish between host ports
+  //  Used in response steering
+  localparam int unsigned IDW   = tlul_pkg::TL_AIW;
+  localparam int unsigned STIDW = $clog2(M);
+
+  tlul_pkg::tl_h2d_t hreq_fifo_o [M];
+  tlul_pkg::tl_d2h_t hrsp_fifo_i [M];
+
+  logic [M-1:0] hrequest;
+  logic [M-1:0] hgrant;
+
+  tlul_pkg::tl_h2d_t dreq_fifo_i;
+  tlul_pkg::tl_d2h_t drsp_fifo_o;
+
+  logic arb_valid;
+  logic arb_ready;
+  tlul_pkg::tl_h2d_t arb_data;
+
+  // Host Req/Rsp FIFO
+  for (genvar i = 0 ; i < M ; i++) begin : gen_host_fifo
+    tlul_pkg::tl_h2d_t hreq_fifo_i;
+
+    // ID Shifting
+    logic [STIDW-1:0] reqid_sub;
+    logic [IDW-1:0] shifted_id;
+    assign reqid_sub = i;   // can cause conversion error?
+    assign shifted_id = {
+      tl_h_i[i].a_source[0+:(IDW-STIDW)],
+      reqid_sub
+    };
+
+ 
+    // assign not connected bits to nc_* signal to make lint happy
+    logic [IDW-1 : IDW-STIDW] unused_tl_h_source;
+    assign unused_tl_h_source = tl_h_i[i].a_source[IDW-1 -: STIDW];
+
+    // Put shifted ID
+    assign hreq_fifo_i = '{
+      a_valid:    tl_h_i[i].a_valid,
+      a_opcode:   tl_h_i[i].a_opcode,
+      a_param:    tl_h_i[i].a_param,
+      a_size:     tl_h_i[i].a_size,
+      a_source:   shifted_id,
+      a_address:  tl_h_i[i].a_address,
+      a_mask:     tl_h_i[i].a_mask,
+      a_data:     tl_h_i[i].a_data,
+      d_ready:    tl_h_i[i].d_ready
+    };
+
+    tlul_fifo_sync #(
+      .ReqPass    (HReqPass[i]),
+      .RspPass    (HRspPass[i]),
+      .ReqDepth   (HReqDepth[i*4+:4]),
+      .RspDepth   (HRspDepth[i*4+:4]),
+      .SpareReqW  (1)
+    ) u_hostfifo (
+      .clk_i,
+      .rst_ni,
+      .tl_h_i      (hreq_fifo_i),
+      .tl_h_o      (tl_h_o[i]),
+      .tl_d_o      (hreq_fifo_o[i]),
+      .tl_d_i      (hrsp_fifo_i[i]),
+      .spare_req_i (1'b0),
+      .spare_req_o (),
+      .spare_rsp_i (1'b0),
+      .spare_rsp_o ()
+    );
+  end
+
+  // Device Req/Rsp FIFO
+  tlul_fifo_sync #(
+    .ReqPass    (DReqPass),
+    .RspPass    (DRspPass),
+    .ReqDepth   (DReqDepth),
+    .RspDepth   (DRspDepth),
+    .SpareReqW  (1)
+  ) u_devicefifo (
+    .clk_i,
+    .rst_ni,
+    .tl_h_i      (dreq_fifo_i),
+    .tl_h_o      (drsp_fifo_o),
+    .tl_d_o      (tl_d_o),
+    .tl_d_i      (tl_d_i),
+    .spare_req_i (1'b0),
+    .spare_req_o (),
+    .spare_rsp_i (1'b0),
+    .spare_rsp_o ()
+  );
+
+  // Request Arbiter
+  for (genvar i = 0 ; i < M ; i++) begin : gen_arbreqgnt
+    assign hrequest[i] = hreq_fifo_o[i].a_valid;
+  end
+
+  assign arb_ready = drsp_fifo_o.a_ready;
+
+  if (tlul_pkg::ArbiterImpl == "PPC") begin : gen_arb_ppc
+    prim_arbiter_ppc #(
+      .N          (M),
+      .DW         ($bits(tlul_pkg::tl_h2d_t)),
+      .EnReqStabA (0)
+    ) u_reqarb (
+      .clk_i,
+      .rst_ni,
+      .req_i   ( hrequest    ),
+      .data_i  ( hreq_fifo_o ),
+      .gnt_o   ( hgrant      ),
+      .idx_o   (             ),
+      .valid_o ( arb_valid   ),
+      .data_o  ( arb_data    ),
+      .ready_i ( arb_ready   )
+    );
+  end else if (tlul_pkg::ArbiterImpl == "BINTREE") begin : gen_tree_arb
+    prim_arbiter_tree #(
+      .N          (M),
+      .DW         ($bits(tlul_pkg::tl_h2d_t)),
+      .EnReqStabA (0)
+    ) u_reqarb (
+      .clk_i,
+      .rst_ni,
+      .req_i   ( hrequest    ),
+      .data_i  ( hreq_fifo_o ),
+      .gnt_o   ( hgrant      ),
+      .idx_o   (             ),
+      .valid_o ( arb_valid   ),
+      .data_o  ( arb_data    ),
+      .ready_i ( arb_ready   )
+    );
+  end else begin : gen_unknown
+    
+  end
+
+  logic [  M-1:0] hfifo_rspvalid;
+  logic [  M-1:0] dfifo_rspready;
+  logic [IDW-1:0] hfifo_rspid;
+  logic dfifo_rspready_merged;
+
+  // arb_data --> dreq_fifo_i
+  //   dreq_fifo_i.hd_rspready <= dfifo_rspready
+
+  assign dfifo_rspready_merged = |dfifo_rspready;
+  assign dreq_fifo_i = '{
+    a_valid:   arb_valid,
+    a_opcode:  arb_data.a_opcode,
+    a_param:   arb_data.a_param,
+    a_size:    arb_data.a_size,
+    a_source:  arb_data.a_source,
+    a_address: arb_data.a_address,
+    a_mask:    arb_data.a_mask,
+    a_data:    arb_data.a_data,
+
+    d_ready:   dfifo_rspready_merged
+  };
+
+  // Response ID steering
+  // drsp_fifo_o --> hrsp_fifo_i[i]
+
+  // Response ID shifting before put into host fifo
+  assign hfifo_rspid = {
+    {STIDW{1'b0}},
+    drsp_fifo_o.d_source[IDW-1:STIDW]
+  };
+  for (genvar i = 0 ; i < M ; i++) begin : gen_idrouting
+    assign hfifo_rspvalid[i] = drsp_fifo_o.d_valid &
+                               (drsp_fifo_o.d_source[0+:STIDW] == i);
+    assign dfifo_rspready[i] = hreq_fifo_o[i].d_ready                &
+                               (drsp_fifo_o.d_source[0+:STIDW] == i) &
+                              drsp_fifo_o.d_valid;
+
+    assign hrsp_fifo_i[i] = '{
+      d_valid:  hfifo_rspvalid[i],
+      d_opcode: drsp_fifo_o.d_opcode,
+      d_param:  drsp_fifo_o.d_param,
+      d_size:   drsp_fifo_o.d_size,
+      d_source: hfifo_rspid,
+      d_sink:   drsp_fifo_o.d_sink,
+      d_data:   drsp_fifo_o.d_data,
+      d_error:  drsp_fifo_o.d_error,
+      a_ready:  hgrant[i]
+    };
+  end
+
+
+endmodule
diff --git a/verilog/rtl/tlul_sram_adapter.sv b/verilog/rtl/tlul_sram_adapter.sv
new file mode 100644
index 0000000..dc77555
--- /dev/null
+++ b/verilog/rtl/tlul_sram_adapter.sv
@@ -0,0 +1,338 @@
+/**
+ * Tile-Link UL adapter for SRAM-like devices
+ *
+ * - Intentionally omitted BaseAddr in case of multiple memory maps are used in a SoC,
+ *   it means that aliasing can happen if target device size in TL-UL crossbar is bigger
+ *   than SRAM size
+ */
+module tlul_sram_adapter #(
+  parameter int SramAw      = 12,
+  parameter int SramDw      = 32, // Must be multiple of the TL width
+  parameter int Outstanding = 1,  // Only one request is accepted
+  parameter bit ByteAccess  = 1,  // 1: true, 0: false
+  parameter bit ErrOnWrite  = 0,  // 1: Writes not allowed, automatically error
+  parameter bit ErrOnRead   = 0   // 1: Reads not allowed, automatically error
+) (
+  input   clk_i,
+  input   rst_ni,
+
+  // TL-UL interface
+  input   tlul_pkg::tl_h2d_t  tl_i,
+  output  tlul_pkg::tl_d2h_t  tl_o,
+
+  // SRAM interface
+  output logic              req_o,
+  input                     gnt_i,
+  output logic              we_o,
+  output logic [SramAw-1:0] addr_o,
+  output logic [SramDw-1:0] wdata_o,
+  output logic [SramDw-1:0] wmask_o,
+  input        [SramDw-1:0] rdata_i,
+  input                     rvalid_i,
+  input        [1:0]        rerror_i // 2 bit error [1]: Uncorrectable, [0]: Correctable
+);
+
+  import tlul_pkg::*;
+
+  localparam int SramByte = SramDw/8;
+  localparam int DataBitWidth = tlul_pkg::vbits(SramByte);
+  localparam int WidthMult = SramDw / tlul_pkg::TL_DW;
+  localparam int WoffsetWidth = (SramByte == tlul_pkg::TL_DBW) ? 1 :
+                                DataBitWidth - tlul_pkg::vbits(tlul_pkg::TL_DBW);
+
+  typedef struct packed {
+    logic [tlul_pkg::TL_DBW-1:0] mask ; // Byte mask within the TL-UL word
+    logic [WoffsetWidth-1:0]    woffset ; // Offset of the TL-UL word within the SRAM word
+  } sram_req_t ;
+
+  typedef enum logic [1:0] {
+    OpWrite,
+    OpRead,
+    OpUnknown
+  } req_op_e ;
+
+  typedef struct packed {
+    req_op_e                    op ;
+    logic                       error ;
+    logic [tlul_pkg::TL_SZW-1:0] size ;
+    logic [tlul_pkg::TL_AIW-1:0] source ;
+  } req_t ;
+
+  typedef struct packed {
+    logic [SramDw-1:0] data ;
+    logic              error ;
+  } rsp_t ;
+
+  localparam int SramReqFifoWidth = $bits(sram_req_t) ;
+  localparam int ReqFifoWidth = $bits(req_t) ;
+  localparam int RspFifoWidth = $bits(rsp_t) ;
+
+  // FIFO signal in case OutStand is greater than 1
+  // If request is latched, {write, source} is pushed to req fifo.
+  // Req fifo is popped when D channel is acknowledged (v & r)
+  // D channel valid is asserted if it is write request or rsp fifo not empty if read.
+  logic reqfifo_wvalid, reqfifo_wready;
+  logic reqfifo_rvalid, reqfifo_rready;
+  req_t reqfifo_wdata,  reqfifo_rdata;
+
+  logic sramreqfifo_wvalid, sramreqfifo_wready;
+  logic sramreqfifo_rready;
+  sram_req_t sramreqfifo_wdata, sramreqfifo_rdata;
+
+  logic rspfifo_wvalid, rspfifo_wready;
+  logic rspfifo_rvalid, rspfifo_rready;
+  rsp_t rspfifo_wdata,  rspfifo_rdata;
+
+  logic error_internal; // Internal protocol error checker
+  logic wr_attr_error;
+  logic wr_vld_error;
+  logic rd_vld_error;
+  logic tlul_error;     // Error from `tlul_err` module
+
+  logic a_ack, d_ack, sram_ack;
+  assign a_ack    = tl_i.a_valid & tl_o.a_ready ;
+  assign d_ack    = tl_o.d_valid & tl_i.d_ready ;
+  assign sram_ack = req_o        & gnt_i ;
+
+  // Valid handling
+  logic d_valid, d_error;
+  always_comb begin
+    d_valid = 1'b0;
+
+    if (reqfifo_rvalid) begin
+      if (reqfifo_rdata.error) begin
+        // Return error response. Assume no request went out to SRAM
+        d_valid = 1'b1;
+      end else if (reqfifo_rdata.op == OpRead) begin
+        d_valid = rspfifo_rvalid;
+      end else begin
+        // Write without error
+        d_valid = 1'b1;
+      end
+    end else begin
+      d_valid = 1'b0;
+    end
+  end
+
+  always_comb begin
+    d_error = 1'b0;
+
+    if (reqfifo_rvalid) begin
+      if (reqfifo_rdata.op == OpRead) begin
+        d_error = rspfifo_rdata.error | reqfifo_rdata.error;
+      end else begin
+        d_error = reqfifo_rdata.error;
+      end
+    end else begin
+      d_error = 1'b0;
+    end
+  end
+
+  assign tl_o = '{
+      d_valid  : d_valid ,
+      d_opcode : (d_valid && reqfifo_rdata.op != OpRead) ? AccessAck : AccessAckData,
+      d_param  : '0,
+      d_size   : (d_valid) ? reqfifo_rdata.size : '0,
+      d_source : (d_valid) ? reqfifo_rdata.source : '0,
+      d_sink   : 1'b0,
+      d_data   : (d_valid && rspfifo_rvalid && reqfifo_rdata.op == OpRead)
+                 ? rspfifo_rdata.data : '0,
+      d_error  : d_valid && d_error,
+
+      a_ready  : (gnt_i | error_internal) & reqfifo_wready & sramreqfifo_wready
+  };
+
+  // a_ready depends on the FIFO full condition and grant from SRAM (or SRAM arbiter)
+  // assemble response, including read response, write response, and error for unsupported stuff
+
+  // Output to SRAM:
+  //    Generate request only when no internal error occurs. If error occurs, the request should be
+  //    dropped and returned error response to the host. So, error to be pushed to reqfifo.
+  //    In this case, it is assumed the request is granted (may cause ordering issue later?)
+  assign req_o    = tl_i.a_valid & reqfifo_wready & ~error_internal;
+  assign we_o     = tl_i.a_valid & logic'(tl_i.a_opcode inside {PutFullData, PutPartialData});
+  assign addr_o   = (tl_i.a_valid) ? tl_i.a_address[DataBitWidth+:SramAw] : '0;
+
+  // Support SRAMs wider than the TL-UL word width by mapping the parts of the
+  // TL-UL address which are more fine-granular than the SRAM width to the
+  // SRAM write mask.
+  logic [WoffsetWidth-1:0] woffset;
+  if (tlul_pkg::TL_DW != SramDw) begin : gen_wordwidthadapt
+    assign woffset = tl_i.a_address[DataBitWidth-1:tlul_pkg::vbits(tlul_pkg::TL_DBW)];
+  end else begin : gen_no_wordwidthadapt
+    assign woffset = '0;
+  end
+
+  // Convert byte mask to SRAM bit mask for writes, and only forward valid data
+  logic [WidthMult-1:0][tlul_pkg::TL_DW-1:0] wmask_int;
+  logic [WidthMult-1:0][tlul_pkg::TL_DW-1:0] wdata_int;
+
+  always_comb begin
+    wmask_int = '0;
+    wdata_int = '0;
+
+    if (tl_i.a_valid) begin
+      for (int i = 0 ; i < tlul_pkg::TL_DW/8 ; i++) begin
+        wmask_int[woffset][8*i +: 8] = {8{tl_i.a_mask[i]}};
+        wdata_int[woffset][8*i +: 8] = (tl_i.a_mask[i] && we_o) ? tl_i.a_data[8*i+:8] : '0;
+      end
+    end
+  end
+
+  assign wmask_o = wmask_int;
+  assign wdata_o = wdata_int;
+
+  // Begin: Request Error Detection
+
+  // wr_attr_error: Check if the request size,mask are permitted.
+  //    Basic check of size, mask, addr align is done in tlul_err module.
+  //    Here it checks any partial write if ByteAccess isn't allowed.
+  assign wr_attr_error = (tl_i.a_opcode == PutFullData || tl_i.a_opcode == PutPartialData) ?
+                         (ByteAccess == 0) ? (tl_i.a_mask != '1 || tl_i.a_size != 2'h2) : 1'b0 :
+                         1'b0;
+
+  if (ErrOnWrite == 1) begin : gen_no_writes
+    assign wr_vld_error = tl_i.a_opcode != Get;
+  end else begin : gen_writes_allowed
+    assign wr_vld_error = 1'b0;
+  end
+
+  if (ErrOnRead == 1) begin: gen_no_reads
+    assign rd_vld_error = tl_i.a_opcode == Get;
+  end else begin : gen_reads_allowed
+    assign rd_vld_error = 1'b0;
+  end
+
+  tlul_err u_err (
+    .tl_i   (tl_i),
+    .err_o (tlul_error)
+  );
+
+  assign error_internal = wr_attr_error | wr_vld_error | rd_vld_error | tlul_error;
+  // End: Request Error Detection
+
+  assign reqfifo_wvalid = a_ack ; // Push to FIFO only when granted
+  assign reqfifo_wdata  = '{
+    op:     (tl_i.a_opcode != Get) ? OpWrite : OpRead, // To return AccessAck for opcode error
+    error:  error_internal,
+    size:   tl_i.a_size,
+    source: tl_i.a_source
+  }; // Store the request only. Doesn't have to store data
+  assign reqfifo_rready = d_ack ;
+
+  // push together with ReqFIFO, pop upon returning read
+  assign sramreqfifo_wdata = '{
+    mask    : tl_i.a_mask,
+    woffset : woffset
+  };
+  assign sramreqfifo_wvalid = sram_ack & ~we_o;
+  assign sramreqfifo_rready = rspfifo_wvalid;
+
+  assign rspfifo_wvalid = rvalid_i & reqfifo_rvalid;
+
+  // Make sure only requested bytes are forwarded
+  logic [WidthMult-1:0][tlul_pkg::TL_DW-1:0] rdata;
+  logic [WidthMult-1:0][tlul_pkg::TL_DW-1:0] rmask;
+  //logic [SramDw-1:0] rmask;
+  logic [tlul_pkg::TL_DW-1:0] rdata_tlword;
+
+  always_comb begin
+    rmask = '0;
+    for (int i = 0 ; i < tlul_pkg::TL_DW/8 ; i++) begin
+      rmask[sramreqfifo_rdata.woffset][8*i +: 8] = {8{sramreqfifo_rdata.mask[i]}};
+    end
+  end
+
+  assign rdata = rdata_i & rmask;
+  assign rdata_tlword = rdata[sramreqfifo_rdata.woffset];
+
+  assign rspfifo_wdata  = '{
+    data : rdata_tlword,
+    error: rerror_i[1] // Only care for Uncorrectable error
+  };
+  assign rspfifo_rready = (reqfifo_rdata.op == OpRead & ~reqfifo_rdata.error)
+                        ? reqfifo_rready : 1'b0 ;
+
+  // This module only cares about uncorrectable errors.
+  logic unused_rerror;
+  assign unused_rerror = rerror_i[0];
+
+  // FIFO instance: REQ, RSP
+
+  // ReqFIFO is to store the Access type to match to the Response data.
+  //    For instance, SRAM accepts the write request but doesn't return the
+  //    acknowledge. In this case, it may be hard to determine when the D
+  //    response for the write data should send out if reads/writes are
+  //    interleaved. So, to make it in-order (even TL-UL allows out-of-order
+  //    responses), storing the request is necessary. And if the read entry
+  //    is write op, it is safe to return the response right away. If it is
+  //    read reqeust, then D response is waiting until read data arrives.
+
+  // Notes:
+  // The oustanding+1 allows the reqfifo to absorb back to back transactions
+  // without any wait states.  Alternatively, the depth can be kept as
+  // oustanding as long as the outgoing ready is qualified with the acceptance
+  // of the response in the same cycle.  Doing so however creates a path from
+  // ready_i to ready_o, which may not be desireable.
+  fifo_sync #(
+    .Width   (ReqFifoWidth),
+    .Pass    (1'b0),
+    .Depth   (Outstanding)
+  ) u_reqfifo (
+    .clk_i,
+    .rst_ni,
+    .clr_i   (1'b0),
+    .wvalid_i(reqfifo_wvalid),
+    .wready_o(reqfifo_wready),
+    .wdata_i (reqfifo_wdata),
+    .depth_o (),
+    .rvalid_o(reqfifo_rvalid),
+    .rready_i(reqfifo_rready),
+    .rdata_o (reqfifo_rdata)
+  );
+
+  // sramreqfifo:
+  //    While the ReqFIFO holds the request until it is sent back via TL-UL, the
+  //    sramreqfifo only needs to hold the mask and word offset until the read
+  //    data returns from memory.
+  fifo_sync #(
+    .Width   (SramReqFifoWidth),
+    .Pass    (1'b0),
+    .Depth   (Outstanding)
+  ) u_sramreqfifo (
+    .clk_i,
+    .rst_ni,
+    .clr_i   (1'b0),
+    .wvalid_i(sramreqfifo_wvalid),
+    .wready_o(sramreqfifo_wready),
+    .wdata_i (sramreqfifo_wdata),
+    .depth_o (),
+    .rvalid_o(),
+    .rready_i(sramreqfifo_rready),
+    .rdata_o (sramreqfifo_rdata)
+  );
+
+  // Rationale having #Outstanding depth in response FIFO.
+  //    In normal case, if the host or the crossbar accepts the response data,
+  //    response FIFO isn't needed. But if in any case it has a chance to be
+  //    back pressured, the response FIFO should store the returned data not to
+  //    lose the data from the SRAM interface. Remember, SRAM interface doesn't
+  //    have back-pressure signal such as read_ready.
+  fifo_sync #(
+    .Width   (RspFifoWidth),
+    .Pass    (1'b1),
+    .Depth   (Outstanding)
+  ) u_rspfifo (
+    .clk_i,
+    .rst_ni,
+    .clr_i   (1'b0),
+    .wvalid_i(rspfifo_wvalid),
+    .wready_o(rspfifo_wready),
+    .wdata_i (rspfifo_wdata),
+    .depth_o (),
+    .rvalid_o(rspfifo_rvalid),
+    .rready_i(rspfifo_rready),
+    .rdata_o (rspfifo_rdata)
+  );
+
+endmodule
diff --git a/verilog/rtl/uart.sv b/verilog/rtl/uart.sv
new file mode 100644
index 0000000..090b02b
--- /dev/null
+++ b/verilog/rtl/uart.sv
@@ -0,0 +1,85 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Description: UART top level wrapper file
+
+// `include "prim_assert.sv"
+
+module uart (
+  input           clk_i,
+  input           rst_ni,
+
+  // Bus Interface
+  input  tlul_pkg::tl_h2d_t tl_i,
+  output tlul_pkg::tl_d2h_t tl_o,
+
+  // Generic IO
+  input           cio_rx_i,
+  output logic    cio_tx_o,
+  output logic    cio_tx_en_o,
+
+  // Interrupts
+  output logic    intr_tx_watermark_o ,
+  output logic    intr_rx_watermark_o ,
+  output logic    intr_tx_empty_o  ,
+  output logic    intr_rx_overflow_o  ,
+  output logic    intr_rx_frame_err_o ,
+  output logic    intr_rx_break_err_o ,
+  output logic    intr_rx_timeout_o   ,
+  output logic    intr_rx_parity_err_o
+);
+
+  import uart_reg_pkg::*;
+
+  uart_reg2hw_t reg2hw;
+  uart_hw2reg_t hw2reg;
+
+  uart_reg_top u_reg (
+    .clk_i,
+    .rst_ni,
+    .tl_i,
+    .tl_o,
+    .reg2hw,
+    .hw2reg,
+
+    .devmode_i  (1'b1)
+  );
+
+  uart_core uart_core (
+    .clk_i,
+    .rst_ni,
+    .reg2hw,
+    .hw2reg,
+
+    .rx    (cio_rx_i   ),
+    .tx    (cio_tx_o   ),
+
+    .intr_tx_watermark_o,
+    .intr_rx_watermark_o,
+    .intr_tx_empty_o,
+    .intr_rx_overflow_o,
+    .intr_rx_frame_err_o,
+    .intr_rx_break_err_o,
+    .intr_rx_timeout_o,
+    .intr_rx_parity_err_o
+  );
+
+  // always enable the driving out of TX
+  assign cio_tx_en_o = 1'b1;
+
+  // // Assert Known for outputs
+  // `ASSERT_KNOWN(txenKnown, cio_tx_en_o)
+  // `ASSERT_KNOWN(txKnown, cio_tx_o, clk_i, !rst_ni || !cio_tx_en_o)
+
+  // // Assert Known for interrupts
+  // `ASSERT_KNOWN(txWatermarkKnown, intr_tx_watermark_o)
+  // `ASSERT_KNOWN(rxWatermarkKnown, intr_rx_watermark_o)
+  // `ASSERT_KNOWN(txEmptyKnown, intr_tx_empty_o)
+  // `ASSERT_KNOWN(rxOverflowKnown, intr_rx_overflow_o)
+  // `ASSERT_KNOWN(rxFrameErrKnown, intr_rx_frame_err_o)
+  // `ASSERT_KNOWN(rxBreakErrKnown, intr_rx_break_err_o)
+  // `ASSERT_KNOWN(rxTimeoutKnown, intr_rx_timeout_o)
+  // `ASSERT_KNOWN(rxParityErrKnown, intr_rx_parity_err_o)
+
+endmodule
diff --git a/verilog/rtl/uart_core.sv b/verilog/rtl/uart_core.sv
new file mode 100644
index 0000000..c205d90
--- /dev/null
+++ b/verilog/rtl/uart_core.sv
@@ -0,0 +1,490 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Description: UART core module
+//
+
+module uart_core (
+  input                  clk_i,
+  input                  rst_ni,
+
+  input  uart_reg_pkg::uart_reg2hw_t reg2hw,
+  output uart_reg_pkg::uart_hw2reg_t hw2reg,
+
+  input                  rx,
+  output logic           tx,
+
+  output logic           intr_tx_watermark_o,
+  output logic           intr_rx_watermark_o,
+  output logic           intr_tx_empty_o,
+  output logic           intr_rx_overflow_o,
+  output logic           intr_rx_frame_err_o,
+  output logic           intr_rx_break_err_o,
+  output logic           intr_rx_timeout_o,
+  output logic           intr_rx_parity_err_o
+);
+
+  import uart_reg_pkg::*;
+
+  localparam int NcoWidth = $bits(reg2hw.ctrl.nco.q);
+
+  logic   [15:0]  rx_val_q;
+  logic   [7:0]   uart_rdata;
+  logic           tick_baud_x16, rx_tick_baud;
+  logic   [5:0]   tx_fifo_depth, rx_fifo_depth;
+  logic   [5:0]   rx_fifo_depth_prev_q;
+  logic   [23:0]  rx_timeout_count_d, rx_timeout_count_q, uart_rxto_val;
+  logic           rx_fifo_depth_changed, uart_rxto_en;
+  logic           tx_enable, rx_enable;
+  logic           sys_loopback, line_loopback, rxnf_enable;
+  logic           uart_fifo_rxrst, uart_fifo_txrst;
+  logic   [2:0]   uart_fifo_rxilvl;
+  logic   [1:0]   uart_fifo_txilvl;
+  logic           ovrd_tx_en, ovrd_tx_val;
+  logic   [7:0]   tx_fifo_data;
+  logic           tx_fifo_rready, tx_fifo_rvalid;
+  logic           tx_fifo_wready, tx_uart_idle;
+  logic           tx_out;
+  logic           tx_out_q;
+  logic   [7:0]   rx_fifo_data;
+  logic           rx_valid, rx_fifo_wvalid, rx_fifo_rvalid;
+  logic           rx_fifo_wready, rx_uart_idle;
+  logic           rx_sync;
+  logic           rx_in;
+  logic           break_err;
+  logic   [4:0]   allzero_cnt_d, allzero_cnt_q;
+  logic           allzero_err, not_allzero_char;
+  logic           event_tx_watermark, event_rx_watermark, event_tx_empty, event_rx_overflow;
+  logic           event_rx_frame_err, event_rx_break_err, event_rx_timeout, event_rx_parity_err;
+  logic           tx_watermark_d, tx_watermark_prev_q;
+  logic           rx_watermark_d, rx_watermark_prev_q;
+  logic           tx_uart_idle_q;
+
+  assign tx_enable        = reg2hw.ctrl.tx.q;
+  assign rx_enable        = reg2hw.ctrl.rx.q;
+  assign rxnf_enable      = reg2hw.ctrl.nf.q;
+  assign sys_loopback     = reg2hw.ctrl.slpbk.q;
+  assign line_loopback    = reg2hw.ctrl.llpbk.q;
+
+  assign uart_fifo_rxrst  = reg2hw.fifo_ctrl.rxrst.q & reg2hw.fifo_ctrl.rxrst.qe;
+  assign uart_fifo_txrst  = reg2hw.fifo_ctrl.txrst.q & reg2hw.fifo_ctrl.txrst.qe;
+  assign uart_fifo_rxilvl = reg2hw.fifo_ctrl.rxilvl.q;
+  assign uart_fifo_txilvl = reg2hw.fifo_ctrl.txilvl.q;
+
+  assign ovrd_tx_en       = reg2hw.ovrd.txen.q;
+  assign ovrd_tx_val      = reg2hw.ovrd.txval.q;
+
+  typedef enum logic {
+    BRK_CHK,
+    BRK_WAIT
+  } break_st_e ;
+
+  break_st_e break_st_q;
+
+  assign not_allzero_char = rx_valid & (~event_rx_frame_err | (rx_fifo_data != 8'h0));
+  assign allzero_err = event_rx_frame_err & (rx_fifo_data == 8'h0);
+
+
+  assign allzero_cnt_d = (break_st_q == BRK_WAIT || not_allzero_char) ? 5'h0 :
+                          //allzero_cnt_q[4] never be 1b without break_st_q as BRK_WAIT
+                          //allzero_cnt_q[4] ? allzero_cnt_q :
+                          allzero_err ? allzero_cnt_q + 5'd1 :
+                          allzero_cnt_q;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni)        allzero_cnt_q <= '0;
+    else if (rx_enable) allzero_cnt_q <= allzero_cnt_d;
+  end
+
+  // break_err edges in same cycle as event_rx_frame_err edges ; that way the
+  // reset-on-read works the same way for break and frame error interrupts.
+
+  always_comb begin
+    unique case (reg2hw.ctrl.rxblvl.q)
+      2'h0:    break_err = allzero_cnt_d >= 5'd2;
+      2'h1:    break_err = allzero_cnt_d >= 5'd4;
+      2'h2:    break_err = allzero_cnt_d >= 5'd8;
+      default: break_err = allzero_cnt_d >= 5'd16;
+    endcase
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) break_st_q <= BRK_CHK;
+    else begin
+      unique case (break_st_q)
+        BRK_CHK: begin
+          if (event_rx_break_err) break_st_q <= BRK_WAIT;
+        end
+
+        BRK_WAIT: begin
+          if (rx_in) break_st_q <= BRK_CHK;
+        end
+
+       // default: begin
+         // break_st_q <= BRK_CHK;
+        //end
+      endcase
+    end
+  end
+
+  assign hw2reg.val.d  = rx_val_q;
+
+  assign hw2reg.rdata.d = uart_rdata;
+
+  assign hw2reg.status.rxempty.d     = ~rx_fifo_rvalid;
+  assign hw2reg.status.rxidle.d      = rx_uart_idle;
+  assign hw2reg.status.txidle.d      = tx_uart_idle & ~tx_fifo_rvalid;
+  assign hw2reg.status.txempty.d     = ~tx_fifo_rvalid;
+  assign hw2reg.status.rxfull.d      = ~rx_fifo_wready;
+  assign hw2reg.status.txfull.d      = ~tx_fifo_wready;
+
+  assign hw2reg.fifo_status.txlvl.d  = tx_fifo_depth;
+  assign hw2reg.fifo_status.rxlvl.d  = rx_fifo_depth;
+
+  // resets are self-clearing, so need to update FIFO_CTRL
+  assign hw2reg.fifo_ctrl.rxilvl.de = 1'b0;
+  assign hw2reg.fifo_ctrl.rxilvl.d  = 3'h0;
+  assign hw2reg.fifo_ctrl.txilvl.de = 1'b0;
+  assign hw2reg.fifo_ctrl.txilvl.d  = 2'h0;
+
+  //              NCO 16x Baud Generator
+  // output clock rate is:
+  //      Fin * (NCO/2**NcoWidth)
+  logic   [NcoWidth:0]     nco_sum_q; // extra bit to get the carry
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      nco_sum_q <= 17'h0;
+    end else if (tx_enable || rx_enable) begin
+      nco_sum_q <= {1'b0,nco_sum_q[NcoWidth-1:0]} + {1'b0,reg2hw.ctrl.nco.q[NcoWidth-1:0]};
+    end
+  end
+
+  assign tick_baud_x16 = nco_sum_q[16];
+
+  //////////////
+  // TX Logic //
+  //////////////
+
+  assign tx_fifo_rready = tx_uart_idle & tx_fifo_rvalid & tx_enable;
+
+  fifo_sync #(
+    .Width   (8),
+    .Pass    (1'b0),
+    .Depth   (32)
+  ) u_uart_txfifo (
+    .clk_i,
+    .rst_ni,
+    .clr_i   (uart_fifo_txrst),
+    .wvalid_i(reg2hw.wdata.qe),
+    .wready_o(tx_fifo_wready),
+    .wdata_i (reg2hw.wdata.q),
+    .depth_o (tx_fifo_depth),
+    .rvalid_o(tx_fifo_rvalid),
+    .rready_i(tx_fifo_rready),
+    .rdata_o (tx_fifo_data)
+  );
+
+  uart_tx uart_tx (
+    .clk_i,
+    .rst_ni,
+    .tx_enable,
+    .tick_baud_x16,
+    .parity_enable  (reg2hw.ctrl.parity_en.q),
+    .wr             (tx_fifo_rready),
+    .wr_parity      ((^tx_fifo_data) ^ reg2hw.ctrl.parity_odd.q),
+    .wr_data        (tx_fifo_data),
+    .idle           (tx_uart_idle),
+    .tx             (tx_out)
+  );
+
+  assign tx = line_loopback ? rx : tx_out_q ;
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      tx_out_q <= 1'b1;
+    end else if (ovrd_tx_en) begin
+      tx_out_q <= ovrd_tx_val ;
+    end else if (sys_loopback) begin
+      tx_out_q <= 1'b1;
+    end else begin
+      tx_out_q <= tx_out;
+    end
+  end
+
+  //////////////
+  // RX Logic //
+  //////////////
+
+  //      sync the incoming data
+  prim_generic_flop_2sync #(
+    .Width(1),
+    .ResetValue(1'b1)
+  ) sync_rx (
+    .clk_i,
+    .rst_ni,
+    .d_i(rx),
+    .q_o(rx_sync)
+  );
+
+  // Based on: en.wikipedia.org/wiki/Repetition_code mentions the use of a majority filter
+  // in UART to ignore brief noise spikes
+  logic   rx_sync_q1, rx_sync_q2, rx_in_mx, rx_in_maj;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      rx_sync_q1 <= 1'b1;
+      rx_sync_q2 <= 1'b1;
+    end else begin
+      rx_sync_q1 <= rx_sync;
+      rx_sync_q2 <= rx_sync_q1;
+    end
+  end
+
+  assign rx_in_maj = (rx_sync    & rx_sync_q1) |
+                     (rx_sync    & rx_sync_q2) |
+                     (rx_sync_q1 & rx_sync_q2);
+  assign rx_in_mx  = rxnf_enable ? rx_in_maj : rx_sync;
+
+  assign rx_in =  sys_loopback ? tx_out   :
+                  line_loopback ? 1'b1 :
+                  rx_in_mx;
+
+  uart_rx uart_rx (
+    .clk_i          (clk_i),
+    .rst_ni         (rst_ni),
+    .rx_enable      (rx_enable),
+    .tick_baud_x16  (tick_baud_x16),
+    .parity_enable  (reg2hw.ctrl.parity_en.q),
+    .parity_odd     (reg2hw.ctrl.parity_odd.q),
+    .tick_baud      (rx_tick_baud),
+    .rx_valid       (rx_valid),
+    .rx_data        (rx_fifo_data),
+    .idle           (rx_uart_idle),
+    .frame_err      (event_rx_frame_err),
+    .rx             (rx_in),
+    .rx_parity_err  (event_rx_parity_err)
+  );
+
+  assign rx_fifo_wvalid = rx_valid & ~event_rx_frame_err & ~event_rx_parity_err;
+
+  fifo_sync #(
+    .Width   (8),
+    .Pass    (1'b0),
+    .Depth   (32)
+  ) u_uart_rxfifo (
+    .clk_i,
+    .rst_ni,
+    .clr_i   (uart_fifo_rxrst),
+    .wvalid_i(rx_fifo_wvalid),
+    .wready_o(rx_fifo_wready),
+    .wdata_i (rx_fifo_data),
+    .depth_o (rx_fifo_depth),
+    .rvalid_o(rx_fifo_rvalid),
+    .rready_i(reg2hw.rdata.re),
+    .rdata_o (uart_rdata)
+  );
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni)            rx_val_q <= 16'h0;
+    else if (tick_baud_x16) rx_val_q <= {rx_val_q[14:0], rx_in};
+  end
+
+  ////////////////////////
+  // Interrupt & Status //
+  ////////////////////////
+
+  always_comb begin
+    unique case(uart_fifo_txilvl)
+      2'h0:    tx_watermark_d = (tx_fifo_depth < 6'd2);
+      2'h1:    tx_watermark_d = (tx_fifo_depth < 6'd4);
+      2'h2:    tx_watermark_d = (tx_fifo_depth < 6'd8);
+      default: tx_watermark_d = (tx_fifo_depth < 6'd16);
+    endcase
+  end
+
+  assign event_tx_watermark = tx_watermark_d & ~tx_watermark_prev_q;
+
+  // The empty condition handling is a bit different.
+  // If empty rising conditions were detected directly, then every first write of a burst
+  // would trigger an empty.  This is due to the fact that the uart_tx fsm immediately
+  // withdraws the content and asserts "empty".
+  // To guard against this false trigger, empty is qualified with idle to extend the window
+  // in which software has an opportunity to deposit new data.
+  // However, if software deposit speed is TOO slow, this would still be an issue.
+  //
+  // The alternative software fix is to disable tx_enable until it has a chance to
+  // burst in the desired amount of data.
+  assign event_tx_empty     = ~tx_fifo_rvalid & ~tx_uart_idle_q & tx_uart_idle;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      tx_watermark_prev_q  <= 1'b1; // by default watermark condition is true
+      rx_watermark_prev_q  <= 1'b0; // by default watermark condition is false
+      tx_uart_idle_q       <= 1'b1;
+    end else begin
+      tx_watermark_prev_q  <= tx_watermark_d;
+      rx_watermark_prev_q  <= rx_watermark_d;
+      tx_uart_idle_q       <= tx_uart_idle;
+    end
+  end
+
+  always_comb begin
+    unique case(uart_fifo_rxilvl)
+      3'h0:    rx_watermark_d = (rx_fifo_depth >= 6'd1);
+      3'h1:    rx_watermark_d = (rx_fifo_depth >= 6'd4);
+      3'h2:    rx_watermark_d = (rx_fifo_depth >= 6'd8);
+      3'h3:    rx_watermark_d = (rx_fifo_depth >= 6'd16);
+      3'h4:    rx_watermark_d = (rx_fifo_depth >= 6'd30);
+      default: rx_watermark_d = 1'b0;
+    endcase
+  end
+
+  assign event_rx_watermark = rx_watermark_d & ~rx_watermark_prev_q;
+
+  // rx timeout interrupt
+  assign uart_rxto_en  = reg2hw.timeout_ctrl.en.q;
+  assign uart_rxto_val = reg2hw.timeout_ctrl.val.q;
+
+  assign rx_fifo_depth_changed = (rx_fifo_depth != rx_fifo_depth_prev_q);
+
+  assign rx_timeout_count_d =
+              // don't count if timeout feature not enabled ;
+              // will never reach timeout val + lower power
+              (uart_rxto_en == 1'b0)              ? 24'd0 :
+              // reset count if timeout interrupt is set
+              event_rx_timeout                    ? 24'd0 :
+              // reset count upon change in fifo level: covers both read and receiving a new byte
+              rx_fifo_depth_changed               ? 24'd0 :
+              // reset count if no bytes are pending
+              (rx_fifo_depth == 5'd0)             ? 24'd0 :
+              // stop the count at timeout value (this will set the interrupt)
+              //   Removed below line as when the timeout reaches the value,
+              //   event occured, and timeout value reset to 0h.
+              //(rx_timeout_count_q == uart_rxto_val) ? rx_timeout_count_q :
+              // increment if at rx baud tick
+              rx_tick_baud                        ? (rx_timeout_count_q + 24'd1) :
+              rx_timeout_count_q;
+
+  assign event_rx_timeout = (rx_timeout_count_q == uart_rxto_val) & uart_rxto_en;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      rx_timeout_count_q   <= 24'd0;
+      rx_fifo_depth_prev_q <= 6'd0;
+    end else begin
+      rx_timeout_count_q    <= rx_timeout_count_d;
+      rx_fifo_depth_prev_q  <= rx_fifo_depth;
+    end
+  end
+
+  assign event_rx_overflow  = rx_fifo_wvalid & ~rx_fifo_wready;
+  assign event_rx_break_err = break_err & (break_st_q == BRK_CHK);
+
+  // instantiate interrupt hardware primitives
+
+  prim_intr_hw #(.Width(1)) intr_hw_tx_watermark (
+    .clk_i,
+    .rst_ni,
+    .event_intr_i           (event_tx_watermark),
+    .reg2hw_intr_enable_q_i (reg2hw.intr_enable.tx_watermark.q),
+    .reg2hw_intr_test_q_i   (reg2hw.intr_test.tx_watermark.q),
+    .reg2hw_intr_test_qe_i  (reg2hw.intr_test.tx_watermark.qe),
+    .reg2hw_intr_state_q_i  (reg2hw.intr_state.tx_watermark.q),
+    .hw2reg_intr_state_de_o (hw2reg.intr_state.tx_watermark.de),
+    .hw2reg_intr_state_d_o  (hw2reg.intr_state.tx_watermark.d),
+    .intr_o                 (intr_tx_watermark_o)
+  );
+
+  prim_intr_hw #(.Width(1)) intr_hw_rx_watermark (
+    .clk_i,
+    .rst_ni,
+    .event_intr_i           (event_rx_watermark),
+    .reg2hw_intr_enable_q_i (reg2hw.intr_enable.rx_watermark.q),
+    .reg2hw_intr_test_q_i   (reg2hw.intr_test.rx_watermark.q),
+    .reg2hw_intr_test_qe_i  (reg2hw.intr_test.rx_watermark.qe),
+    .reg2hw_intr_state_q_i  (reg2hw.intr_state.rx_watermark.q),
+    .hw2reg_intr_state_de_o (hw2reg.intr_state.rx_watermark.de),
+    .hw2reg_intr_state_d_o  (hw2reg.intr_state.rx_watermark.d),
+    .intr_o                 (intr_rx_watermark_o)
+  );
+
+  prim_intr_hw #(.Width(1)) intr_hw_tx_empty (
+    .clk_i,
+    .rst_ni,
+    .event_intr_i           (event_tx_empty),
+    .reg2hw_intr_enable_q_i (reg2hw.intr_enable.tx_empty.q),
+    .reg2hw_intr_test_q_i   (reg2hw.intr_test.tx_empty.q),
+    .reg2hw_intr_test_qe_i  (reg2hw.intr_test.tx_empty.qe),
+    .reg2hw_intr_state_q_i  (reg2hw.intr_state.tx_empty.q),
+    .hw2reg_intr_state_de_o (hw2reg.intr_state.tx_empty.de),
+    .hw2reg_intr_state_d_o  (hw2reg.intr_state.tx_empty.d),
+    .intr_o                 (intr_tx_empty_o)
+  );
+
+  prim_intr_hw #(.Width(1)) intr_hw_rx_overflow (
+    .clk_i,
+    .rst_ni,
+    .event_intr_i           (event_rx_overflow),
+    .reg2hw_intr_enable_q_i (reg2hw.intr_enable.rx_overflow.q),
+    .reg2hw_intr_test_q_i   (reg2hw.intr_test.rx_overflow.q),
+    .reg2hw_intr_test_qe_i  (reg2hw.intr_test.rx_overflow.qe),
+    .reg2hw_intr_state_q_i  (reg2hw.intr_state.rx_overflow.q),
+    .hw2reg_intr_state_de_o (hw2reg.intr_state.rx_overflow.de),
+    .hw2reg_intr_state_d_o  (hw2reg.intr_state.rx_overflow.d),
+    .intr_o                 (intr_rx_overflow_o)
+  );
+
+  prim_intr_hw #(.Width(1)) intr_hw_rx_frame_err (
+    .clk_i,
+    .rst_ni,
+    .event_intr_i           (event_rx_frame_err),
+    .reg2hw_intr_enable_q_i (reg2hw.intr_enable.rx_frame_err.q),
+    .reg2hw_intr_test_q_i   (reg2hw.intr_test.rx_frame_err.q),
+    .reg2hw_intr_test_qe_i  (reg2hw.intr_test.rx_frame_err.qe),
+    .reg2hw_intr_state_q_i  (reg2hw.intr_state.rx_frame_err.q),
+    .hw2reg_intr_state_de_o (hw2reg.intr_state.rx_frame_err.de),
+    .hw2reg_intr_state_d_o  (hw2reg.intr_state.rx_frame_err.d),
+    .intr_o                 (intr_rx_frame_err_o)
+  );
+
+  prim_intr_hw #(.Width(1)) intr_hw_rx_break_err (
+    .clk_i,
+    .rst_ni,
+    .event_intr_i           (event_rx_break_err),
+    .reg2hw_intr_enable_q_i (reg2hw.intr_enable.rx_break_err.q),
+    .reg2hw_intr_test_q_i   (reg2hw.intr_test.rx_break_err.q),
+    .reg2hw_intr_test_qe_i  (reg2hw.intr_test.rx_break_err.qe),
+    .reg2hw_intr_state_q_i  (reg2hw.intr_state.rx_break_err.q),
+    .hw2reg_intr_state_de_o (hw2reg.intr_state.rx_break_err.de),
+    .hw2reg_intr_state_d_o  (hw2reg.intr_state.rx_break_err.d),
+    .intr_o                 (intr_rx_break_err_o)
+  );
+
+  prim_intr_hw #(.Width(1)) intr_hw_rx_timeout (
+    .clk_i,
+    .rst_ni,
+    .event_intr_i           (event_rx_timeout),
+    .reg2hw_intr_enable_q_i (reg2hw.intr_enable.rx_timeout.q),
+    .reg2hw_intr_test_q_i   (reg2hw.intr_test.rx_timeout.q),
+    .reg2hw_intr_test_qe_i  (reg2hw.intr_test.rx_timeout.qe),
+    .reg2hw_intr_state_q_i  (reg2hw.intr_state.rx_timeout.q),
+    .hw2reg_intr_state_de_o (hw2reg.intr_state.rx_timeout.de),
+    .hw2reg_intr_state_d_o  (hw2reg.intr_state.rx_timeout.d),
+    .intr_o                 (intr_rx_timeout_o)
+  );
+
+  prim_intr_hw #(.Width(1)) intr_hw_rx_parity_err (
+    .clk_i,
+    .rst_ni,
+    .event_intr_i           (event_rx_parity_err),
+    .reg2hw_intr_enable_q_i (reg2hw.intr_enable.rx_parity_err.q),
+    .reg2hw_intr_test_q_i   (reg2hw.intr_test.rx_parity_err.q),
+    .reg2hw_intr_test_qe_i  (reg2hw.intr_test.rx_parity_err.qe),
+    .reg2hw_intr_state_q_i  (reg2hw.intr_state.rx_parity_err.q),
+    .hw2reg_intr_state_de_o (hw2reg.intr_state.rx_parity_err.de),
+    .hw2reg_intr_state_d_o  (hw2reg.intr_state.rx_parity_err.d),
+    .intr_o                 (intr_rx_parity_err_o)
+  );
+
+endmodule
diff --git a/verilog/rtl/uart_reg_pkg.sv b/verilog/rtl/uart_reg_pkg.sv
new file mode 100644
index 0000000..22ae7ac
--- /dev/null
+++ b/verilog/rtl/uart_reg_pkg.sv
@@ -0,0 +1,369 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Register Package auto-generated by `reggen` containing data structure
+
+package uart_reg_pkg;
+
+  // Address width within the block
+  parameter int BlockAw = 6;
+
+  ////////////////////////////
+  // Typedefs for registers //
+  ////////////////////////////
+  typedef struct packed {
+    struct packed {
+      logic        q;
+    } tx_watermark;
+    struct packed {
+      logic        q;
+    } rx_watermark;
+    struct packed {
+      logic        q;
+    } tx_empty;
+    struct packed {
+      logic        q;
+    } rx_overflow;
+    struct packed {
+      logic        q;
+    } rx_frame_err;
+    struct packed {
+      logic        q;
+    } rx_break_err;
+    struct packed {
+      logic        q;
+    } rx_timeout;
+    struct packed {
+      logic        q;
+    } rx_parity_err;
+  } uart_reg2hw_intr_state_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic        q;
+    } tx_watermark;
+    struct packed {
+      logic        q;
+    } rx_watermark;
+    struct packed {
+      logic        q;
+    } tx_empty;
+    struct packed {
+      logic        q;
+    } rx_overflow;
+    struct packed {
+      logic        q;
+    } rx_frame_err;
+    struct packed {
+      logic        q;
+    } rx_break_err;
+    struct packed {
+      logic        q;
+    } rx_timeout;
+    struct packed {
+      logic        q;
+    } rx_parity_err;
+  } uart_reg2hw_intr_enable_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic        q;
+      logic        qe;
+    } tx_watermark;
+    struct packed {
+      logic        q;
+      logic        qe;
+    } rx_watermark;
+    struct packed {
+      logic        q;
+      logic        qe;
+    } tx_empty;
+    struct packed {
+      logic        q;
+      logic        qe;
+    } rx_overflow;
+    struct packed {
+      logic        q;
+      logic        qe;
+    } rx_frame_err;
+    struct packed {
+      logic        q;
+      logic        qe;
+    } rx_break_err;
+    struct packed {
+      logic        q;
+      logic        qe;
+    } rx_timeout;
+    struct packed {
+      logic        q;
+      logic        qe;
+    } rx_parity_err;
+  } uart_reg2hw_intr_test_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic        q;
+    } tx;
+    struct packed {
+      logic        q;
+    } rx;
+    struct packed {
+      logic        q;
+    } nf;
+    struct packed {
+      logic        q;
+    } slpbk;
+    struct packed {
+      logic        q;
+    } llpbk;
+    struct packed {
+      logic        q;
+    } parity_en;
+    struct packed {
+      logic        q;
+    } parity_odd;
+    struct packed {
+      logic [1:0]  q;
+    } rxblvl;
+    struct packed {
+      logic [15:0] q;
+    } nco;
+  } uart_reg2hw_ctrl_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic        q;
+      logic        re;
+    } txfull;
+    struct packed {
+      logic        q;
+      logic        re;
+    } rxfull;
+    struct packed {
+      logic        q;
+      logic        re;
+    } txempty;
+    struct packed {
+      logic        q;
+      logic        re;
+    } txidle;
+    struct packed {
+      logic        q;
+      logic        re;
+    } rxidle;
+    struct packed {
+      logic        q;
+      logic        re;
+    } rxempty;
+  } uart_reg2hw_status_reg_t;
+
+  typedef struct packed {
+    logic [7:0]  q;
+    logic        re;
+  } uart_reg2hw_rdata_reg_t;
+
+  typedef struct packed {
+    logic [7:0]  q;
+    logic        qe;
+  } uart_reg2hw_wdata_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic        q;
+      logic        qe;
+    } rxrst;
+    struct packed {
+      logic        q;
+      logic        qe;
+    } txrst;
+    struct packed {
+      logic [2:0]  q;
+      logic        qe;
+    } rxilvl;
+    struct packed {
+      logic [1:0]  q;
+      logic        qe;
+    } txilvl;
+  } uart_reg2hw_fifo_ctrl_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic        q;
+    } txen;
+    struct packed {
+      logic        q;
+    } txval;
+  } uart_reg2hw_ovrd_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic [23:0] q;
+    } val;
+    struct packed {
+      logic        q;
+    } en;
+  } uart_reg2hw_timeout_ctrl_reg_t;
+
+
+  typedef struct packed {
+    struct packed {
+      logic        d;
+      logic        de;
+    } tx_watermark;
+    struct packed {
+      logic        d;
+      logic        de;
+    } rx_watermark;
+    struct packed {
+      logic        d;
+      logic        de;
+    } tx_empty;
+    struct packed {
+      logic        d;
+      logic        de;
+    } rx_overflow;
+    struct packed {
+      logic        d;
+      logic        de;
+    } rx_frame_err;
+    struct packed {
+      logic        d;
+      logic        de;
+    } rx_break_err;
+    struct packed {
+      logic        d;
+      logic        de;
+    } rx_timeout;
+    struct packed {
+      logic        d;
+      logic        de;
+    } rx_parity_err;
+  } uart_hw2reg_intr_state_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic        d;
+    } txfull;
+    struct packed {
+      logic        d;
+    } rxfull;
+    struct packed {
+      logic        d;
+    } txempty;
+    struct packed {
+      logic        d;
+    } txidle;
+    struct packed {
+      logic        d;
+    } rxidle;
+    struct packed {
+      logic        d;
+    } rxempty;
+  } uart_hw2reg_status_reg_t;
+
+  typedef struct packed {
+    logic [7:0]  d;
+  } uart_hw2reg_rdata_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic [2:0]  d;
+      logic        de;
+    } rxilvl;
+    struct packed {
+      logic [1:0]  d;
+      logic        de;
+    } txilvl;
+  } uart_hw2reg_fifo_ctrl_reg_t;
+
+  typedef struct packed {
+    struct packed {
+      logic [5:0]  d;
+    } txlvl;
+    struct packed {
+      logic [5:0]  d;
+    } rxlvl;
+  } uart_hw2reg_fifo_status_reg_t;
+
+  typedef struct packed {
+    logic [15:0] d;
+  } uart_hw2reg_val_reg_t;
+
+
+  ///////////////////////////////////////
+  // Register to internal design logic //
+  ///////////////////////////////////////
+  typedef struct packed {
+    uart_reg2hw_intr_state_reg_t intr_state; // [124:117]
+    uart_reg2hw_intr_enable_reg_t intr_enable; // [116:109]
+    uart_reg2hw_intr_test_reg_t intr_test; // [108:93]
+    uart_reg2hw_ctrl_reg_t ctrl; // [92:68]
+    uart_reg2hw_status_reg_t status; // [67:56]
+    uart_reg2hw_rdata_reg_t rdata; // [55:47]
+    uart_reg2hw_wdata_reg_t wdata; // [46:38]
+    uart_reg2hw_fifo_ctrl_reg_t fifo_ctrl; // [37:27]
+    uart_reg2hw_ovrd_reg_t ovrd; // [26:25]
+    uart_reg2hw_timeout_ctrl_reg_t timeout_ctrl; // [24:0]
+  } uart_reg2hw_t;
+
+  ///////////////////////////////////////
+  // Internal design logic to register //
+  ///////////////////////////////////////
+  typedef struct packed {
+    uart_hw2reg_intr_state_reg_t intr_state; // [64:49]
+    uart_hw2reg_status_reg_t status; // [48:43]
+    uart_hw2reg_rdata_reg_t rdata; // [42:35]
+    uart_hw2reg_fifo_ctrl_reg_t fifo_ctrl; // [34:28]
+    uart_hw2reg_fifo_status_reg_t fifo_status; // [27:16]
+    uart_hw2reg_val_reg_t val; // [15:0]
+  } uart_hw2reg_t;
+
+  // Register Address
+  parameter logic [BlockAw-1:0] UART_INTR_STATE_OFFSET = 6'h 0;
+  parameter logic [BlockAw-1:0] UART_INTR_ENABLE_OFFSET = 6'h 4;
+  parameter logic [BlockAw-1:0] UART_INTR_TEST_OFFSET = 6'h 8;
+  parameter logic [BlockAw-1:0] UART_CTRL_OFFSET = 6'h c;
+  parameter logic [BlockAw-1:0] UART_STATUS_OFFSET = 6'h 10;
+  parameter logic [BlockAw-1:0] UART_RDATA_OFFSET = 6'h 14;
+  parameter logic [BlockAw-1:0] UART_WDATA_OFFSET = 6'h 18;
+  parameter logic [BlockAw-1:0] UART_FIFO_CTRL_OFFSET = 6'h 1c;
+  parameter logic [BlockAw-1:0] UART_FIFO_STATUS_OFFSET = 6'h 20;
+  parameter logic [BlockAw-1:0] UART_OVRD_OFFSET = 6'h 24;
+  parameter logic [BlockAw-1:0] UART_VAL_OFFSET = 6'h 28;
+  parameter logic [BlockAw-1:0] UART_TIMEOUT_CTRL_OFFSET = 6'h 2c;
+
+
+  // Register Index
+  typedef enum int {
+    UART_INTR_STATE,
+    UART_INTR_ENABLE,
+    UART_INTR_TEST,
+    UART_CTRL,
+    UART_STATUS,
+    UART_RDATA,
+    UART_WDATA,
+    UART_FIFO_CTRL,
+    UART_FIFO_STATUS,
+    UART_OVRD,
+    UART_VAL,
+    UART_TIMEOUT_CTRL
+  } uart_id_e;
+
+  // Register width information to check illegal writes
+  parameter logic [3:0] UART_PERMIT [12] = '{
+    4'b 0001, // index[ 0] UART_INTR_STATE
+    4'b 0001, // index[ 1] UART_INTR_ENABLE
+    4'b 0001, // index[ 2] UART_INTR_TEST
+    4'b 1111, // index[ 3] UART_CTRL
+    4'b 0001, // index[ 4] UART_STATUS
+    4'b 0001, // index[ 5] UART_RDATA
+    4'b 0001, // index[ 6] UART_WDATA
+    4'b 0001, // index[ 7] UART_FIFO_CTRL
+    4'b 0111, // index[ 8] UART_FIFO_STATUS
+    4'b 0001, // index[ 9] UART_OVRD
+    4'b 0011, // index[10] UART_VAL
+    4'b 1111  // index[11] UART_TIMEOUT_CTRL
+  };
+endpackage
+
diff --git a/verilog/rtl/uart_reg_top.sv b/verilog/rtl/uart_reg_top.sv
new file mode 100644
index 0000000..4342e2a
--- /dev/null
+++ b/verilog/rtl/uart_reg_top.sv
@@ -0,0 +1,1677 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Register Top module auto-generated by `reggen`
+
+// `include "prim_assert.sv"
+
+module uart_reg_top (
+  input clk_i,
+  input rst_ni,
+
+  // Below Regster interface can be changed
+  input  tlul_pkg::tl_h2d_t tl_i,
+  output tlul_pkg::tl_d2h_t tl_o,
+  // To HW
+  output uart_reg_pkg::uart_reg2hw_t reg2hw, // Write
+  input  uart_reg_pkg::uart_hw2reg_t hw2reg, // Read
+
+  // Config
+  input devmode_i // If 1, explicit error return for unmapped register access
+);
+
+  import uart_reg_pkg::* ;
+
+  localparam int AW = 6;
+  localparam int DW = 32;
+  localparam int DBW = DW/8;                    // Byte Width
+
+  // register signals
+  logic           reg_we;
+  logic           reg_re;
+  logic [AW-1:0]  reg_addr;
+  logic [DW-1:0]  reg_wdata;
+  logic [DBW-1:0] reg_be;
+  logic [DW-1:0]  reg_rdata;
+  logic           reg_error;
+
+  logic          addrmiss, wr_err;
+
+  logic [DW-1:0] reg_rdata_next;
+
+  tlul_pkg::tl_h2d_t tl_reg_h2d;
+  tlul_pkg::tl_d2h_t tl_reg_d2h;
+
+  assign tl_reg_h2d = tl_i;
+  assign tl_o       = tl_reg_d2h;
+
+  tlul_adapter_reg #(
+    .RegAw(AW),
+    .RegDw(DW)
+  ) u_reg_if (
+    .clk_i,
+    .rst_ni,
+
+    .tl_i (tl_reg_h2d),
+    .tl_o (tl_reg_d2h),
+
+    .we_o    (reg_we),
+    .re_o    (reg_re),
+    .addr_o  (reg_addr),
+    .wdata_o (reg_wdata),
+    .be_o    (reg_be),
+    .rdata_i (reg_rdata),
+    .error_i (reg_error)
+  );
+
+  assign reg_rdata = reg_rdata_next ;
+  assign reg_error = (devmode_i & addrmiss) | wr_err ;
+
+  // Define SW related signals
+  // Format: <reg>_<field>_{wd|we|qs}
+  //        or <reg>_{wd|we|qs} if field == 1 or 0
+  logic intr_state_tx_watermark_qs;
+  logic intr_state_tx_watermark_wd;
+  logic intr_state_tx_watermark_we;
+  logic intr_state_rx_watermark_qs;
+  logic intr_state_rx_watermark_wd;
+  logic intr_state_rx_watermark_we;
+  logic intr_state_tx_empty_qs;
+  logic intr_state_tx_empty_wd;
+  logic intr_state_tx_empty_we;
+  logic intr_state_rx_overflow_qs;
+  logic intr_state_rx_overflow_wd;
+  logic intr_state_rx_overflow_we;
+  logic intr_state_rx_frame_err_qs;
+  logic intr_state_rx_frame_err_wd;
+  logic intr_state_rx_frame_err_we;
+  logic intr_state_rx_break_err_qs;
+  logic intr_state_rx_break_err_wd;
+  logic intr_state_rx_break_err_we;
+  logic intr_state_rx_timeout_qs;
+  logic intr_state_rx_timeout_wd;
+  logic intr_state_rx_timeout_we;
+  logic intr_state_rx_parity_err_qs;
+  logic intr_state_rx_parity_err_wd;
+  logic intr_state_rx_parity_err_we;
+  logic intr_enable_tx_watermark_qs;
+  logic intr_enable_tx_watermark_wd;
+  logic intr_enable_tx_watermark_we;
+  logic intr_enable_rx_watermark_qs;
+  logic intr_enable_rx_watermark_wd;
+  logic intr_enable_rx_watermark_we;
+  logic intr_enable_tx_empty_qs;
+  logic intr_enable_tx_empty_wd;
+  logic intr_enable_tx_empty_we;
+  logic intr_enable_rx_overflow_qs;
+  logic intr_enable_rx_overflow_wd;
+  logic intr_enable_rx_overflow_we;
+  logic intr_enable_rx_frame_err_qs;
+  logic intr_enable_rx_frame_err_wd;
+  logic intr_enable_rx_frame_err_we;
+  logic intr_enable_rx_break_err_qs;
+  logic intr_enable_rx_break_err_wd;
+  logic intr_enable_rx_break_err_we;
+  logic intr_enable_rx_timeout_qs;
+  logic intr_enable_rx_timeout_wd;
+  logic intr_enable_rx_timeout_we;
+  logic intr_enable_rx_parity_err_qs;
+  logic intr_enable_rx_parity_err_wd;
+  logic intr_enable_rx_parity_err_we;
+  logic intr_test_tx_watermark_wd;
+  logic intr_test_tx_watermark_we;
+  logic intr_test_rx_watermark_wd;
+  logic intr_test_rx_watermark_we;
+  logic intr_test_tx_empty_wd;
+  logic intr_test_tx_empty_we;
+  logic intr_test_rx_overflow_wd;
+  logic intr_test_rx_overflow_we;
+  logic intr_test_rx_frame_err_wd;
+  logic intr_test_rx_frame_err_we;
+  logic intr_test_rx_break_err_wd;
+  logic intr_test_rx_break_err_we;
+  logic intr_test_rx_timeout_wd;
+  logic intr_test_rx_timeout_we;
+  logic intr_test_rx_parity_err_wd;
+  logic intr_test_rx_parity_err_we;
+  logic ctrl_tx_qs;
+  logic ctrl_tx_wd;
+  logic ctrl_tx_we;
+  logic ctrl_rx_qs;
+  logic ctrl_rx_wd;
+  logic ctrl_rx_we;
+  logic ctrl_nf_qs;
+  logic ctrl_nf_wd;
+  logic ctrl_nf_we;
+  logic ctrl_slpbk_qs;
+  logic ctrl_slpbk_wd;
+  logic ctrl_slpbk_we;
+  logic ctrl_llpbk_qs;
+  logic ctrl_llpbk_wd;
+  logic ctrl_llpbk_we;
+  logic ctrl_parity_en_qs;
+  logic ctrl_parity_en_wd;
+  logic ctrl_parity_en_we;
+  logic ctrl_parity_odd_qs;
+  logic ctrl_parity_odd_wd;
+  logic ctrl_parity_odd_we;
+  logic [1:0] ctrl_rxblvl_qs;
+  logic [1:0] ctrl_rxblvl_wd;
+  logic ctrl_rxblvl_we;
+  logic [15:0] ctrl_nco_qs;
+  logic [15:0] ctrl_nco_wd;
+  logic ctrl_nco_we;
+  logic status_txfull_qs;
+  logic status_txfull_re;
+  logic status_rxfull_qs;
+  logic status_rxfull_re;
+  logic status_txempty_qs;
+  logic status_txempty_re;
+  logic status_txidle_qs;
+  logic status_txidle_re;
+  logic status_rxidle_qs;
+  logic status_rxidle_re;
+  logic status_rxempty_qs;
+  logic status_rxempty_re;
+  logic [7:0] rdata_qs;
+  logic rdata_re;
+  logic [7:0] wdata_wd;
+  logic wdata_we;
+  logic fifo_ctrl_rxrst_wd;
+  logic fifo_ctrl_rxrst_we;
+  logic fifo_ctrl_txrst_wd;
+  logic fifo_ctrl_txrst_we;
+  logic [2:0] fifo_ctrl_rxilvl_qs;
+  logic [2:0] fifo_ctrl_rxilvl_wd;
+  logic fifo_ctrl_rxilvl_we;
+  logic [1:0] fifo_ctrl_txilvl_qs;
+  logic [1:0] fifo_ctrl_txilvl_wd;
+  logic fifo_ctrl_txilvl_we;
+  logic [5:0] fifo_status_txlvl_qs;
+  logic fifo_status_txlvl_re;
+  logic [5:0] fifo_status_rxlvl_qs;
+  logic fifo_status_rxlvl_re;
+  logic ovrd_txen_qs;
+  logic ovrd_txen_wd;
+  logic ovrd_txen_we;
+  logic ovrd_txval_qs;
+  logic ovrd_txval_wd;
+  logic ovrd_txval_we;
+  logic [15:0] val_qs;
+  logic val_re;
+  logic [23:0] timeout_ctrl_val_qs;
+  logic [23:0] timeout_ctrl_val_wd;
+  logic timeout_ctrl_val_we;
+  logic timeout_ctrl_en_qs;
+  logic timeout_ctrl_en_wd;
+  logic timeout_ctrl_en_we;
+
+  // Register instances
+  // R[intr_state]: V(False)
+
+  //   F[tx_watermark]: 0:0
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("W1C"),
+    .RESVAL  (1'h0)
+  ) u_intr_state_tx_watermark (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_state_tx_watermark_we),
+    .wd     (intr_state_tx_watermark_wd),
+
+    // from internal hardware
+    .de     (hw2reg.intr_state.tx_watermark.de),
+    .d      (hw2reg.intr_state.tx_watermark.d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_state.tx_watermark.q ),
+
+    // to register interface (read)
+    .qs     (intr_state_tx_watermark_qs)
+  );
+
+
+  //   F[rx_watermark]: 1:1
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("W1C"),
+    .RESVAL  (1'h0)
+  ) u_intr_state_rx_watermark (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_state_rx_watermark_we),
+    .wd     (intr_state_rx_watermark_wd),
+
+    // from internal hardware
+    .de     (hw2reg.intr_state.rx_watermark.de),
+    .d      (hw2reg.intr_state.rx_watermark.d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_state.rx_watermark.q ),
+
+    // to register interface (read)
+    .qs     (intr_state_rx_watermark_qs)
+  );
+
+
+  //   F[tx_empty]: 2:2
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("W1C"),
+    .RESVAL  (1'h0)
+  ) u_intr_state_tx_empty (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_state_tx_empty_we),
+    .wd     (intr_state_tx_empty_wd),
+
+    // from internal hardware
+    .de     (hw2reg.intr_state.tx_empty.de),
+    .d      (hw2reg.intr_state.tx_empty.d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_state.tx_empty.q ),
+
+    // to register interface (read)
+    .qs     (intr_state_tx_empty_qs)
+  );
+
+
+  //   F[rx_overflow]: 3:3
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("W1C"),
+    .RESVAL  (1'h0)
+  ) u_intr_state_rx_overflow (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_state_rx_overflow_we),
+    .wd     (intr_state_rx_overflow_wd),
+
+    // from internal hardware
+    .de     (hw2reg.intr_state.rx_overflow.de),
+    .d      (hw2reg.intr_state.rx_overflow.d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_state.rx_overflow.q ),
+
+    // to register interface (read)
+    .qs     (intr_state_rx_overflow_qs)
+  );
+
+
+  //   F[rx_frame_err]: 4:4
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("W1C"),
+    .RESVAL  (1'h0)
+  ) u_intr_state_rx_frame_err (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_state_rx_frame_err_we),
+    .wd     (intr_state_rx_frame_err_wd),
+
+    // from internal hardware
+    .de     (hw2reg.intr_state.rx_frame_err.de),
+    .d      (hw2reg.intr_state.rx_frame_err.d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_state.rx_frame_err.q ),
+
+    // to register interface (read)
+    .qs     (intr_state_rx_frame_err_qs)
+  );
+
+
+  //   F[rx_break_err]: 5:5
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("W1C"),
+    .RESVAL  (1'h0)
+  ) u_intr_state_rx_break_err (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_state_rx_break_err_we),
+    .wd     (intr_state_rx_break_err_wd),
+
+    // from internal hardware
+    .de     (hw2reg.intr_state.rx_break_err.de),
+    .d      (hw2reg.intr_state.rx_break_err.d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_state.rx_break_err.q ),
+
+    // to register interface (read)
+    .qs     (intr_state_rx_break_err_qs)
+  );
+
+
+  //   F[rx_timeout]: 6:6
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("W1C"),
+    .RESVAL  (1'h0)
+  ) u_intr_state_rx_timeout (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_state_rx_timeout_we),
+    .wd     (intr_state_rx_timeout_wd),
+
+    // from internal hardware
+    .de     (hw2reg.intr_state.rx_timeout.de),
+    .d      (hw2reg.intr_state.rx_timeout.d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_state.rx_timeout.q ),
+
+    // to register interface (read)
+    .qs     (intr_state_rx_timeout_qs)
+  );
+
+
+  //   F[rx_parity_err]: 7:7
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("W1C"),
+    .RESVAL  (1'h0)
+  ) u_intr_state_rx_parity_err (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_state_rx_parity_err_we),
+    .wd     (intr_state_rx_parity_err_wd),
+
+    // from internal hardware
+    .de     (hw2reg.intr_state.rx_parity_err.de),
+    .d      (hw2reg.intr_state.rx_parity_err.d ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_state.rx_parity_err.q ),
+
+    // to register interface (read)
+    .qs     (intr_state_rx_parity_err_qs)
+  );
+
+
+  // R[intr_enable]: V(False)
+
+  //   F[tx_watermark]: 0:0
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_intr_enable_tx_watermark (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_enable_tx_watermark_we),
+    .wd     (intr_enable_tx_watermark_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_enable.tx_watermark.q ),
+
+    // to register interface (read)
+    .qs     (intr_enable_tx_watermark_qs)
+  );
+
+
+  //   F[rx_watermark]: 1:1
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_intr_enable_rx_watermark (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_enable_rx_watermark_we),
+    .wd     (intr_enable_rx_watermark_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_enable.rx_watermark.q ),
+
+    // to register interface (read)
+    .qs     (intr_enable_rx_watermark_qs)
+  );
+
+
+  //   F[tx_empty]: 2:2
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_intr_enable_tx_empty (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_enable_tx_empty_we),
+    .wd     (intr_enable_tx_empty_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_enable.tx_empty.q ),
+
+    // to register interface (read)
+    .qs     (intr_enable_tx_empty_qs)
+  );
+
+
+  //   F[rx_overflow]: 3:3
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_intr_enable_rx_overflow (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_enable_rx_overflow_we),
+    .wd     (intr_enable_rx_overflow_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_enable.rx_overflow.q ),
+
+    // to register interface (read)
+    .qs     (intr_enable_rx_overflow_qs)
+  );
+
+
+  //   F[rx_frame_err]: 4:4
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_intr_enable_rx_frame_err (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_enable_rx_frame_err_we),
+    .wd     (intr_enable_rx_frame_err_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_enable.rx_frame_err.q ),
+
+    // to register interface (read)
+    .qs     (intr_enable_rx_frame_err_qs)
+  );
+
+
+  //   F[rx_break_err]: 5:5
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_intr_enable_rx_break_err (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_enable_rx_break_err_we),
+    .wd     (intr_enable_rx_break_err_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_enable.rx_break_err.q ),
+
+    // to register interface (read)
+    .qs     (intr_enable_rx_break_err_qs)
+  );
+
+
+  //   F[rx_timeout]: 6:6
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_intr_enable_rx_timeout (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_enable_rx_timeout_we),
+    .wd     (intr_enable_rx_timeout_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_enable.rx_timeout.q ),
+
+    // to register interface (read)
+    .qs     (intr_enable_rx_timeout_qs)
+  );
+
+
+  //   F[rx_parity_err]: 7:7
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_intr_enable_rx_parity_err (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (intr_enable_rx_parity_err_we),
+    .wd     (intr_enable_rx_parity_err_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.intr_enable.rx_parity_err.q ),
+
+    // to register interface (read)
+    .qs     (intr_enable_rx_parity_err_qs)
+  );
+
+
+  // R[intr_test]: V(True)
+
+  //   F[tx_watermark]: 0:0
+  prim_subreg_ext #(
+    .DW    (1)
+  ) u_intr_test_tx_watermark (
+    .re     (1'b0),
+    .we     (intr_test_tx_watermark_we),
+    .wd     (intr_test_tx_watermark_wd),
+    .d      ('0),
+    .qre    (),
+    .qe     (reg2hw.intr_test.tx_watermark.qe),
+    .q      (reg2hw.intr_test.tx_watermark.q ),
+    .qs     ()
+  );
+
+
+  //   F[rx_watermark]: 1:1
+  prim_subreg_ext #(
+    .DW    (1)
+  ) u_intr_test_rx_watermark (
+    .re     (1'b0),
+    .we     (intr_test_rx_watermark_we),
+    .wd     (intr_test_rx_watermark_wd),
+    .d      ('0),
+    .qre    (),
+    .qe     (reg2hw.intr_test.rx_watermark.qe),
+    .q      (reg2hw.intr_test.rx_watermark.q ),
+    .qs     ()
+  );
+
+
+  //   F[tx_empty]: 2:2
+  prim_subreg_ext #(
+    .DW    (1)
+  ) u_intr_test_tx_empty (
+    .re     (1'b0),
+    .we     (intr_test_tx_empty_we),
+    .wd     (intr_test_tx_empty_wd),
+    .d      ('0),
+    .qre    (),
+    .qe     (reg2hw.intr_test.tx_empty.qe),
+    .q      (reg2hw.intr_test.tx_empty.q ),
+    .qs     ()
+  );
+
+
+  //   F[rx_overflow]: 3:3
+  prim_subreg_ext #(
+    .DW    (1)
+  ) u_intr_test_rx_overflow (
+    .re     (1'b0),
+    .we     (intr_test_rx_overflow_we),
+    .wd     (intr_test_rx_overflow_wd),
+    .d      ('0),
+    .qre    (),
+    .qe     (reg2hw.intr_test.rx_overflow.qe),
+    .q      (reg2hw.intr_test.rx_overflow.q ),
+    .qs     ()
+  );
+
+
+  //   F[rx_frame_err]: 4:4
+  prim_subreg_ext #(
+    .DW    (1)
+  ) u_intr_test_rx_frame_err (
+    .re     (1'b0),
+    .we     (intr_test_rx_frame_err_we),
+    .wd     (intr_test_rx_frame_err_wd),
+    .d      ('0),
+    .qre    (),
+    .qe     (reg2hw.intr_test.rx_frame_err.qe),
+    .q      (reg2hw.intr_test.rx_frame_err.q ),
+    .qs     ()
+  );
+
+
+  //   F[rx_break_err]: 5:5
+  prim_subreg_ext #(
+    .DW    (1)
+  ) u_intr_test_rx_break_err (
+    .re     (1'b0),
+    .we     (intr_test_rx_break_err_we),
+    .wd     (intr_test_rx_break_err_wd),
+    .d      ('0),
+    .qre    (),
+    .qe     (reg2hw.intr_test.rx_break_err.qe),
+    .q      (reg2hw.intr_test.rx_break_err.q ),
+    .qs     ()
+  );
+
+
+  //   F[rx_timeout]: 6:6
+  prim_subreg_ext #(
+    .DW    (1)
+  ) u_intr_test_rx_timeout (
+    .re     (1'b0),
+    .we     (intr_test_rx_timeout_we),
+    .wd     (intr_test_rx_timeout_wd),
+    .d      ('0),
+    .qre    (),
+    .qe     (reg2hw.intr_test.rx_timeout.qe),
+    .q      (reg2hw.intr_test.rx_timeout.q ),
+    .qs     ()
+  );
+
+
+  //   F[rx_parity_err]: 7:7
+  prim_subreg_ext #(
+    .DW    (1)
+  ) u_intr_test_rx_parity_err (
+    .re     (1'b0),
+    .we     (intr_test_rx_parity_err_we),
+    .wd     (intr_test_rx_parity_err_wd),
+    .d      ('0),
+    .qre    (),
+    .qe     (reg2hw.intr_test.rx_parity_err.qe),
+    .q      (reg2hw.intr_test.rx_parity_err.q ),
+    .qs     ()
+  );
+
+
+  // R[ctrl]: V(False)
+
+  //   F[tx]: 0:0
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ctrl_tx (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ctrl_tx_we),
+    .wd     (ctrl_tx_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ctrl.tx.q ),
+
+    // to register interface (read)
+    .qs     (ctrl_tx_qs)
+  );
+
+
+  //   F[rx]: 1:1
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ctrl_rx (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ctrl_rx_we),
+    .wd     (ctrl_rx_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ctrl.rx.q ),
+
+    // to register interface (read)
+    .qs     (ctrl_rx_qs)
+  );
+
+
+  //   F[nf]: 2:2
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ctrl_nf (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ctrl_nf_we),
+    .wd     (ctrl_nf_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ctrl.nf.q ),
+
+    // to register interface (read)
+    .qs     (ctrl_nf_qs)
+  );
+
+
+  //   F[slpbk]: 4:4
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ctrl_slpbk (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ctrl_slpbk_we),
+    .wd     (ctrl_slpbk_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ctrl.slpbk.q ),
+
+    // to register interface (read)
+    .qs     (ctrl_slpbk_qs)
+  );
+
+
+  //   F[llpbk]: 5:5
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ctrl_llpbk (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ctrl_llpbk_we),
+    .wd     (ctrl_llpbk_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ctrl.llpbk.q ),
+
+    // to register interface (read)
+    .qs     (ctrl_llpbk_qs)
+  );
+
+
+  //   F[parity_en]: 6:6
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ctrl_parity_en (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ctrl_parity_en_we),
+    .wd     (ctrl_parity_en_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ctrl.parity_en.q ),
+
+    // to register interface (read)
+    .qs     (ctrl_parity_en_qs)
+  );
+
+
+  //   F[parity_odd]: 7:7
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ctrl_parity_odd (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ctrl_parity_odd_we),
+    .wd     (ctrl_parity_odd_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ctrl.parity_odd.q ),
+
+    // to register interface (read)
+    .qs     (ctrl_parity_odd_qs)
+  );
+
+
+  //   F[rxblvl]: 9:8
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_ctrl_rxblvl (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ctrl_rxblvl_we),
+    .wd     (ctrl_rxblvl_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ctrl.rxblvl.q ),
+
+    // to register interface (read)
+    .qs     (ctrl_rxblvl_qs)
+  );
+
+
+  //   F[nco]: 31:16
+  prim_subreg #(
+    .DW      (16),
+    .SWACCESS("RW"),
+    .RESVAL  (16'h0)
+  ) u_ctrl_nco (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ctrl_nco_we),
+    .wd     (ctrl_nco_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ctrl.nco.q ),
+
+    // to register interface (read)
+    .qs     (ctrl_nco_qs)
+  );
+
+
+  // R[status]: V(True)
+
+  //   F[txfull]: 0:0
+  prim_subreg_ext #(
+    .DW    (1)
+  ) u_status_txfull (
+    .re     (status_txfull_re),
+    .we     (1'b0),
+    .wd     ('0),
+    .d      (hw2reg.status.txfull.d),
+    .qre    (reg2hw.status.txfull.re),
+    .qe     (),
+    .q      (reg2hw.status.txfull.q ),
+    .qs     (status_txfull_qs)
+  );
+
+
+  //   F[rxfull]: 1:1
+  prim_subreg_ext #(
+    .DW    (1)
+  ) u_status_rxfull (
+    .re     (status_rxfull_re),
+    .we     (1'b0),
+    .wd     ('0),
+    .d      (hw2reg.status.rxfull.d),
+    .qre    (reg2hw.status.rxfull.re),
+    .qe     (),
+    .q      (reg2hw.status.rxfull.q ),
+    .qs     (status_rxfull_qs)
+  );
+
+
+  //   F[txempty]: 2:2
+  prim_subreg_ext #(
+    .DW    (1)
+  ) u_status_txempty (
+    .re     (status_txempty_re),
+    .we     (1'b0),
+    .wd     ('0),
+    .d      (hw2reg.status.txempty.d),
+    .qre    (reg2hw.status.txempty.re),
+    .qe     (),
+    .q      (reg2hw.status.txempty.q ),
+    .qs     (status_txempty_qs)
+  );
+
+
+  //   F[txidle]: 3:3
+  prim_subreg_ext #(
+    .DW    (1)
+  ) u_status_txidle (
+    .re     (status_txidle_re),
+    .we     (1'b0),
+    .wd     ('0),
+    .d      (hw2reg.status.txidle.d),
+    .qre    (reg2hw.status.txidle.re),
+    .qe     (),
+    .q      (reg2hw.status.txidle.q ),
+    .qs     (status_txidle_qs)
+  );
+
+
+  //   F[rxidle]: 4:4
+  prim_subreg_ext #(
+    .DW    (1)
+  ) u_status_rxidle (
+    .re     (status_rxidle_re),
+    .we     (1'b0),
+    .wd     ('0),
+    .d      (hw2reg.status.rxidle.d),
+    .qre    (reg2hw.status.rxidle.re),
+    .qe     (),
+    .q      (reg2hw.status.rxidle.q ),
+    .qs     (status_rxidle_qs)
+  );
+
+
+  //   F[rxempty]: 5:5
+  prim_subreg_ext #(
+    .DW    (1)
+  ) u_status_rxempty (
+    .re     (status_rxempty_re),
+    .we     (1'b0),
+    .wd     ('0),
+    .d      (hw2reg.status.rxempty.d),
+    .qre    (reg2hw.status.rxempty.re),
+    .qe     (),
+    .q      (reg2hw.status.rxempty.q ),
+    .qs     (status_rxempty_qs)
+  );
+
+
+  // R[rdata]: V(True)
+
+  prim_subreg_ext #(
+    .DW    (8)
+  ) u_rdata (
+    .re     (rdata_re),
+    .we     (1'b0),
+    .wd     ('0),
+    .d      (hw2reg.rdata.d),
+    .qre    (reg2hw.rdata.re),
+    .qe     (),
+    .q      (reg2hw.rdata.q ),
+    .qs     (rdata_qs)
+  );
+
+
+  // R[wdata]: V(False)
+
+  prim_subreg #(
+    .DW      (8),
+    .SWACCESS("WO"),
+    .RESVAL  (8'h0)
+  ) u_wdata (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (wdata_we),
+    .wd     (wdata_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (reg2hw.wdata.qe),
+    .q      (reg2hw.wdata.q ),
+
+    .qs     ()
+  );
+
+
+  // R[fifo_ctrl]: V(False)
+
+  //   F[rxrst]: 0:0
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("WO"),
+    .RESVAL  (1'h0)
+  ) u_fifo_ctrl_rxrst (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (fifo_ctrl_rxrst_we),
+    .wd     (fifo_ctrl_rxrst_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (reg2hw.fifo_ctrl.rxrst.qe),
+    .q      (reg2hw.fifo_ctrl.rxrst.q ),
+
+    .qs     ()
+  );
+
+
+  //   F[txrst]: 1:1
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("WO"),
+    .RESVAL  (1'h0)
+  ) u_fifo_ctrl_txrst (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (fifo_ctrl_txrst_we),
+    .wd     (fifo_ctrl_txrst_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (reg2hw.fifo_ctrl.txrst.qe),
+    .q      (reg2hw.fifo_ctrl.txrst.q ),
+
+    .qs     ()
+  );
+
+
+  //   F[rxilvl]: 4:2
+  prim_subreg #(
+    .DW      (3),
+    .SWACCESS("RW"),
+    .RESVAL  (3'h0)
+  ) u_fifo_ctrl_rxilvl (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (fifo_ctrl_rxilvl_we),
+    .wd     (fifo_ctrl_rxilvl_wd),
+
+    // from internal hardware
+    .de     (hw2reg.fifo_ctrl.rxilvl.de),
+    .d      (hw2reg.fifo_ctrl.rxilvl.d ),
+
+    // to internal hardware
+    .qe     (reg2hw.fifo_ctrl.rxilvl.qe),
+    .q      (reg2hw.fifo_ctrl.rxilvl.q ),
+
+    // to register interface (read)
+    .qs     (fifo_ctrl_rxilvl_qs)
+  );
+
+
+  //   F[txilvl]: 6:5
+  prim_subreg #(
+    .DW      (2),
+    .SWACCESS("RW"),
+    .RESVAL  (2'h0)
+  ) u_fifo_ctrl_txilvl (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (fifo_ctrl_txilvl_we),
+    .wd     (fifo_ctrl_txilvl_wd),
+
+    // from internal hardware
+    .de     (hw2reg.fifo_ctrl.txilvl.de),
+    .d      (hw2reg.fifo_ctrl.txilvl.d ),
+
+    // to internal hardware
+    .qe     (reg2hw.fifo_ctrl.txilvl.qe),
+    .q      (reg2hw.fifo_ctrl.txilvl.q ),
+
+    // to register interface (read)
+    .qs     (fifo_ctrl_txilvl_qs)
+  );
+
+
+  // R[fifo_status]: V(True)
+
+  //   F[txlvl]: 5:0
+  prim_subreg_ext #(
+    .DW    (6)
+  ) u_fifo_status_txlvl (
+    .re     (fifo_status_txlvl_re),
+    .we     (1'b0),
+    .wd     ('0),
+    .d      (hw2reg.fifo_status.txlvl.d),
+    .qre    (),
+    .qe     (),
+    .q      (),
+    .qs     (fifo_status_txlvl_qs)
+  );
+
+
+  //   F[rxlvl]: 21:16
+  prim_subreg_ext #(
+    .DW    (6)
+  ) u_fifo_status_rxlvl (
+    .re     (fifo_status_rxlvl_re),
+    .we     (1'b0),
+    .wd     ('0),
+    .d      (hw2reg.fifo_status.rxlvl.d),
+    .qre    (),
+    .qe     (),
+    .q      (),
+    .qs     (fifo_status_rxlvl_qs)
+  );
+
+
+  // R[ovrd]: V(False)
+
+  //   F[txen]: 0:0
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ovrd_txen (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ovrd_txen_we),
+    .wd     (ovrd_txen_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ovrd.txen.q ),
+
+    // to register interface (read)
+    .qs     (ovrd_txen_qs)
+  );
+
+
+  //   F[txval]: 1:1
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ovrd_txval (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ovrd_txval_we),
+    .wd     (ovrd_txval_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ovrd.txval.q ),
+
+    // to register interface (read)
+    .qs     (ovrd_txval_qs)
+  );
+
+
+  // R[val]: V(True)
+
+  prim_subreg_ext #(
+    .DW    (16)
+  ) u_val (
+    .re     (val_re),
+    .we     (1'b0),
+    .wd     ('0),
+    .d      (hw2reg.val.d),
+    .qre    (),
+    .qe     (),
+    .q      (),
+    .qs     (val_qs)
+  );
+
+
+  // R[timeout_ctrl]: V(False)
+
+  //   F[val]: 23:0
+  prim_subreg #(
+    .DW      (24),
+    .SWACCESS("RW"),
+    .RESVAL  (24'h0)
+  ) u_timeout_ctrl_val (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (timeout_ctrl_val_we),
+    .wd     (timeout_ctrl_val_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.timeout_ctrl.val.q ),
+
+    // to register interface (read)
+    .qs     (timeout_ctrl_val_qs)
+  );
+
+
+  //   F[en]: 31:31
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_timeout_ctrl_en (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (timeout_ctrl_en_we),
+    .wd     (timeout_ctrl_en_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.timeout_ctrl.en.q ),
+
+    // to register interface (read)
+    .qs     (timeout_ctrl_en_qs)
+  );
+
+
+
+
+  logic [11:0] addr_hit;
+  always_comb begin
+    addr_hit = '0;
+    addr_hit[ 0] = (reg_addr == UART_INTR_STATE_OFFSET);
+    addr_hit[ 1] = (reg_addr == UART_INTR_ENABLE_OFFSET);
+    addr_hit[ 2] = (reg_addr == UART_INTR_TEST_OFFSET);
+    addr_hit[ 3] = (reg_addr == UART_CTRL_OFFSET);
+    addr_hit[ 4] = (reg_addr == UART_STATUS_OFFSET);
+    addr_hit[ 5] = (reg_addr == UART_RDATA_OFFSET);
+    addr_hit[ 6] = (reg_addr == UART_WDATA_OFFSET);
+    addr_hit[ 7] = (reg_addr == UART_FIFO_CTRL_OFFSET);
+    addr_hit[ 8] = (reg_addr == UART_FIFO_STATUS_OFFSET);
+    addr_hit[ 9] = (reg_addr == UART_OVRD_OFFSET);
+    addr_hit[10] = (reg_addr == UART_VAL_OFFSET);
+    addr_hit[11] = (reg_addr == UART_TIMEOUT_CTRL_OFFSET);
+  end
+
+  assign addrmiss = (reg_re || reg_we) ? ~|addr_hit : 1'b0 ;
+
+  // Check sub-word write is permitted
+  always_comb begin
+    wr_err = 1'b0;
+    if (addr_hit[ 0] && reg_we && (UART_PERMIT[ 0] != (UART_PERMIT[ 0] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 1] && reg_we && (UART_PERMIT[ 1] != (UART_PERMIT[ 1] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 2] && reg_we && (UART_PERMIT[ 2] != (UART_PERMIT[ 2] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 3] && reg_we && (UART_PERMIT[ 3] != (UART_PERMIT[ 3] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 4] && reg_we && (UART_PERMIT[ 4] != (UART_PERMIT[ 4] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 5] && reg_we && (UART_PERMIT[ 5] != (UART_PERMIT[ 5] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 6] && reg_we && (UART_PERMIT[ 6] != (UART_PERMIT[ 6] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 7] && reg_we && (UART_PERMIT[ 7] != (UART_PERMIT[ 7] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 8] && reg_we && (UART_PERMIT[ 8] != (UART_PERMIT[ 8] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[ 9] && reg_we && (UART_PERMIT[ 9] != (UART_PERMIT[ 9] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[10] && reg_we && (UART_PERMIT[10] != (UART_PERMIT[10] & reg_be))) wr_err = 1'b1 ;
+    if (addr_hit[11] && reg_we && (UART_PERMIT[11] != (UART_PERMIT[11] & reg_be))) wr_err = 1'b1 ;
+  end
+
+  assign intr_state_tx_watermark_we = addr_hit[0] & reg_we & ~wr_err;
+  assign intr_state_tx_watermark_wd = reg_wdata[0];
+
+  assign intr_state_rx_watermark_we = addr_hit[0] & reg_we & ~wr_err;
+  assign intr_state_rx_watermark_wd = reg_wdata[1];
+
+  assign intr_state_tx_empty_we = addr_hit[0] & reg_we & ~wr_err;
+  assign intr_state_tx_empty_wd = reg_wdata[2];
+
+  assign intr_state_rx_overflow_we = addr_hit[0] & reg_we & ~wr_err;
+  assign intr_state_rx_overflow_wd = reg_wdata[3];
+
+  assign intr_state_rx_frame_err_we = addr_hit[0] & reg_we & ~wr_err;
+  assign intr_state_rx_frame_err_wd = reg_wdata[4];
+
+  assign intr_state_rx_break_err_we = addr_hit[0] & reg_we & ~wr_err;
+  assign intr_state_rx_break_err_wd = reg_wdata[5];
+
+  assign intr_state_rx_timeout_we = addr_hit[0] & reg_we & ~wr_err;
+  assign intr_state_rx_timeout_wd = reg_wdata[6];
+
+  assign intr_state_rx_parity_err_we = addr_hit[0] & reg_we & ~wr_err;
+  assign intr_state_rx_parity_err_wd = reg_wdata[7];
+
+  assign intr_enable_tx_watermark_we = addr_hit[1] & reg_we & ~wr_err;
+  assign intr_enable_tx_watermark_wd = reg_wdata[0];
+
+  assign intr_enable_rx_watermark_we = addr_hit[1] & reg_we & ~wr_err;
+  assign intr_enable_rx_watermark_wd = reg_wdata[1];
+
+  assign intr_enable_tx_empty_we = addr_hit[1] & reg_we & ~wr_err;
+  assign intr_enable_tx_empty_wd = reg_wdata[2];
+
+  assign intr_enable_rx_overflow_we = addr_hit[1] & reg_we & ~wr_err;
+  assign intr_enable_rx_overflow_wd = reg_wdata[3];
+
+  assign intr_enable_rx_frame_err_we = addr_hit[1] & reg_we & ~wr_err;
+  assign intr_enable_rx_frame_err_wd = reg_wdata[4];
+
+  assign intr_enable_rx_break_err_we = addr_hit[1] & reg_we & ~wr_err;
+  assign intr_enable_rx_break_err_wd = reg_wdata[5];
+
+  assign intr_enable_rx_timeout_we = addr_hit[1] & reg_we & ~wr_err;
+  assign intr_enable_rx_timeout_wd = reg_wdata[6];
+
+  assign intr_enable_rx_parity_err_we = addr_hit[1] & reg_we & ~wr_err;
+  assign intr_enable_rx_parity_err_wd = reg_wdata[7];
+
+  assign intr_test_tx_watermark_we = addr_hit[2] & reg_we & ~wr_err;
+  assign intr_test_tx_watermark_wd = reg_wdata[0];
+
+  assign intr_test_rx_watermark_we = addr_hit[2] & reg_we & ~wr_err;
+  assign intr_test_rx_watermark_wd = reg_wdata[1];
+
+  assign intr_test_tx_empty_we = addr_hit[2] & reg_we & ~wr_err;
+  assign intr_test_tx_empty_wd = reg_wdata[2];
+
+  assign intr_test_rx_overflow_we = addr_hit[2] & reg_we & ~wr_err;
+  assign intr_test_rx_overflow_wd = reg_wdata[3];
+
+  assign intr_test_rx_frame_err_we = addr_hit[2] & reg_we & ~wr_err;
+  assign intr_test_rx_frame_err_wd = reg_wdata[4];
+
+  assign intr_test_rx_break_err_we = addr_hit[2] & reg_we & ~wr_err;
+  assign intr_test_rx_break_err_wd = reg_wdata[5];
+
+  assign intr_test_rx_timeout_we = addr_hit[2] & reg_we & ~wr_err;
+  assign intr_test_rx_timeout_wd = reg_wdata[6];
+
+  assign intr_test_rx_parity_err_we = addr_hit[2] & reg_we & ~wr_err;
+  assign intr_test_rx_parity_err_wd = reg_wdata[7];
+
+  assign ctrl_tx_we = addr_hit[3] & reg_we & ~wr_err;
+  assign ctrl_tx_wd = reg_wdata[0];
+
+  assign ctrl_rx_we = addr_hit[3] & reg_we & ~wr_err;
+  assign ctrl_rx_wd = reg_wdata[1];
+
+  assign ctrl_nf_we = addr_hit[3] & reg_we & ~wr_err;
+  assign ctrl_nf_wd = reg_wdata[2];
+
+  assign ctrl_slpbk_we = addr_hit[3] & reg_we & ~wr_err;
+  assign ctrl_slpbk_wd = reg_wdata[4];
+
+  assign ctrl_llpbk_we = addr_hit[3] & reg_we & ~wr_err;
+  assign ctrl_llpbk_wd = reg_wdata[5];
+
+  assign ctrl_parity_en_we = addr_hit[3] & reg_we & ~wr_err;
+  assign ctrl_parity_en_wd = reg_wdata[6];
+
+  assign ctrl_parity_odd_we = addr_hit[3] & reg_we & ~wr_err;
+  assign ctrl_parity_odd_wd = reg_wdata[7];
+
+  assign ctrl_rxblvl_we = addr_hit[3] & reg_we & ~wr_err;
+  assign ctrl_rxblvl_wd = reg_wdata[9:8];
+
+  assign ctrl_nco_we = addr_hit[3] & reg_we & ~wr_err;
+  assign ctrl_nco_wd = reg_wdata[31:16];
+
+  assign status_txfull_re = addr_hit[4] && reg_re;
+
+  assign status_rxfull_re = addr_hit[4] && reg_re;
+
+  assign status_txempty_re = addr_hit[4] && reg_re;
+
+  assign status_txidle_re = addr_hit[4] && reg_re;
+
+  assign status_rxidle_re = addr_hit[4] && reg_re;
+
+  assign status_rxempty_re = addr_hit[4] && reg_re;
+
+  assign rdata_re = addr_hit[5] && reg_re;
+
+  assign wdata_we = addr_hit[6] & reg_we & ~wr_err;
+  assign wdata_wd = reg_wdata[7:0];
+
+  assign fifo_ctrl_rxrst_we = addr_hit[7] & reg_we & ~wr_err;
+  assign fifo_ctrl_rxrst_wd = reg_wdata[0];
+
+  assign fifo_ctrl_txrst_we = addr_hit[7] & reg_we & ~wr_err;
+  assign fifo_ctrl_txrst_wd = reg_wdata[1];
+
+  assign fifo_ctrl_rxilvl_we = addr_hit[7] & reg_we & ~wr_err;
+  assign fifo_ctrl_rxilvl_wd = reg_wdata[4:2];
+
+  assign fifo_ctrl_txilvl_we = addr_hit[7] & reg_we & ~wr_err;
+  assign fifo_ctrl_txilvl_wd = reg_wdata[6:5];
+
+  assign fifo_status_txlvl_re = addr_hit[8] && reg_re;
+
+  assign fifo_status_rxlvl_re = addr_hit[8] && reg_re;
+
+  assign ovrd_txen_we = addr_hit[9] & reg_we & ~wr_err;
+  assign ovrd_txen_wd = reg_wdata[0];
+
+  assign ovrd_txval_we = addr_hit[9] & reg_we & ~wr_err;
+  assign ovrd_txval_wd = reg_wdata[1];
+
+  assign val_re = addr_hit[10] && reg_re;
+
+  assign timeout_ctrl_val_we = addr_hit[11] & reg_we & ~wr_err;
+  assign timeout_ctrl_val_wd = reg_wdata[23:0];
+
+  assign timeout_ctrl_en_we = addr_hit[11] & reg_we & ~wr_err;
+  assign timeout_ctrl_en_wd = reg_wdata[31];
+
+  // Read data return
+  always_comb begin
+    reg_rdata_next = '0;
+    unique case (1'b1)
+      addr_hit[0]: begin
+        reg_rdata_next[0] = intr_state_tx_watermark_qs;
+        reg_rdata_next[1] = intr_state_rx_watermark_qs;
+        reg_rdata_next[2] = intr_state_tx_empty_qs;
+        reg_rdata_next[3] = intr_state_rx_overflow_qs;
+        reg_rdata_next[4] = intr_state_rx_frame_err_qs;
+        reg_rdata_next[5] = intr_state_rx_break_err_qs;
+        reg_rdata_next[6] = intr_state_rx_timeout_qs;
+        reg_rdata_next[7] = intr_state_rx_parity_err_qs;
+      end
+
+      addr_hit[1]: begin
+        reg_rdata_next[0] = intr_enable_tx_watermark_qs;
+        reg_rdata_next[1] = intr_enable_rx_watermark_qs;
+        reg_rdata_next[2] = intr_enable_tx_empty_qs;
+        reg_rdata_next[3] = intr_enable_rx_overflow_qs;
+        reg_rdata_next[4] = intr_enable_rx_frame_err_qs;
+        reg_rdata_next[5] = intr_enable_rx_break_err_qs;
+        reg_rdata_next[6] = intr_enable_rx_timeout_qs;
+        reg_rdata_next[7] = intr_enable_rx_parity_err_qs;
+      end
+
+      addr_hit[2]: begin
+        reg_rdata_next[0] = '0;
+        reg_rdata_next[1] = '0;
+        reg_rdata_next[2] = '0;
+        reg_rdata_next[3] = '0;
+        reg_rdata_next[4] = '0;
+        reg_rdata_next[5] = '0;
+        reg_rdata_next[6] = '0;
+        reg_rdata_next[7] = '0;
+      end
+
+      addr_hit[3]: begin
+        reg_rdata_next[0] = ctrl_tx_qs;
+        reg_rdata_next[1] = ctrl_rx_qs;
+        reg_rdata_next[2] = ctrl_nf_qs;
+        reg_rdata_next[4] = ctrl_slpbk_qs;
+        reg_rdata_next[5] = ctrl_llpbk_qs;
+        reg_rdata_next[6] = ctrl_parity_en_qs;
+        reg_rdata_next[7] = ctrl_parity_odd_qs;
+        reg_rdata_next[9:8] = ctrl_rxblvl_qs;
+        reg_rdata_next[31:16] = ctrl_nco_qs;
+      end
+
+      addr_hit[4]: begin
+        reg_rdata_next[0] = status_txfull_qs;
+        reg_rdata_next[1] = status_rxfull_qs;
+        reg_rdata_next[2] = status_txempty_qs;
+        reg_rdata_next[3] = status_txidle_qs;
+        reg_rdata_next[4] = status_rxidle_qs;
+        reg_rdata_next[5] = status_rxempty_qs;
+      end
+
+      addr_hit[5]: begin
+        reg_rdata_next[7:0] = rdata_qs;
+      end
+
+      addr_hit[6]: begin
+        reg_rdata_next[7:0] = '0;
+      end
+
+      addr_hit[7]: begin
+        reg_rdata_next[0] = '0;
+        reg_rdata_next[1] = '0;
+        reg_rdata_next[4:2] = fifo_ctrl_rxilvl_qs;
+        reg_rdata_next[6:5] = fifo_ctrl_txilvl_qs;
+      end
+
+      addr_hit[8]: begin
+        reg_rdata_next[5:0] = fifo_status_txlvl_qs;
+        reg_rdata_next[21:16] = fifo_status_rxlvl_qs;
+      end
+
+      addr_hit[9]: begin
+        reg_rdata_next[0] = ovrd_txen_qs;
+        reg_rdata_next[1] = ovrd_txval_qs;
+      end
+
+      addr_hit[10]: begin
+        reg_rdata_next[15:0] = val_qs;
+      end
+
+      addr_hit[11]: begin
+        reg_rdata_next[23:0] = timeout_ctrl_val_qs;
+        reg_rdata_next[31] = timeout_ctrl_en_qs;
+      end
+
+      default: begin
+        reg_rdata_next = '1;
+      end
+    endcase
+  end
+
+  // // Assertions for Register Interface
+  // `ASSERT_PULSE(wePulse, reg_we)
+  // `ASSERT_PULSE(rePulse, reg_re)
+
+  // `ASSERT(reAfterRv, $rose(reg_re || reg_we) |=> tl_o.d_valid)
+
+  // `ASSERT(en2addrHit, (reg_we || reg_re) |-> $onehot0(addr_hit))
+
+  // // this is formulated as an assumption such that the FPV testbenches do disprove this
+  // // property by mistake
+  // `ASSUME(reqParity, tl_reg_h2d.a_valid |-> tl_reg_h2d.a_user.parity_en == 1'b0)
+
+endmodule
diff --git a/verilog/rtl/uart_rx.sv b/verilog/rtl/uart_rx.sv
new file mode 100644
index 0000000..829895a
--- /dev/null
+++ b/verilog/rtl/uart_rx.sv
@@ -0,0 +1,105 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Description: UART Receive Module
+//
+
+module uart_rx (
+  input           clk_i,
+  input           rst_ni,
+
+  input           rx_enable,
+  input           tick_baud_x16,
+  input           parity_enable,
+  input           parity_odd,
+
+  output logic    tick_baud,
+  output logic    rx_valid,
+  output [7:0]    rx_data,
+  output logic    idle,
+  output          frame_err,
+  output          rx_parity_err,
+
+  input           rx
+);
+
+  logic            rx_valid_q;
+  logic   [10:0]   sreg_q, sreg_d;
+  logic    [3:0]   bit_cnt_q, bit_cnt_d;
+  logic    [3:0]   baud_div_q, baud_div_d;
+  logic            tick_baud_d, tick_baud_q;
+  logic            idle_d, idle_q;
+
+  assign tick_baud = tick_baud_q;
+  assign idle      = idle_q;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      sreg_q      <= 11'h0;
+      bit_cnt_q   <= 4'h0;
+      baud_div_q  <= 4'h0;
+      tick_baud_q <= 1'b0;
+      idle_q      <= 1'b1;
+    end else begin
+      sreg_q      <= sreg_d;
+      bit_cnt_q   <= bit_cnt_d;
+      baud_div_q  <= baud_div_d;
+      tick_baud_q <= tick_baud_d;
+      idle_q      <= idle_d;
+    end
+  end
+
+  always_comb begin
+    if (!rx_enable) begin
+      sreg_d      = 11'h0;
+      bit_cnt_d   = 4'h0;
+      baud_div_d  = 4'h0;
+      tick_baud_d = 1'b0;
+      idle_d      = 1'b1;
+    end else begin
+      tick_baud_d = 1'b0;
+      sreg_d      = sreg_q;
+      bit_cnt_d   = bit_cnt_q;
+      baud_div_d  = baud_div_q;
+      idle_d      = idle_q;
+      if (tick_baud_x16) begin
+        {tick_baud_d, baud_div_d} = {1'b0,baud_div_q} + 5'h1;
+      end
+
+      if (idle_q && !rx) begin
+        // start of char, sample in the middle of the bit time
+        baud_div_d  = 4'd8;
+        tick_baud_d = 1'b0;
+        bit_cnt_d   = (parity_enable ? 4'd11 : 4'd10);
+        sreg_d      = 11'h0;
+        idle_d      = 1'b0;
+      end else if (!idle_q && tick_baud_q) begin
+        if ((bit_cnt_q == (parity_enable ? 4'd11 : 4'd10)) && rx) begin
+          // must have been a glitch on the input, start bit is not set
+          // in the middle of the bit time, abort
+          idle_d    = 1'b1;
+          bit_cnt_d = 4'h0;
+        end else begin
+          sreg_d    = {rx, sreg_q[10:1]};
+          bit_cnt_d = bit_cnt_q - 4'h1;
+          idle_d    = (bit_cnt_q == 4'h1);
+        end
+      end
+    end
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) rx_valid_q <= 1'b0;
+    else         rx_valid_q <= tick_baud_q & (bit_cnt_q == 4'h1);
+
+  end
+
+  assign rx_valid      = rx_valid_q;
+  assign rx_data       = parity_enable ? sreg_q[8:1] : sreg_q[9:2];
+  //    (rx_parity     = sreg_q[9])
+  assign frame_err     = rx_valid_q & ~sreg_q[10];
+  assign rx_parity_err = parity_enable & rx_valid_q &
+                         (^{sreg_q[9:1],parity_odd});
+
+endmodule
diff --git a/verilog/rtl/uart_rx_prog.v b/verilog/rtl/uart_rx_prog.v
new file mode 100644
index 0000000..fb43991
--- /dev/null
+++ b/verilog/rtl/uart_rx_prog.v
@@ -0,0 +1,156 @@
+
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Set Parameter CLKS_PER_BIT as follows:
+// CLKS_PER_BIT = (Frequency of i_Clock)/(Frequency of UART)
+// Example: 10 MHz Clock, 115200 baud UART
+// (10000000)/(115200) = 87
+  
+module uart_rx_prog (
+   input         clk_i,
+   input         rst_ni,
+   input         i_Rx_Serial,
+   input  [15:0] CLKS_PER_BIT,
+   output        o_Rx_DV,
+   output  [7:0] o_Rx_Byte
+   );
+    
+  parameter s_IDLE         = 3'b000;
+  parameter s_RX_START_BIT = 3'b001;
+  parameter s_RX_DATA_BITS = 3'b010;
+  parameter s_RX_STOP_BIT  = 3'b011;
+  parameter s_CLEANUP      = 3'b100;
+   
+  reg           r_Rx_Data_R ;
+  reg           r_Rx_Data   ;
+   
+  reg [15:0]     r_Clock_Count ;
+  reg [2:0]     r_Bit_Index  ; //8 bits total
+  reg [7:0]     r_Rx_Byte   ;
+  reg           r_Rx_DV     ;
+  reg [2:0]     r_SM_Main   ;
+   
+  // Purpose: Double-register the incoming data.
+  // This allows it to be used in the UART RX Clock Domain.
+  // (It removes problems caused by metastability)
+  always @(posedge clk_i)
+    begin
+    if (~rst_ni) begin
+      r_Rx_Data_R <= 1'b1;
+      r_Rx_Data   <= 1'b1;
+    end else begin
+      r_Rx_Data_R <= i_Rx_Serial;
+      r_Rx_Data   <= r_Rx_Data_R;
+    end
+  end
+   
+   
+  // Purpose: Control RX state machine
+  always @(posedge clk_i or negedge rst_ni)
+    begin
+      if (~rst_ni) begin
+        r_SM_Main <= s_IDLE;
+        r_Rx_DV       <= 1'b0;
+        r_Clock_Count <= 0;
+        r_Bit_Index   <= 0;
+      end else begin       
+      case (r_SM_Main)
+        s_IDLE :
+          begin
+            r_Rx_DV       <= 1'b0;
+            r_Clock_Count <= 0;
+            r_Bit_Index   <= 0;
+             
+            if (r_Rx_Data == 1'b0)          // Start bit detected
+              r_SM_Main <= s_RX_START_BIT;
+            else
+              r_SM_Main <= s_IDLE;
+          end
+         
+        // Check middle of start bit to make sure it's still low
+        s_RX_START_BIT :
+          begin
+            if (r_Clock_Count == ((CLKS_PER_BIT-1)>>1))
+              begin
+                if (r_Rx_Data == 1'b0)
+                  begin
+                    r_Clock_Count <= 0;  // reset counter, found the middle
+                    r_SM_Main     <= s_RX_DATA_BITS;
+                  end
+                else
+                  r_SM_Main <= s_IDLE;
+              end
+            else
+              begin
+                r_Clock_Count <= r_Clock_Count + 1;
+                r_SM_Main     <= s_RX_START_BIT;
+              end
+          end // case: s_RX_START_BIT
+         
+         
+        // Wait CLKS_PER_BIT-1 clock cycles to sample serial data
+        s_RX_DATA_BITS :
+          begin
+            if (r_Clock_Count < CLKS_PER_BIT-1)
+              begin
+                r_Clock_Count <= r_Clock_Count + 1;
+                r_SM_Main     <= s_RX_DATA_BITS;
+              end
+            else
+              begin
+                r_Clock_Count          <= 0;
+                r_Rx_Byte[r_Bit_Index] <= r_Rx_Data;
+                 
+                // Check if we have received all bits
+                if (r_Bit_Index < 7)
+                  begin
+                    r_Bit_Index <= r_Bit_Index + 1;
+                    r_SM_Main   <= s_RX_DATA_BITS;
+                  end
+                else
+                  begin
+                    r_Bit_Index <= 0;
+                    r_SM_Main   <= s_RX_STOP_BIT;
+                  end
+              end
+          end // case: s_RX_DATA_BITS
+     
+     
+        // Receive Stop bit.  Stop bit = 1
+        s_RX_STOP_BIT :
+          begin
+            // Wait CLKS_PER_BIT-1 clock cycles for Stop bit to finish
+            if (r_Clock_Count < CLKS_PER_BIT-1)
+              begin
+                r_Clock_Count <= r_Clock_Count + 1;
+                r_SM_Main     <= s_RX_STOP_BIT;
+              end
+            else
+              begin
+                r_Rx_DV       <= 1'b1;
+                r_Clock_Count <= 0;
+                r_SM_Main     <= s_CLEANUP;
+              end
+          end // case: s_RX_STOP_BIT
+     
+         
+        // Stay here 1 clock
+        s_CLEANUP :
+          begin
+            r_SM_Main <= s_IDLE;
+            r_Rx_DV   <= 1'b0;
+          end
+         
+         
+        default :
+          r_SM_Main <= s_IDLE;
+         
+      endcase
+      end
+    end   
+   
+  assign o_Rx_DV   = r_Rx_DV;
+  assign o_Rx_Byte = r_Rx_Byte;
+   
+endmodule // uart_rx
diff --git a/verilog/rtl/uart_tx.sv b/verilog/rtl/uart_tx.sv
new file mode 100644
index 0000000..d10d16a
--- /dev/null
+++ b/verilog/rtl/uart_tx.sv
@@ -0,0 +1,79 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Description: UART Transmit Module
+//
+
+module uart_tx (
+  input               clk_i,
+  input               rst_ni,
+
+  input               tx_enable,
+  input               tick_baud_x16,
+  input  logic        parity_enable,
+
+  input               wr,
+  input  logic        wr_parity,
+  input   [7:0]       wr_data,
+  output              idle,
+
+  output logic        tx
+);
+
+
+  logic    [3:0] baud_div_q;
+  logic          tick_baud_q;
+
+  logic    [3:0] bit_cnt_q, bit_cnt_d;
+  logic   [10:0] sreg_q, sreg_d;
+  logic          tx_q, tx_d;
+
+  assign tx = tx_q;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      baud_div_q  <= 4'h0;
+      tick_baud_q <= 1'b0;
+    end else if (tick_baud_x16) begin
+      {tick_baud_q, baud_div_q} <= {1'b0,baud_div_q} + 5'h1;
+    end else begin
+      tick_baud_q <= 1'b0;
+    end
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      bit_cnt_q <= 4'h0;
+      sreg_q    <= 11'h7ff;
+      tx_q      <= 1'b1;
+    end else begin
+      bit_cnt_q <= bit_cnt_d;
+      sreg_q    <= sreg_d;
+      tx_q      <= tx_d;
+    end
+  end
+
+  always_comb begin
+    if (!tx_enable) begin
+      bit_cnt_d = 4'h0;
+      sreg_d    = 11'h7ff;
+      tx_d      = 1'b1;
+    end else begin
+      bit_cnt_d = bit_cnt_q;
+      sreg_d    = sreg_q;
+      tx_d      = tx_q;
+      if (wr) begin
+        sreg_d    = {1'b1, (parity_enable ? wr_parity : 1'b1), wr_data, 1'b0};
+        bit_cnt_d = (parity_enable ? 4'd11 : 4'd10);
+      end else if (tick_baud_q && (bit_cnt_q != 4'h0)) begin
+        sreg_d    = {1'b1, sreg_q[10:1]};
+        tx_d      = sreg_q[0];
+        bit_cnt_d = bit_cnt_q - 4'h1;
+      end
+    end
+  end
+
+  assign idle = (tx_enable) ? (bit_cnt_q == 4'h0) : 1'b1;
+
+endmodule