added azadi_soc design files
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..cb81277
--- /dev/null
+++ b/verilog/rtl/
Binary files differ
diff --git a/verilog/rtl/.user_project_wrapper.v.swp b/verilog/rtl/.user_project_wrapper.v.swp
new file mode 100644
index 0000000..ca62df7
--- /dev/null
+++ b/verilog/rtl/.user_project_wrapper.v.swp
Binary files differ
diff --git a/verilog/rtl/PWM.v b/verilog/rtl/PWM.v
new file mode 100644
index 0000000..44078a0
--- /dev/null
+++ b/verilog/rtl/PWM.v
@@ -0,0 +1,251 @@
+control register [7:0]ctrl:
+bit 0: When set, external clock is chosen for PWM/timer. When cleared, wb clock is used for PWM/timer.
+bit 1: When set, PWM is enabled. When cleared, timer is enabled.
+bit 2: When set, PWM/timer starts. When cleared, PWM/timer stops.
+bit 3: When set, timer runs continuously. When cleared, timer runs one time.
+bit 4: When set, o_pwm enabled.
+bit 5: timer interrupt bit When it is written with 0, interrupt request is cleared.
+bit 6: When set, a 16-bit external signal i_DC is used as duty cycle. When cleared, register DC is used.
+bit 7: When set, counter reset for PWM/timer, it's output and bit 5 will also be cleared. When changing from PWM mode to timer mode reset is needed before timer starts.
+module PWM(
+//tlul interface
+ input clk_i,
+ input rst_ni,
+ input re_i,
+ input we_i,
+ input [7:0] addr_i,
+ input [31:0] wdata_i,
+ input [3:0] be_i,
+ output [31:0] rdata_o,
+ //output error_o,
+ input i_extclk,
+ input [15:0] i_DC,
+ input i_valid_DC,
+ output o_pwm,
+ output o_pwm_2,
+ output reg oe_pwm1,
+ output reg oe_pwm2
+////////////////////control logic////////////////////////////
+parameter adr_ctrl_1 = 0,
+ adr_divisor_1= 4,
+ adr_period_1 = 8,
+ adr_DC_1 = 12;
+parameter adr_ctrl_2 = 16,
+ adr_divisor_2= 20,
+ adr_period_2 = 24,
+ adr_DC_2 = 28;
+reg [7:0] ctrl;
+reg [15:0] period;
+reg [15:0] DC;
+reg [15:0] divisor;
+reg [7:0] ctrl_2;
+reg [15:0] period_2;
+reg [15:0] DC_2;
+reg [15:0] divisor_2;
+wire write;
+assign write = we_i & ~re_i;
+always@(posedge clk_i)
+ if(~rst_ni)begin
+ ctrl[4:2] <= 0;
+ ctrl[0] <= 0;
+ ctrl[7:6] <= 0;
+ DC <= 0;
+ period <= 0;
+ divisor <= 0;
+ ctrl_2[4:2] <= 0;
+ ctrl_2[0] <= 0;
+ ctrl_2[7:6] <= 0;
+ DC_2 <= 0;
+ period_2 <= 0;
+ divisor_2 <= 0;
+ end
+ else if(write)begin
+ case(addr_i)
+ adr_ctrl_1:begin
+ ctrl[0] <= wdata_i[0];
+ ctrl[4:2] <= wdata_i[4:2];
+ ctrl[7:6] <= wdata_i[7:6];
+ end
+ adr_ctrl_2:begin
+ ctrl_2[0] <= wdata_i[0];
+ ctrl_2[4:2] <= wdata_i[4:2];
+ ctrl_2[7:6] <= wdata_i[7:6];
+ end
+ adr_divisor_1 : divisor <= wdata_i[15:0];
+ adr_period_1 : period <= wdata_i[15:0];
+ adr_DC_1 : DC <= wdata_i[15:0];
+ adr_divisor_2 : divisor_2 <= wdata_i[15:0];
+ adr_period_2 : period_2 <= wdata_i[15:0];
+ adr_DC_2 : DC_2 <= wdata_i[15:0];
+ endcase
+ end
+wire pwm;
+always @(posedge clk_i) begin
+ ctrl[1] <= 1'b1;
+assign pwm = ctrl[1];
+wire pwm_1;
+always @(posedge clk_i) begin
+ ctrl_2[1] <= 1'b1;
+assign pwm_1 = ctrl_2[1];
+wire eclk_2,oclk_2;
+//////down clocking for pwm///////////////////
+wire clk_source;
+wire eclk,oclk;
+assign clk_source = clk_i;
+down_clocking_even clock_div_ev(
+ .clk_i (clk_source) ,
+ .rst_ni (rst_ni),
+ .i_divisor ({1'b0,divisor[15:1]}),
+ .o_clk (eclk)
+down_clocking_odd clock_div_od(
+ .clk_i (clk_source),
+ .rst_ni (rst_ni),
+ .i_divisor ({1'b0,divisor[15:1]}),
+ .o_clk (oclk)
+wire clk;
+assign clk = divisor[0]? oclk: eclk;
+down_clocking_even clock_div_ev_2(
+ .clk_i (clk_source) ,
+ .rst_ni (rst_ni),
+ .i_divisor ({1'b0,divisor_2[15:1]}),
+ .o_clk (eclk_2)
+down_clocking_odd clock_div_od_2(
+ .clk_i (clk_source),
+ .rst_ni (rst_ni),
+ .i_divisor ({1'b0,divisor_2[15:1]}),
+ .o_clk (oclk_2)
+wire clk_2;
+assign clk_2 = divisor_2[0]? oclk_2: eclk_2;
+/////////////////main counter //////////////////////////
+reg [15:0] ct;
+reg pts; //PWM signal
+reg [15:0] extDC;
+wire [15:0] DC_1;
+assign DC_1 = ctrl[6]? extDC: DC; //external or internal duty cycle toggle
+wire [15:0] period_1;
+assign period_1 = (period==0)? 0: (period-1);
+wire rst_ct;
+assign rst_ct = ~rst_ni|ctrl[7];
+reg [15:0] ct_2;
+reg pts_2; //PWM signal
+reg [15:0] extDC_2;
+wire [15:0] DCw_2;
+assign DCw_2 = ctrl_2[6]? extDC_2: DC_2; //external or internal duty cycle toggle
+wire [15:0] period_P2;
+assign period_P2 = (period_2==0)? 0: (period_2-1);
+wire rst_ct_2;
+assign rst_ct_2 = ~rst_ni|ctrl_2[7];
+always@(posedge clk )
+ if(rst_ct)begin
+ pts <= 0;
+ ct <= 0;
+ extDC <= 0;
+ end
+ else begin
+ if(i_valid_DC) extDC <= i_DC;
+ if(ctrl[2])begin
+ if(pwm) begin
+ oe_pwm1 <= 1'b1;
+ if(ct >= period_1) ct <= 0;
+ else ct <= ct+1;
+ if(ct < DC_1) pts <= 1'b1;
+ else pts <= 1'b0;
+ end
+ end
+ else begin
+ pts <= 1'b0;
+ ct <= 0;
+ oe_pwm1 <= 0;
+ end
+always@(posedge clk_2 )
+ if(rst_ct_2)begin
+ pts_2 <= 0;
+ ct_2 <= 0;
+ extDC_2 <= 0;
+ end
+ else begin
+ if(i_valid_DC) extDC_2 <= i_DC;
+ if(ctrl_2[2])begin
+ if(pwm_1) begin
+ oe_pwm2 <= 1'b1;
+ if(ct_2 >= period_P2) ct_2 <= 0;
+ else ct_2 <= ct_2+1;
+ if(ct_2 < DCw_2) pts_2 <= 1'b1;
+ else pts_2 <= 1'b0;
+ end
+ end
+ else begin
+ pts_2 <= 1'b0;
+ ct_2 <= 0;
+ oe_pwm2 <= 1'b0;
+ end
+assign o_pwm = ctrl[4]? pts: 0;
+assign o_pwm_2 = ctrl_2[4]? pts_2: 0;
+assign rdata_o = (addr_i == adr_ctrl_1) ? {8'h0,ctrl} :
+ (addr_i == adr_divisor_1)? divisor :
+ (addr_i == adr_period_1) ? period :
+ (addr_i == adr_DC_1) ? DC :
+ (addr_i == adr_DC_2) ? DC_2 :
+ (addr_i == adr_period_2) ? period_2 :
+ (addr_i == adr_divisor_2)? divisor_2 :
+ (addr_i == adr_ctrl_2) ? {8'h0,ctrl_2}:0;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..da32e03
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,520 @@
+module azadi_soc_top (
+ input clk_i,
+ input rst_ni,
+ input prog,
+ //output system_rst_ni,
+ //output prog_rst_ni,
+ input logic [31:0] gpio_i,
+ output logic [31:0] gpio_o,
+ output logic [31:0] gpio_oe,
+ // jtag interface
+ input logic jtag_tck_i,
+ input logic jtag_tms_i,
+ input logic jtag_trst_ni,
+ input logic jtag_tdi_i,
+ output logic jtag_tdo_o,
+ output logic jtag_tdo_oe_o,
+ // uart-periph interface
+ output logic uart_tx,
+ output logic tx_en_o,
+ input logic uart_rx,
+ // PWM interface
+ output logic pwm_o,
+ output logic pwm_o_2,
+ output logic pwm1_oe,
+ output logic pwm2_oe,
+ // SPI interface
+ output logic [`SPI_SS_NB-1:0] ss_o,
+ output logic sclk_o,
+ output logic sd_o,
+ output logic sd_oe,
+ input logic sd_i
+localparam logic [31:0] JTAG_ID = {
+ 4'h0, // Version
+ 16'h4F54, // Part Number: "OT"
+ 11'h426, // Manufacturer Identity: Google
+ 1'b1 // (fixed)
+ logic prog_rst_n;
+ logic system_rst_ni;
+ logic [31:0] gpio_in;
+ logic [31:0] gpio_out;
+ assign gpio_in = gpio_i;
+ assign gpio_o = gpio_out;
+ logic instr_valid;
+ logic [11:0] tlul_addr;
+ logic req_i;
+ logic [31:0] tlul_data;
+ logic dbg_req;
+ logic dbg_rst;
+ // instruction sram interface
+ logic instr_csb;
+ logic [11:0] instr_addr;
+ logic [31:0] instr_wdata;
+ logic [3:0] instr_wmask;
+ logic instr_we;
+ logic [31:0] instr_rdata;
+ // data sram interface
+ logic data_csb;
+ logic [11:0] data_addr;
+ logic [31:0] data_wdata;
+ logic [3:0] data_wmask;
+ logic data_we;
+ logic [31:0] data_rdata;
+ logic [31:0] iccm_ctrl_data;
+ logic iccm_ctrl_we;
+ logic [11:0] iccm_ctrl_addr_o;
+ tlul_pkg::tl_h2d_t ifu_to_xbar;
+ tlul_pkg::tl_d2h_t xbar_to_ifu;
+ tlul_pkg::tl_h2d_t xbar_to_iccm;
+ tlul_pkg::tl_d2h_t iccm_to_xbar;
+ tlul_pkg::tl_h2d_t lsu_to_xbar;
+ tlul_pkg::tl_d2h_t xbar_to_lsu;
+ tlul_pkg::tl_h2d_t xbar_to_dccm;
+ tlul_pkg::tl_d2h_t dccm_to_xbar;
+ tlul_pkg::tl_h2d_t xbarp_to_gpio;
+ tlul_pkg::tl_d2h_t gpio_to_xbarp;
+ tlul_pkg::tl_h2d_t dm_to_xbar;
+ tlul_pkg::tl_d2h_t xbar_to_dm;
+ tlul_pkg::tl_h2d_t dbgrom_to_xbar;
+ tlul_pkg::tl_d2h_t xbar_to_dbgrom;
+ tlul_pkg::tl_h2d_t plic_req;
+ tlul_pkg::tl_d2h_t plic_resp;
+ tlul_pkg::tl_h2d_t xbar_to_uart;
+ tlul_pkg::tl_d2h_t uart_to_xbar;
+ tlul_pkg::tl_h2d_t xbar_to_timer;
+ tlul_pkg::tl_d2h_t timer_to_xbar;
+ tlul_pkg::tl_h2d_t xbar_to_pwm;
+ tlul_pkg::tl_d2h_t pwm_to_xbar;
+ tlul_pkg::tl_h2d_t xbar_to_spi;
+ tlul_pkg::tl_d2h_t spi_to_xbar;
+ // interrupt vector
+ logic [43:0] intr_vector;
+ // Interrupt source list
+ logic [31:0] intr_gpio;
+ logic intr_uart0_tx_watermark;
+ logic intr_uart0_rx_watermark;
+ logic intr_uart0_tx_empty;
+ logic intr_uart0_rx_overflow;
+ logic intr_uart0_rx_frame_err;
+ logic intr_uart0_rx_break_err;
+ logic intr_uart0_rx_timeout;
+ logic intr_uart0_rx_parity_err;
+ logic intr_req;
+ logic intr_srx;
+ logic intr_stx;
+ logic intr_timer;
+ assign intr_vector = {
+ intr_srx,
+ intr_stx,
+ intr_uart0_rx_parity_err,
+ intr_uart0_rx_timeout,
+ intr_uart0_rx_break_err,
+ intr_uart0_rx_frame_err,
+ intr_uart0_rx_overflow,
+ intr_uart0_tx_empty,
+ intr_uart0_rx_watermark,
+ intr_uart0_tx_watermark,
+ intr_gpio,
+ 1'b0
+ };
+// jtag interface
+ jtag_pkg::jtag_req_t jtag_req;
+ jtag_pkg::jtag_rsp_t jtag_rsp;
+ assign jtag_req.tck = jtag_tck_i;
+ assign jtag_req.tms = jtag_tms_i;
+ assign jtag_req.trst_n = jtag_trst_ni;
+ assign jtag_req.tdi = jtag_tdi_i;
+ assign jtag_tdo_o = jtag_rsp.tdo;
+ assign jtag_tdo_oe_o = jtag_rsp.tdo_oe;
+brq_core_top #(
+ .PMPEnable (1'b0),
+ .PMPGranularity (0),
+ .PMPNumRegions (4),
+ .MHPMCounterNum (0),
+ .MHPMCounterWidth (40),
+ .RV32E (1'b0),
+ .RV32M (brq_pkg::RV32MSlow),
+ .RV32B (brq_pkg::RV32BNone),
+ .RegFile (brq_pkg::RegFileFF),
+ .BranchTargetALU (1'b0),
+ .WritebackStage (1'b1),
+ .ICache (1'b0),
+ .ICacheECC (1'b0),
+ .BranchPredictor (1'b0),
+ .DbgTriggerEn (1'b1),
+ .DbgHwBreakNum (1),
+ .Securebrq (1'b0),
+ .DmHaltAddr (tl_main_pkg::ADDR_SPACE_DEBUG_ROM + 32'h 800),
+ .DmExceptionAddr (tl_main_pkg::ADDR_SPACE_DEBUG_ROM + dm::ExceptionAddress)
+) u_top (
+ .clk_i (clk_i),
+ .rst_ni (system_rst_ni),
+ // instruction memory interface
+ .tl_i_i (xbar_to_ifu),
+ .tl_i_o (ifu_to_xbar),
+ // data memory interface
+ .tl_d_i (xbar_to_lsu),
+ .tl_d_o (lsu_to_xbar),
+ .test_en_i (1'b0), // enable all clk_i gates for testing
+ .hart_id_i (32'b0),
+ .boot_addr_i (32'h20000000),
+ // Interrupt inputs
+ .irq_software_i (1'b0),
+ .irq_timer_i (intr_timer),
+ .irq_external_i (intr_req),
+ .irq_fast_i ('0),
+ .irq_nm_i (1'b0), // non-maskeable interrupt
+ // Debug Interface
+ .debug_req_i (dbg_req),
+ // CPU Control Signals
+ .fetch_enable_i (1'b1),
+ .alert_minor_o (),
+ .alert_major_o (),
+ .core_sleep_o ()
+// Debug module
+rv_dm #(
+ .NrHarts(1),
+ .IdcodeValue(JTAG_ID)
+ // .DirectDmiTap (DirectDmiTap)
+) debug_module (
+ .clk_i(clk_i), // clk_i
+ .rst_ni(rst_ni), // asynchronous reset active low, connect PoR
+ // here, not the system reset
+ .testmode_i('0),
+ .ndmreset_o(dbg_rst), // non-debug module reset
+ .dmactive_o(), // debug module is active
+ .debug_req_o(dbg_req), // async debug request
+ .unavailable_i(1'b0), // communicate whether the hart is unavailable
+ // (e.g.: power down)
+ // bus device with debug memory, for an execution based technique
+ .tl_d_i(dbgrom_to_xbar),
+ .tl_d_o(xbar_to_dbgrom),
+ // bus host, for system bus accesses
+ .tl_h_o(dm_to_xbar),
+ .tl_h_i(xbar_to_dm),
+ .jtag_req_i(jtag_req),
+ .jtag_rsp_o(jtag_rsp)
+// main xbar module
+ tl_xbar_main main_swith (
+ .clk_i (clk_i),
+ .rst_ni (system_rst_ni),
+ // Host interfaces
+ .tl_brqif_i (ifu_to_xbar),
+ .tl_brqif_o (xbar_to_ifu),
+ .tl_brqlsu_i (lsu_to_xbar),
+ .tl_brqlsu_o (xbar_to_lsu),
+ .tl_dm_sba_i (dm_to_xbar),
+ .tl_dm_sba_o (xbar_to_dm),
+ // Device interfaces
+ .tl_iccm_o (xbar_to_iccm),
+ .tl_iccm_i (iccm_to_xbar),
+ .tl_debug_rom_o (dbgrom_to_xbar),
+ .tl_debug_rom_i (xbar_to_dbgrom),
+ .tl_dccm_o (xbar_to_dccm),
+ .tl_dccm_i (dccm_to_xbar),
+ .tl_timer0_o (xbar_to_timer),
+ .tl_timer0_i (timer_to_xbar),
+ .tl_uart_o (xbar_to_uart),
+ .tl_uart_i (uart_to_xbar),
+ .tl_spi_o (xbar_to_spi),
+ .tl_spi_i (spi_to_xbar),
+ .tl_pwm_o (xbar_to_pwm),
+ .tl_pwm_i (pwm_to_xbar),
+ .tl_gpio_o (xbarp_to_gpio),
+ .tl_gpio_i (gpio_to_xbarp),
+ .tl_plic_o (plic_req),
+ .tl_plic_i (plic_resp)
+// timer
+rv_timer timer0(
+ .clk_i (clk_i),
+ .rst_ni (system_rst_ni),
+ .tl_i (xbar_to_timer),
+ .tl_o (timer_to_xbar),
+ .intr_timer_expired_0_0_o (intr_timer)
+// PWM module
+pwm_top u_pwm(
+ .clk_i (clk_i),
+ .rst_ni (system_rst_ni),
+ .tl_i (xbar_to_pwm),
+ .tl_o (pwm_to_xbar),
+ .pwm_o (pwm_o),
+ .pwm_o_2 (pwm_o_2),
+ .pwm1_oe (pwm1_oe),
+ .pwm2_oe (pwm2_oe)
+// spi module
+spi_top u_spi_host(
+ .clk_i (clk_i),
+ .rst_ni (system_rst_ni),
+ .tl_i (xbar_to_spi),
+ .tl_o (spi_to_xbar),
+ // SPI signals
+ .intr_rx_o (intr_srx),
+ .intr_tx_o (intr_stx),
+ .ss_o (ss_o),
+ .sclk_o (sclk_o),
+ .sd_o (sd_o),
+ .sd_oe (sd_oe),
+ .sd_i (sd_i)
+//GPIO module
+gpio GPIO (
+ .clk_i (clk_i),
+ .rst_ni (system_rst_ni),
+ // Below Regster interface can be changed
+ .tl_i (xbarp_to_gpio),
+ .tl_o (gpio_to_xbarp),
+ .cio_gpio_i (gpio_in),
+ .cio_gpio_o (gpio_out),
+ .cio_gpio_en_o (gpio_oe),
+ .intr_gpio_o (intr_gpio )
+rstmgr reset_manager(
+ .clk_i(clk_i),
+ .rst_ni(rst_ni),
+ .ndmreset (dbg_rst),
+ .prog_rst_ni(prog_rst_ni),
+ .sys_rst_ni(system_rst_ni)
+rv_plic intr_controller (
+ .clk_i(clk_i),
+ .rst_ni(system_rst_ni),
+ // Bus Interface (device)
+ .tl_i (plic_req),
+ .tl_o (plic_resp),
+ // Interrupt Sources
+ .intr_src_i (intr_vector),
+ // Interrupt notification to targets
+ .irq_o (intr_req),
+ .msip_o()
+uart u_uart0(
+ .clk_i (clk_i ),
+ .rst_ni (system_rst_ni ),
+ // Bus Interface
+ .tl_i (xbar_to_uart ),
+ .tl_o (uart_to_xbar ),
+ // Generic IO
+ .cio_rx_i (uart_rx ),
+ .cio_tx_o (uart_tx ),
+ .cio_tx_en_o (tx_en_o ),
+ // Interrupts
+ .intr_tx_watermark_o (intr_uart0_tx_watermark ),
+ .intr_rx_watermark_o (intr_uart0_rx_watermark ),
+ .intr_tx_empty_o (intr_uart0_tx_empty ),
+ .intr_rx_overflow_o (intr_uart0_rx_overflow ),
+ .intr_rx_frame_err_o (intr_uart0_rx_frame_err ),
+ .intr_rx_break_err_o (intr_uart0_rx_break_err ),
+ .intr_rx_timeout_o (intr_uart0_rx_timeout ),
+ .intr_rx_parity_err_o (intr_uart0_rx_parity_err)
+logic rx_dv_i;
+logic [7:0] rx_byte_i;
+iccm_controller u_dut(
+ .clk_i (~clk_i),
+ .rst_ni (rst_ni),
+ .prog_i (prog),
+ .rx_dv_i (rx_dv_i),
+ .rx_byte_i (rx_byte_i),
+ .we_o (iccm_ctrl_we),
+ .addr_o (iccm_ctrl_addr_o),
+ .wdata_o (iccm_ctrl_data),
+ .reset_o (prog_rst_ni)
+uart_rx_prog u_uart_rx_prog(
+ .clk_i (~clk_i),
+ .rst_ni (rst_ni),
+ .i_Rx_Serial (uart_rx),
+ .CLKS_PER_BIT (16'd1667),
+ .o_Rx_DV (rx_dv_i),
+ .o_Rx_Byte (rx_byte_i)
+// dummy instruction memory
+instr_mem_top iccm_adapter(
+ .clk_i (clk_i),
+ .rst_ni (system_rst_ni),
+ .tl_i (xbar_to_iccm),
+ .tl_o (iccm_to_xbar),
+// iccm controller interface
+ .iccm_ctrl_addr (iccm_ctrl_addr_o),
+ .iccm_ctrl_wdata (iccm_ctrl_data),
+ .iccm_ctrl_we (iccm_ctrl_we),
+ .prog_rst_ni (prog_rst_ni),
+// instruction sram interface
+ .csb (instr_csb),
+ .addr_o (instr_addr),
+ .wdata_o (instr_wdata),
+ .wmask_o (instr_wmask),
+ .we_o (instr_we),
+ .rdata_i (instr_rdata)
+ sram #(
+ .NUM_WMASKS (4),
+// .MEMD (4096),
+ .DATA_WIDTH (32), // data width
+// .nRPORTS (1) , // number of reading ports
+// .nWPORTS (1), // number of write ports
+ .IZERO (0) , // binary / Initial RAM with zeros (has priority over IFILE)
+// .BASIC_MODEL (1024),
+ .ADDR_WIDTH (11)
+ ) u_iccm ( /*`ifdef USE_POWER_PINS
+ inout vdd;
+ inout gnd;
+ `endif*/
+ .clk0 (~clk_i), // clock
+ .csb0 (instr_csb), // active low chip select
+ .web0 (instr_we), // active low write control
+ .wmask0 (instr_wmask), // write mask
+ .addr0 (instr_addr[10:0]),
+ .din0 (instr_wdata),
+ .dout0 (instr_rdata),
+ .clk1 (1'b0),
+ .csb1 (1'b1),
+ .addr1 ('0),
+ .dout1 ()
+ );
+// dummy data memory
+data_mem_top dccm_adapter(
+ .clk_i (clk_i),
+ .rst_ni (system_rst_ni),
+// tl-ul insterface
+ .tl_d_i (xbar_to_dccm),
+ .tl_d_o (dccm_to_xbar),
+ // sram interface
+ .csb (data_csb),
+ .addr_o (data_addr),
+ .wdata_o (data_wdata),
+ .wmask_o (data_wmask),
+ .we_o (data_we),
+ .rdata_i (data_rdata)
+sram #(
+ .NUM_WMASKS (4),
+// .MEMD (4096),
+ .DATA_WIDTH (32), // data width
+// .nRPORTS (1) , // number of reading ports
+// .nWPORTS (1), // number of write ports
+ .IZERO (0) , // binary / Initial RAM with zeros (has priority over IFILE)
+// .BASIC_MODEL (1024),
+ .ADDR_WIDTH (11)
+ ) u_dccm ( /*`ifdef USE_POWER_PINS
+ inout vdd;
+ inout gnd;
+ .clk0 (~clk_i), // clock
+ .csb0 (data_csb), // active low chip select
+ .web0 (data_we), // active low write control
+ .wmask0 (data_wmask), // write mask
+ .addr0 (data_addr[10:0]),
+ .din0 (data_wdata),
+ .dout0 (data_rdata),
+ .clk1 (1'b0),
+ .csb1 (1'b1),
+ .addr1 ('0),
+ .dout1 ()
+ );
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..1e83243
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,1592 @@
+ `define RVFI
+ * Top level module of the brq RISC-V core
+ */
+module brq_core #(
+ parameter bit PMPEnable = 1'b0,
+ parameter int unsigned PMPGranularity = 0,
+ parameter int unsigned PMPNumRegions = 0,
+ parameter int unsigned MHPMCounterNum = 0,
+ parameter int unsigned MHPMCounterWidth = 40,
+ parameter bit RV32E = 1'b0,
+ parameter brq_pkg::rv32m_e RV32M = brq_pkg::RV32MFast,
+ parameter brq_pkg::rv32b_e RV32B = brq_pkg::RV32BNone,
+ parameter brq_pkg::regfile_e RegFile = brq_pkg::RegFileFF,
+ parameter brq_pkg::rvfloat_e RVF = brq_pkg::RV32FSingle, // for floating point
+ parameter int unsigned FloatingPoint = 1'b1,
+ parameter bit BranchTargetALU = 1'b0,
+ parameter bit WritebackStage = 1'b1,
+ parameter bit ICache = 1'b0,
+ parameter bit ICacheECC = 1'b0,
+ parameter bit BranchPredictor = 1'b0,
+ parameter bit DbgTriggerEn = 1'b0,
+ parameter int unsigned DbgHwBreakNum = 1,
+ parameter bit Securebrq = 1'b0,
+ parameter int unsigned DmHaltAddr = 32'h1A110800,
+ parameter int unsigned DmExceptionAddr = 32'h1A110808
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+ input logic test_en_i, // enable all clock gates for testing
+ input logic [31:0] hart_id_i,
+ input logic [31:0] boot_addr_i,
+ // Instruction memory interface
+ output logic instr_req_o,
+ input logic instr_gnt_i,
+ input logic instr_rvalid_i,
+ output logic [31:0] instr_addr_o,
+ input logic [31:0] instr_rdata_i,
+ input logic instr_err_i,
+ // Data memory interface
+ output logic data_req_o,
+ input logic data_gnt_i,
+ input logic data_rvalid_i,
+ output logic data_we_o,
+ output logic [3:0] data_be_o,
+ output logic [31:0] data_addr_o,
+ output logic [31:0] data_wdata_o,
+ input logic [31:0] data_rdata_i,
+ input logic data_err_i,
+ // Interrupt inputs
+ input logic irq_software_i,
+ input logic irq_timer_i,
+ input logic irq_external_i,
+ input logic [14:0] irq_fast_i,
+ input logic irq_nm_i, // non-maskeable interrupt
+ // Debug Interface
+ input logic debug_req_i,
+ // RISC-V Formal Interface
+ // Does not comply with the coding standards of _i/_o suffixes, but follows
+ // the convention of RISC-V Formal Interface Specification.
+`ifdef RVFI
+ output logic rvfi_valid,
+ output logic [63:0] rvfi_order,
+ output logic [31:0] rvfi_insn,
+ output logic rvfi_trap,
+ output logic rvfi_halt,
+ output logic rvfi_intr,
+ output logic [ 1:0] rvfi_mode,
+ output logic [ 1:0] rvfi_ixl,
+ output logic [ 4:0] rvfi_rs1_addr,
+ output logic [ 4:0] rvfi_rs2_addr,
+ output logic [ 4:0] rvfi_rs3_addr,
+ output logic [31:0] rvfi_rs1_rdata,
+ output logic [31:0] rvfi_rs2_rdata,
+ output logic [31:0] rvfi_rs3_rdata,
+ output logic [ 4:0] rvfi_rd_addr,
+ output logic [31:0] rvfi_rd_wdata,
+ output logic [31:0] rvfi_pc_rdata,
+ output logic [31:0] rvfi_pc_wdata,
+ output logic [31:0] rvfi_mem_addr,
+ output logic [ 3:0] rvfi_mem_rmask,
+ output logic [ 3:0] rvfi_mem_wmask,
+ output logic [31:0] rvfi_mem_rdata,
+ output logic [31:0] rvfi_mem_wdata,
+ // CPU Control Signals
+ input logic fetch_enable_i,
+ output logic alert_minor_o,
+ output logic alert_major_o,
+ output logic core_sleep_o
+ import brq_pkg::*;
+ // floating point
+ localparam int unsigned W = 32;
+ logic fp_flush;
+ logic in_ready_c2fpu; // ready - from core to FPU
+ logic in_valid_c2fpu; // valid - from FPU to core
+ logic out_ready_fpu2c; // ready - from FPU to core
+ logic out_valid_fpu2c; // valid - from core to FPU
+ logic valid_id_fpu; // select which valid signal will go to dec
+ logic fp_rm_dynamic;
+ logic fp_alu_op_mod;
+ logic [4:0] fp_rf_raddr_a;
+ logic [4:0] fp_rf_raddr_b;
+ logic [4:0] fp_rf_raddr_c;
+ logic [W-1:0] fp_rf_rdata_a;
+ logic [W-1:0] fp_rf_rdata_b;
+ logic [W-1:0] fp_rf_rdata_c;
+ logic fp_rf_wen_id;
+ logic is_fp_instr;
+ logic [2:0][W-1:0] fp_operands; // three operands in fpu
+ logic fp_busy;
+ logic fpu_busy_idu;
+ logic [W-1:0] fp_result;
+ logic [ 31:0] data_wb;
+ logic [4:0] fp_rf_waddr_id;
+ logic [4:0] fp_rf_waddr_wb;
+ logic fp_rf_we;
+ logic fp_rf_wen_wb;
+ logic use_fp_rs1;
+ logic use_fp_rs2;
+ logic use_fp_rd;
+ logic fp_rf_write_wb;
+ logic [31:0] rf_int_fp_lsu;
+ logic fp_swap_oprnds;
+ logic fpu_is_busy;
+ logic fp_load;
+ logic [31:0] fp_rf_wdata_wb;
+ fpnew_pkg::status_t fp_status;
+ fpnew_pkg::operation_e fp_operation;
+ fpnew_pkg::roundmode_e fp_rounding_mode;
+ fpnew_pkg::roundmode_e fp_frm_csr;
+ fpnew_pkg::roundmode_e fp_frm_fpnew;
+ fpnew_pkg::operation_e fp_alu_operator;
+ fpnew_pkg::fp_format_e fp_src_fmt;
+ fpnew_pkg::fp_format_e fp_dst_fmt;
+ // brq
+ localparam int unsigned PMP_NUM_CHAN = 2;
+ localparam bit DataIndTiming = Securebrq;
+ localparam bit DummyInstructions = Securebrq;
+ localparam bit PCIncrCheck = Securebrq;
+ localparam bit ShadowCSR = Securebrq;
+ // Speculative branch option, trades-off performance against timing.
+ // Setting this to 1 eases branch target critical paths significantly but reduces performance
+ // by ~3% (based on CoreMark/MHz score).
+ // Set by default in the max PMP config which has the tightest budget for branch target timing.
+ localparam bit SpecBranch = PMPEnable & (PMPNumRegions == 16);
+ localparam bit RegFileECC = Securebrq;
+ localparam int unsigned RegFileDataWidth = RegFileECC ? 32 + 7 : 32;
+ // IF/ID signals
+ logic dummy_instr_id;
+ logic instr_valid_id;
+ logic instr_new_id;
+ logic [31:0] instr_rdata_id; // Instruction sampled inside IF stage
+ logic [31:0] instr_rdata_alu_id; // Instruction sampled inside IF stage (replicated to
+ // ease fan-out)
+ logic [15:0] instr_rdata_c_id; // Compressed instruction sampled inside IF stage
+ logic instr_is_compressed_id;
+ logic instr_perf_count_id;
+ logic instr_bp_taken_id;
+ logic instr_fetch_err; // Bus error on instr fetch
+ logic instr_fetch_err_plus2; // Instruction error is misaligned
+ logic illegal_c_insn_id; // Illegal compressed instruction sent to ID stage
+ logic [31:0] pc_if; // Program counter in IF stage
+ logic [31:0] pc_id; // Program counter in ID stage
+ logic [31:0] pc_wb; // Program counter in WB stage
+ logic [33:0] imd_val_d_ex[2]; // Intermediate register for multicycle Ops
+ logic [33:0] imd_val_q_ex[2]; // Intermediate register for multicycle Ops
+ logic [1:0] imd_val_we_ex;
+ logic data_ind_timing;
+ logic dummy_instr_en;
+ logic [2:0] dummy_instr_mask;
+ logic dummy_instr_seed_en;
+ logic [31:0] dummy_instr_seed;
+ logic icache_enable;
+ logic icache_inval;
+ logic pc_mismatch_alert;
+ logic csr_shadow_err;
+ logic instr_first_cycle_id;
+ logic instr_valid_clear;
+ logic pc_set;
+ logic pc_set_spec;
+ logic nt_branch_mispredict;
+ pc_sel_e pc_mux_id; // Mux selector for next PC
+ exc_pc_sel_e exc_pc_mux_id; // Mux selector for exception PC
+ exc_cause_e exc_cause; // Exception cause
+ logic lsu_load_err;
+ logic lsu_store_err;
+ // LSU signals
+ logic lsu_addr_incr_req;
+ logic [31:0] lsu_addr_last;
+ // Jump and branch target and decision (EX->IF)
+ logic [31:0] branch_target_ex;
+ logic branch_decision;
+ // Core busy signals
+ logic ctrl_busy;
+ logic if_busy;
+ logic lsu_busy;
+ logic core_busy_d, core_busy_q;
+ // Register File
+ logic [4:0] rf_raddr_a;
+ logic [31:0] rf_rdata_a;
+ logic [4:0] rf_raddr_b;
+ logic [31:0] rf_rdata_b;
+ logic rf_ren_a;
+ logic rf_ren_b;
+ logic [4:0] rf_waddr_wb;
+ logic [31:0] rf_wdata_wb;
+ // Writeback register write data that can be used on the forwarding path (doesn't factor in memory
+ // read data as this is too late for the forwarding path)
+ logic [31:0] rf_wdata_fwd_wb;
+ logic [31:0] rf_wdata_lsu;
+ logic rf_we_wb;
+ logic rf_we_lsu;
+ logic [4:0] rf_waddr_id;
+ logic [31:0] rf_wdata_id;
+ logic rf_we_id;
+ logic rf_rd_a_wb_match;
+ logic rf_rd_b_wb_match;
+ // ALU Control
+ alu_op_e alu_operator_ex;
+ logic [31:0] alu_operand_a_ex;
+ logic [31:0] alu_operand_b_ex;
+ logic [31:0] bt_a_operand;
+ logic [31:0] bt_b_operand;
+ logic [31:0] alu_adder_result_ex; // Used to forward computed address to LSU
+ logic [31:0] result_ex;
+ // Multiplier Control
+ logic mult_en_ex;
+ logic div_en_ex;
+ logic mult_sel_ex;
+ logic div_sel_ex;
+ md_op_e multdiv_operator_ex;
+ logic [1:0] multdiv_signed_mode_ex;
+ logic [31:0] multdiv_operand_a_ex;
+ logic [31:0] multdiv_operand_b_ex;
+ logic multdiv_ready_id;
+ // CSR control
+ logic csr_access;
+ csr_op_e csr_op;
+ logic csr_op_en;
+ csr_num_e csr_addr;
+ logic [31:0] csr_rdata;
+ logic [31:0] csr_wdata;
+ logic illegal_csr_insn_id; // CSR access to non-existent register,
+ // with wrong priviledge level,
+ // or missing write permissions
+ // Data Memory Control
+ logic lsu_we;
+ logic [1:0] lsu_type;
+ logic lsu_sign_ext;
+ logic lsu_req;
+ logic [31:0] lsu_wdata;
+ logic lsu_req_done;
+ // stall control
+ logic id_in_ready;
+ logic ex_valid;
+ logic lsu_resp_valid;
+ logic lsu_resp_err;
+ // Signals between instruction core interface and pipe (if and id stages)
+ logic instr_req_int; // Id stage asserts a req to instruction core interface
+ // Writeback stage
+ logic en_wb;
+ wb_instr_type_e instr_type_wb;
+ logic ready_wb;
+ logic rf_write_wb;
+ logic outstanding_load_wb;
+ logic outstanding_store_wb;
+ // Interrupts
+ logic irq_pending;
+ logic nmi_mode;
+ irqs_t irqs;
+ logic csr_mstatus_mie;
+ logic [31:0] csr_mepc, csr_depc;
+ // PMP signals
+ logic [33:0] csr_pmp_addr [PMPNumRegions];
+ pmp_cfg_t csr_pmp_cfg [PMPNumRegions];
+ logic pmp_req_err [PMP_NUM_CHAN];
+ logic instr_req_out;
+ logic data_req_out;
+ logic csr_save_if;
+ logic csr_save_id;
+ logic csr_save_wb;
+ logic csr_restore_mret_id;
+ logic csr_restore_dret_id;
+ logic csr_save_cause;
+ logic csr_mtvec_init;
+ logic [31:0] csr_mtvec;
+ logic [31:0] csr_mtval;
+ logic csr_mstatus_tw;
+ priv_lvl_e priv_mode_id;
+ priv_lvl_e priv_mode_if;
+ priv_lvl_e priv_mode_lsu;
+ // debug mode and dcsr configuration
+ logic debug_mode;
+ dbg_cause_e debug_cause;
+ logic debug_csr_save;
+ logic debug_single_step;
+ logic debug_ebreakm;
+ logic debug_ebreaku;
+ logic trigger_match;
+ // signals relating to instruction movements between pipeline stages
+ // used by performance counters and RVFI
+ logic instr_id_done;
+ logic instr_done_wb;
+ logic perf_instr_ret_wb;
+ logic perf_instr_ret_compressed_wb;
+ logic perf_iside_wait;
+ logic perf_dside_wait;
+ logic perf_mul_wait;
+ logic perf_div_wait;
+ logic perf_jump;
+ logic perf_branch;
+ logic perf_tbranch;
+ logic perf_load;
+ logic perf_store;
+ // for RVFI
+ logic illegal_insn_id, unused_illegal_insn_id; // ID stage sees an illegal instruction
+ // RISC-V Formal Interface signals
+`ifdef RVFI
+ logic rvfi_instr_new_wb;
+ logic rvfi_intr_d;
+ logic rvfi_intr_q;
+ logic rvfi_set_trap_pc_d;
+ logic rvfi_set_trap_pc_q;
+ logic [31:0] rvfi_insn_id;
+ logic [4:0] rvfi_rs1_addr_d;
+ logic [4:0] rvfi_rs1_addr_q;
+ logic [4:0] rvfi_rs2_addr_d;
+ logic [4:0] rvfi_rs2_addr_q;
+ logic [4:0] rvfi_rs3_addr_d;
+ logic [31:0] rvfi_rs1_data_d;
+ logic [31:0] rvfi_rs1_data_q;
+ logic [31:0] rvfi_rs2_data_d;
+ logic [31:0] rvfi_rs2_data_q;
+ logic [31:0] rvfi_rs3_data_d;
+ logic [4:0] rvfi_rd_addr_wb;
+ logic [4:0] rvfi_rd_addr_q;
+ logic [4:0] rvfi_rd_addr_d;
+ logic [31:0] rvfi_rd_wdata_wb;
+ logic [31:0] rvfi_rd_wdata_d;
+ logic [31:0] rvfi_rd_wdata_q;
+ logic rvfi_rd_we_wb;
+ logic [3:0] rvfi_mem_mask_int;
+ logic [31:0] rvfi_mem_rdata_d;
+ logic [31:0] rvfi_mem_rdata_q;
+ logic [31:0] rvfi_mem_wdata_d;
+ logic [31:0] rvfi_mem_wdata_q;
+ logic [31:0] rvfi_mem_addr_d;
+ logic [31:0] rvfi_mem_addr_q;
+ //////////////////////
+ // Clock management //
+ //////////////////////
+ logic clk;
+ logic clock_en;
+ // Before going to sleep, wait for I- and D-side
+ // interfaces to finish ongoing operations.
+ assign core_busy_d = ctrl_busy | if_busy | lsu_busy | fp_busy;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ core_busy_q <= 1'b0;
+ end else begin
+ core_busy_q <= core_busy_d;
+ end
+ end
+ // capture fetch_enable_i in fetch_enable_q, once for ever
+ logic fetch_enable_q;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ fetch_enable_q <= 1'b0;
+ end else if (fetch_enable_i) begin
+ fetch_enable_q <= 1'b1;
+ end
+ end
+ assign clock_en = fetch_enable_q & (core_busy_q | debug_req_i | irq_pending | irq_nm_i);
+ assign core_sleep_o = ~clock_en;
+ // main clock gate of the core
+ // generates all clocks except the one for the debug unit which is
+ // independent
+ prim_clock_gating core_clock_gate_i (
+ .clk_i ( clk_i ),
+ .en_i ( clock_en ),
+ .test_en_i ( test_en_i ),
+ .clk_o ( clk )
+ );
+ //////////////
+ // IF stage //
+ //////////////
+ brq_ifu #(
+ .DmHaltAddr ( DmHaltAddr ),
+ .DmExceptionAddr ( DmExceptionAddr ),
+ .DummyInstructions ( DummyInstructions ),
+ .ICache ( ICache ),
+ .ICacheECC ( ICacheECC ),
+ .PCIncrCheck ( PCIncrCheck ),
+ .BranchPredictor ( BranchPredictor )
+ ) if_stage_i (
+ .clk_i ( clk ),
+ .rst_ni ( rst_ni ),
+ .boot_addr_i ( boot_addr_i ),
+ .req_i ( instr_req_int ), // instruction request control
+ // instruction cache interface
+ .instr_req_o ( instr_req_out ),
+ .instr_addr_o ( instr_addr_o ),
+ .instr_gnt_i ( instr_gnt_i ),
+ .instr_rvalid_i ( instr_rvalid_i ),
+ .instr_rdata_i ( instr_rdata_i ),
+ .instr_err_i ( instr_err_i ),
+ .instr_pmp_err_i ( pmp_req_err[PMP_I] ),
+ // outputs to ID stage
+ .instr_valid_id_o ( instr_valid_id ),
+ .instr_new_id_o ( instr_new_id ),
+ .instr_rdata_id_o ( instr_rdata_id ),
+ .instr_rdata_alu_id_o ( instr_rdata_alu_id ),
+ .instr_rdata_c_id_o ( instr_rdata_c_id ),
+ .instr_is_compressed_id_o ( instr_is_compressed_id ),
+ .instr_bp_taken_o ( instr_bp_taken_id ),
+ .instr_fetch_err_o ( instr_fetch_err ),
+ .instr_fetch_err_plus2_o ( instr_fetch_err_plus2 ),
+ .illegal_c_insn_id_o ( illegal_c_insn_id ),
+ .dummy_instr_id_o ( dummy_instr_id ),
+ .pc_if_o ( pc_if ),
+ .pc_id_o ( pc_id ),
+ // control signals
+ .instr_valid_clear_i ( instr_valid_clear ),
+ .pc_set_i ( pc_set ),
+ .pc_set_spec_i ( pc_set_spec ),
+ .pc_mux_i ( pc_mux_id ),
+ .nt_branch_mispredict_i ( nt_branch_mispredict ),
+ .exc_pc_mux_i ( exc_pc_mux_id ),
+ .exc_cause ( exc_cause ),
+ .dummy_instr_en_i ( dummy_instr_en ),
+ .dummy_instr_mask_i ( dummy_instr_mask ),
+ .dummy_instr_seed_en_i ( dummy_instr_seed_en ),
+ .dummy_instr_seed_i ( dummy_instr_seed ),
+ .icache_enable_i ( icache_enable ),
+ .icache_inval_i ( icache_inval ),
+ // branch targets
+ .branch_target_ex_i ( branch_target_ex ),
+ // CSRs
+ .csr_mepc_i ( csr_mepc ), // exception return address
+ .csr_depc_i ( csr_depc ), // debug return address
+ .csr_mtvec_i ( csr_mtvec ), // trap-vector base address
+ .csr_mtvec_init_o ( csr_mtvec_init ),
+ // pipeline stalls
+ .id_in_ready_i ( id_in_ready ),
+ .pc_mismatch_alert_o ( pc_mismatch_alert ),
+ .if_busy_o ( if_busy )
+ );
+ // Core is waiting for the ISide when ID/EX stage is ready for a new instruction but none are
+ // available
+ assign perf_iside_wait = id_in_ready & ~instr_valid_id;
+ // Qualify the instruction request with PMP error
+ assign instr_req_o = instr_req_out & ~pmp_req_err[PMP_I];
+ //////////////
+ // ID stage //
+ //////////////
+ brq_idu #(
+ .RV32E ( RV32E ),
+ .RV32M ( RV32M ),
+ .RV32B ( RV32B ),
+ .BranchTargetALU ( BranchTargetALU ),
+ .DataIndTiming ( DataIndTiming ),
+ .SpecBranch ( SpecBranch ),
+ .WritebackStage ( WritebackStage ),
+ .BranchPredictor ( BranchPredictor )
+ ) id_stage_i (
+ .clk_i ( clk ),
+ .rst_ni ( rst_ni ),
+ // Processor Enable
+ .ctrl_busy_o ( ctrl_busy ),
+ .illegal_insn_o ( illegal_insn_id ),
+ // from/to IF-ID pipeline register
+ .instr_valid_i ( instr_valid_id ),
+ .instr_rdata_i ( instr_rdata_id ),
+ .instr_rdata_alu_i ( instr_rdata_alu_id ),
+ .instr_rdata_c_i ( instr_rdata_c_id ),
+ .instr_is_compressed_i ( instr_is_compressed_id ),
+ .instr_bp_taken_i ( instr_bp_taken_id ),
+ // Jumps and branches
+ .branch_decision_i ( branch_decision ),
+ // IF and ID control signals
+ .instr_first_cycle_id_o ( instr_first_cycle_id ),
+ .instr_valid_clear_o ( instr_valid_clear ),
+ .id_in_ready_o ( id_in_ready ),
+ .instr_req_o ( instr_req_int ),
+ .pc_set_o ( pc_set ),
+ .pc_set_spec_o ( pc_set_spec ),
+ .pc_mux_o ( pc_mux_id ),
+ .nt_branch_mispredict_o ( nt_branch_mispredict ),
+ .exc_pc_mux_o ( exc_pc_mux_id ),
+ .exc_cause_o ( exc_cause ),
+ .icache_inval_o ( icache_inval ),
+ .instr_fetch_err_i ( instr_fetch_err ),
+ .instr_fetch_err_plus2_i ( instr_fetch_err_plus2 ),
+ .illegal_c_insn_i ( illegal_c_insn_id ),
+ .pc_id_i ( pc_id ),
+ // Stalls
+ .ex_valid_i ( valid_id_fpu ), // changed by zeeshan from
+ // ex_valid to valid_id_fpu
+ // for ready selection
+ .lsu_resp_valid_i ( lsu_resp_valid ),
+ .alu_operator_ex_o ( alu_operator_ex ),
+ .alu_operand_a_ex_o ( alu_operand_a_ex ),
+ .alu_operand_b_ex_o ( alu_operand_b_ex ),
+ .imd_val_q_ex_o ( imd_val_q_ex ),
+ .imd_val_d_ex_i ( imd_val_d_ex ),
+ .imd_val_we_ex_i ( imd_val_we_ex ),
+ .bt_a_operand_o ( bt_a_operand ),
+ .bt_b_operand_o ( bt_b_operand ),
+ .mult_en_ex_o ( mult_en_ex ),
+ .div_en_ex_o ( div_en_ex ),
+ .mult_sel_ex_o ( mult_sel_ex ),
+ .div_sel_ex_o ( div_sel_ex ),
+ .multdiv_operator_ex_o ( multdiv_operator_ex ),
+ .multdiv_signed_mode_ex_o ( multdiv_signed_mode_ex ),
+ .multdiv_operand_a_ex_o ( multdiv_operand_a_ex ),
+ .multdiv_operand_b_ex_o ( multdiv_operand_b_ex ),
+ .multdiv_ready_id_o ( multdiv_ready_id ),
+ // CSR ID/EX
+ .csr_access_o ( csr_access ),
+ .csr_op_o ( csr_op ),
+ .csr_op_en_o ( csr_op_en ),
+ .csr_save_if_o ( csr_save_if ), // control signal to save PC
+ .csr_save_id_o ( csr_save_id ), // control signal to save PC
+ .csr_save_wb_o ( csr_save_wb ), // control signal to save PC
+ .csr_restore_mret_id_o ( csr_restore_mret_id ), // restore mstatus upon MRET
+ .csr_restore_dret_id_o ( csr_restore_dret_id ), // restore mstatus upon MRET
+ .csr_save_cause_o ( csr_save_cause ),
+ .csr_mtval_o ( csr_mtval ),
+ .priv_mode_i ( priv_mode_id ),
+ .csr_mstatus_tw_i ( csr_mstatus_tw ),
+ .illegal_csr_insn_i ( illegal_csr_insn_id ),
+ .data_ind_timing_i ( data_ind_timing ),
+ // LSU
+ .lsu_req_o ( lsu_req ), // to load store unit
+ .lsu_we_o ( lsu_we ), // to load store unit
+ .lsu_type_o ( lsu_type ), // to load store unit
+ .lsu_sign_ext_o ( lsu_sign_ext ), // to load store unit
+ .lsu_wdata_o ( lsu_wdata ), // to load store unit
+ .lsu_req_done_i ( lsu_req_done ), // from load store unit
+ .lsu_addr_incr_req_i ( lsu_addr_incr_req ),
+ .lsu_addr_last_i ( lsu_addr_last ),
+ .lsu_load_err_i ( lsu_load_err ),
+ .lsu_store_err_i ( lsu_store_err ),
+ // Interrupt Signals
+ .csr_mstatus_mie_i ( csr_mstatus_mie ),
+ .irq_pending_i ( irq_pending ),
+ .irqs_i ( irqs ),
+ .irq_nm_i ( irq_nm_i ),
+ .nmi_mode_o ( nmi_mode ),
+ // Debug Signal
+ .debug_mode_o ( debug_mode ),
+ .debug_cause_o ( debug_cause ),
+ .debug_csr_save_o ( debug_csr_save ),
+ .debug_req_i ( debug_req_i ),
+ .debug_single_step_i ( debug_single_step ),
+ .debug_ebreakm_i ( debug_ebreakm ),
+ .debug_ebreaku_i ( debug_ebreaku ),
+ .trigger_match_i ( trigger_match ),
+ // write data to commit in the register file
+ .result_ex_i ( data_wb ), // changed by zeeshan from result_ex
+ // to data_wb for FVCT, FMV.WX ins
+ .csr_rdata_i ( csr_rdata ),
+ .rf_raddr_a_o ( rf_raddr_a ),
+ .rf_rdata_a_i ( rf_rdata_a ),
+ .rf_raddr_b_o ( rf_raddr_b ),
+ .rf_rdata_b_i ( rf_int_fp_lsu ),
+ .rf_ren_a_o ( rf_ren_a ),
+ .rf_ren_b_o ( rf_ren_b ),
+ .rf_waddr_id_o ( rf_waddr_id ),
+ .rf_wdata_id_o ( rf_wdata_id ),
+ .rf_we_id_o ( rf_we_id ),
+ .rf_rd_a_wb_match_o ( rf_rd_a_wb_match ),
+ .rf_rd_b_wb_match_o ( rf_rd_b_wb_match ),
+ .rf_waddr_wb_i ( rf_waddr_wb ),
+ .rf_wdata_fwd_wb_i ( rf_wdata_fwd_wb ),
+ .rf_write_wb_i ( rf_write_wb ),
+ .en_wb_o ( en_wb ),
+ .instr_type_wb_o ( instr_type_wb ),
+ .instr_perf_count_id_o ( instr_perf_count_id ),
+ .ready_wb_i ( ready_wb ),
+ .outstanding_load_wb_i ( outstanding_load_wb ),
+ .outstanding_store_wb_i ( outstanding_store_wb ),
+ // Performance Counters
+ .perf_jump_o ( perf_jump ),
+ .perf_branch_o ( perf_branch ),
+ .perf_tbranch_o ( perf_tbranch ),
+ .perf_dside_wait_o ( perf_dside_wait ),
+ .perf_mul_wait_o ( perf_mul_wait ),
+ .perf_div_wait_o ( perf_div_wait ),
+ .instr_id_done_o ( instr_id_done ),
+ // Floating point extensions IO
+ .fp_rounding_mode_o ( fp_rounding_mode ), // defines the rounding mode
+ .fp_rf_rdata_a_i ( fp_rf_rdata_a ),
+ .fp_rf_rdata_b_i ( fp_rf_rdata_b ),
+ .fp_rf_rdata_c_i ( fp_rf_rdata_c ),
+ .fp_rf_raddr_a_o ( fp_rf_raddr_a ),
+ .fp_rf_raddr_b_o ( fp_rf_raddr_b ),
+ .fp_rf_raddr_c_o ( fp_rf_raddr_c ),
+ .fp_rf_waddr_o ( fp_rf_waddr_id ),
+ .fp_rf_we_o ( fp_rf_wen_id ),
+ .fp_alu_operator_o ( fp_alu_operator ),
+ .fp_alu_op_mod_o ( fp_alu_op_mod ),
+ .fp_src_fmt_o ( fp_src_fmt ),
+ .fp_dst_fmt_o ( fp_dst_fmt ),
+ .fp_rm_dynamic_o ( fp_rm_dynamic ),
+ .fp_flush_o ( fp_flush ),
+ .is_fp_instr_o ( is_fp_instr ),
+ .use_fp_rs1_o ( use_fp_rs1 ),
+ .use_fp_rs2_o ( use_fp_rs2 ),
+ .use_fp_rs3_o ( use_fp_rs3 ),
+ .use_fp_rd_o ( use_fp_rd ),
+ .fpu_busy_i ( fpu_busy_idu ),
+ .fp_rf_write_wb_i ( fp_rf_write_wb ),
+ .fp_rf_wdata_fwd_wb_i ( fp_rf_wdata_wb ),
+ .fp_operands_o ( fp_operands ),
+ .fp_load_o ( fp_load )
+ );
+ // for RVFI only
+ assign unused_illegal_insn_id = illegal_insn_id;
+ brq_exu #(
+ .RV32M ( RV32M ),
+ .RV32B ( RV32B ),
+ .BranchTargetALU ( BranchTargetALU )
+ ) ex_block_i (
+ .clk_i ( clk ),
+ .rst_ni ( rst_ni ),
+ // ALU signal from ID stage
+ .alu_operator_i ( alu_operator_ex ),
+ .alu_operand_a_i ( alu_operand_a_ex ),
+ .alu_operand_b_i ( alu_operand_b_ex ),
+ .alu_instr_first_cycle_i ( instr_first_cycle_id ),
+ // Branch target ALU signal from ID stage
+ .bt_a_operand_i ( bt_a_operand ),
+ .bt_b_operand_i ( bt_b_operand ),
+ // Multipler/Divider signal from ID stage
+ .multdiv_operator_i ( multdiv_operator_ex ),
+ .mult_en_i ( mult_en_ex ),
+ .div_en_i ( div_en_ex ),
+ .mult_sel_i ( mult_sel_ex ),
+ .div_sel_i ( div_sel_ex ),
+ .multdiv_signed_mode_i ( multdiv_signed_mode_ex ),
+ .multdiv_operand_a_i ( multdiv_operand_a_ex ),
+ .multdiv_operand_b_i ( multdiv_operand_b_ex ),
+ .multdiv_ready_id_i ( multdiv_ready_id ),
+ .data_ind_timing_i ( data_ind_timing ),
+ // Intermediate value register
+ .imd_val_we_o ( imd_val_we_ex ),
+ .imd_val_d_o ( imd_val_d_ex ),
+ .imd_val_q_i ( imd_val_q_ex ),
+ // Outputs
+ .alu_adder_result_ex_o ( alu_adder_result_ex ), // to LSU
+ .result_ex_o ( result_ex ), // to ID
+ .branch_target_o ( branch_target_ex ), // to IF
+ .branch_decision_o ( branch_decision ), // to ID
+ .ex_valid_o ( ex_valid )
+ );
+ /////////////////////
+ // Load/store unit //
+ /////////////////////
+ assign data_req_o = data_req_out & ~pmp_req_err[PMP_D];
+ assign lsu_resp_err = lsu_load_err | lsu_store_err;
+ brq_lsu load_store_unit_i (
+ .clk_i ( clk ),
+ .rst_ni ( rst_ni ),
+ // data interface
+ .data_req_o ( data_req_out ),
+ .data_gnt_i ( data_gnt_i ),
+ .data_rvalid_i ( data_rvalid_i ),
+ .data_err_i ( data_err_i ),
+ .data_pmp_err_i ( pmp_req_err[PMP_D] ),
+ .data_addr_o ( data_addr_o ),
+ .data_we_o ( data_we_o ),
+ .data_be_o ( data_be_o ),
+ .data_wdata_o ( data_wdata_o ),
+ .data_rdata_i ( data_rdata_i ),
+ // signals to/from ID/EX stage
+ .lsu_we_i ( lsu_we ),
+ .lsu_type_i ( lsu_type ),
+ .lsu_wdata_i ( lsu_wdata ),
+ .lsu_sign_ext_i ( lsu_sign_ext ),
+ .lsu_rdata_o ( rf_wdata_lsu ),
+ .lsu_rdata_valid_o ( rf_we_lsu ),
+ .lsu_req_i ( lsu_req ),
+ .lsu_req_done_o ( lsu_req_done ),
+ .adder_result_ex_i ( alu_adder_result_ex ),
+ .addr_incr_req_o ( lsu_addr_incr_req ),
+ .addr_last_o ( lsu_addr_last ),
+ .lsu_resp_valid_o ( lsu_resp_valid ),
+ // exception signals
+ .load_err_o ( lsu_load_err ),
+ .store_err_o ( lsu_store_err ),
+ .busy_o ( lsu_busy ),
+ .perf_load_o ( perf_load ),
+ .perf_store_o ( perf_store )
+ );
+ brq_wbu #(
+ .WritebackStage ( WritebackStage )
+ ) wb_stage_i (
+ .clk_i ( clk ),
+ .rst_ni ( rst_ni ),
+ .en_wb_i ( en_wb ),
+ .instr_type_wb_i ( instr_type_wb ),
+ .pc_id_i ( pc_id ),
+ .instr_is_compressed_id_i ( instr_is_compressed_id ),
+ .instr_perf_count_id_i ( instr_perf_count_id ),
+ .ready_wb_o ( ready_wb ),
+ .rf_write_wb_o ( rf_write_wb ),
+ .outstanding_load_wb_o ( outstanding_load_wb ),
+ .outstanding_store_wb_o ( outstanding_store_wb ),
+ .pc_wb_o ( pc_wb ),
+ .perf_instr_ret_wb_o ( perf_instr_ret_wb ),
+ .perf_instr_ret_compressed_wb_o ( perf_instr_ret_compressed_wb ),
+ .rf_waddr_id_i ( rf_waddr_id ),
+ .rf_wdata_id_i ( rf_wdata_id ),
+ .rf_we_id_i ( rf_we_id ),
+ .rf_wdata_lsu_i ( rf_wdata_lsu ),
+ .rf_we_lsu_i ( rf_we_lsu ),
+ .rf_wdata_fwd_wb_o ( rf_wdata_fwd_wb ),
+ .rf_waddr_wb_o ( rf_waddr_wb ),
+ .rf_wdata_wb_o ( rf_wdata_wb ),
+ .rf_we_wb_o ( rf_we_wb ),
+ .lsu_resp_valid_i ( lsu_resp_valid ),
+ .lsu_resp_err_i ( lsu_resp_err ),
+ .instr_done_wb_o ( instr_done_wb ),
+ // floating point
+ .fp_rf_write_wb_o ( fp_rf_write_wb ),
+ .fp_rf_wen_wb_o ( fp_rf_wen_wb ),
+ .fp_rf_waddr_wb_o ( fp_rf_waddr_wb ),
+ .fp_rf_wen_id_i ( fp_rf_wen_id ),
+ .fp_rf_waddr_id_i ( fp_rf_waddr_id ),
+ .fp_rf_wdata_wb_o ( fp_rf_wdata_wb ),
+ .fp_load_i ( fp_load )
+ );
+ ///////////////////////
+ // Register file ECC //
+ ///////////////////////
+ logic [RegFileDataWidth-1:0] rf_wdata_wb_ecc;
+ logic [RegFileDataWidth-1:0] rf_rdata_a_ecc;
+ logic [RegFileDataWidth-1:0] rf_rdata_b_ecc;
+ logic rf_ecc_err_comb;
+ if (RegFileECC) begin : gen_regfile_ecc
+ logic [1:0] rf_ecc_err_a, rf_ecc_err_b;
+ logic rf_ecc_err_a_id, rf_ecc_err_b_id;
+ // ECC checkbit generation for regiter file wdata
+ prim_secded_39_32_enc regfile_ecc_enc (
+ .in (rf_wdata_wb),
+ .out (rf_wdata_wb_ecc)
+ );
+ // ECC checking on register file rdata
+ prim_secded_39_32_dec regfile_ecc_dec_a (
+ .in (rf_rdata_a_ecc),
+ .d_o (),
+ .syndrome_o (),
+ .err_o (rf_ecc_err_a)
+ );
+ prim_secded_39_32_dec regfile_ecc_dec_b (
+ .in (rf_rdata_b_ecc),
+ .d_o (),
+ .syndrome_o (),
+ .err_o (rf_ecc_err_b)
+ );
+ // Assign read outputs - no error correction, just trigger an alert
+ assign rf_rdata_a = rf_rdata_a_ecc[31:0];
+ assign rf_rdata_b = rf_rdata_b_ecc[31:0];
+ // Calculate errors - qualify with WB forwarding to avoid xprop into the alert signal
+ assign rf_ecc_err_a_id = |rf_ecc_err_a & rf_ren_a & ~rf_rd_a_wb_match;
+ assign rf_ecc_err_b_id = |rf_ecc_err_b & rf_ren_b & ~rf_rd_b_wb_match;
+ // Combined error
+ assign rf_ecc_err_comb = instr_valid_id & (rf_ecc_err_a_id | rf_ecc_err_b_id);
+ end else begin : gen_no_regfile_ecc
+ logic unused_rf_ren_a, unused_rf_ren_b;
+ logic unused_rf_rd_a_wb_match, unused_rf_rd_b_wb_match;
+ assign unused_rf_ren_a = rf_ren_a;
+ assign unused_rf_ren_b = rf_ren_b;
+ assign unused_rf_rd_a_wb_match = rf_rd_a_wb_match;
+ assign unused_rf_rd_b_wb_match = rf_rd_b_wb_match;
+ assign rf_wdata_wb_ecc = rf_wdata_wb;
+ assign rf_rdata_a = rf_rdata_a_ecc;
+ assign rf_rdata_b = rf_rdata_b_ecc;
+ assign rf_ecc_err_comb = 1'b0;
+ end
+ assign rf_int_fp_lsu = (is_fp_instr & use_fp_rs2) ? fp_rf_rdata_b : rf_rdata_b;
+ if (RegFile == RegFileFF) begin : gen_regfile_ff
+ brq_register_file_ff #(
+ .RV32E ( RV32E ),
+ .DataWidth ( RegFileDataWidth ),
+ .DummyInstructions ( DummyInstructions )
+ ) register_file_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .test_en_i ( test_en_i ),
+ .dummy_instr_id_i ( dummy_instr_id ),
+ .raddr_a_i ( rf_raddr_a ),
+ .rdata_a_o ( rf_rdata_a_ecc ),
+ .raddr_b_i ( rf_raddr_b ),
+ .rdata_b_o ( rf_rdata_b_ecc ),
+ .waddr_a_i ( rf_waddr_wb ),
+ .wdata_a_i ( rf_wdata_wb_ecc ),
+ .we_a_i ( rf_we_wb )
+ );
+ end else if (RegFile == RegFileFPGA) begin : gen_regfile_fpga
+ brq_register_file_fpga #(
+ .RV32E ( RV32E ),
+ .DataWidth ( RegFileDataWidth ),
+ .DummyInstructions ( DummyInstructions )
+ ) register_file_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .test_en_i ( test_en_i ),
+ .dummy_instr_id_i ( dummy_instr_id ),
+ .raddr_a_i ( rf_raddr_a ),
+ .rdata_a_o ( rf_rdata_a_ecc ),
+ .raddr_b_i ( rf_raddr_b ),
+ .rdata_b_o ( rf_rdata_b_ecc ),
+ .waddr_a_i ( rf_waddr_wb ),
+ .wdata_a_i ( rf_wdata_wb_ecc ),
+ .we_a_i ( rf_we_wb )
+ );
+ end else if (RegFile == RegFileLatch) begin : gen_regfile_latch
+ brq_register_file_latch #(
+ .RV32E ( RV32E ),
+ .DataWidth ( RegFileDataWidth ),
+ .DummyInstructions ( DummyInstructions )
+ ) register_file_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .test_en_i ( test_en_i ),
+ .dummy_instr_id_i ( dummy_instr_id ),
+ .raddr_a_i ( rf_raddr_a ),
+ .rdata_a_o ( rf_rdata_a_ecc ),
+ .raddr_b_i ( rf_raddr_b ),
+ .rdata_b_o ( rf_rdata_b_ecc ),
+ .waddr_a_i ( rf_waddr_wb ),
+ .wdata_a_i ( rf_wdata_wb_ecc ),
+ .we_a_i ( rf_we_wb )
+ );
+ end
+ if (FloatingPoint) begin : gen_fp_regfile
+ brq_fp_register_file_ff #(
+ .RVF ( RVF ),
+ .DataWidth ( W )
+ ) fp_register_file (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .raddr_a_i ( fp_rf_raddr_a ),
+ .rdata_a_o ( fp_rf_rdata_a ),
+ .raddr_b_i ( fp_rf_raddr_b ),
+ .rdata_b_o ( fp_rf_rdata_b ),
+ .raddr_c_i ( fp_rf_raddr_c ),
+ .rdata_c_o ( fp_rf_rdata_c ),
+ .waddr_a_i ( fp_rf_waddr_wb ),
+ .wdata_a_i ( fp_rf_wdata_wb ),
+ .we_a_i ( fp_rf_wen_wb )
+ end
+ ///////////////////
+ // Alert outputs //
+ ///////////////////
+ // Minor alert - core is in a recoverable state
+ // TODO add I$ ECC errors here
+ assign alert_minor_o = 1'b0;
+ // Major alert - core is unrecoverable
+ assign alert_major_o = (rf_ecc_err_comb | pc_mismatch_alert | csr_shadow_err);
+ // Explict INC_ASSERT block to avoid unused signal lint warnings were asserts are not included
+ `ifdef INC_ASSERT
+ // Signals used for assertions only
+ logic outstanding_load_resp;
+ logic outstanding_store_resp;
+ logic outstanding_load_id;
+ logic outstanding_store_id;
+ assign outstanding_load_id = id_stage_i.instr_executing & id_stage_i.lsu_req_dec &
+ ~id_stage_i.lsu_we;
+ assign outstanding_store_id = id_stage_i.instr_executing & id_stage_i.lsu_req_dec &
+ id_stage_i.lsu_we;
+ if (WritebackStage) begin : gen_wb_stage
+ // When the writeback stage is present a load/store could be in ID or WB. A Load/store in ID can
+ // see a response before it moves to WB when it is unaligned otherwise we should only see
+ // a response when load/store is in WB.
+ assign outstanding_load_resp = outstanding_load_wb |
+ (outstanding_load_id & load_store_unit_i.split_misaligned_access);
+ assign outstanding_store_resp = outstanding_store_wb |
+ (outstanding_store_id & load_store_unit_i.split_misaligned_access);
+ // When writing back the result of a load, the load must have made it to writeback
+ end else begin : gen_no_wb_stage
+ // Without writeback stage only look into whether load or store is in ID to determine if
+ // a response is expected.
+ assign outstanding_load_resp = outstanding_load_id;
+ assign outstanding_store_resp = outstanding_store_id;
+ end
+ `endif
+ ////////////////////////
+ // RF (Register File) //
+ ////////////////////////
+`ifdef RVFI
+ assign rvfi_rd_addr_wb = rf_waddr_wb;
+ assign rvfi_rd_wdata_wb = rf_we_wb ? rf_wdata_wb : rf_wdata_lsu;
+ assign rvfi_rd_we_wb = rf_we_wb | rf_we_lsu;
+ /////////////////////////////////////////
+ // CSRs (Control and Status Registers) //
+ /////////////////////////////////////////
+ assign csr_wdata = alu_operand_a_ex;
+ assign csr_addr = csr_num_e'(csr_access ? alu_operand_b_ex[11:0] : 12'b0);
+ brq_cs_registers #(
+ .DbgTriggerEn ( DbgTriggerEn ),
+ .DbgHwBreakNum ( DbgHwBreakNum ),
+ .DataIndTiming ( DataIndTiming ),
+ .DummyInstructions ( DummyInstructions ),
+ .ShadowCSR ( ShadowCSR ),
+ .ICache ( ICache ),
+ .MHPMCounterNum ( MHPMCounterNum ),
+ .MHPMCounterWidth ( MHPMCounterWidth ),
+ .PMPEnable ( PMPEnable ),
+ .PMPGranularity ( PMPGranularity ),
+ .PMPNumRegions ( PMPNumRegions ),
+ .RV32E ( RV32E ),
+ .RV32M ( RV32M )
+ ) cs_registers_i (
+ .clk_i ( clk ),
+ .rst_ni ( rst_ni ),
+ // Hart ID from outside
+ .hart_id_i ( hart_id_i ),
+ .priv_mode_id_o ( priv_mode_id ),
+ .priv_mode_if_o ( priv_mode_if ),
+ .priv_mode_lsu_o ( priv_mode_lsu ),
+ // mtvec
+ .csr_mtvec_o ( csr_mtvec ),
+ .csr_mtvec_init_i ( csr_mtvec_init ),
+ .boot_addr_i ( boot_addr_i ),
+ // Interface to CSRs ( SRAM like )
+ .csr_access_i ( csr_access ),
+ .csr_addr_i ( csr_addr ),
+ .csr_wdata_i ( csr_wdata ),
+ .csr_op_i ( csr_op ),
+ .csr_op_en_i ( csr_op_en ),
+ .csr_rdata_o ( csr_rdata ),
+ // Interrupt related control signals
+ .irq_software_i ( irq_software_i ),
+ .irq_timer_i ( irq_timer_i ),
+ .irq_external_i ( irq_external_i ),
+ .irq_fast_i ( irq_fast_i ),
+ .nmi_mode_i ( nmi_mode ),
+ .irq_pending_o ( irq_pending ),
+ .irqs_o ( irqs ),
+ .csr_mstatus_mie_o ( csr_mstatus_mie ),
+ .csr_mstatus_tw_o ( csr_mstatus_tw ),
+ .csr_mepc_o ( csr_mepc ),
+ // PMP
+ .csr_pmp_cfg_o ( csr_pmp_cfg ),
+ .csr_pmp_addr_o ( csr_pmp_addr ),
+ // debug
+ .csr_depc_o ( csr_depc ),
+ .debug_mode_i ( debug_mode ),
+ .debug_cause_i ( debug_cause ),
+ .debug_csr_save_i ( debug_csr_save ),
+ .debug_single_step_o ( debug_single_step ),
+ .debug_ebreakm_o ( debug_ebreakm ),
+ .debug_ebreaku_o ( debug_ebreaku ),
+ .trigger_match_o ( trigger_match ),
+ .pc_if_i ( pc_if ),
+ .pc_id_i ( pc_id ),
+ .pc_wb_i ( pc_wb ),
+ .data_ind_timing_o ( data_ind_timing ),
+ .dummy_instr_en_o ( dummy_instr_en ),
+ .dummy_instr_mask_o ( dummy_instr_mask ),
+ .dummy_instr_seed_en_o ( dummy_instr_seed_en ),
+ .dummy_instr_seed_o ( dummy_instr_seed ),
+ .icache_enable_o ( icache_enable ),
+ .csr_shadow_err_o ( csr_shadow_err ),
+ .csr_save_if_i ( csr_save_if ),
+ .csr_save_id_i ( csr_save_id ),
+ .csr_save_wb_i ( csr_save_wb ),
+ .csr_restore_mret_i ( csr_restore_mret_id ),
+ .csr_restore_dret_i ( csr_restore_dret_id ),
+ .csr_save_cause_i ( csr_save_cause ),
+ .csr_mcause_i ( exc_cause ),
+ .csr_mtval_i ( csr_mtval ),
+ .illegal_csr_insn_o ( illegal_csr_insn_id ),
+ // performance counter related signals
+ .instr_ret_i ( perf_instr_ret_wb ),
+ .instr_ret_compressed_i ( perf_instr_ret_compressed_wb ),
+ .iside_wait_i ( perf_iside_wait ),
+ .jump_i ( perf_jump ),
+ .branch_i ( perf_branch ),
+ .branch_taken_i ( perf_tbranch ),
+ .mem_load_i ( perf_load ),
+ .mem_store_i ( perf_store ),
+ .dside_wait_i ( perf_dside_wait ),
+ .mul_wait_i ( perf_mul_wait ),
+ .div_wait_i ( perf_div_wait ),
+ // floating point
+ .fp_rm_dynamic_i ( fp_rm_dynamic ),
+ .fp_frm_o ( fp_frm_csr ),
+ .fp_status_i ( fp_status ),
+ .is_fp_instr_i ( is_fp_instr )
+ );
+ assign fp_frm_fpnew = fp_rm_dynamic ? fp_frm_csr : fp_rounding_mode;
+ assign in_ready_c2fpu = id_in_ready; //multdiv_ready_id;
+ assign in_valid_c2fpu = (instr_valid_id & is_fp_instr);
+ // assign ready_id_fpu = id_in_ready; // (is_fp_instr) ? out_ready_fpu2c : id_in_ready;
+ assign valid_id_fpu = (is_fp_instr) ? out_valid_fpu2c : ex_valid;
+// FPU instance
+ fpnew_top #(
+ .Features ( fpnew_pkg::RV32F ),
+ .Implementation ( fpnew_pkg::DEFAULT_NOREGS ),
+ .TagType ( logic )
+ ) i_fpnew_top (
+ .clk_i ( clk ),
+ .rst_ni ( rst_ni ),
+ .operands_i ( fp_operands ),
+ .rnd_mode_i ( fp_frm_fpnew ),
+ .op_i ( fp_alu_operator ),
+ .op_mod_i ( fp_alu_op_mod ),
+ .src_fmt_i ( fp_src_fmt ),
+ .dst_fmt_i ( fp_dst_fmt ),
+ .int_fmt_i ( fpnew_pkg::INT32 ),
+ .vectorial_op_i ( 1'b0 ),
+ .tag_i ( '1 ),
+ .in_valid_i ( in_valid_c2fpu ),
+ .in_ready_o ( out_ready_fpu2c ),
+ .flush_i ( fp_flush ),
+ .result_o ( fp_result ),
+ .status_o ( fp_status ),
+ .tag_o ( ),
+ .out_valid_o ( out_valid_fpu2c ),
+ .out_ready_i ( in_ready_c2fpu ),
+ .busy_o ( fp_busy )
+ );
+ assign fpu_busy_idu = fp_busy & (~out_valid_fpu2c);
+ assign data_wb = is_fp_instr ? fp_result : result_ex;
+ // These assertions are in top-level as instr_valid_id required as the enable term
+ if (PMPEnable) begin : g_pmp
+ logic [33:0] pmp_req_addr [PMP_NUM_CHAN];
+ pmp_req_e pmp_req_type [PMP_NUM_CHAN];
+ priv_lvl_e pmp_priv_lvl [PMP_NUM_CHAN];
+ assign pmp_req_addr[PMP_I] = {2'b00,instr_addr_o[31:0]};
+ assign pmp_req_type[PMP_I] = PMP_ACC_EXEC;
+ assign pmp_priv_lvl[PMP_I] = priv_mode_if;
+ assign pmp_req_addr[PMP_D] = {2'b00,data_addr_o[31:0]};
+ assign pmp_req_type[PMP_D] = data_we_o ? PMP_ACC_WRITE : PMP_ACC_READ;
+ assign pmp_priv_lvl[PMP_D] = priv_mode_lsu;
+ brq_pmp #(
+ .PMPGranularity ( PMPGranularity ),
+ .PMPNumChan ( PMP_NUM_CHAN ),
+ .PMPNumRegions ( PMPNumRegions )
+ ) pmp_i (
+ .clk_i ( clk ),
+ .rst_ni ( rst_ni ),
+ // Interface to CSRs
+ .csr_pmp_cfg_i ( csr_pmp_cfg ),
+ .csr_pmp_addr_i ( csr_pmp_addr ),
+ .priv_mode_i ( pmp_priv_lvl ),
+ // Access checking channels
+ .pmp_req_addr_i ( pmp_req_addr ),
+ .pmp_req_type_i ( pmp_req_type ),
+ .pmp_req_err_o ( pmp_req_err )
+ );
+ end else begin : g_no_pmp
+ // Unused signal tieoff
+ priv_lvl_e unused_priv_lvl_if, unused_priv_lvl_ls;
+ logic [33:0] unused_csr_pmp_addr [PMPNumRegions];
+ pmp_cfg_t unused_csr_pmp_cfg [PMPNumRegions];
+ assign unused_priv_lvl_if = priv_mode_if;
+ assign unused_priv_lvl_ls = priv_mode_lsu;
+ assign unused_csr_pmp_addr = csr_pmp_addr;
+ assign unused_csr_pmp_cfg = csr_pmp_cfg;
+ // Output tieoff
+ assign pmp_req_err[PMP_I] = 1'b0;
+ assign pmp_req_err[PMP_D] = 1'b0;
+ end
+`ifdef RVFI
+ // When writeback stage is present RVFI information is emitted when instruction is finished in
+ // third stage but some information must be captured whilst the instruction is in the second
+ // stage. Without writeback stage RVFI information is all emitted when instruction retires in
+ // second stage. RVFI outputs are all straight from flops. So 2 stage pipeline requires a single
+ // set of flops (instr_info => RVFI_out), 3 stage pipeline requires two sets (instr_info => wb
+ // => RVFI_out)
+ localparam int RVFI_STAGES = WritebackStage ? 2 : 1;
+ logic rvfi_stage_valid [RVFI_STAGES];
+ logic [63:0] rvfi_stage_order [RVFI_STAGES];
+ logic [31:0] rvfi_stage_insn [RVFI_STAGES];
+ logic rvfi_stage_trap [RVFI_STAGES];
+ logic rvfi_stage_halt [RVFI_STAGES];
+ logic rvfi_stage_intr [RVFI_STAGES];
+ logic [ 1:0] rvfi_stage_mode [RVFI_STAGES];
+ logic [ 1:0] rvfi_stage_ixl [RVFI_STAGES];
+ logic [ 4:0] rvfi_stage_rs1_addr [RVFI_STAGES];
+ logic [ 4:0] rvfi_stage_rs2_addr [RVFI_STAGES];
+ logic [ 4:0] rvfi_stage_rs3_addr [RVFI_STAGES];
+ logic [31:0] rvfi_stage_rs1_rdata [RVFI_STAGES];
+ logic [31:0] rvfi_stage_rs2_rdata [RVFI_STAGES];
+ logic [31:0] rvfi_stage_rs3_rdata [RVFI_STAGES];
+ logic [ 4:0] rvfi_stage_rd_addr [RVFI_STAGES];
+ logic [31:0] rvfi_stage_rd_wdata [RVFI_STAGES];
+ logic [31:0] rvfi_stage_pc_rdata [RVFI_STAGES];
+ logic [31:0] rvfi_stage_pc_wdata [RVFI_STAGES];
+ logic [31:0] rvfi_stage_mem_addr [RVFI_STAGES];
+ logic [ 3:0] rvfi_stage_mem_rmask [RVFI_STAGES];
+ logic [ 3:0] rvfi_stage_mem_wmask [RVFI_STAGES];
+ logic [31:0] rvfi_stage_mem_rdata [RVFI_STAGES];
+ logic [31:0] rvfi_stage_mem_wdata [RVFI_STAGES];
+ logic rvfi_stage_valid_d [RVFI_STAGES];
+ assign rvfi_valid = rvfi_stage_valid [RVFI_STAGES-1];
+ assign rvfi_order = rvfi_stage_order [RVFI_STAGES-1];
+ assign rvfi_insn = rvfi_stage_insn [RVFI_STAGES-1];
+ assign rvfi_trap = rvfi_stage_trap [RVFI_STAGES-1];
+ assign rvfi_halt = rvfi_stage_halt [RVFI_STAGES-1];
+ assign rvfi_intr = rvfi_stage_intr [RVFI_STAGES-1];
+ assign rvfi_mode = rvfi_stage_mode [RVFI_STAGES-1];
+ assign rvfi_ixl = rvfi_stage_ixl [RVFI_STAGES-1];
+ assign rvfi_rs1_addr = rvfi_stage_rs1_addr [RVFI_STAGES-1];
+ assign rvfi_rs2_addr = rvfi_stage_rs2_addr [RVFI_STAGES-1];
+ assign rvfi_rs3_addr = rvfi_stage_rs3_addr [RVFI_STAGES-1];
+ assign rvfi_rs1_rdata = rvfi_stage_rs1_rdata[RVFI_STAGES-1];
+ assign rvfi_rs2_rdata = rvfi_stage_rs2_rdata[RVFI_STAGES-1];
+ assign rvfi_rs3_rdata = rvfi_stage_rs3_rdata[RVFI_STAGES-1];
+ assign rvfi_rd_addr = rvfi_stage_rd_addr [RVFI_STAGES-1];
+ assign rvfi_rd_wdata = rvfi_stage_rd_wdata [RVFI_STAGES-1];
+ assign rvfi_pc_rdata = rvfi_stage_pc_rdata [RVFI_STAGES-1];
+ assign rvfi_pc_wdata = rvfi_stage_pc_wdata [RVFI_STAGES-1];
+ assign rvfi_mem_addr = rvfi_stage_mem_addr [RVFI_STAGES-1];
+ assign rvfi_mem_rmask = rvfi_stage_mem_rmask[RVFI_STAGES-1];
+ assign rvfi_mem_wmask = rvfi_stage_mem_wmask[RVFI_STAGES-1];
+ assign rvfi_mem_rdata = rvfi_stage_mem_rdata[RVFI_STAGES-1];
+ assign rvfi_mem_wdata = rvfi_stage_mem_wdata[RVFI_STAGES-1];
+ if (WritebackStage) begin : gen_rvfi_wb_stage
+ logic unused_instr_new_id;
+ assign unused_instr_new_id = instr_new_id;
+ // With writeback stage first RVFI stage buffers instruction information captured in ID/EX
+ // awaiting instruction retirement and RF Write data/Mem read data whilst instruction is in WB
+ // So first stage becomes valid when instruction leaves ID/EX stage and remains valid until
+ // instruction leaves WB
+ assign rvfi_stage_valid_d[0] = (instr_id_done & ~dummy_instr_id) |
+ (rvfi_stage_valid[0] & ~instr_done_wb);
+ // Second stage is output stage so simple valid cycle after instruction leaves WB (and so has
+ // retired)
+ assign rvfi_stage_valid_d[1] = instr_done_wb;
+ // Signal new instruction in WB cycle after instruction leaves ID/EX (to enter WB)
+ logic rvfi_instr_new_wb_q;
+ assign rvfi_instr_new_wb = rvfi_instr_new_wb_q;
+ always_ff @(posedge clk or negedge rst_ni) begin
+ if (~rst_ni) begin
+ rvfi_instr_new_wb_q <= 0;
+ end else begin
+ rvfi_instr_new_wb_q <= instr_id_done;
+ end
+ end
+ end else begin : gen_rvfi_no_wb_stage
+ // Without writeback stage first RVFI stage is output stage so simply valid the cycle after
+ // instruction leaves ID/EX (and so has retired)
+ assign rvfi_stage_valid_d[0] = instr_id_done & ~dummy_instr_id;
+ // Without writeback stage signal new instr_new_wb when instruction enters ID/EX to correctly
+ // setup register write signals
+ assign rvfi_instr_new_wb = instr_new_id;
+ end
+ for (genvar i = 0;i < RVFI_STAGES; i = i + 1) begin : g_rvfi_stages
+ always_ff @(posedge clk or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rvfi_stage_halt[i] <= '0;
+ rvfi_stage_trap[i] <= '0;
+ rvfi_stage_intr[i] <= '0;
+ rvfi_stage_order[i] <= '0;
+ rvfi_stage_insn[i] <= '0;
+ rvfi_stage_mode[i] <= {PRIV_LVL_M};
+ rvfi_stage_ixl[i] <= CSR_MISA_MXL;
+ rvfi_stage_rs1_addr[i] <= '0;
+ rvfi_stage_rs2_addr[i] <= '0;
+ rvfi_stage_rs3_addr[i] <= '0;
+ rvfi_stage_pc_rdata[i] <= '0;
+ rvfi_stage_pc_wdata[i] <= '0;
+ rvfi_stage_mem_rmask[i] <= '0;
+ rvfi_stage_mem_wmask[i] <= '0;
+ rvfi_stage_valid[i] <= '0;
+ rvfi_stage_rs1_rdata[i] <= '0;
+ rvfi_stage_rs2_rdata[i] <= '0;
+ rvfi_stage_rs3_rdata[i] <= '0;
+ rvfi_stage_rd_wdata[i] <= '0;
+ rvfi_stage_rd_addr[i] <= '0;
+ rvfi_stage_mem_rdata[i] <= '0;
+ rvfi_stage_mem_wdata[i] <= '0;
+ rvfi_stage_mem_addr[i] <= '0;
+ end else begin
+ rvfi_stage_valid[i] <= rvfi_stage_valid_d[i];
+ if (i == 0) begin
+ if(instr_id_done) begin
+ rvfi_stage_halt[i] <= '0;
+ rvfi_stage_trap[i] <= illegal_insn_id;
+ rvfi_stage_intr[i] <= rvfi_intr_d;
+ rvfi_stage_order[i] <= rvfi_stage_order[i] + 64'(rvfi_stage_valid_d[i]);
+ rvfi_stage_insn[i] <= rvfi_insn_id;
+ rvfi_stage_mode[i] <= {priv_mode_id};
+ rvfi_stage_ixl[i] <= CSR_MISA_MXL;
+ rvfi_stage_rs1_addr[i] <= rvfi_rs1_addr_d;
+ rvfi_stage_rs2_addr[i] <= rvfi_rs2_addr_d;
+ rvfi_stage_rs3_addr[i] <= rvfi_rs3_addr_d;
+ rvfi_stage_pc_rdata[i] <= pc_id;
+ rvfi_stage_pc_wdata[i] <= pc_set ? branch_target_ex : pc_if;
+ rvfi_stage_mem_rmask[i] <= rvfi_mem_mask_int;
+ rvfi_stage_mem_wmask[i] <= data_we_o ? rvfi_mem_mask_int : 4'b0000;
+ rvfi_stage_rs1_rdata[i] <= rvfi_rs1_data_d;
+ rvfi_stage_rs2_rdata[i] <= rvfi_rs2_data_d;
+ rvfi_stage_rs3_rdata[i] <= rvfi_rs3_data_d;
+ rvfi_stage_rd_addr[i] <= rvfi_rd_addr_d;
+ rvfi_stage_rd_wdata[i] <= rvfi_rd_wdata_d;
+ rvfi_stage_mem_rdata[i] <= rvfi_mem_rdata_d;
+ rvfi_stage_mem_wdata[i] <= rvfi_mem_wdata_d;
+ rvfi_stage_mem_addr[i] <= rvfi_mem_addr_d;
+ end
+ end else begin
+ if(instr_done_wb) begin
+ rvfi_stage_halt[i] <= rvfi_stage_halt[i-1];
+ rvfi_stage_trap[i] <= rvfi_stage_trap[i-1];
+ rvfi_stage_intr[i] <= rvfi_stage_intr[i-1];
+ rvfi_stage_order[i] <= rvfi_stage_order[i-1];
+ rvfi_stage_insn[i] <= rvfi_stage_insn[i-1];
+ rvfi_stage_mode[i] <= rvfi_stage_mode[i-1];
+ rvfi_stage_ixl[i] <= rvfi_stage_ixl[i-1];
+ rvfi_stage_rs1_addr[i] <= rvfi_stage_rs1_addr[i-1];
+ rvfi_stage_rs2_addr[i] <= rvfi_stage_rs2_addr[i-1];
+ rvfi_stage_rs3_addr[i] <= rvfi_stage_rs3_addr[i-1];
+ rvfi_stage_pc_rdata[i] <= rvfi_stage_pc_rdata[i-1];
+ rvfi_stage_pc_wdata[i] <= rvfi_stage_pc_wdata[i-1];
+ rvfi_stage_mem_rmask[i] <= rvfi_stage_mem_rmask[i-1];
+ rvfi_stage_mem_wmask[i] <= rvfi_stage_mem_wmask[i-1];
+ rvfi_stage_rs1_rdata[i] <= rvfi_stage_rs1_rdata[i-1];
+ rvfi_stage_rs2_rdata[i] <= rvfi_stage_rs2_rdata[i-1];
+ rvfi_stage_rs3_rdata[i] <= rvfi_stage_rs3_rdata[i-1];
+ rvfi_stage_mem_wdata[i] <= rvfi_stage_mem_wdata[i-1];
+ rvfi_stage_mem_addr[i] <= rvfi_stage_mem_addr[i-1];
+ // For 2 RVFI_STAGES/Writeback Stage ignore first stage flops for rd_addr, rd_wdata and
+ // mem_rdata. For RF write addr/data actual write happens in writeback so capture
+ // address/data there. For mem_rdata that is only available from the writeback stage.
+ // Previous stage flops still exist in RTL as they are used by the non writeback config
+ rvfi_stage_rd_addr[i] <= rvfi_rd_addr_d;
+ rvfi_stage_rd_wdata[i] <= rvfi_rd_wdata_d;
+ rvfi_stage_mem_rdata[i] <= rvfi_mem_rdata_d;
+ end
+ end
+ end
+ end
+ end
+ // Memory adddress/write data available first cycle of ld/st instruction from register read
+ always_comb begin
+ if (instr_first_cycle_id) begin
+ rvfi_mem_addr_d = alu_adder_result_ex;
+ rvfi_mem_wdata_d = lsu_wdata;
+ end else begin
+ rvfi_mem_addr_d = rvfi_mem_addr_q;
+ rvfi_mem_wdata_d = rvfi_mem_wdata_q;
+ end
+ end
+ // Capture read data from LSU when it becomes valid
+ always_comb begin
+ if (lsu_resp_valid) begin
+ rvfi_mem_rdata_d = rf_wdata_lsu;
+ end else begin
+ rvfi_mem_rdata_d = rvfi_mem_rdata_q;
+ end
+ end
+ always_ff @(posedge clk or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rvfi_mem_addr_q <= '0;
+ rvfi_mem_rdata_q <= '0;
+ rvfi_mem_wdata_q <= '0;
+ end else begin
+ rvfi_mem_addr_q <= rvfi_mem_addr_d;
+ rvfi_mem_rdata_q <= rvfi_mem_rdata_d;
+ rvfi_mem_wdata_q <= rvfi_mem_wdata_d;
+ end
+ end
+ // Byte enable based on data type
+ always_comb begin
+ unique case (lsu_type)
+ 2'b00: rvfi_mem_mask_int = 4'b1111;
+ 2'b01: rvfi_mem_mask_int = 4'b0011;
+ 2'b10: rvfi_mem_mask_int = 4'b0001;
+ default: rvfi_mem_mask_int = 4'b0000;
+ endcase
+ end
+ always_comb begin
+ if (instr_is_compressed_id) begin
+ rvfi_insn_id = {16'b0, instr_rdata_c_id};
+ end else begin
+ rvfi_insn_id = instr_rdata_id;
+ end
+ end
+ // Source registers 1 and 2 are read in the first instruction cycle
+ // Source register 3 is read in the second instruction cycle.
+ always_comb begin
+ if (instr_first_cycle_id) begin
+ rvfi_rs1_data_d = rf_ren_a ? multdiv_operand_a_ex : '0;
+ rvfi_rs1_addr_d = rf_ren_a ? rf_raddr_a : '0;
+ rvfi_rs2_data_d = rf_ren_b ? multdiv_operand_b_ex : '0;
+ rvfi_rs2_addr_d = rf_ren_b ? rf_raddr_b : '0;
+ rvfi_rs3_data_d = '0;
+ rvfi_rs3_addr_d = '0;
+ end else begin
+ rvfi_rs1_data_d = rvfi_rs1_data_q;
+ rvfi_rs1_addr_d = rvfi_rs1_addr_q;
+ rvfi_rs2_data_d = rvfi_rs2_data_q;
+ rvfi_rs2_addr_d = rvfi_rs2_addr_q;
+ rvfi_rs3_data_d = multdiv_operand_a_ex;
+ rvfi_rs3_addr_d = rf_raddr_a;
+ end
+ end
+ always_ff @(posedge clk or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rvfi_rs1_data_q <= '0;
+ rvfi_rs1_addr_q <= '0;
+ rvfi_rs2_data_q <= '0;
+ rvfi_rs2_addr_q <= '0;
+ end else begin
+ rvfi_rs1_data_q <= rvfi_rs1_data_d;
+ rvfi_rs1_addr_q <= rvfi_rs1_addr_d;
+ rvfi_rs2_data_q <= rvfi_rs2_data_d;
+ rvfi_rs2_addr_q <= rvfi_rs2_addr_d;
+ end
+ end
+ always_comb begin
+ if(rvfi_rd_we_wb) begin
+ // Capture address/data of write to register file
+ rvfi_rd_addr_d = rvfi_rd_addr_wb;
+ // If writing to x0 zero write data as required by RVFI specification
+ if(rvfi_rd_addr_wb == 5'b0) begin
+ rvfi_rd_wdata_d = '0;
+ end else begin
+ rvfi_rd_wdata_d = rvfi_rd_wdata_wb;
+ end
+ end else if(rvfi_instr_new_wb) begin
+ // If no RF write but new instruction in Writeback (when present) or ID/EX (when no writeback
+ // stage present) then zero RF write address/data as required by RVFI specification
+ rvfi_rd_addr_d = '0;
+ rvfi_rd_wdata_d = '0;
+ end else begin
+ // Otherwise maintain previous value
+ rvfi_rd_addr_d = rvfi_rd_addr_q;
+ rvfi_rd_wdata_d = rvfi_rd_wdata_q;
+ end
+ end
+ // RD write register is refreshed only once per cycle and
+ // then it is kept stable for the cycle.
+ always_ff @(posedge clk or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rvfi_rd_addr_q <= '0;
+ rvfi_rd_wdata_q <= '0;
+ end else begin
+ rvfi_rd_addr_q <= rvfi_rd_addr_d;
+ rvfi_rd_wdata_q <= rvfi_rd_wdata_d;
+ end
+ end
+ // rvfi_intr must be set for first instruction that is part of a trap handler.
+ // On the first cycle of a new instruction see if a trap PC was set by the previous instruction,
+ // otherwise maintain value.
+ assign rvfi_intr_d = instr_first_cycle_id ? rvfi_set_trap_pc_q : rvfi_intr_q;
+ always_comb begin
+ rvfi_set_trap_pc_d = rvfi_set_trap_pc_q;
+ if (pc_set && pc_mux_id == PC_EXC &&
+ (exc_pc_mux_id == EXC_PC_EXC || exc_pc_mux_id == EXC_PC_IRQ)) begin
+ // PC is set to enter a trap handler
+ rvfi_set_trap_pc_d = 1'b1;
+ end else if (rvfi_set_trap_pc_q && instr_id_done) begin
+ // first instruction has been executed after PC is set to trap handler
+ rvfi_set_trap_pc_d = 1'b0;
+ end
+ end
+ always_ff @(posedge clk or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rvfi_set_trap_pc_q <= 1'b0;
+ rvfi_intr_q <= 1'b0;
+ end else begin
+ rvfi_set_trap_pc_q <= rvfi_set_trap_pc_d;
+ rvfi_intr_q <= rvfi_intr_d;
+ end
+ end
+ logic unused_instr_new_id, unused_instr_done_wb;
+ assign unused_instr_new_id = instr_new_id;
+ assign unused_instr_done_wb = instr_done_wb;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..65b428a
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,215 @@
+//`include "/home/usman/Documents/ibex/rtl/"
+//`include "/home/merl/Documents/ibex/rtl/"
+module brq_core_top #(
+ parameter bit PMPEnable = 1'b0,
+ parameter int unsigned PMPGranularity = 0,
+ parameter int unsigned PMPNumRegions = 0,
+ parameter int unsigned MHPMCounterNum = 0,
+ parameter int unsigned MHPMCounterWidth = 40,
+ parameter bit RV32E = 1'b0,
+ parameter brq_pkg::rv32m_e RV32M = brq_pkg::RV32MFast,
+ parameter brq_pkg::rv32b_e RV32B = brq_pkg::RV32BNone,
+ parameter brq_pkg::regfile_e RegFile = brq_pkg::RegFileFF,
+ parameter bit BranchTargetALU = 1'b0,
+ parameter bit WritebackStage = 1'b1,
+ parameter bit ICache = 1'b0,
+ parameter bit ICacheECC = 1'b0,
+ parameter bit BranchPredictor = 1'b0,
+ parameter bit DbgTriggerEn = 1'b0,
+ parameter int unsigned DbgHwBreakNum = 1,
+ parameter bit Securebrq = 1'b0,
+ parameter int unsigned DmHaltAddr = 0,
+ parameter int unsigned DmExceptionAddr = 0
+ input clk_i,
+ input rst_ni,
+ // instruction memory interface
+ input tlul_pkg::tl_d2h_t tl_i_i,
+ output tlul_pkg::tl_h2d_t tl_i_o,
+ // data memory interface
+ input tlul_pkg::tl_d2h_t tl_d_i,
+ output tlul_pkg::tl_h2d_t tl_d_o,
+ input logic test_en_i, // enable all clk_i gates for testing
+ input logic [31:0] hart_id_i,
+ input logic [31:0] boot_addr_i,
+ // Interrupt inputs
+ input logic irq_software_i,
+ input logic irq_timer_i,
+ input logic irq_external_i,
+ input logic [14:0] irq_fast_i,
+ input logic irq_nm_i, // non-maskeable interrupt
+ // Debug Interface
+ input logic debug_req_i,
+ // CPU Control Signals
+ input logic fetch_enable_i,
+ output logic alert_minor_o,
+ output logic alert_major_o,
+ output logic core_sleep_o
+import brq_pkg::*;
+ //logic rst_ni;
+ //assign rst_ni = rst_ni;
+ // Instruction interface (internal)
+ logic instr_req;
+ logic instr_gnt;
+ logic instr_rvalid;
+ logic [31:0] instr_addr;
+ logic [31:0] instr_rdata;
+ logic instr_err;
+ // Data interface (internal)
+ logic data_req;
+ logic data_gnt;
+ logic data_rvalid;
+ logic data_we;
+ logic [3:0] data_be;
+ logic [31:0] data_addr;
+ logic [31:0] data_wdata;
+ logic [31:0] data_rdata;
+ logic data_err;
+brq_core #(
+ .PMPEnable (PMPEnable),
+ .PMPGranularity (PMPGranularity),
+ .PMPNumRegions (PMPNumRegions),
+ .MHPMCounterNum (MHPMCounterNum),
+ .MHPMCounterWidth (MHPMCounterWidth),
+ .RV32E (RV32E),
+ .RV32M (RV32M),
+ .RV32B (RV32B),
+ .RegFile (RegFile),
+ .BranchTargetALU (BranchTargetALU),
+ .WritebackStage (WritebackStage),
+ .ICache (ICache),
+ .ICacheECC (ICacheECC),
+ .BranchPredictor (BranchPredictor),
+ .DbgTriggerEn (DbgTriggerEn),
+ .DbgHwBreakNum (DbgHwBreakNum),
+ .Securebrq (Securebrq),
+ .DmHaltAddr (DmHaltAddr),
+ .DmExceptionAddr (DmExceptionAddr)
+) u_core (
+ // clk_i and rst_ni
+ .clk_i (clk_i),
+ .rst_ni(rst_ni),
+ .test_en_i (test_en_i), // enable all clk_i gates for testing
+ .hart_id_i (hart_id_i),
+ .boot_addr_i(boot_addr_i),
+ // Instruction memory interface
+ .instr_req_o (instr_req),
+ .instr_gnt_i (instr_gnt),
+ .instr_rvalid_i (instr_rvalid),
+ .instr_addr_o (instr_addr),
+ .instr_rdata_i (instr_rdata),
+ .instr_err_i (instr_err),
+ // Data memory interface
+ .data_req_o (data_req),
+ .data_gnt_i (data_gnt),
+ .data_rvalid_i (data_rvalid),
+ .data_we_o (data_we),
+ .data_be_o (data_be),
+ .data_addr_o (data_addr),
+ .data_wdata_o (data_wdata),
+ .data_rdata_i (data_rdata),
+ .data_err_i (data_err),
+ // Interrupt inputs
+ .irq_software_i (irq_software_i),
+ .irq_timer_i (irq_timer_i),
+ .irq_external_i (irq_external_i),
+ .irq_fast_i (irq_fast_i),
+ .irq_nm_i (irq_nm_i), // non-maskeable interrupt
+ // Debug Interface
+ .debug_req_i (debug_req_i),
+ // RISC-V Formal Interface
+ // Does not comply with the coding standards of _i/_o suffixes, but follows
+ // the convention of RISC-V Formal Interface Specification.
+`ifdef RVFI
+ .rvfi_valid (),
+ .rvfi_order (),
+ .rvfi_insn (),
+ .rvfi_trap (),
+ .rvfi_halt (),
+ .rvfi_intr (),
+ .rvfi_mode (),
+ .rvfi_ixl (),
+ .rvfi_rs1_addr (),
+ .rvfi_rs2_addr (),
+ .rvfi_rs3_addr (),
+ .rvfi_rs1_rdata (),
+ .rvfi_rs2_rdata (),
+ .rvfi_rs3_rdata (),
+ .rvfi_rd_addr (),
+ .rvfi_rd_wdata (),
+ .rvfi_pc_rdata (),
+ .rvfi_pc_wdata (),
+ .rvfi_mem_addr (),
+ .rvfi_mem_rmask (),
+ .rvfi_mem_wmask (),
+ .rvfi_mem_rdata (),
+ .rvfi_mem_wdata (),
+ // CPU Control Signals
+ .fetch_enable_i (fetch_enable_i),
+ .alert_minor_o (alert_minor_o),
+ .alert_major_o (alert_major_o),
+ .core_sleep_o (core_sleep_o)
+tlul_host_adapter #(
+ .MAX_REQS(2)
+) intr_interface (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .req_i (instr_req),
+ .gnt_o (instr_gnt),
+ .addr_i (instr_addr),
+ .we_i (1'b0),
+ .wdata_i (32'b0),
+ .be_i (4'hF),
+ .valid_o (instr_rvalid),
+ .rdata_o (instr_rdata),
+ .err_o (instr_err),
+ .tl_h_c_a (tl_i_o),
+ .tl_h_c_d (tl_i_i)
+tlul_host_adapter #(
+ .MAX_REQS (2)
+) data_interface (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .req_i (data_req),
+ .gnt_o (data_gnt),
+ .addr_i (data_addr),
+ .we_i (data_we),
+ .wdata_i (data_wdata),
+ .be_i (data_be),
+ .valid_o (data_rvalid),
+ .rdata_o (data_rdata),
+ .err_o (data_err),
+ .tl_h_c_a (tl_d_o),
+ .tl_h_c_d (tl_d_i)
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..3fedb28
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,82 @@
+module brq_counter #(
+ parameter int CounterWidth = 32
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ input logic counter_inc_i,
+ input logic counterh_we_i,
+ input logic counter_we_i,
+ input logic [31:0] counter_val_i,
+ output logic [63:0] counter_val_o
+ logic [63:0] counter;
+ logic [CounterWidth-1:0] counter_upd;
+ logic [63:0] counter_load;
+ logic we;
+ logic [CounterWidth-1:0] counter_d;
+ // Update
+ always_comb begin
+ // Write
+ we = counter_we_i | counterh_we_i;
+ counter_load[63:32] = counter[63:32];
+ counter_load[31:0] = counter_val_i;
+ if (counterh_we_i) begin
+ counter_load[63:32] = counter_val_i;
+ counter_load[31:0] = counter[31:0];
+ end
+ // Increment
+ counter_upd = counter[CounterWidth-1:0] + {{CounterWidth-1{1'b0}},1'b1};
+ // Next value logic
+ if (we) begin
+ counter_d = counter_load[CounterWidth-1:0];
+ end else if (counter_inc_i)begin
+ counter_d = counter_upd[CounterWidth-1:0];
+ end else begin
+ counter_d = counter[CounterWidth-1:0];
+ end
+ end
+ // Set DSP pragma for supported xilinx FPGAs
+ localparam int DspPragma = CounterWidth < 49 ? "yes" : "no";
+ (* use_dsp = DspPragma *) logic [CounterWidth-1:0] counter_q;
+ // DSP output register requires synchronous reset.
+ `define COUNTER_FLOP_RST posedge clk_i
+ logic [CounterWidth-1:0] counter_q;
+ `define COUNTER_FLOP_RST posedge clk_i or negedge rst_ni
+ // Counter flop
+ always_ff @(`COUNTER_FLOP_RST) begin
+ if (!rst_ni) begin
+ counter_q <= '0;
+ end else begin
+ counter_q <= counter_d;
+ end
+ end
+ if (CounterWidth < 64) begin : g_counter_narrow
+ logic [63:CounterWidth] unused_counter_load;
+ assign counter[CounterWidth-1:0] = counter_q;
+ assign counter[63:CounterWidth] = '0;
+ assign unused_counter_load = counter_load[63:CounterWidth];
+ end else begin : g_counter_full
+ assign counter = counter_q;
+ end
+ assign counter_val_o = counter;
+// Keep helper defines file-local.
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..6b33279
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,1510 @@
+ * Control and Status Registers
+ *
+ * Control and Status Registers (CSRs) following the RISC-V Privileged
+ * Specification, draft version 1.11
+ */
+module brq_cs_registers #(
+ parameter bit DbgTriggerEn = 0,
+ parameter int unsigned DbgHwBreakNum = 1,
+ parameter bit DataIndTiming = 1'b0,
+ parameter bit DummyInstructions = 1'b0,
+ parameter bit ShadowCSR = 1'b0,
+ parameter bit ICache = 1'b0,
+ parameter int unsigned MHPMCounterNum = 10,
+ parameter int unsigned MHPMCounterWidth = 40,
+ parameter bit PMPEnable = 0,
+ parameter int unsigned PMPGranularity = 0,
+ parameter int unsigned PMPNumRegions = 4,
+ parameter bit RV32E = 0,
+ parameter brq_pkg::rv32m_e RV32M = brq_pkg::RV32MFast,
+ parameter brq_pkg::rvfloat_e RVF = brq_pkg::RV64FDouble // for floating point
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+ // Hart ID
+ input logic [31:0] hart_id_i,
+ // Privilege mode
+ output brq_pkg::priv_lvl_e priv_mode_id_o,
+ output brq_pkg::priv_lvl_e priv_mode_if_o,
+ output brq_pkg::priv_lvl_e priv_mode_lsu_o,
+ output logic csr_mstatus_tw_o,
+ // mtvec
+ output logic [31:0] csr_mtvec_o,
+ input logic csr_mtvec_init_i,
+ input logic [31:0] boot_addr_i,
+ // Interface to registers (SRAM like)
+ input logic csr_access_i,
+ input brq_pkg::csr_num_e csr_addr_i,
+ input logic [31:0] csr_wdata_i,
+ input brq_pkg::csr_op_e csr_op_i,
+ input csr_op_en_i,
+ output logic [31:0] csr_rdata_o,
+ // interrupts
+ input logic irq_software_i,
+ input logic irq_timer_i,
+ input logic irq_external_i,
+ input logic [14:0] irq_fast_i,
+ input logic nmi_mode_i,
+ output logic irq_pending_o, // interrupt request pending
+ output brq_pkg::irqs_t irqs_o, // interrupt requests qualified with mie
+ output logic csr_mstatus_mie_o,
+ output logic [31:0] csr_mepc_o,
+ // PMP
+ output brq_pkg::pmp_cfg_t csr_pmp_cfg_o [PMPNumRegions],
+ output logic [33:0] csr_pmp_addr_o [PMPNumRegions],
+ // debug
+ input logic debug_mode_i,
+ input brq_pkg::dbg_cause_e debug_cause_i,
+ input logic debug_csr_save_i,
+ output logic [31:0] csr_depc_o,
+ output logic debug_single_step_o,
+ output logic debug_ebreakm_o,
+ output logic debug_ebreaku_o,
+ output logic trigger_match_o,
+ input logic [31:0] pc_if_i,
+ input logic [31:0] pc_id_i,
+ input logic [31:0] pc_wb_i,
+ // CPU control bits
+ output logic data_ind_timing_o,
+ output logic dummy_instr_en_o,
+ output logic [2:0] dummy_instr_mask_o,
+ output logic dummy_instr_seed_en_o,
+ output logic [31:0] dummy_instr_seed_o,
+ output logic icache_enable_o,
+ output logic csr_shadow_err_o,
+ // Exception save/restore
+ input logic csr_save_if_i,
+ input logic csr_save_id_i,
+ input logic csr_save_wb_i,
+ input logic csr_restore_mret_i,
+ input logic csr_restore_dret_i,
+ input logic csr_save_cause_i,
+ input brq_pkg::exc_cause_e csr_mcause_i,
+ input logic [31:0] csr_mtval_i,
+ output logic illegal_csr_insn_o, // access to non-existent CSR,
+ // with wrong priviledge level, or
+ // missing write permissions
+ // Performance Counters
+ input logic instr_ret_i, // instr retired in ID/EX stage
+ input logic instr_ret_compressed_i, // compressed instr retired
+ input logic iside_wait_i, // core waiting for the iside
+ input logic jump_i, // jump instr seen (j, jr, jal, jalr)
+ input logic branch_i, // branch instr seen (bf, bnf)
+ input logic branch_taken_i, // branch was taken
+ input logic mem_load_i, // load from memory in this cycle
+ input logic mem_store_i, // store to memory in this cycle
+ input logic dside_wait_i, // core waiting for the dside
+ input logic mul_wait_i, // core waiting for multiply
+ input logic div_wait_i, // core waiting for divide
+ // floating point
+ input logic fp_rm_dynamic_i,
+ output fpnew_pkg::roundmode_e fp_frm_o,
+ input fpnew_pkg::status_t fp_status_i,
+ input logic is_fp_instr_i
+ import brq_pkg::*;
+ import fpnew_pkg::roundmode_e;
+ localparam int unsigned RV32MEnabled = (RV32M == RV32MNone) ? 0 : 1;
+ localparam int unsigned PMPAddrWidth = (PMPGranularity > 0) ? 33 - PMPGranularity : 32;
+ localparam int unsigned SinglePrecision = (RVF == RV32FSingle) ? 1 : 0;
+ localparam int unsigned DoublePrecision = (RVF == RV64FDouble) ? 1 : 0;
+ // misa
+ localparam logic [31:0] MISA_VALUE =
+ (0 << 0) // A - Atomic Instructions extension
+ | (1 << 2) // C - Compressed extension
+ | (DoublePrecision << 3) // D - Double precision floating-point extension
+ | (32'(RV32E) << 4) // E - RV32E base ISA
+ | (SinglePrecision << 5) // F - Single precision floating-point extension
+ | (32'(!RV32E) << 8) // I - RV32I/64I/128I base ISA
+ | (RV32MEnabled << 12) // M - Integer Multiply/Divide extension
+ | (0 << 13) // N - User level interrupts supported
+ | (0 << 18) // S - Supervisor mode implemented
+ | (1 << 20) // U - User mode implemented
+ | (0 << 23) // X - Non-standard extensions present
+ | (32'(CSR_MISA_MXL) << 30); // M-XLEN
+ typedef struct packed {
+ logic mie;
+ logic mpie;
+ priv_lvl_e mpp;
+ logic mprv;
+ logic tw;
+ } status_t;
+ typedef struct packed {
+ logic mpie;
+ priv_lvl_e mpp;
+ } status_stk_t;
+ typedef struct packed {
+ x_debug_ver_e xdebugver;
+ logic [11:0] zero2;
+ logic ebreakm;
+ logic zero1;
+ logic ebreaks;
+ logic ebreaku;
+ logic stepie;
+ logic stopcount;
+ logic stoptime;
+ dbg_cause_e cause;
+ logic zero0;
+ logic mprven;
+ logic nmip;
+ logic step;
+ priv_lvl_e prv;
+ } dcsr_t;
+ // CPU control register fields
+ typedef struct packed {
+ logic [2:0] dummy_instr_mask;
+ logic dummy_instr_en;
+ logic data_ind_timing;
+ logic icache_enable;
+ } cpu_ctrl_t;
+ // Interrupt and exception control signals
+ logic [31:0] exception_pc;
+ // CSRs
+ fpnew_pkg::status_t fflags_q, fflags_d, fflag_wdata;
+ logic fflags_en;
+ logic frm_en;
+ roundmode_e frm_q, frm_d;
+ priv_lvl_e priv_lvl_q, priv_lvl_d;
+ status_t mstatus_q, mstatus_d;
+ logic mstatus_err;
+ logic mstatus_en;
+ irqs_t mie_q, mie_d;
+ logic mie_en;
+ logic [31:0] mscratch_q;
+ logic mscratch_en;
+ logic [31:0] mepc_q, mepc_d;
+ logic mepc_en;
+ logic [5:0] mcause_q, mcause_d;
+ logic mcause_en;
+ logic [31:0] mtval_q, mtval_d;
+ logic mtval_en;
+ logic [31:0] mtvec_q, mtvec_d;
+ logic mtvec_err;
+ logic mtvec_en;
+ irqs_t mip;
+ dcsr_t dcsr_q, dcsr_d;
+ logic dcsr_en;
+ logic [31:0] depc_q, depc_d;
+ logic depc_en;
+ logic [31:0] dscratch0_q;
+ logic [31:0] dscratch1_q;
+ logic dscratch0_en, dscratch1_en;
+ // CSRs for recoverable NMIs
+ // NOTE: these CSRS are nonstandard, see
+ status_stk_t mstack_q, mstack_d;
+ logic mstack_en;
+ logic [31:0] mstack_epc_q, mstack_epc_d;
+ logic [5:0] mstack_cause_q, mstack_cause_d;
+ // PMP Signals
+ logic [31:0] pmp_addr_rdata [PMP_MAX_REGIONS];
+ logic [PMP_CFG_W-1:0] pmp_cfg_rdata [PMP_MAX_REGIONS];
+ logic pmp_csr_err;
+ // Hardware performance monitor signals
+ logic [31:0] mcountinhibit;
+ // Only have mcountinhibit flops for counters that actually exist
+ logic [MHPMCounterNum+3-1:0] mcountinhibit_d, mcountinhibit_q;
+ logic mcountinhibit_we;
+ // mhpmcounter flops are elaborated below providing only the precise number that is required based
+ // on MHPMCounterNum/MHPMCounterWidth. This signal connects to the Q output of these flops
+ // where they exist and is otherwise 0.
+ logic [63:0] mhpmcounter [32];
+ logic [31:0] mhpmcounter_we;
+ logic [31:0] mhpmcounterh_we;
+ logic [31:0] mhpmcounter_incr;
+ logic [31:0] mhpmevent [32];
+ logic [4:0] mhpmcounter_idx;
+ logic unused_mhpmcounter_we_1;
+ logic unused_mhpmcounterh_we_1;
+ logic unused_mhpmcounter_incr_1;
+ // Debug / trigger registers
+ logic [31:0] tselect_rdata;
+ logic [31:0] tmatch_control_rdata;
+ logic [31:0] tmatch_value_rdata;
+ // CPU control bits
+ cpu_ctrl_t cpuctrl_q, cpuctrl_d, cpuctrl_wdata;
+ logic cpuctrl_we;
+ logic cpuctrl_err;
+ // CSR update logic
+ logic [31:0] csr_wdata_int;
+ logic [31:0] csr_rdata_int;
+ logic csr_we_int;
+ logic csr_wreq;
+ // Access violation signals
+ logic illegal_csr;
+ logic illegal_csr_priv;
+ logic illegal_csr_write;
+ logic [7:0] unused_boot_addr;
+ logic [2:0] unused_csr_addr;
+ assign unused_boot_addr = boot_addr_i[7:0];
+ /////////////
+ // CSR reg //
+ /////////////
+ logic illegal_dyn_mod;
+ logic illegal_csr_dyn_mod;
+ logic [$bits(csr_num_e)-1:0] csr_addr;
+ assign csr_addr = {csr_addr_i};
+ assign unused_csr_addr = csr_addr[7:5];
+ assign mhpmcounter_idx = csr_addr[4:0];
+ assign illegal_csr_dyn_mod = illegal_dyn_mod & fp_rm_dynamic_i;
+ // See RISC-V Privileged Specification, version 1.11, Section 2.1
+ assign illegal_csr_priv = (csr_addr[9:8] > {priv_lvl_q});
+ assign illegal_csr_write = (csr_addr[11:10] == 2'b11) && csr_wreq;
+ assign illegal_csr_insn_o = (csr_access_i & (illegal_csr | illegal_csr_write | illegal_csr_priv)) | illegal_csr_dyn_mod;
+ // mip CSR is purely combinational - must be able to re-enable the clock upon WFI
+ assign mip.irq_software = irq_software_i;
+ assign mip.irq_timer = irq_timer_i;
+ assign mip.irq_external = irq_external_i;
+ assign mip.irq_fast = irq_fast_i;
+ // Floating point
+ always_comb begin
+ unique case (frm_q)
+ 3'b000,
+ 3'b001,
+ 3'b010,
+ 3'b011,
+ 3'b100: illegal_dyn_mod = 1'b0;
+ 3'b101,
+ 3'b110,
+ 3'b111: illegal_dyn_mod = 1'b1;
+ endcase
+ fp_frm_o = frm_q;
+ end
+ // read logic
+ always_comb begin
+ csr_rdata_int = '0;
+ illegal_csr = 1'b0;
+ unique case (csr_addr_i)
+ // fcsr: floating-point control and status register (frm+fflags)
+ CSR_FCSR: csr_rdata_int = {24'b0 , frm_q, fflags_q};
+ // fflags: floating-point accrued exception
+ CSR_FFLAG: csr_rdata_int = {27'b0 , fflags_q};
+ // frm: floating-point dynamic rounding mode
+ CSR_FRM: begin
+ csr_rdata_int = {29'b0 , frm_q};
+ end
+ // mhartid: unique hardware thread id
+ CSR_MHARTID: csr_rdata_int = hart_id_i;
+ // mstatus: always M-mode, contains IE bit
+ CSR_MSTATUS: begin
+ csr_rdata_int = '0;
+ csr_rdata_int[CSR_MSTATUS_MIE_BIT] = mstatus_q.mie;
+ csr_rdata_int[CSR_MSTATUS_MPIE_BIT] = mstatus_q.mpie;
+ csr_rdata_int[CSR_MSTATUS_MPP_BIT_HIGH:CSR_MSTATUS_MPP_BIT_LOW] = mstatus_q.mpp;
+ csr_rdata_int[CSR_MSTATUS_MPRV_BIT] = mstatus_q.mprv;
+ csr_rdata_int[CSR_MSTATUS_TW_BIT] =;
+ end
+ // misa
+ CSR_MISA: csr_rdata_int = MISA_VALUE;
+ // interrupt enable
+ CSR_MIE: begin
+ csr_rdata_int = '0;
+ csr_rdata_int[CSR_MSIX_BIT] = mie_q.irq_software;
+ csr_rdata_int[CSR_MTIX_BIT] = mie_q.irq_timer;
+ csr_rdata_int[CSR_MEIX_BIT] = mie_q.irq_external;
+ csr_rdata_int[CSR_MFIX_BIT_HIGH:CSR_MFIX_BIT_LOW] = mie_q.irq_fast;
+ end
+ CSR_MSCRATCH: csr_rdata_int = mscratch_q;
+ // mtvec: trap-vector base address
+ CSR_MTVEC: csr_rdata_int = mtvec_q;
+ // mepc: exception program counter
+ CSR_MEPC: csr_rdata_int = mepc_q;
+ // mcause: exception cause
+ CSR_MCAUSE: csr_rdata_int = {mcause_q[5], 26'b0, mcause_q[4:0]};
+ // mtval: trap value
+ CSR_MTVAL: csr_rdata_int = mtval_q;
+ // mip: interrupt pending
+ CSR_MIP: begin
+ csr_rdata_int = '0;
+ csr_rdata_int[CSR_MSIX_BIT] = mip.irq_software;
+ csr_rdata_int[CSR_MTIX_BIT] = mip.irq_timer;
+ csr_rdata_int[CSR_MEIX_BIT] = mip.irq_external;
+ csr_rdata_int[CSR_MFIX_BIT_HIGH:CSR_MFIX_BIT_LOW] = mip.irq_fast;
+ end
+ // PMP registers
+ CSR_PMPCFG0: csr_rdata_int = {pmp_cfg_rdata[3], pmp_cfg_rdata[2],
+ pmp_cfg_rdata[1], pmp_cfg_rdata[0]};
+ CSR_PMPCFG1: csr_rdata_int = {pmp_cfg_rdata[7], pmp_cfg_rdata[6],
+ pmp_cfg_rdata[5], pmp_cfg_rdata[4]};
+ CSR_PMPCFG2: csr_rdata_int = {pmp_cfg_rdata[11], pmp_cfg_rdata[10],
+ pmp_cfg_rdata[9], pmp_cfg_rdata[8]};
+ CSR_PMPCFG3: csr_rdata_int = {pmp_cfg_rdata[15], pmp_cfg_rdata[14],
+ pmp_cfg_rdata[13], pmp_cfg_rdata[12]};
+ CSR_PMPADDR0: csr_rdata_int = pmp_addr_rdata[0];
+ CSR_PMPADDR1: csr_rdata_int = pmp_addr_rdata[1];
+ CSR_PMPADDR2: csr_rdata_int = pmp_addr_rdata[2];
+ CSR_PMPADDR3: csr_rdata_int = pmp_addr_rdata[3];
+ CSR_PMPADDR4: csr_rdata_int = pmp_addr_rdata[4];
+ CSR_PMPADDR5: csr_rdata_int = pmp_addr_rdata[5];
+ CSR_PMPADDR6: csr_rdata_int = pmp_addr_rdata[6];
+ CSR_PMPADDR7: csr_rdata_int = pmp_addr_rdata[7];
+ CSR_PMPADDR8: csr_rdata_int = pmp_addr_rdata[8];
+ CSR_PMPADDR9: csr_rdata_int = pmp_addr_rdata[9];
+ CSR_PMPADDR10: csr_rdata_int = pmp_addr_rdata[10];
+ CSR_PMPADDR11: csr_rdata_int = pmp_addr_rdata[11];
+ CSR_PMPADDR12: csr_rdata_int = pmp_addr_rdata[12];
+ CSR_PMPADDR13: csr_rdata_int = pmp_addr_rdata[13];
+ CSR_PMPADDR14: csr_rdata_int = pmp_addr_rdata[14];
+ CSR_PMPADDR15: csr_rdata_int = pmp_addr_rdata[15];
+ CSR_DCSR: begin
+ csr_rdata_int = dcsr_q;
+ illegal_csr = ~debug_mode_i;
+ end
+ CSR_DPC: begin
+ csr_rdata_int = depc_q;
+ illegal_csr = ~debug_mode_i;
+ end
+ CSR_DSCRATCH0: begin
+ csr_rdata_int = dscratch0_q;
+ illegal_csr = ~debug_mode_i;
+ end
+ CSR_DSCRATCH1: begin
+ csr_rdata_int = dscratch1_q;
+ illegal_csr = ~debug_mode_i;
+ end
+ // machine counter/timers
+ CSR_MCOUNTINHIBIT: csr_rdata_int = mcountinhibit;
+ csr_rdata_int = mhpmevent[mhpmcounter_idx];
+ end
+ csr_rdata_int = mhpmcounter[mhpmcounter_idx][31:0];
+ end
+ csr_rdata_int = mhpmcounter[mhpmcounter_idx][63:32];
+ end
+ // Debug triggers
+ CSR_TSELECT: begin
+ csr_rdata_int = tselect_rdata;
+ illegal_csr = ~DbgTriggerEn;
+ end
+ CSR_TDATA1: begin
+ csr_rdata_int = tmatch_control_rdata;
+ illegal_csr = ~DbgTriggerEn;
+ end
+ CSR_TDATA2: begin
+ csr_rdata_int = tmatch_value_rdata;
+ illegal_csr = ~DbgTriggerEn;
+ end
+ CSR_TDATA3: begin
+ csr_rdata_int = '0;
+ illegal_csr = ~DbgTriggerEn;
+ end
+ csr_rdata_int = '0;
+ illegal_csr = ~DbgTriggerEn;
+ end
+ csr_rdata_int = '0;
+ illegal_csr = ~DbgTriggerEn;
+ end
+ // Custom CSR for controlling CPU features
+ CSR_CPUCTRL: begin
+ csr_rdata_int = {{32-$bits(cpu_ctrl_t){1'b0}},cpuctrl_q};
+ end
+ // Custom CSR for LFSR re-seeding (cannot be read)
+ csr_rdata_int = '0;
+ end
+ default: begin
+ illegal_csr = 1'b1;
+ end
+ endcase
+ end
+ // write logic
+ always_comb begin
+ exception_pc = pc_id_i;
+ // Floating point
+ fflags_d = fflags_q;
+ fflags_en = 1'b0;
+ frm_d = frm_q;
+ frm_en = 1'b0;
+ priv_lvl_d = priv_lvl_q;
+ mstatus_en = 1'b0;
+ mstatus_d = mstatus_q;
+ mie_en = 1'b0;
+ mscratch_en = 1'b0;
+ mepc_en = 1'b0;
+ mepc_d = {csr_wdata_int[31:1], 1'b0};
+ mcause_en = 1'b0;
+ mcause_d = {csr_wdata_int[31], csr_wdata_int[4:0]};
+ mtval_en = 1'b0;
+ mtval_d = csr_wdata_int;
+ mtvec_en = csr_mtvec_init_i;
+ // mtvec.MODE set to vectored
+ // mtvec.BASE must be 256-byte aligned
+ mtvec_d = csr_mtvec_init_i ? {boot_addr_i[31:2], 2'b00} :
+ {csr_wdata_int[31:2], 2'b00};
+ dcsr_en = 1'b0;
+ dcsr_d = dcsr_q;
+ depc_d = {csr_wdata_int[31:1], 1'b0};
+ depc_en = 1'b0;
+ dscratch0_en = 1'b0;
+ dscratch1_en = 1'b0;
+ mstack_en = 1'b0;
+ mstack_d.mpie = mstatus_q.mpie;
+ mstack_d.mpp = mstatus_q.mpp;
+ mstack_epc_d = mepc_q;
+ mstack_cause_d = mcause_q;
+ mcountinhibit_we = 1'b0;
+ mhpmcounter_we = '0;
+ mhpmcounterh_we = '0;
+ cpuctrl_we = 1'b0;
+ if (csr_we_int) begin
+ unique case (csr_addr_i)
+ // mstatus: IE bit
+ CSR_FCSR: begin
+ fflags_en = 1'b1;
+ frm_en = 1'b1;
+ fflags_d = csr_wdata_int[4:0];
+ frm_d = roundmode_e'(csr_wdata_int[7:5]);
+ end
+ CSR_FFLAG : begin
+ fflags_en = 1'b1;
+ fflags_d = fpnew_pkg::status_t'(csr_wdata_int[4:0]);
+ end
+ CSR_FRM: begin
+ frm_en = 1'b1;
+ frm_d = roundmode_e'(csr_wdata_int[2:0]);
+ end
+ CSR_MSTATUS: begin
+ mstatus_en = 1'b1;
+ mstatus_d = '{
+ mie: csr_wdata_int[CSR_MSTATUS_MIE_BIT],
+ mpie: csr_wdata_int[CSR_MSTATUS_MPIE_BIT],
+ mpp: priv_lvl_e'(csr_wdata_int[CSR_MSTATUS_MPP_BIT_HIGH:CSR_MSTATUS_MPP_BIT_LOW]),
+ mprv: csr_wdata_int[CSR_MSTATUS_MPRV_BIT],
+ tw: csr_wdata_int[CSR_MSTATUS_TW_BIT]
+ };
+ // Convert illegal values to M-mode
+ if ((mstatus_d.mpp != PRIV_LVL_M) && (mstatus_d.mpp != PRIV_LVL_U)) begin
+ mstatus_d.mpp = PRIV_LVL_M;
+ end
+ end
+ // interrupt enable
+ CSR_MIE: mie_en = 1'b1;
+ CSR_MSCRATCH: mscratch_en = 1'b1;
+ // mepc: exception program counter
+ CSR_MEPC: mepc_en = 1'b1;
+ // mcause
+ CSR_MCAUSE: mcause_en = 1'b1;
+ // mtval: trap value
+ CSR_MTVAL: mtval_en = 1'b1;
+ // mtvec
+ CSR_MTVEC: mtvec_en = 1'b1;
+ CSR_DCSR: begin
+ dcsr_d = csr_wdata_int;
+ dcsr_d.xdebugver = XDEBUGVER_STD;
+ // Change to PRIV_LVL_M if software writes an unsupported value
+ if ((dcsr_d.prv != PRIV_LVL_M) && (dcsr_d.prv != PRIV_LVL_U)) begin
+ dcsr_d.prv = PRIV_LVL_M;
+ end
+ // Read-only for SW
+ dcsr_d.cause = dcsr_q.cause;
+ // currently not supported:
+ dcsr_d.nmip = 1'b0;
+ dcsr_d.mprven = 1'b0;
+ dcsr_d.stopcount = 1'b0;
+ dcsr_d.stoptime = 1'b0;
+ // forced to be zero
+ dcsr_d.zero0 = 1'b0;
+ dcsr_d.zero1 = 1'b0;
+ dcsr_d.zero2 = 12'h0;
+ dcsr_en = 1'b1;
+ end
+ // dpc: debug program counter
+ CSR_DPC: depc_en = 1'b1;
+ CSR_DSCRATCH0: dscratch0_en = 1'b1;
+ CSR_DSCRATCH1: dscratch1_en = 1'b1;
+ // machine counter/timers
+ CSR_MCOUNTINHIBIT: mcountinhibit_we = 1'b1;
+ mhpmcounter_we[mhpmcounter_idx] = 1'b1;
+ end
+ mhpmcounterh_we[mhpmcounter_idx] = 1'b1;
+ end
+ CSR_CPUCTRL: cpuctrl_we = 1'b1;
+ default:;
+ endcase
+ end
+ // exception controller gets priority over other writes
+ unique case (1'b1)
+ csr_save_cause_i: begin
+ unique case (1'b1)
+ csr_save_if_i: begin
+ exception_pc = pc_if_i;
+ end
+ csr_save_id_i: begin
+ exception_pc = pc_id_i;
+ end
+ csr_save_wb_i: begin
+ exception_pc = pc_wb_i;
+ end
+ default:;
+ endcase
+ // Any exception, including debug mode, causes a switch to M-mode
+ priv_lvl_d = PRIV_LVL_M;
+ if (debug_csr_save_i) begin
+ // all interrupts are masked
+ // do not update cause, epc, tval, epc and status
+ dcsr_d.prv = priv_lvl_q;
+ dcsr_d.cause = debug_cause_i;
+ dcsr_en = 1'b1;
+ depc_d = exception_pc;
+ depc_en = 1'b1;
+ end else if (!debug_mode_i) begin
+ // In debug mode, "exceptions do not update any registers. That
+ // includes cause, epc, tval, dpc and mstatus." [Debug Spec v0.13.2, p.39]
+ mtval_en = 1'b1;
+ mtval_d = csr_mtval_i;
+ mstatus_en = 1'b1;
+ mstatus_d.mie = 1'b0; // disable interrupts
+ // save current status
+ mstatus_d.mpie = mstatus_q.mie;
+ mstatus_d.mpp = priv_lvl_q;
+ mepc_en = 1'b1;
+ mepc_d = exception_pc;
+ mcause_en = 1'b1;
+ mcause_d = {csr_mcause_i};
+ // save previous status for recoverable NMI
+ mstack_en = 1'b1;
+ end
+ end // csr_save_cause_i
+ csr_restore_dret_i: begin // DRET
+ priv_lvl_d = dcsr_q.prv;
+ end // csr_restore_dret_i
+ csr_restore_mret_i: begin // MRET
+ priv_lvl_d = mstatus_q.mpp;
+ mstatus_en = 1'b1;
+ mstatus_d.mie = mstatus_q.mpie; // re-enable interrupts
+ if (nmi_mode_i) begin
+ // when returning from an NMI restore state from mstack CSR
+ mstatus_d.mpie = mstack_q.mpie;
+ mstatus_d.mpp = mstack_q.mpp;
+ mepc_en = 1'b1;
+ mepc_d = mstack_epc_q;
+ mcause_en = 1'b1;
+ mcause_d = mstack_cause_q;
+ end else begin
+ // otherwise just set mstatus.MPIE/MPP
+ // See RISC-V Privileged Specification, version 1.11, Section
+ mstatus_d.mpie = 1'b1;
+ mstatus_d.mpp = PRIV_LVL_U;
+ end
+ end // csr_restore_mret_i
+ default:;
+ endcase
+ end
+ // Update current priv level
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ priv_lvl_q <= PRIV_LVL_M;
+ end else begin
+ priv_lvl_q <= priv_lvl_d;
+ end
+ end
+ // Send current priv level to the decoder
+ assign priv_mode_id_o = priv_lvl_q;
+ // New instruction fetches need to account for updates to priv_lvl_q this cycle
+ assign priv_mode_if_o = priv_lvl_d;
+ // Load/store instructions must factor in MPRV for PMP checking
+ assign priv_mode_lsu_o = mstatus_q.mprv ? mstatus_q.mpp : priv_lvl_q;
+ // CSR operation logic
+ always_comb begin
+ unique case (csr_op_i)
+ CSR_OP_WRITE: csr_wdata_int = csr_wdata_i;
+ CSR_OP_SET: csr_wdata_int = csr_wdata_i | csr_rdata_o;
+ CSR_OP_CLEAR: csr_wdata_int = ~csr_wdata_i & csr_rdata_o;
+ CSR_OP_READ: csr_wdata_int = csr_wdata_i;
+ // default: csr_wdata_int = csr_wdata_i;
+ endcase
+ end
+ assign csr_wreq = csr_op_en_i &
+ (csr_op_i inside {CSR_OP_WRITE,
+ // only write CSRs during one clock cycle
+ assign csr_we_int = csr_wreq & ~illegal_csr_insn_o;
+ assign csr_rdata_o = csr_rdata_int;
+ // directly output some registers
+ assign csr_mepc_o = mepc_q;
+ assign csr_depc_o = depc_q;
+ assign csr_mtvec_o = mtvec_q;
+ assign csr_mstatus_mie_o = mstatus_q.mie;
+ assign csr_mstatus_tw_o =;
+ assign debug_single_step_o = dcsr_q.step;
+ assign debug_ebreakm_o = dcsr_q.ebreakm;
+ assign debug_ebreaku_o = dcsr_q.ebreaku;
+ // Qualify incoming interrupt requests in mip CSR with mie CSR for controller and to re-enable
+ // clock upon WFI (must be purely combinational).
+ assign irqs_o = mip & mie_q;
+ assign irq_pending_o = |irqs_o;
+ ////////////////////////
+ // CSR instantiations //
+ ////////////////////////
+ localparam status_t MSTATUS_RST_VAL = '{mie: 1'b0,
+ mpie: 1'b1,
+ mpp: PRIV_LVL_U,
+ mprv: 1'b0,
+ tw: 1'b0};
+ brq_csr #(
+ .Width ($bits(status_t)),
+ .ShadowCopy (ShadowCSR),
+ .ResetValue ({MSTATUS_RST_VAL})
+ ) u_mstatus_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i ({mstatus_d}),
+ .wr_en_i (mstatus_en),
+ .rd_data_o (mstatus_q),
+ .rd_error_o (mstatus_err)
+ );
+ assign fflag_wdata = is_fp_instr_i ? fp_status_i : fflags_d;
+ brq_csr #(
+ .Width (5),
+ .ShadowCopy (1'b0),
+ .ResetValue ('0)
+ ) fflags_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (fflag_wdata),
+ .wr_en_i (fflags_en | is_fp_instr_i),
+ .rd_data_o (fflags_q),
+ .rd_error_o ()
+ );
+ // FRM
+ brq_csr #(
+ .Width (3),
+ .ShadowCopy (1'b0),
+ .ResetValue ('0)
+ ) frm_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (frm_d),
+ .wr_en_i (frm_en),
+ .rd_data_o (frm_q),
+ .rd_error_o ()
+ );
+ // MEPC
+ brq_csr #(
+ .Width (32),
+ .ShadowCopy (1'b0),
+ .ResetValue ('0)
+ ) u_mepc_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (mepc_d),
+ .wr_en_i (mepc_en),
+ .rd_data_o (mepc_q),
+ .rd_error_o ()
+ );
+ // MIE
+ assign mie_d.irq_software = csr_wdata_int[CSR_MSIX_BIT];
+ assign mie_d.irq_timer = csr_wdata_int[CSR_MTIX_BIT];
+ assign mie_d.irq_external = csr_wdata_int[CSR_MEIX_BIT];
+ assign mie_d.irq_fast = csr_wdata_int[CSR_MFIX_BIT_HIGH:CSR_MFIX_BIT_LOW];
+ brq_csr #(
+ .Width ($bits(irqs_t)),
+ .ShadowCopy (1'b0),
+ .ResetValue ('0)
+ ) u_mie_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i ({mie_d}),
+ .wr_en_i (mie_en),
+ .rd_data_o (mie_q),
+ .rd_error_o ()
+ );
+ brq_csr #(
+ .Width (32),
+ .ShadowCopy (1'b0),
+ .ResetValue ('0)
+ ) u_mscratch_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (csr_wdata_int),
+ .wr_en_i (mscratch_en),
+ .rd_data_o (mscratch_q),
+ .rd_error_o ()
+ );
+ brq_csr #(
+ .Width (6),
+ .ShadowCopy (1'b0),
+ .ResetValue ('0)
+ ) u_mcause_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (mcause_d),
+ .wr_en_i (mcause_en),
+ .rd_data_o (mcause_q),
+ .rd_error_o ()
+ );
+ // MTVAL
+ brq_csr #(
+ .Width (32),
+ .ShadowCopy (1'b0),
+ .ResetValue ('0)
+ ) u_mtval_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (mtval_d),
+ .wr_en_i (mtval_en),
+ .rd_data_o (mtval_q),
+ .rd_error_o ()
+ );
+ // MTVEC
+ brq_csr #(
+ .Width (32),
+ .ShadowCopy (ShadowCSR),
+ .ResetValue (32'd1)
+ ) u_mtvec_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (mtvec_d),
+ .wr_en_i (mtvec_en),
+ .rd_data_o (mtvec_q),
+ .rd_error_o (mtvec_err)
+ );
+ // DCSR
+ localparam dcsr_t DCSR_RESET_VAL = '{
+ xdebugver: XDEBUGVER_STD,
+ cause: DBG_CAUSE_NONE, // 3'h0
+ prv: PRIV_LVL_M,
+ default: '0
+ };
+ brq_csr #(
+ .Width ($bits(dcsr_t)),
+ .ShadowCopy (1'b0),
+ .ResetValue ({DCSR_RESET_VAL})
+ ) u_dcsr_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i ({dcsr_d}),
+ .wr_en_i (dcsr_en),
+ .rd_data_o (dcsr_q),
+ .rd_error_o ()
+ );
+ // DEPC
+ brq_csr #(
+ .Width (32),
+ .ShadowCopy (1'b0),
+ .ResetValue ('0)
+ ) u_depc_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (depc_d),
+ .wr_en_i (depc_en),
+ .rd_data_o (depc_q),
+ .rd_error_o ()
+ );
+ brq_csr #(
+ .Width (32),
+ .ShadowCopy (1'b0),
+ .ResetValue ('0)
+ ) u_dscratch0_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (csr_wdata_int),
+ .wr_en_i (dscratch0_en),
+ .rd_data_o (dscratch0_q),
+ .rd_error_o ()
+ );
+ brq_csr #(
+ .Width (32),
+ .ShadowCopy (1'b0),
+ .ResetValue ('0)
+ ) u_dscratch1_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (csr_wdata_int),
+ .wr_en_i (dscratch1_en),
+ .rd_data_o (dscratch1_q),
+ .rd_error_o ()
+ );
+ localparam status_stk_t MSTACK_RESET_VAL = '{
+ mpie: 1'b1,
+ mpp: PRIV_LVL_U
+ };
+ brq_csr #(
+ .Width ($bits(status_stk_t)),
+ .ShadowCopy (1'b0),
+ .ResetValue ({MSTACK_RESET_VAL})
+ ) u_mstack_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i ({mstack_d}),
+ .wr_en_i (mstack_en),
+ .rd_data_o (mstack_q),
+ .rd_error_o ()
+ );
+ brq_csr #(
+ .Width (32),
+ .ShadowCopy (1'b0),
+ .ResetValue ('0)
+ ) u_mstack_epc_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (mstack_epc_d),
+ .wr_en_i (mstack_en),
+ .rd_data_o (mstack_epc_q),
+ .rd_error_o ()
+ );
+ brq_csr #(
+ .Width (6),
+ .ShadowCopy (1'b0),
+ .ResetValue ('0)
+ ) u_mstack_cause_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (mstack_cause_d),
+ .wr_en_i (mstack_en),
+ .rd_data_o (mstack_cause_q),
+ .rd_error_o ()
+ );
+ // -----------------
+ // PMP registers
+ // -----------------
+ if (PMPEnable) begin : g_pmp_registers
+ pmp_cfg_t pmp_cfg [PMPNumRegions];
+ pmp_cfg_t pmp_cfg_wdata [PMPNumRegions];
+ logic [PMPAddrWidth-1:0] pmp_addr [PMPNumRegions];
+ logic [PMPNumRegions-1:0] pmp_cfg_we;
+ logic [PMPNumRegions-1:0] pmp_cfg_err;
+ logic [PMPNumRegions-1:0] pmp_addr_we;
+ logic [PMPNumRegions-1:0] pmp_addr_err;
+ // Expanded / qualified register read data
+ for (genvar i = 0; i < PMP_MAX_REGIONS; i++) begin : g_exp_rd_data
+ if (i < PMPNumRegions) begin : g_implemented_regions
+ // Add in zero padding for reserved fields
+ assign pmp_cfg_rdata[i] = {pmp_cfg[i].lock, 2'b00, pmp_cfg[i].mode,
+ pmp_cfg[i].exec, pmp_cfg[i].write, pmp_cfg[i].read};
+ // Address field read data depends on the current programmed mode and the granularity
+ // See RISC-V Privileged Specification, version 1.11, Section 3.6.1
+ if (PMPGranularity == 0) begin : g_pmp_g0
+ // If G == 0, read data is unmodified
+ assign pmp_addr_rdata[i] = pmp_addr[i];
+ end else if (PMPGranularity == 1) begin : g_pmp_g1
+ // If G == 1, bit [G-1] reads as zero in TOR or OFF mode
+ always_comb begin
+ pmp_addr_rdata[i] = pmp_addr[i];
+ if ((pmp_cfg[i].mode == PMP_MODE_OFF) || (pmp_cfg[i].mode == PMP_MODE_TOR)) begin
+ pmp_addr_rdata[i][PMPGranularity-1:0] = '0;
+ end
+ end
+ end else begin : g_pmp_g2
+ // For G >= 2, bits are masked to one or zero depending on the mode
+ always_comb begin
+ // In NAPOT mode, bits [G-2:0] must read as one
+ pmp_addr_rdata[i] = {pmp_addr[i], {PMPGranularity-1{1'b1}}};
+ if ((pmp_cfg[i].mode == PMP_MODE_OFF) || (pmp_cfg[i].mode == PMP_MODE_TOR)) begin
+ // In TOR or OFF mode, bits [G-1:0] must read as zero
+ pmp_addr_rdata[i][PMPGranularity-1:0] = '0;
+ end
+ end
+ end
+ end else begin : g_other_regions
+ // Non-implemented regions read as zero
+ assign pmp_cfg_rdata[i] = '0;
+ assign pmp_addr_rdata[i] = '0;
+ end
+ end
+ // Write data calculation
+ for (genvar i = 0; i < PMPNumRegions; i++) begin : g_pmp_csrs
+ // -------------------------
+ // Instantiate cfg registers
+ // -------------------------
+ assign pmp_cfg_we[i] = csr_we_int & ~pmp_cfg[i].lock &
+ (csr_addr == (CSR_OFF_PMP_CFG + (i[11:0] >> 2)));
+ // Select the correct WDATA (each CSR contains 4 CFG fields, each with 2 RES bits)
+ assign pmp_cfg_wdata[i].lock = csr_wdata_int[(i%4)*PMP_CFG_W+7];
+ // NA4 mode is not selectable when G > 0, mode is treated as OFF
+ always_comb begin
+ unique case (csr_wdata_int[(i%4)*PMP_CFG_W+3+:2])
+ 2'b00 : pmp_cfg_wdata[i].mode = PMP_MODE_OFF;
+ 2'b01 : pmp_cfg_wdata[i].mode = PMP_MODE_TOR;
+ 2'b10 : pmp_cfg_wdata[i].mode = (PMPGranularity == 0) ? PMP_MODE_NA4:
+ 2'b11 : pmp_cfg_wdata[i].mode = PMP_MODE_NAPOT;
+ default : pmp_cfg_wdata[i].mode = PMP_MODE_OFF;
+ endcase
+ end
+ assign pmp_cfg_wdata[i].exec = csr_wdata_int[(i%4)*PMP_CFG_W+2];
+ // W = 1, R = 0 is a reserved combination. For now, we force W to 0 if R == 0
+ assign pmp_cfg_wdata[i].write = &csr_wdata_int[(i%4)*PMP_CFG_W+:2];
+ assign pmp_cfg_wdata[i].read = csr_wdata_int[(i%4)*PMP_CFG_W];
+ brq_csr #(
+ .Width ($bits(pmp_cfg_t)),
+ .ShadowCopy (ShadowCSR),
+ .ResetValue ('0)
+ ) u_pmp_cfg_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i ({pmp_cfg_wdata[i]}),
+ .wr_en_i (pmp_cfg_we[i]),
+ .rd_data_o (pmp_cfg[i]),
+ .rd_error_o (pmp_cfg_err[i])
+ );
+ // --------------------------
+ // Instantiate addr registers
+ // --------------------------
+ if (i < PMPNumRegions - 1) begin : g_lower
+ assign pmp_addr_we[i] = csr_we_int & ~pmp_cfg[i].lock &
+ (~pmp_cfg[i+1].lock | (pmp_cfg[i+1].mode != PMP_MODE_TOR)) &
+ (csr_addr == (CSR_OFF_PMP_ADDR + i[11:0]));
+ end else begin : g_upper
+ assign pmp_addr_we[i] = csr_we_int & ~pmp_cfg[i].lock &
+ (csr_addr == (CSR_OFF_PMP_ADDR + i[11:0]));
+ end
+ brq_csr #(
+ .Width (PMPAddrWidth),
+ .ShadowCopy (ShadowCSR),
+ .ResetValue ('0)
+ ) u_pmp_addr_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (csr_wdata_int[31-:PMPAddrWidth]),
+ .wr_en_i (pmp_addr_we[i]),
+ .rd_data_o (pmp_addr[i]),
+ .rd_error_o (pmp_addr_err[i])
+ );
+ assign csr_pmp_cfg_o[i] = pmp_cfg[i];
+ assign csr_pmp_addr_o[i] = {pmp_addr_rdata[i], 2'b00};
+ end
+ assign pmp_csr_err = (|pmp_cfg_err) | (|pmp_addr_err);
+ end else begin : g_no_pmp_tieoffs
+ // Generate tieoffs when PMP is not configured
+ for (genvar i = 0; i < PMP_MAX_REGIONS; i++) begin : g_rdata
+ assign pmp_addr_rdata[i] = '0;
+ assign pmp_cfg_rdata[i] = '0;
+ end
+ for (genvar i = 0; i < PMPNumRegions; i++) begin : g_outputs
+ assign csr_pmp_cfg_o[i] = pmp_cfg_t'(1'b0);
+ assign csr_pmp_addr_o[i] = '0;
+ end
+ assign pmp_csr_err = 1'b0;
+ end
+ //////////////////////////
+ // Performance monitor //
+ //////////////////////////
+ // update enable signals
+ always_comb begin : mcountinhibit_update
+ if (mcountinhibit_we == 1'b1) begin
+ // bit 1 must always be 0
+ mcountinhibit_d = {csr_wdata_int[MHPMCounterNum+2:2], 1'b0, csr_wdata_int[0]};
+ end else begin
+ mcountinhibit_d = mcountinhibit_q;
+ end
+ end
+ // event selection (hardwired) & control
+ always_comb begin : gen_mhpmcounter_incr
+ // Assign inactive counters (first to prevent latch inference)
+ for (int unsigned i=0; i<32; i++) begin : gen_mhpmcounter_incr_inactive
+ mhpmcounter_incr[i] = 1'b0;
+ end
+ // When adding or altering performance counter meanings and default
+ // mappings please update dv/verilator/pcount/cpp/
+ // appropriately.
+ //
+ // active counters
+ mhpmcounter_incr[0] = 1'b1; // mcycle
+ mhpmcounter_incr[1] = 1'b0; // reserved
+ mhpmcounter_incr[2] = instr_ret_i; // minstret
+ mhpmcounter_incr[3] = dside_wait_i; // cycles waiting for data memory
+ mhpmcounter_incr[4] = iside_wait_i; // cycles waiting for instr fetches
+ mhpmcounter_incr[5] = mem_load_i; // num of loads
+ mhpmcounter_incr[6] = mem_store_i; // num of stores
+ mhpmcounter_incr[7] = jump_i; // num of jumps (unconditional)
+ mhpmcounter_incr[8] = branch_i; // num of branches (conditional)
+ mhpmcounter_incr[9] = branch_taken_i; // num of taken branches (conditional)
+ mhpmcounter_incr[10] = instr_ret_compressed_i; // num of compressed instr
+ mhpmcounter_incr[11] = mul_wait_i; // cycles waiting for multiply
+ mhpmcounter_incr[12] = div_wait_i; // cycles waiting for divide
+ end
+ // event selector (hardwired, 0 means no event)
+ always_comb begin : gen_mhpmevent
+ // activate all
+ for (int i=0; i<32; i++) begin : gen_mhpmevent_active
+ mhpmevent[i] = '0;
+ mhpmevent[i][i] = 1'b1;
+ end
+ // deactivate
+ mhpmevent[1] = '0; // not existing, reserved
+ for (int unsigned i=3+MHPMCounterNum; i<32; i++) begin : gen_mhpmevent_inactive
+ mhpmevent[i] = '0;
+ end
+ end
+ // mcycle
+ brq_counter #(
+ .CounterWidth(64)
+ ) mcycle_counter_i (
+ .clk_i(clk_i),
+ .rst_ni(rst_ni),
+ .counter_inc_i(mhpmcounter_incr[0] & ~mcountinhibit[0]),
+ .counterh_we_i(mhpmcounterh_we[0]),
+ .counter_we_i(mhpmcounter_we[0]),
+ .counter_val_i(csr_wdata_int),
+ .counter_val_o(mhpmcounter[0])
+ );
+ // minstret
+ brq_counter #(
+ .CounterWidth(64)
+ ) minstret_counter_i (
+ .clk_i(clk_i),
+ .rst_ni(rst_ni),
+ .counter_inc_i(mhpmcounter_incr[2] & ~mcountinhibit[2]),
+ .counterh_we_i(mhpmcounterh_we[2]),
+ .counter_we_i(mhpmcounter_we[2]),
+ .counter_val_i(csr_wdata_int),
+ .counter_val_o(mhpmcounter[2])
+ );
+ // reserved:
+ assign mhpmcounter[1] = '0;
+ assign unused_mhpmcounter_we_1 = mhpmcounter_we[1];
+ assign unused_mhpmcounterh_we_1 = mhpmcounterh_we[1];
+ assign unused_mhpmcounter_incr_1 = mhpmcounter_incr[1];
+ for (genvar cnt=0; cnt < 29; cnt++) begin : gen_cntrs
+ if (cnt < MHPMCounterNum) begin : gen_imp
+ brq_counter #(
+ .CounterWidth(MHPMCounterWidth)
+ ) mcounters_variable_i (
+ .clk_i(clk_i),
+ .rst_ni(rst_ni),
+ .counter_inc_i(mhpmcounter_incr[cnt+3] & ~mcountinhibit[cnt+3]),
+ .counterh_we_i(mhpmcounterh_we[cnt+3]),
+ .counter_we_i(mhpmcounter_we[cnt+3]),
+ .counter_val_i(csr_wdata_int),
+ .counter_val_o(mhpmcounter[cnt+3])
+ );
+ end else begin : gen_unimp
+ assign mhpmcounter[cnt+3] = '0;
+ end
+ end
+ if(MHPMCounterNum < 29) begin : g_mcountinhibit_reduced
+ logic [29-MHPMCounterNum-1:0] unused_mhphcounter_we;
+ logic [29-MHPMCounterNum-1:0] unused_mhphcounterh_we;
+ logic [29-MHPMCounterNum-1:0] unused_mhphcounter_incr;
+ assign mcountinhibit = {{29-MHPMCounterNum{1'b1}}, mcountinhibit_q};
+ // Lint tieoffs for unused bits
+ assign unused_mhphcounter_we = mhpmcounter_we[31:MHPMCounterNum+3];
+ assign unused_mhphcounterh_we = mhpmcounterh_we[31:MHPMCounterNum+3];
+ assign unused_mhphcounter_incr = mhpmcounter_incr[31:MHPMCounterNum+3];
+ end else begin : g_mcountinhibit_full
+ assign mcountinhibit = mcountinhibit_q;
+ end
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ mcountinhibit_q <= '0;
+ end else begin
+ mcountinhibit_q <= mcountinhibit_d;
+ end
+ end
+ /////////////////////////////
+ // Debug trigger registers //
+ /////////////////////////////
+ if (DbgTriggerEn) begin : gen_trigger_regs
+ localparam int unsigned DbgHwNumLen = DbgHwBreakNum > 1 ? $clog2(DbgHwBreakNum) : 1;
+ // Register values
+ logic [DbgHwNumLen-1:0] tselect_d, tselect_q;
+ logic tmatch_control_d;
+ logic [DbgHwBreakNum-1:0] tmatch_control_q;
+ logic [31:0] tmatch_value_d;
+ logic [31:0] tmatch_value_q[DbgHwBreakNum];
+ // Write enables
+ logic tselect_we;
+ logic [DbgHwBreakNum-1:0] tmatch_control_we;
+ logic [DbgHwBreakNum-1:0] tmatch_value_we;
+ // Trigger comparison result
+ logic [DbgHwBreakNum-1:0] trigger_match;
+ // Write select
+ assign tselect_we = csr_we_int & debug_mode_i & (csr_addr_i == CSR_TSELECT);
+ for (genvar i = 0; i < DbgHwBreakNum; i++) begin : g_dbg_tmatch_we
+ assign tmatch_control_we[i] = (i[DbgHwNumLen-1:0] == tselect_q) & csr_we_int & debug_mode_i &
+ (csr_addr_i == CSR_TDATA1);
+ assign tmatch_value_we[i] = (i[DbgHwNumLen-1:0] == tselect_q) & csr_we_int & debug_mode_i &
+ (csr_addr_i == CSR_TDATA2);
+ end
+ // Debug interface tests the available number of triggers by writing and reading the trigger
+ // select register. Only allow changes to the register if it is within the supported region.
+ assign tselect_d = (csr_wdata_int < DbgHwBreakNum) ? csr_wdata_int[DbgHwNumLen-1:0] :
+ DbgHwBreakNum-1;
+ // tmatch_control is enabled when the execute bit is set
+ assign tmatch_control_d = csr_wdata_int[2];
+ assign tmatch_value_d = csr_wdata_int[31:0];
+ // Registers
+ brq_csr #(
+ .Width (DbgHwNumLen),
+ .ShadowCopy (1'b0),
+ .ResetValue ('0)
+ ) u_tselect_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (tselect_d),
+ .wr_en_i (tselect_we),
+ .rd_data_o (tselect_q),
+ .rd_error_o ()
+ );
+ for (genvar i = 0; i < DbgHwBreakNum; i++) begin : g_dbg_tmatch_reg
+ brq_csr #(
+ .Width (1),
+ .ShadowCopy (1'b0),
+ .ResetValue ('0)
+ ) u_tmatch_control_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (tmatch_control_d),
+ .wr_en_i (tmatch_control_we[i]),
+ .rd_data_o (tmatch_control_q[i]),
+ .rd_error_o ()
+ );
+ brq_csr #(
+ .Width (32),
+ .ShadowCopy (1'b0),
+ .ResetValue ('0)
+ ) u_tmatch_value_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (tmatch_value_d),
+ .wr_en_i (tmatch_value_we[i]),
+ .rd_data_o (tmatch_value_q[i]),
+ .rd_error_o ()
+ );
+ end
+ // Assign read data
+ // TSELECT - number of supported triggers defined by parameter DbgHwBreakNum
+ localparam int unsigned TSelectRdataPadlen = DbgHwNumLen >= 32 ? 0 : (32 - DbgHwNumLen);
+ assign tselect_rdata = {{TSelectRdataPadlen{1'b0}}, tselect_q};
+ // TDATA0 - only support simple address matching
+ assign tmatch_control_rdata = {4'h2, // type : address/data match
+ 1'b1, // dmode : access from D mode only
+ 6'h00, // maskmax : exact match only
+ 1'b0, // hit : not supported
+ 1'b0, // select : address match only
+ 1'b0, // timing : match before execution
+ 2'b00, // sizelo : match any access
+ 4'h1, // action : enter debug mode
+ 1'b0, // chain : not supported
+ 4'h0, // match : simple match
+ 1'b1, // m : match in m-mode
+ 1'b0, // 0 : zero
+ 1'b0, // s : not supported
+ 1'b1, // u : match in u-mode
+ tmatch_control_q[tselect_q], // execute : match instruction address
+ 1'b0, // store : not supported
+ 1'b0}; // load : not supported
+ // TDATA1 - address match value only
+ assign tmatch_value_rdata = tmatch_value_q[tselect_q];
+ // Breakpoint matching
+ // We match against the next address, as the breakpoint must be taken before execution
+ for (genvar i = 0; i < DbgHwBreakNum; i++) begin : g_dbg_trigger_match
+ assign trigger_match[i] = tmatch_control_q[i] & (pc_if_i[31:0] == tmatch_value_q[i]);
+ end
+ assign trigger_match_o = |trigger_match;
+ end else begin : gen_no_trigger_regs
+ assign tselect_rdata = 'b0;
+ assign tmatch_control_rdata = 'b0;
+ assign tmatch_value_rdata = 'b0;
+ assign trigger_match_o = 'b0;
+ end
+ //////////////////////////
+ // CPU control register //
+ //////////////////////////
+ // Cast register write data
+ assign cpuctrl_wdata = cpu_ctrl_t'(csr_wdata_int[$bits(cpu_ctrl_t)-1:0]);
+ // Generate fixed time execution bit
+ if (DataIndTiming) begin : gen_dit
+ assign cpuctrl_d.data_ind_timing = cpuctrl_wdata.data_ind_timing;
+ end else begin : gen_no_dit
+ // tieoff for the unused bit
+ logic unused_dit;
+ assign unused_dit = cpuctrl_wdata.data_ind_timing;
+ // field will always read as zero if not configured
+ assign cpuctrl_d.data_ind_timing = 1'b0;
+ end
+ assign data_ind_timing_o = cpuctrl_q.data_ind_timing;
+ // Generate dummy instruction signals
+ if (DummyInstructions) begin : gen_dummy
+ assign cpuctrl_d.dummy_instr_en = cpuctrl_wdata.dummy_instr_en;
+ assign cpuctrl_d.dummy_instr_mask = cpuctrl_wdata.dummy_instr_mask;
+ // Signal a write to the seed register
+ assign dummy_instr_seed_en_o = csr_we_int && (csr_addr == CSR_SECURESEED);
+ assign dummy_instr_seed_o = csr_wdata_int;
+ end else begin : gen_no_dummy
+ // tieoff for the unused bit
+ logic unused_dummy_en;
+ logic [2:0] unused_dummy_mask;
+ assign unused_dummy_en = cpuctrl_wdata.dummy_instr_en;
+ assign unused_dummy_mask = cpuctrl_wdata.dummy_instr_mask;
+ // field will always read as zero if not configured
+ assign cpuctrl_d.dummy_instr_en = 1'b0;
+ assign cpuctrl_d.dummy_instr_mask = 3'b000;
+ assign dummy_instr_seed_en_o = 1'b0;
+ assign dummy_instr_seed_o = '0;
+ end
+ assign dummy_instr_en_o = cpuctrl_q.dummy_instr_en;
+ assign dummy_instr_mask_o = cpuctrl_q.dummy_instr_mask;
+ // Generate icache enable bit
+ if (ICache) begin : gen_icache_enable
+ assign cpuctrl_d.icache_enable = cpuctrl_wdata.icache_enable;
+ end else begin : gen_no_icache
+ // tieoff for the unused icen bit
+ logic unused_icen;
+ assign unused_icen = cpuctrl_wdata.icache_enable;
+ // icen field will always read as zero if ICache not configured
+ assign cpuctrl_d.icache_enable = 1'b0;
+ end
+ assign icache_enable_o = cpuctrl_q.icache_enable;
+ brq_csr #(
+ .Width ($bits(cpu_ctrl_t)),
+ .ShadowCopy (ShadowCSR),
+ .ResetValue ('0)
+ ) u_cpuctrl_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i ({cpuctrl_d}),
+ .wr_en_i (cpuctrl_we),
+ .rd_data_o (cpuctrl_q),
+ .rd_error_o (cpuctrl_err)
+ );
+ assign csr_shadow_err_o = mstatus_err | mtvec_err | pmp_csr_err | cpuctrl_err;
\ No newline at end of file
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..b1c4477
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,53 @@
+ * Control / status register primitive
+ */
+module brq_csr #(
+ parameter int unsigned Width = 32,
+ parameter bit ShadowCopy = 1'b0,
+ parameter bit [Width-1:0] ResetValue = '0
+ ) (
+ input logic clk_i,
+ input logic rst_ni,
+ input logic [Width-1:0] wr_data_i,
+ input logic wr_en_i,
+ output logic [Width-1:0] rd_data_o,
+ output logic rd_error_o
+ logic [Width-1:0] rdata_q;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rdata_q <= ResetValue;
+ end else if (wr_en_i) begin
+ rdata_q <= wr_data_i;
+ end
+ end
+ assign rd_data_o = rdata_q;
+ if (ShadowCopy) begin : gen_shadow
+ logic [Width-1:0] shadow_q;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ shadow_q <= ~ResetValue;
+ end else if (wr_en_i) begin
+ shadow_q <= ~wr_data_i;
+ end
+ end
+ assign rd_error_o = rdata_q != ~shadow_q;
+ end else begin : gen_no_shadow
+ assign rd_error_o = 1'b0;
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..883cf7a
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,194 @@
+ * Execution stage
+ *
+ * Execution block: Hosts ALU and MUL/DIV unit
+ */
+module brq_exu #(
+ parameter brq_pkg::rv32m_e RV32M = brq_pkg::RV32MFast,
+ parameter brq_pkg::rv32b_e RV32B = brq_pkg::RV32BNone,
+ parameter bit BranchTargetALU = 0
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ // ALU
+ input brq_pkg::alu_op_e alu_operator_i,
+ input logic [31:0] alu_operand_a_i,
+ input logic [31:0] alu_operand_b_i,
+ input logic alu_instr_first_cycle_i,
+ // Branch Target ALU
+ // All of these signals are unusued when BranchTargetALU == 0
+ input logic [31:0] bt_a_operand_i,
+ input logic [31:0] bt_b_operand_i,
+ // Multiplier/Divider
+ input brq_pkg::md_op_e multdiv_operator_i,
+ input logic mult_en_i, // dynamic enable signal, for FSM control
+ input logic div_en_i, // dynamic enable signal, for FSM control
+ input logic mult_sel_i, // static decoder output, for data muxes
+ input logic div_sel_i, // static decoder output, for data muxes
+ input logic [1:0] multdiv_signed_mode_i,
+ input logic [31:0] multdiv_operand_a_i,
+ input logic [31:0] multdiv_operand_b_i,
+ input logic multdiv_ready_id_i,
+ input logic data_ind_timing_i,
+ // intermediate val reg
+ output logic [1:0] imd_val_we_o,
+ output logic [33:0] imd_val_d_o[2],
+ input logic [33:0] imd_val_q_i[2],
+ // Outputs
+ output logic [31:0] alu_adder_result_ex_o, // to LSU
+ output logic [31:0] result_ex_o,
+ output logic [31:0] branch_target_o, // to IF
+ output logic branch_decision_o, // to ID
+ output logic ex_valid_o // EX has valid output
+ import brq_pkg::*;
+ logic [31:0] alu_result, multdiv_result;
+ logic [32:0] multdiv_alu_operand_b, multdiv_alu_operand_a;
+ logic [33:0] alu_adder_result_ext;
+ logic alu_cmp_result, alu_is_equal_result;
+ logic multdiv_valid;
+ logic multdiv_sel;
+ logic [31:0] alu_imd_val_q[2];
+ logic [31:0] alu_imd_val_d[2];
+ logic [ 1:0] alu_imd_val_we;
+ logic [33:0] multdiv_imd_val_d[2];
+ logic [ 1:0] multdiv_imd_val_we;
+ /*
+ The multdiv_i output is never selected if RV32M=RV32MNone
+ At synthesis time, all the combinational and sequential logic
+ from the multdiv_i module are eliminated
+ */
+ if (RV32M != RV32MNone) begin : gen_multdiv_m
+ assign multdiv_sel = mult_sel_i | div_sel_i;
+ end else begin : gen_multdiv_no_m
+ assign multdiv_sel = 1'b0;
+ end
+ // Intermediate Value Register Mux
+ assign imd_val_d_o[0] = multdiv_sel ? multdiv_imd_val_d[0] : {2'b0, alu_imd_val_d[0]};
+ assign imd_val_d_o[1] = multdiv_sel ? multdiv_imd_val_d[1] : {2'b0, alu_imd_val_d[1]};
+ assign imd_val_we_o = multdiv_sel ? multdiv_imd_val_we : alu_imd_val_we;
+ assign alu_imd_val_q = '{imd_val_q_i[0][31:0], imd_val_q_i[1][31:0]};
+ assign result_ex_o = multdiv_sel ? multdiv_result : alu_result;
+ // branch handling
+ assign branch_decision_o = alu_cmp_result;
+ if (BranchTargetALU) begin : g_branch_target_alu
+ logic [32:0] bt_alu_result;
+ logic unused_bt_carry;
+ assign bt_alu_result = bt_a_operand_i + bt_b_operand_i;
+ assign unused_bt_carry = bt_alu_result[32];
+ assign branch_target_o = bt_alu_result[31:0];
+ end else begin : g_no_branch_target_alu
+ // Unused bt_operand signals cause lint errors, this avoids them
+ logic [31:0] unused_bt_a_operand, unused_bt_b_operand;
+ assign unused_bt_a_operand = bt_a_operand_i;
+ assign unused_bt_b_operand = bt_b_operand_i;
+ assign branch_target_o = alu_adder_result_ex_o;
+ end
+ /////////
+ // ALU //
+ /////////
+ brq_exu_alu #(
+ .RV32B(RV32B)
+ ) alu_i (
+ .operator_i ( alu_operator_i ),
+ .operand_a_i ( alu_operand_a_i ),
+ .operand_b_i ( alu_operand_b_i ),
+ .instr_first_cycle_i ( alu_instr_first_cycle_i ),
+ .imd_val_q_i ( alu_imd_val_q ),
+ .imd_val_we_o ( alu_imd_val_we ),
+ .imd_val_d_o ( alu_imd_val_d ),
+ .multdiv_operand_a_i ( multdiv_alu_operand_a ),
+ .multdiv_operand_b_i ( multdiv_alu_operand_b ),
+ .multdiv_sel_i ( multdiv_sel ),
+ .adder_result_o ( alu_adder_result_ex_o ),
+ .adder_result_ext_o ( alu_adder_result_ext ),
+ .result_o ( alu_result ),
+ .comparison_result_o ( alu_cmp_result ),
+ .is_equal_result_o ( alu_is_equal_result )
+ );
+ ////////////////
+ // Multiplier //
+ ////////////////
+ if (RV32M == RV32MSlow) begin : gen_multdiv_slow
+ brq_exu_multdiv_slow multdiv_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .mult_en_i ( mult_en_i ),
+ .div_en_i ( div_en_i ),
+ .mult_sel_i ( mult_sel_i ),
+ .div_sel_i ( div_sel_i ),
+ .operator_i ( multdiv_operator_i ),
+ .signed_mode_i ( multdiv_signed_mode_i ),
+ .op_a_i ( multdiv_operand_a_i ),
+ .op_b_i ( multdiv_operand_b_i ),
+ .alu_adder_ext_i ( alu_adder_result_ext ),
+ .alu_adder_i ( alu_adder_result_ex_o ),
+ .equal_to_zero_i ( alu_is_equal_result ),
+ .data_ind_timing_i ( data_ind_timing_i ),
+ .valid_o ( multdiv_valid ),
+ .alu_operand_a_o ( multdiv_alu_operand_a ),
+ .alu_operand_b_o ( multdiv_alu_operand_b ),
+ .imd_val_q_i ( imd_val_q_i ),
+ .imd_val_d_o ( multdiv_imd_val_d ),
+ .imd_val_we_o ( multdiv_imd_val_we ),
+ .multdiv_ready_id_i ( multdiv_ready_id_i ),
+ .multdiv_result_o ( multdiv_result )
+ );
+ end else if (RV32M == RV32MFast || RV32M == RV32MSingleCycle) begin : gen_multdiv_fast
+ brq_exu_multdiv_fast # (
+ .RV32M ( RV32M )
+ ) multdiv_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .mult_en_i ( mult_en_i ),
+ .div_en_i ( div_en_i ),
+ .mult_sel_i ( mult_sel_i ),
+ .div_sel_i ( div_sel_i ),
+ .operator_i ( multdiv_operator_i ),
+ .signed_mode_i ( multdiv_signed_mode_i ),
+ .op_a_i ( multdiv_operand_a_i ),
+ .op_b_i ( multdiv_operand_b_i ),
+ .alu_operand_a_o ( multdiv_alu_operand_a ),
+ .alu_operand_b_o ( multdiv_alu_operand_b ),
+ .alu_adder_ext_i ( alu_adder_result_ext ),
+ .alu_adder_i ( alu_adder_result_ex_o ),
+ .equal_to_zero_i ( alu_is_equal_result ),
+ .data_ind_timing_i ( data_ind_timing_i ),
+ .imd_val_q_i ( imd_val_q_i ),
+ .imd_val_d_o ( multdiv_imd_val_d ),
+ .imd_val_we_o ( multdiv_imd_val_we ),
+ .multdiv_ready_id_i ( multdiv_ready_id_i ),
+ .valid_o ( multdiv_valid ),
+ .multdiv_result_o ( multdiv_result )
+ );
+ end
+ // Multiplier/divider may require multiple cycles. The ALU output is valid in the same cycle
+ // unless the intermediate result register is being written (which indicates this isn't the
+ // final cycle of ALU operation).
+ assign ex_valid_o = multdiv_sel ? multdiv_valid : ~(|alu_imd_val_we);
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..99fe60a
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,1267 @@
+ * Arithmetic logic unit
+ */
+module brq_exu_alu #(
+ parameter brq_pkg::rv32b_e RV32B = brq_pkg::RV32BNone
+) (
+ input brq_pkg::alu_op_e operator_i,
+ input logic [31:0] operand_a_i,
+ input logic [31:0] operand_b_i,
+ input logic instr_first_cycle_i,
+ input logic [32:0] multdiv_operand_a_i,
+ input logic [32:0] multdiv_operand_b_i,
+ input logic multdiv_sel_i,
+ input logic [31:0] imd_val_q_i[2],
+ output logic [31:0] imd_val_d_o[2],
+ output logic [1:0] imd_val_we_o,
+ output logic [31:0] adder_result_o,
+ output logic [33:0] adder_result_ext_o,
+ output logic [31:0] result_o,
+ output logic comparison_result_o,
+ output logic is_equal_result_o
+ import brq_pkg::*;
+ logic [31:0] operand_a_rev;
+ logic [32:0] operand_b_neg;
+ // bit reverse operand_a for left shifts and bit counting
+ for (genvar k = 0; k < 32; k++) begin : gen_rev_operand_a
+ assign operand_a_rev[k] = operand_a_i[31-k];
+ end
+ ///////////
+ // Adder //
+ ///////////
+ logic adder_op_b_negate;
+ logic [32:0] adder_in_a, adder_in_b;
+ logic [31:0] adder_result;
+ always_comb begin
+ adder_op_b_negate = 1'b0;
+ unique case (operator_i)
+ // Adder OPs
+ // Comparator OPs
+ // MinMax OPs (RV32B Ops)
+ ALU_MAX, ALU_MAXU: adder_op_b_negate = 1'b1;
+ default:;
+ endcase
+ end
+ // prepare operand a
+ assign adder_in_a = multdiv_sel_i ? multdiv_operand_a_i : {operand_a_i,1'b1};
+ // prepare operand b
+ assign operand_b_neg = {operand_b_i,1'b0} ^ {33{1'b1}};
+ always_comb begin
+ unique case(1'b1)
+ multdiv_sel_i: adder_in_b = multdiv_operand_b_i;
+ adder_op_b_negate: adder_in_b = operand_b_neg;
+ default : adder_in_b = {operand_b_i, 1'b0};
+ endcase
+ end
+ // actual adder
+ assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b);
+ assign adder_result = adder_result_ext_o[32:1];
+ assign adder_result_o = adder_result;
+ ////////////////
+ // Comparison //
+ ////////////////
+ logic is_equal;
+ logic is_greater_equal; // handles both signed and unsigned forms
+ logic cmp_signed;
+ always_comb begin
+ unique case (operator_i)
+ // RV32B only
+ ALU_MAX: cmp_signed = 1'b1;
+ default: cmp_signed = 1'b0;
+ endcase
+ end
+ assign is_equal = (adder_result == 32'b0);
+ assign is_equal_result_o = is_equal;
+ // Is greater equal
+ always_comb begin
+ if ((operand_a_i[31] ^ operand_b_i[31]) == 1'b0) begin
+ is_greater_equal = (adder_result[31] == 1'b0);
+ end else begin
+ is_greater_equal = operand_a_i[31] ^ (cmp_signed);
+ end
+ end
+ // GTE unsigned:
+ // (a[31] == 1 && b[31] == 1) => adder_result[31] == 0
+ // (a[31] == 0 && b[31] == 0) => adder_result[31] == 0
+ // (a[31] == 1 && b[31] == 0) => 1
+ // (a[31] == 0 && b[31] == 1) => 0
+ // GTE signed:
+ // (a[31] == 1 && b[31] == 1) => adder_result[31] == 0
+ // (a[31] == 0 && b[31] == 0) => adder_result[31] == 0
+ // (a[31] == 1 && b[31] == 0) => 0
+ // (a[31] == 0 && b[31] == 1) => 1
+ // generate comparison result
+ logic cmp_result;
+ always_comb begin
+ unique case (operator_i)
+ ALU_EQ: cmp_result = is_equal;
+ ALU_NE: cmp_result = ~is_equal;
+ ALU_MAX, ALU_MAXU: cmp_result = is_greater_equal; // RV32B only
+ ALU_MIN, ALU_MINU, //RV32B only
+ ALU_SLT, ALU_SLTU: cmp_result = ~is_greater_equal;
+ default: cmp_result = is_equal;
+ endcase
+ end
+ assign comparison_result_o = cmp_result;
+ ///////////
+ // Shift //
+ ///////////
+ // The shifter structure consists of a 33-bit shifter: 32-bit operand + 1 bit extension for
+ // arithmetic shifts and one-shift support.
+ // Rotations and funnel shifts are implemented as multi-cycle instructions.
+ // The shifter is also used for single-bit instructions and bit-field place as detailed below.
+ //
+ // Standard Shifts
+ // ===============
+ // For standard shift instructions, the direction of the shift is to the right by default. For
+ // left shifts, the signal shift_left signal is set. If so, the operand is initially reversed,
+ // shifted to the right by the specified amount and shifted back again. For arithmetic- and
+ // one-shifts the 33rd bit of the shifter operand can is set accordingly.
+ //
+ // Multicycle Shifts
+ // =================
+ //
+ // Rotation
+ // --------
+ // For rotations, the operand signals operand_a_i and operand_b_i are kept constant to rs1 and
+ // rs2 respectively.
+ //
+ // Rotation pseudocode:
+ // shift_amt = rs2 & 31;
+ // multicycle_result = (rs1 >> shift_amt) | (rs1 << (32 - shift_amt));
+ // ^-- cycle 0 -----^ ^-- cycle 1 --------------^
+ //
+ // Funnel Shifts
+ // -------------
+ // For funnel shifs, operand_a_i is tied to rs1 in the first cycle and rs3 in the
+ // second cycle. operand_b_i is always tied to rs2. The order of applying the shift amount or
+ // its complement is determined by bit [5] of shift_amt.
+ //
+ // Funnel shift Pseudocode: (fsl)
+ // shift_amt = rs2 & 63;
+ // shift_amt_compl = 32 - shift_amt[4:0]
+ // if (shift_amt >=33):
+ // multicycle_result = (rs1 >> shift_amt_compl[4:0]) | (rs3 << shift_amt[4:0]);
+ // ^-- cycle 0 ----------------^ ^-- cycle 1 ------------^
+ // else if (shift_amt <= 31 && shift_amt > 0):
+ // multicycle_result = (rs1 << shift_amt[4:0]) | (rs3 >> shift_amt_compl[4:0]);
+ // ^-- cycle 0 ----------^ ^-- cycle 1 -------------------^
+ // For shift_amt == 0, 32, both shift_amt[4:0] and shift_amt_compl[4:0] == '0.
+ // these cases need to be handled separately outside the shifting structure:
+ // else if (shift_amt == 32):
+ // multicycle_result = rs3
+ // else if (shift_amt == 0):
+ // multicycle_result = rs1.
+ //
+ // Single-Bit Instructions
+ // =======================
+ // Single bit instructions operate on bit operand_b_i[4:0] of operand_a_i.
+ // The operations sbset, sbclr and sbinv are implemented by generation of a bit-mask using the
+ // shifter structure. This is done by left-shifting the operand 32'h1 by the required amount.
+ // The signal shift_sbmode multiplexes the shifter input and sets the signal shift_left.
+ // Further processing is taken care of by a separate structure.
+ //
+ // For sbext, the bit defined by operand_b_i[4:0] is to be returned. This is done by simply
+ // shifting operand_a_i to the right by the required amount and returning bit [0] of the result.
+ //
+ // Bit-Field Place
+ // ===============
+ // The shifter structure is shared to compute bfp_mask << bfp_off.
+ logic shift_left;
+ logic shift_ones;
+ logic shift_arith;
+ logic shift_funnel;
+ logic shift_sbmode;
+ logic [5:0] shift_amt;
+ logic [5:0] shift_amt_compl; // complementary shift amount (32 - shift_amt)
+ logic [31:0] shift_operand;
+ logic [32:0] shift_result_ext;
+ logic unused_shift_result_ext;
+ logic [31:0] shift_result;
+ logic [31:0] shift_result_rev;
+ // zbf
+ logic bfp_op;
+ logic [4:0] bfp_len;
+ logic [4:0] bfp_off;
+ logic [31:0] bfp_mask;
+ logic [31:0] bfp_mask_rev;
+ logic [31:0] bfp_result;
+ // bfp: shares the shifter structure to compute bfp_mask << bfp_off
+ assign bfp_op = (RV32B != RV32BNone) ? (operator_i == ALU_BFP) : 1'b0;
+ assign bfp_len = {~(|operand_b_i[27:24]), operand_b_i[27:24]}; // len = 0 encodes for len = 16
+ assign bfp_off = operand_b_i[20:16];
+ assign bfp_mask = (RV32B != RV32BNone) ? ~(32'hffff_ffff << bfp_len) : '0;
+ for (genvar i=0; i<32; i++) begin : gen_rev_bfp_mask
+ assign bfp_mask_rev[i] = bfp_mask[31-i];
+ end
+ assign bfp_result =(RV32B != RV32BNone) ?
+ (~shift_result & operand_a_i) | ((operand_b_i & bfp_mask) << bfp_off) : '0;
+ // bit shift_amt[5]: word swap bit: only considered for FSL/FSR.
+ // if set, reverse operations in first and second cycle.
+ assign shift_amt[5] = operand_b_i[5] & shift_funnel;
+ assign shift_amt_compl = 32 - operand_b_i[4:0];
+ always_comb begin
+ if (bfp_op) begin
+ shift_amt[4:0] = bfp_off ; // length field of bfp control word
+ end else begin
+ shift_amt[4:0] = instr_first_cycle_i ?
+ (operand_b_i[5] && shift_funnel ? shift_amt_compl[4:0] : operand_b_i[4:0]) :
+ (operand_b_i[5] && shift_funnel ? operand_b_i[4:0] : shift_amt_compl[4:0]);
+ end
+ end
+ // single-bit mode: shift
+ assign shift_sbmode = (RV32B != RV32BNone) ?
+ (operator_i == ALU_SBSET) | (operator_i == ALU_SBCLR) | (operator_i == ALU_SBINV) : 1'b0;
+ // left shift if this is:
+ // * a standard left shift (slo, sll)
+ // * a rol in the first cycle
+ // * a ror in the second cycle
+ // * fsl: without word-swap bit: first cycle, else: second cycle
+ // * fsr: without word-swap bit: second cycle, else: first cycle
+ // * a single-bit instruction: sbclr, sbset, sbinv (excluding sbext)
+ // * bfp: bfp_mask << bfp_off
+ always_comb begin
+ unique case (operator_i)
+ ALU_SLL: shift_left = 1'b1;
+ ALU_BFP: shift_left = (RV32B != RV32BNone) ? 1'b1 : 1'b0;
+ ALU_ROL: shift_left = (RV32B != RV32BNone) ? instr_first_cycle_i : 0;
+ ALU_ROR: shift_left = (RV32B != RV32BNone) ? ~instr_first_cycle_i : 0;
+ ALU_FSL: shift_left = (RV32B != RV32BNone) ?
+ (shift_amt[5] ? ~instr_first_cycle_i : instr_first_cycle_i) : 1'b0;
+ ALU_FSR: shift_left = (RV32B != RV32BNone) ?
+ (shift_amt[5] ? instr_first_cycle_i : ~instr_first_cycle_i) : 1'b0;
+ default: shift_left = 1'b0;
+ endcase
+ if (shift_sbmode) begin
+ shift_left = 1'b1;
+ end
+ end
+ assign shift_arith = (operator_i == ALU_SRA);
+ assign shift_ones =
+ (RV32B != RV32BNone) ? (operator_i == ALU_SLO) | (operator_i == ALU_SRO) : 1'b0;
+ assign shift_funnel =
+ (RV32B != RV32BNone) ? (operator_i == ALU_FSL) | (operator_i == ALU_FSR) : 1'b0;
+ // shifter structure.
+ always_comb begin
+ // select shifter input
+ // for bfp, sbmode and shift_left the corresponding bit-reversed input is chosen.
+ if (RV32B == RV32BNone) begin
+ shift_operand = shift_left ? operand_a_rev : operand_a_i;
+ end else begin
+ unique case (1'b1)
+ bfp_op: shift_operand = bfp_mask_rev;
+ shift_sbmode: shift_operand = 32'h8000_0000;
+ default: shift_operand = shift_left ? operand_a_rev : operand_a_i;
+ endcase
+ end
+ shift_result_ext =
+ $unsigned($signed({shift_ones | (shift_arith & shift_operand[31]), shift_operand}) >>>
+ shift_amt[4:0]);
+ shift_result = shift_result_ext[31:0];
+ unused_shift_result_ext = shift_result_ext[32];
+ for (int unsigned i=0; i<32; i++) begin
+ shift_result_rev[i] = shift_result[31-i];
+ end
+ shift_result = shift_left ? shift_result_rev : shift_result;
+ end
+ ///////////////////
+ // Bitwise Logic //
+ ///////////////////
+ logic bwlogic_or;
+ logic bwlogic_and;
+ logic [31:0] bwlogic_operand_b;
+ logic [31:0] bwlogic_or_result;
+ logic [31:0] bwlogic_and_result;
+ logic [31:0] bwlogic_xor_result;
+ logic [31:0] bwlogic_result;
+ logic bwlogic_op_b_negate;
+ always_comb begin
+ unique case (operator_i)
+ // Logic-with-negate OPs (RV32B Ops)
+ ALU_ANDN: bwlogic_op_b_negate = (RV32B != RV32BNone) ? 1'b1 : 1'b0;
+ ALU_CMIX: bwlogic_op_b_negate = (RV32B != RV32BNone) ? ~instr_first_cycle_i : 1'b0;
+ default: bwlogic_op_b_negate = 1'b0;
+ endcase
+ end
+ assign bwlogic_operand_b = bwlogic_op_b_negate ? operand_b_neg[32:1] : operand_b_i;
+ assign bwlogic_or_result = operand_a_i | bwlogic_operand_b;
+ assign bwlogic_and_result = operand_a_i & bwlogic_operand_b;
+ assign bwlogic_xor_result = operand_a_i ^ bwlogic_operand_b;
+ assign bwlogic_or = (operator_i == ALU_OR) | (operator_i == ALU_ORN);
+ assign bwlogic_and = (operator_i == ALU_AND) | (operator_i == ALU_ANDN);
+ always_comb begin
+ unique case (1'b1)
+ bwlogic_or: bwlogic_result = bwlogic_or_result;
+ bwlogic_and: bwlogic_result = bwlogic_and_result;
+ default: bwlogic_result = bwlogic_xor_result;
+ endcase
+ end
+ logic [5:0] bitcnt_result;
+ logic [31:0] minmax_result;
+ logic [31:0] pack_result;
+ logic [31:0] sext_result;
+ logic [31:0] singlebit_result;
+ logic [31:0] rev_result;
+ logic [31:0] shuffle_result;
+ logic [31:0] butterfly_result;
+ logic [31:0] invbutterfly_result;
+ logic [31:0] clmul_result;
+ logic [31:0] multicycle_result;
+ if (RV32B != RV32BNone) begin : g_alu_rvb
+ /////////////////
+ // Bitcounting //
+ /////////////////
+ // The bit-counter structure computes the number of set bits in its operand. Partial results
+ // (from left to right) are needed to compute the control masks for computation of bext/bdep
+ // by the butterfly network, if implemented.
+ // For pcnt, clz and ctz, only the end result is used.
+ logic zbe_op;
+ logic bitcnt_ctz;
+ logic bitcnt_clz;
+ logic bitcnt_cz;
+ logic [31:0] bitcnt_bits;
+ logic [31:0] bitcnt_mask_op;
+ logic [31:0] bitcnt_bit_mask;
+ logic [ 5:0] bitcnt_partial [32];
+ logic [31:0] bitcnt_partial_lsb_d;
+ logic [31:0] bitcnt_partial_msb_d;
+ assign bitcnt_ctz = operator_i == ALU_CTZ;
+ assign bitcnt_clz = operator_i == ALU_CLZ;
+ assign bitcnt_cz = bitcnt_ctz | bitcnt_clz;
+ assign bitcnt_result = bitcnt_partial[31];
+ // Bit-mask generation for clz and ctz:
+ // The bit mask is generated by spreading the lowest-order set bit in the operand to all
+ // higher order bits. The resulting mask is inverted to cover the lowest order zeros. In order
+ // to create the bit mask for leading zeros, the input operand needs to be reversed.
+ assign bitcnt_mask_op = bitcnt_clz ? operand_a_rev : operand_a_i;
+ always_comb begin
+ bitcnt_bit_mask = bitcnt_mask_op;
+ bitcnt_bit_mask |= bitcnt_bit_mask << 1;
+ bitcnt_bit_mask |= bitcnt_bit_mask << 2;
+ bitcnt_bit_mask |= bitcnt_bit_mask << 4;
+ bitcnt_bit_mask |= bitcnt_bit_mask << 8;
+ bitcnt_bit_mask |= bitcnt_bit_mask << 16;
+ bitcnt_bit_mask = ~bitcnt_bit_mask;
+ end
+ assign zbe_op = (operator_i == ALU_BEXT) | (operator_i == ALU_BDEP);
+ always_comb begin
+ case(1'b1)
+ zbe_op: bitcnt_bits = operand_b_i;
+ bitcnt_cz: bitcnt_bits = bitcnt_bit_mask & ~bitcnt_mask_op; // clz / ctz
+ default: bitcnt_bits = operand_a_i; // pcnt
+ endcase
+ end
+ // The parallel prefix counter is of the structure of a Brent-Kung Adder. In the first
+ // log2(width) stages, the sum of the n preceding bit lines is computed for the bit lines at
+ // positions 2**n-1 (power-of-two positions) where n denotes the current stage.
+ // In stage n=log2(width), the count for position width-1 (the MSB) is finished.
+ // For the intermediate values, an inverse adder tree then computes the bit counts for the bit
+ // lines at positions
+ // m = 2**(n-1) + i*2**(n-2), where i = [1 ... width / 2**(n-1)-1] and n = [log2(width) ... 2].
+ // Thus, at every subsequent stage the result of two previously unconnected sub-trees is
+ // summed, starting at the node summing bits [width/2-1 : 0] and [3*width/4-1: width/2]
+ // and moving to iteratively sum up all the sub-trees.
+ // The inverse adder tree thus features log2(width) - 1 stages the first of these stages is a
+ // single addition at position 3*width/4 - 1. It does not interfere with the last
+ // stage of the primary adder tree. These stages can thus be folded together, resulting in a
+ // total of 2*log2(width)-2 stages.
+ // For more details refer to R. Brent, H. T. Kung, "A Regular Layout for Parallel Adders",
+ // (1982).
+ // For a bitline at position p, only bits
+ // bitcnt_partial[max(i, such that p % log2(i) == 0)-1 : 0] are needed for generation of the
+ // butterfly network control signals. The adders in the intermediate value adder tree thus need
+ // not be full 5-bit adders. We leave the optimization to the synthesis tools.
+ //
+ // Consider the following 8-bit example for illustraton.
+ //
+ // let bitcnt_bits = 8'babcdefgh.
+ //
+ // a b c d e f g h
+ // | /: | /: | /: | /:
+ // |/ : |/ : |/ : |/ :
+ // stage 1: + : + : + : + :
+ // | : /: : | : /: :
+ // |,--+ : : |,--+ : :
+ // stage 2: + : : : + : : :
+ // | : | : /: : : :
+ // |,-----,--+ : : : : ^-primary adder tree
+ // stage 3: + : + : : : : : -------------------------
+ // : | /| /| /| /| /| : ,-intermediate adder tree
+ // : |/ |/ |/ |/ |/ : :
+ // stage 4 : + + + + + : :
+ // : : : : : : : :
+ // bitcnt_partial[i] 7 6 5 4 3 2 1 0
+ always_comb begin
+ bitcnt_partial = '{default: '0};
+ // stage 1
+ for (int unsigned i=1; i<32; i+=2) begin
+ bitcnt_partial[i] = {5'h0, bitcnt_bits[i]} + {5'h0, bitcnt_bits[i-1]};
+ end
+ // stage 2
+ for (int unsigned i=3; i<32; i+=4) begin
+ bitcnt_partial[i] = bitcnt_partial[i-2] + bitcnt_partial[i];
+ end
+ // stage 3
+ for (int unsigned i=7; i<32; i+=8) begin
+ bitcnt_partial[i] = bitcnt_partial[i-4] + bitcnt_partial[i];
+ end
+ // stage 4
+ for (int unsigned i=15; i <32; i+=16) begin
+ bitcnt_partial[i] = bitcnt_partial[i-8] + bitcnt_partial[i];
+ end
+ // stage 5
+ bitcnt_partial[31] = bitcnt_partial[15] + bitcnt_partial[31];
+ // ^- primary adder tree
+ // -------------------------------
+ // ,-intermediate value adder tree
+ bitcnt_partial[23] = bitcnt_partial[15] + bitcnt_partial[23];
+ // stage 6
+ for (int unsigned i=11; i<32; i+=8) begin
+ bitcnt_partial[i] = bitcnt_partial[i-4] + bitcnt_partial[i];
+ end
+ // stage 7
+ for (int unsigned i=5; i<32; i+=4) begin
+ bitcnt_partial[i] = bitcnt_partial[i-2] + bitcnt_partial[i];
+ end
+ // stage 8
+ bitcnt_partial[0] = {5'h0, bitcnt_bits[0]};
+ for (int unsigned i=2; i<32; i+=2) begin
+ bitcnt_partial[i] = bitcnt_partial[i-1] + {5'h0, bitcnt_bits[i]};
+ end
+ end
+ ///////////////
+ // Min / Max //
+ ///////////////
+ assign minmax_result = cmp_result ? operand_a_i : operand_b_i;
+ //////////
+ // Pack //
+ //////////
+ logic packu;
+ logic packh;
+ assign packu = operator_i == ALU_PACKU;
+ assign packh = operator_i == ALU_PACKH;
+ always_comb begin
+ unique case (1'b1)
+ packu: pack_result = {operand_b_i[31:16], operand_a_i[31:16]};
+ packh: pack_result = {16'h0, operand_b_i[7:0], operand_a_i[7:0]};
+ default: pack_result = {operand_b_i[15:0], operand_a_i[15:0]};
+ endcase
+ end
+ //////////
+ // Sext //
+ //////////
+ assign sext_result = (operator_i == ALU_SEXTB) ?
+ { {24{operand_a_i[7]}}, operand_a_i[7:0]} : { {16{operand_a_i[15]}}, operand_a_i[15:0]};
+ /////////////////////////////
+ // Single-bit Instructions //
+ /////////////////////////////
+ always_comb begin
+ unique case (operator_i)
+ ALU_SBSET: singlebit_result = operand_a_i | shift_result;
+ ALU_SBCLR: singlebit_result = operand_a_i & ~shift_result;
+ ALU_SBINV: singlebit_result = operand_a_i ^ shift_result;
+ default: singlebit_result = {31'h0, shift_result[0]}; // ALU_SBEXT
+ endcase
+ end
+ ////////////////////////////////////
+ // General Reverse and Or-combine //
+ ////////////////////////////////////
+ // Only a subset of the General reverse and or-combine instructions are implemented in the
+ // balanced version of the B extension. Currently rev, rev8 and orc.b are supported in the
+ // base extension.
+ logic [4:0] zbp_shift_amt;
+ logic gorc_op;
+ assign gorc_op = (operator_i == ALU_GORC);
+ assign zbp_shift_amt[2:0] = (RV32B == RV32BFull) ? shift_amt[2:0] : {3{&shift_amt[2:0]}};
+ assign zbp_shift_amt[4:3] = (RV32B == RV32BFull) ? shift_amt[4:3] : {2{&shift_amt[4:3]}};
+ always_comb begin
+ rev_result = operand_a_i;
+ if (zbp_shift_amt[0]) begin
+ rev_result = (gorc_op ? rev_result : 32'h0) |
+ ((rev_result & 32'h5555_5555) << 1) |
+ ((rev_result & 32'haaaa_aaaa) >> 1);
+ end
+ if (zbp_shift_amt[1]) begin
+ rev_result = (gorc_op ? rev_result : 32'h0) |
+ ((rev_result & 32'h3333_3333) << 2) |
+ ((rev_result & 32'hcccc_cccc) >> 2);
+ end
+ if (zbp_shift_amt[2]) begin
+ rev_result = (gorc_op ? rev_result : 32'h0) |
+ ((rev_result & 32'h0f0f_0f0f) << 4) |
+ ((rev_result & 32'hf0f0_f0f0) >> 4);
+ end
+ if (zbp_shift_amt[3]) begin
+ rev_result = (gorc_op & (RV32B == RV32BFull) ? rev_result : 32'h0) |
+ ((rev_result & 32'h00ff_00ff) << 8) |
+ ((rev_result & 32'hff00_ff00) >> 8);
+ end
+ if (zbp_shift_amt[4]) begin
+ rev_result = (gorc_op & (RV32B == RV32BFull) ? rev_result : 32'h0) |
+ ((rev_result & 32'h0000_ffff) << 16) |
+ ((rev_result & 32'hffff_0000) >> 16);
+ end
+ end
+ logic crc_hmode;
+ logic crc_bmode;
+ logic [31:0] clmul_result_rev;
+ if (RV32B == RV32BFull) begin : gen_alu_rvb_full
+ /////////////////////////
+ // Shuffle / Unshuffle //
+ /////////////////////////
+ localparam logic [31:0] SHUFFLE_MASK_L [4] =
+ '{32'h00ff_0000, 32'h0f00_0f00, 32'h3030_3030, 32'h4444_4444};
+ localparam logic [31:0] SHUFFLE_MASK_R [4] =
+ '{32'h0000_ff00, 32'h00f0_00f0, 32'h0c0c_0c0c, 32'h2222_2222};
+ localparam logic [31:0] FLIP_MASK_L [4] =
+ '{32'h2200_1100, 32'h0044_0000, 32'h4411_0000, 32'h1100_0000};
+ localparam logic [31:0] FLIP_MASK_R [4] =
+ '{32'h0088_0044, 32'h0000_2200, 32'h0000_8822, 32'h0000_0088};
+ logic [31:0] SHUFFLE_MASK_NOT [4];
+ for(genvar i = 0; i < 4; i++) begin : gen_shuffle_mask_not
+ end
+ logic shuffle_flip;
+ assign shuffle_flip = operator_i == ALU_UNSHFL;
+ logic [3:0] shuffle_mode;
+ always_comb begin
+ shuffle_result = operand_a_i;
+ if (shuffle_flip) begin
+ shuffle_mode[3] = shift_amt[0];
+ shuffle_mode[2] = shift_amt[1];
+ shuffle_mode[1] = shift_amt[2];
+ shuffle_mode[0] = shift_amt[3];
+ end else begin
+ shuffle_mode = shift_amt[3:0];
+ end
+ if (shuffle_flip) begin
+ shuffle_result = (shuffle_result & 32'h8822_4411) |
+ ((shuffle_result << 6) & FLIP_MASK_L[0]) |
+ ((shuffle_result >> 6) & FLIP_MASK_R[0]) |
+ ((shuffle_result << 9) & FLIP_MASK_L[1]) |
+ ((shuffle_result >> 9) & FLIP_MASK_R[1]) |
+ ((shuffle_result << 15) & FLIP_MASK_L[2]) |
+ ((shuffle_result >> 15) & FLIP_MASK_R[2]) |
+ ((shuffle_result << 21) & FLIP_MASK_L[3]) |
+ ((shuffle_result >> 21) & FLIP_MASK_R[3]);
+ end
+ if (shuffle_mode[3]) begin
+ shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[0]) |
+ (((shuffle_result << 8) & SHUFFLE_MASK_L[0]) |
+ ((shuffle_result >> 8) & SHUFFLE_MASK_R[0]));
+ end
+ if (shuffle_mode[2]) begin
+ shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[1]) |
+ (((shuffle_result << 4) & SHUFFLE_MASK_L[1]) |
+ ((shuffle_result >> 4) & SHUFFLE_MASK_R[1]));
+ end
+ if (shuffle_mode[1]) begin
+ shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[2]) |
+ (((shuffle_result << 2) & SHUFFLE_MASK_L[2]) |
+ ((shuffle_result >> 2) & SHUFFLE_MASK_R[2]));
+ end
+ if (shuffle_mode[0]) begin
+ shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[3]) |
+ (((shuffle_result << 1) & SHUFFLE_MASK_L[3]) |
+ ((shuffle_result >> 1) & SHUFFLE_MASK_R[3]));
+ end
+ if (shuffle_flip) begin
+ shuffle_result = (shuffle_result & 32'h8822_4411) |
+ ((shuffle_result << 6) & FLIP_MASK_L[0]) |
+ ((shuffle_result >> 6) & FLIP_MASK_R[0]) |
+ ((shuffle_result << 9) & FLIP_MASK_L[1]) |
+ ((shuffle_result >> 9) & FLIP_MASK_R[1]) |
+ ((shuffle_result << 15) & FLIP_MASK_L[2]) |
+ ((shuffle_result >> 15) & FLIP_MASK_R[2]) |
+ ((shuffle_result << 21) & FLIP_MASK_L[3]) |
+ ((shuffle_result >> 21) & FLIP_MASK_R[3]);
+ end
+ end
+ ///////////////
+ // Butterfly //
+ ///////////////
+ // The butterfly / inverse butterfly network executing bext/bdep (zbe) instructions.
+ // For bdep, the control bits mask of a local left region is generated by
+ // the inverse of a n-bit left rotate and complement upon wrap (LROTC) operation by the number
+ // of ones in the deposit bitmask to the right of the segment. n hereby denotes the width
+ // of the according segment. The bitmask for a pertaining local right region is equal to the
+ // corresponding local left region. Bext uses an analogue inverse process.
+ // Consider the following 8-bit example. For details, see Hilewitz et al. "Fast Bit Gather,
+ // Bit Scatter and Bit Permuation Instructions for Commodity Microprocessors", (2008).
+ //
+ // The bext/bdep instructions are completed in 2 cycles. In the first cycle, the control
+ // bitmask is prepared by executing the parallel prefix bit count. In the second cycle,
+ // the bit swapping is executed according to the control masks.
+ // 8-bit example: (Hilewitz et al.)
+ // Consider the instruction bdep operand_a_i deposit_mask
+ // Let operand_a_i = 8'babcd_efgh
+ // deposit_mask = 8'b1010_1101
+ //
+ // control bitmask for stage 1:
+ // - number of ones in the right half of the deposit bitmask: 3
+ // - width of the segment: 4
+ // - control bitmask = ~LROTC(4'b0, 3)[3:0] = 4'b1000
+ //
+ // control bitmask: c3 c2 c1 c0 c3 c2 c1 c0
+ // 1 0 0 0 1 0 0 0
+ // <- L -----> <- R ----->
+ // operand_a_i a b c d e f g h
+ // :\ | | | /: | | |
+ // : +|---|--|-+ : | | |
+ // :/ | | | \: | | |
+ // stage 1 e b c d a f g h
+ // <L-> <R-> <L-> <R->
+ // control bitmask: c3 c2 c3 c2 c1 c0 c1 c0
+ // 1 1 1 1 1 0 1 0
+ // :\ :\ /: /: :\ | /: |
+ // : +:-+-:+ : : +|-+ : |
+ // :/ :/ \: \: :/ | \: |
+ // stage 2 c d e b g f a h
+ // L R L R L R L R
+ // control bitmask: c3 c3 c2 c2 c1 c1 c0 c0
+ // 1 1 0 0 1 1 0 0
+ // :\/: | | :\/: | |
+ // : : | | : : | |
+ // :/\: | | :/\: | |
+ // stage 3 d c e b f g a h
+ // & deposit bitmask: 1 0 1 0 1 1 0 1
+ // result: d 0 e 0 f g 0 h
+ logic [ 5:0] bitcnt_partial_q [32];
+ // first cycle
+ // Store partial bitcnts
+ for (genvar i=0; i<32; i++) begin : gen_bitcnt_reg_in_lsb
+ assign bitcnt_partial_lsb_d[i] = bitcnt_partial[i][0];
+ end
+ for (genvar i=0; i<16; i++) begin : gen_bitcnt_reg_in_b1
+ assign bitcnt_partial_msb_d[i] = bitcnt_partial[2*i+1][1];
+ end
+ for (genvar i=0; i<8; i++) begin : gen_bitcnt_reg_in_b2
+ assign bitcnt_partial_msb_d[16+i] = bitcnt_partial[4*i+3][2];
+ end
+ for (genvar i=0; i<4; i++) begin : gen_bitcnt_reg_in_b3
+ assign bitcnt_partial_msb_d[24+i] = bitcnt_partial[8*i+7][3];
+ end
+ for (genvar i=0; i<2; i++) begin : gen_bitcnt_reg_in_b4
+ assign bitcnt_partial_msb_d[28+i] = bitcnt_partial[16*i+15][4];
+ end
+ assign bitcnt_partial_msb_d[30] = bitcnt_partial[31][5];
+ assign bitcnt_partial_msb_d[31] = 1'b0; // unused
+ // Second cycle
+ // Load partial bitcnts
+ always_comb begin
+ bitcnt_partial_q = '{default: '0};
+ for (int unsigned i=0; i<32; i++) begin : gen_bitcnt_reg_out_lsb
+ bitcnt_partial_q[i][0] = imd_val_q_i[0][i];
+ end
+ for (int unsigned i=0; i<16; i++) begin : gen_bitcnt_reg_out_b1
+ bitcnt_partial_q[2*i+1][1] = imd_val_q_i[1][i];
+ end
+ for (int unsigned i=0; i<8; i++) begin : gen_bitcnt_reg_out_b2
+ bitcnt_partial_q[4*i+3][2] = imd_val_q_i[1][16+i];
+ end
+ for (int unsigned i=0; i<4; i++) begin : gen_bitcnt_reg_out_b3
+ bitcnt_partial_q[8*i+7][3] = imd_val_q_i[1][24+i];
+ end
+ for (int unsigned i=0; i<2; i++) begin : gen_bitcnt_reg_out_b4
+ bitcnt_partial_q[16*i+15][4] = imd_val_q_i[1][28+i];
+ end
+ bitcnt_partial_q[31][5] = imd_val_q_i[1][30];
+ end
+ logic [31:0] butterfly_mask_l[5];
+ logic [31:0] butterfly_mask_r[5];
+ logic [31:0] butterfly_mask_not[5];
+ logic [31:0] lrotc_stage [5]; // left rotate and complement upon wrap
+ // number of bits in local r = 32 / 2**(stage + 1) = 16/2**stage
+ `define _N(stg) (16 >> stg)
+ // bext / bdep control bit generation
+ for (genvar stg=0; stg<5; stg++) begin : gen_butterfly_ctrl_stage
+ // number of segs: 2** stg
+ for (genvar seg=0; seg<2**stg; seg++) begin : gen_butterfly_ctrl
+ assign lrotc_stage[stg][2*`_N(stg)*(seg+1)-1 : 2*`_N(stg)*seg] =
+ {{`_N(stg){1'b0}},{`_N(stg){1'b1}}} <<
+ bitcnt_partial_q[`_N(stg)*(2*seg+1)-1][$clog2(`_N(stg)):0];
+ assign butterfly_mask_l[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)]
+ = ~lrotc_stage[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)];
+ assign butterfly_mask_r[stg][`_N(stg)*(2*seg+1)-1 : `_N(stg)*(2*seg)]
+ = ~lrotc_stage[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)];
+ assign butterfly_mask_l[stg][`_N(stg)*(2*seg+1)-1 : `_N(stg)*(2*seg)] = '0;
+ assign butterfly_mask_r[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)] = '0;
+ end
+ end
+ `undef _N
+ for (genvar stg=0; stg<5; stg++) begin : gen_butterfly_not
+ assign butterfly_mask_not[stg] =
+ ~(butterfly_mask_l[stg] | butterfly_mask_r[stg]);
+ end
+ always_comb begin
+ butterfly_result = operand_a_i;
+ butterfly_result = butterfly_result & butterfly_mask_not[0] |
+ ((butterfly_result & butterfly_mask_l[0]) >> 16)|
+ ((butterfly_result & butterfly_mask_r[0]) << 16);
+ butterfly_result = butterfly_result & butterfly_mask_not[1] |
+ ((butterfly_result & butterfly_mask_l[1]) >> 8)|
+ ((butterfly_result & butterfly_mask_r[1]) << 8);
+ butterfly_result = butterfly_result & butterfly_mask_not[2] |
+ ((butterfly_result & butterfly_mask_l[2]) >> 4)|
+ ((butterfly_result & butterfly_mask_r[2]) << 4);
+ butterfly_result = butterfly_result & butterfly_mask_not[3] |
+ ((butterfly_result & butterfly_mask_l[3]) >> 2)|
+ ((butterfly_result & butterfly_mask_r[3]) << 2);
+ butterfly_result = butterfly_result & butterfly_mask_not[4] |
+ ((butterfly_result & butterfly_mask_l[4]) >> 1)|
+ ((butterfly_result & butterfly_mask_r[4]) << 1);
+ butterfly_result = butterfly_result & operand_b_i;
+ end
+ always_comb begin
+ invbutterfly_result = operand_a_i & operand_b_i;
+ invbutterfly_result = invbutterfly_result & butterfly_mask_not[4] |
+ ((invbutterfly_result & butterfly_mask_l[4]) >> 1)|
+ ((invbutterfly_result & butterfly_mask_r[4]) << 1);
+ invbutterfly_result = invbutterfly_result & butterfly_mask_not[3] |
+ ((invbutterfly_result & butterfly_mask_l[3]) >> 2)|
+ ((invbutterfly_result & butterfly_mask_r[3]) << 2);
+ invbutterfly_result = invbutterfly_result & butterfly_mask_not[2] |
+ ((invbutterfly_result & butterfly_mask_l[2]) >> 4)|
+ ((invbutterfly_result & butterfly_mask_r[2]) << 4);
+ invbutterfly_result = invbutterfly_result & butterfly_mask_not[1] |
+ ((invbutterfly_result & butterfly_mask_l[1]) >> 8)|
+ ((invbutterfly_result & butterfly_mask_r[1]) << 8);
+ invbutterfly_result = invbutterfly_result & butterfly_mask_not[0] |
+ ((invbutterfly_result & butterfly_mask_l[0]) >> 16)|
+ ((invbutterfly_result & butterfly_mask_r[0]) << 16);
+ end
+ ///////////////////////////////////////////////////
+ // Carry-less Multiply + Cyclic Redundancy Check //
+ ///////////////////////////////////////////////////
+ // Carry-less multiplication can be understood as multiplication based on
+ // the addition interpreted as the bit-wise xor operation.
+ //
+ // Example: 1101 X 1011 = 1111111:
+ //
+ // 1011 X 1101
+ // -----------
+ // 1101
+ // xor 1101
+ // ---------
+ // 10111
+ // xor 0000
+ // ----------
+ // 010111
+ // xor 1101
+ // -----------
+ // 1111111
+ //
+ // Architectural details:
+ // A 32 x 32-bit array
+ // [ operand_b[i] ? (operand_a << i) : '0 for i in 0 ... 31 ]
+ // is generated. The entries of the array are pairwise 'xor-ed'
+ // together in a 5-stage binary tree.
+ //
+ //
+ // Cyclic Redundancy Check:
+ //
+ // CRC-32 (CRC-32/ISO-HDLC) and CRC-32C (CRC-32/ISCSI) are directly implemented. For
+ // documentation of the crc configuration (crc-polynomials, initialization, reflection, etc.)
+ // see
+ // A useful guide to crc arithmetic and algorithms is given here:
+ //
+ //
+ // The CRC operation solves the following equation using binary polynomial arithmetic:
+ //
+ // rev(rd)(x) = rev(rs1)(x) * x**n mod {1, P}(x)
+ //
+ // where P denotes lower 32 bits of the corresponding CRC polynomial, rev(a) the bit reversal
+ // of a, n = 8,16, or 32 for .b, .h, .w -variants. {a, b} denotes bit concatenation.
+ //
+ // Using barret reduction, one can show that
+ //
+ // M(x) mod P(x) = R(x) =
+ // (M(x) * x**n) & {deg(P(x)'{1'b1}}) ^ (M(x) x**-(deg(P(x) - n)) cx mu(x) cx P(x),
+ //
+ // Where mu(x) = polydiv(x**64, {1,P}) & 0xffffffff. Here, 'cx' refers to carry-less
+ // multiplication. Substituting rev(rd)(x) for R(x) and rev(rs1)(x) for M(x) and solving for
+ // rd(x) with P(x) a crc32 polynomial (deg(P(x)) = 32), we get
+ //
+ // rd = rev( (rev(rs1) << n) ^ ((rev(rs1) >> (32-n)) cx mu cx P)
+ // = (rs1 >> n) ^ rev(rev( (rs1 << (32-n)) cx rev(mu)) cx P)
+ // ^-- cycle 0--------------------^
+ // ^- cycle 1 -------------------------------------------^
+ //
+ // In the last step we used the fact that carry-less multiplication is bit-order agnostic:
+ // rev(a cx b) = rev(a) cx rev(b).
+ logic clmul_rmode;
+ logic clmul_hmode;
+ logic [31:0] clmul_op_a;
+ logic [31:0] clmul_op_b;
+ logic [31:0] operand_b_rev;
+ logic [31:0] clmul_and_stage[32];
+ logic [31:0] clmul_xor_stage1[16];
+ logic [31:0] clmul_xor_stage2[8];
+ logic [31:0] clmul_xor_stage3[4];
+ logic [31:0] clmul_xor_stage4[2];
+ logic [31:0] clmul_result_raw;
+ for (genvar i=0; i<32; i++) begin: gen_rev_operand_b
+ assign operand_b_rev[i] = operand_b_i[31-i];
+ end
+ assign clmul_rmode = operator_i == ALU_CLMULR;
+ assign clmul_hmode = operator_i == ALU_CLMULH;
+ // CRC
+ localparam logic [31:0] CRC32_POLYNOMIAL = 32'h04c1_1db7;
+ localparam logic [31:0] CRC32_MU_REV = 32'hf701_1641;
+ localparam logic [31:0] CRC32C_POLYNOMIAL = 32'h1edc_6f41;
+ localparam logic [31:0] CRC32C_MU_REV = 32'hdea7_13f1;
+ logic crc_op;
+ logic crc_cpoly;
+ logic [31:0] crc_operand;
+ logic [31:0] crc_poly;
+ logic [31:0] crc_mu_rev;
+ assign crc_op = (operator_i == ALU_CRC32C_W) | (operator_i == ALU_CRC32_W) |
+ (operator_i == ALU_CRC32C_H) | (operator_i == ALU_CRC32_H) |
+ (operator_i == ALU_CRC32C_B) | (operator_i == ALU_CRC32_B);
+ assign crc_cpoly = (operator_i == ALU_CRC32C_W) |
+ (operator_i == ALU_CRC32C_H) |
+ (operator_i == ALU_CRC32C_B);
+ assign crc_hmode = (operator_i == ALU_CRC32_H) | (operator_i == ALU_CRC32C_H);
+ assign crc_bmode = (operator_i == ALU_CRC32_B) | (operator_i == ALU_CRC32C_B);
+ assign crc_poly = crc_cpoly ? CRC32C_POLYNOMIAL : CRC32_POLYNOMIAL;
+ assign crc_mu_rev = crc_cpoly ? CRC32C_MU_REV : CRC32_MU_REV;
+ always_comb begin
+ unique case(1'b1)
+ crc_bmode: crc_operand = {operand_a_i[7:0], 24'h0};
+ crc_hmode: crc_operand = {operand_a_i[15:0], 16'h0};
+ default: crc_operand = operand_a_i;
+ endcase
+ end
+ // Select clmul input
+ always_comb begin
+ if (crc_op) begin
+ clmul_op_a = instr_first_cycle_i ? crc_operand : imd_val_q_i[0];
+ clmul_op_b = instr_first_cycle_i ? crc_mu_rev : crc_poly;
+ end else begin
+ clmul_op_a = clmul_rmode | clmul_hmode ? operand_a_rev : operand_a_i;
+ clmul_op_b = clmul_rmode | clmul_hmode ? operand_b_rev : operand_b_i;
+ end
+ end
+ for (genvar i=0; i<32; i++) begin : gen_clmul_and_op
+ assign clmul_and_stage[i] = clmul_op_b[i] ? clmul_op_a << i : '0;
+ end
+ for (genvar i=0; i<16; i++) begin : gen_clmul_xor_op_l1
+ assign clmul_xor_stage1[i] = clmul_and_stage[2*i] ^ clmul_and_stage[2*i+1];
+ end
+ for (genvar i=0; i<8; i++) begin : gen_clmul_xor_op_l2
+ assign clmul_xor_stage2[i] = clmul_xor_stage1[2*i] ^ clmul_xor_stage1[2*i+1];
+ end
+ for (genvar i=0; i<4; i++) begin : gen_clmul_xor_op_l3
+ assign clmul_xor_stage3[i] = clmul_xor_stage2[2*i] ^ clmul_xor_stage2[2*i+1];
+ end
+ for (genvar i=0; i<2; i++) begin : gen_clmul_xor_op_l4
+ assign clmul_xor_stage4[i] = clmul_xor_stage3[2*i] ^ clmul_xor_stage3[2*i+1];
+ end
+ assign clmul_result_raw = clmul_xor_stage4[0] ^ clmul_xor_stage4[1];
+ for (genvar i=0; i<32; i++) begin : gen_rev_clmul_result
+ assign clmul_result_rev[i] = clmul_result_raw[31-i];
+ end
+ // clmulr_result = rev(clmul(rev(a), rev(b)))
+ // clmulh_result = clmulr_result >> 1
+ always_comb begin
+ case(1'b1)
+ clmul_rmode: clmul_result = clmul_result_rev;
+ clmul_hmode: clmul_result = {1'b0, clmul_result_rev[31:1]};
+ default: clmul_result = clmul_result_raw;
+ endcase
+ end
+ end else begin : gen_alu_rvb_notfull
+ logic [31:0] unused_imd_val_q_1;
+ assign unused_imd_val_q_1 = imd_val_q_i[1];
+ assign shuffle_result = '0;
+ assign butterfly_result = '0;
+ assign invbutterfly_result = '0;
+ assign clmul_result = '0;
+ // support signals
+ assign bitcnt_partial_lsb_d = '0;
+ assign bitcnt_partial_msb_d = '0;
+ assign clmul_result_rev = '0;
+ assign crc_bmode = '0;
+ assign crc_hmode = '0;
+ end
+ //////////////////////////////////////
+ // Multicycle Bitmanip Instructions //
+ //////////////////////////////////////
+ // Ternary instructions + Shift Rotations + Bit extract/deposit + CRC
+ // For ternary instructions (zbt), operand_a_i is tied to rs1 in the first cycle and rs3 in the
+ // second cycle. operand_b_i is always tied to rs2.
+ always_comb begin
+ unique case (operator_i)
+ ALU_CMOV: begin
+ multicycle_result = (operand_b_i == 32'h0) ? operand_a_i : imd_val_q_i[0];
+ imd_val_d_o = '{operand_a_i, 32'h0};
+ if (instr_first_cycle_i) begin
+ imd_val_we_o = 2'b01;
+ end else begin
+ imd_val_we_o = 2'b00;
+ end
+ end
+ ALU_CMIX: begin
+ multicycle_result = imd_val_q_i[0] | bwlogic_and_result;
+ imd_val_d_o = '{bwlogic_and_result, 32'h0};
+ if (instr_first_cycle_i) begin
+ imd_val_we_o = 2'b01;
+ end else begin
+ imd_val_we_o = 2'b00;
+ end
+ end
+ ALU_ROL, ALU_ROR: begin
+ if (shift_amt[4:0] == 5'h0) begin
+ multicycle_result = shift_amt[5] ? operand_a_i : imd_val_q_i[0];
+ end else begin
+ multicycle_result = imd_val_q_i[0] | shift_result;
+ end
+ imd_val_d_o = '{shift_result, 32'h0};
+ if (instr_first_cycle_i) begin
+ imd_val_we_o = 2'b01;
+ end else begin
+ imd_val_we_o = 2'b00;
+ end
+ end
+ ALU_CRC32_B, ALU_CRC32C_B: begin
+ if (RV32B == RV32BFull) begin
+ unique case(1'b1)
+ crc_bmode: multicycle_result = clmul_result_rev ^ (operand_a_i >> 8);
+ crc_hmode: multicycle_result = clmul_result_rev ^ (operand_a_i >> 16);
+ default: multicycle_result = clmul_result_rev;
+ endcase
+ imd_val_d_o = '{clmul_result_rev, 32'h0};
+ if (instr_first_cycle_i) begin
+ imd_val_we_o = 2'b01;
+ end else begin
+ imd_val_we_o = 2'b00;
+ end
+ end else begin
+ imd_val_d_o = '{operand_a_i, 32'h0};
+ imd_val_we_o = 2'b00;
+ multicycle_result = '0;
+ end
+ end
+ if (RV32B == RV32BFull) begin
+ multicycle_result = (operator_i == ALU_BDEP) ? butterfly_result : invbutterfly_result;
+ imd_val_d_o = '{bitcnt_partial_lsb_d, bitcnt_partial_msb_d};
+ if (instr_first_cycle_i) begin
+ imd_val_we_o = 2'b11;
+ end else begin
+ imd_val_we_o = 2'b00;
+ end
+ end else begin
+ imd_val_d_o = '{operand_a_i, 32'h0};
+ imd_val_we_o = 2'b00;
+ multicycle_result = '0;
+ end
+ end
+ default: begin
+ imd_val_d_o = '{operand_a_i, 32'h0};
+ imd_val_we_o = 2'b00;
+ multicycle_result = '0;
+ end
+ endcase
+ end
+ end else begin : g_no_alu_rvb
+ logic [31:0] unused_imd_val_q[2];
+ assign unused_imd_val_q = imd_val_q_i;
+ logic [31:0] unused_butterfly_result;
+ assign unused_butterfly_result = butterfly_result;
+ logic [31:0] unused_invbutterfly_result;
+ assign unused_invbutterfly_result = invbutterfly_result;
+ // RV32B result signals
+ assign bitcnt_result = '0;
+ assign minmax_result = '0;
+ assign pack_result = '0;
+ assign sext_result = '0;
+ assign singlebit_result = '0;
+ assign rev_result = '0;
+ assign shuffle_result = '0;
+ assign butterfly_result = '0;
+ assign invbutterfly_result = '0;
+ assign clmul_result = '0;
+ assign multicycle_result = '0;
+ // RV32B support signals
+ assign imd_val_d_o = '{default: '0};
+ assign imd_val_we_o = '{default: '0};
+ end
+ ////////////////
+ // Result mux //
+ ////////////////
+ always_comb begin
+ result_o = '0;
+ unique case (operator_i)
+ // Bitwise Logic Operations (negate: RV32B)
+ ALU_AND, ALU_ANDN: result_o = bwlogic_result;
+ // Adder Operations
+ ALU_ADD, ALU_SUB: result_o = adder_result;
+ // Shift Operations
+ // RV32B
+ ALU_SLO, ALU_SRO: result_o = shift_result;
+ // Shuffle Operations (RV32B)
+ ALU_SHFL, ALU_UNSHFL: result_o = shuffle_result;
+ // Comparison Operations
+ ALU_SLT, ALU_SLTU: result_o = {31'h0,cmp_result};
+ // MinMax Operations (RV32B)
+ ALU_MINU, ALU_MAXU: result_o = minmax_result;
+ // Bitcount Operations (RV32B)
+ ALU_PCNT: result_o = {26'h0, bitcnt_result};
+ // Pack Operations (RV32B)
+ ALU_PACKU: result_o = pack_result;
+ // Sign-Extend (RV32B)
+ ALU_SEXTB, ALU_SEXTH: result_o = sext_result;
+ // Ternary Bitmanip Operations (RV32B)
+ // Rotate Shift (RV32B)
+ // Cyclic Redundancy Checks (RV32B)
+ // Bit Extract / Deposit (RV32B)
+ ALU_BEXT, ALU_BDEP: result_o = multicycle_result;
+ // Single-Bit Bitmanip Operations (RV32B)
+ ALU_SBINV, ALU_SBEXT: result_o = singlebit_result;
+ // General Reverse / Or-combine (RV32B)
+ ALU_GREV, ALU_GORC: result_o = rev_result;
+ // Bit Field Place (RV32B)
+ ALU_BFP: result_o = bfp_result;
+ // Carry-less Multiply Operations (RV32B)
+ ALU_CLMULH: result_o = clmul_result;
+ default: ;
+ endcase
+ end
+ logic unused_shift_amt_compl;
+ assign unused_shift_amt_compl = shift_amt_compl[5];
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..3bb8217
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,514 @@
+`define OP_L 15:0
+`define OP_H 31:16
+ * Fast Multiplier and Division
+ *
+ * 16x16 kernel multiplier and Long Division
+ */
+module brq_exu_multdiv_fast #(
+ parameter brq_pkg::rv32m_e RV32M = brq_pkg::RV32MFast
+ ) (
+ input logic clk_i,
+ input logic rst_ni,
+ input logic mult_en_i, // dynamic enable signal, for FSM control
+ input logic div_en_i, // dynamic enable signal, for FSM control
+ input logic mult_sel_i, // static decoder output, for data muxes
+ input logic div_sel_i, // static decoder output, for data muxes
+ input brq_pkg::md_op_e operator_i,
+ input logic [1:0] signed_mode_i,
+ input logic [31:0] op_a_i,
+ input logic [31:0] op_b_i,
+ input logic [33:0] alu_adder_ext_i,
+ input logic [31:0] alu_adder_i,
+ input logic equal_to_zero_i,
+ input logic data_ind_timing_i,
+ output logic [32:0] alu_operand_a_o,
+ output logic [32:0] alu_operand_b_o,
+ input logic [33:0] imd_val_q_i[2],
+ output logic [33:0] imd_val_d_o[2],
+ output logic [1:0] imd_val_we_o,
+ input logic multdiv_ready_id_i,
+ output logic [31:0] multdiv_result_o,
+ output logic valid_o
+ import brq_pkg::*;
+ // Both multiplier variants
+ logic signed [34:0] mac_res_signed;
+ logic [34:0] mac_res_ext;
+ logic [33:0] accum;
+ logic sign_a, sign_b;
+ logic mult_valid;
+ logic signed_mult;
+ // Results that become intermediate value depending on whether mul or div is being calculated
+ logic [33:0] mac_res_d, op_remainder_d;
+ // Raw output of MAC calculation
+ logic [33:0] mac_res;
+ // Divider signals
+ logic div_sign_a, div_sign_b;
+ logic is_greater_equal;
+ logic div_change_sign, rem_change_sign;
+ logic [31:0] one_shift;
+ logic [31:0] op_denominator_q;
+ logic [31:0] op_numerator_q;
+ logic [31:0] op_quotient_q;
+ logic [31:0] op_denominator_d;
+ logic [31:0] op_numerator_d;
+ logic [31:0] op_quotient_d;
+ logic [31:0] next_remainder;
+ logic [32:0] next_quotient;
+ logic [31:0] res_adder_h;
+ logic div_valid;
+ logic [ 4:0] div_counter_q, div_counter_d;
+ logic multdiv_en;
+ logic mult_hold;
+ logic div_hold;
+ logic div_by_zero_d, div_by_zero_q;
+ logic mult_en_internal;
+ logic div_en_internal;
+ typedef enum logic [2:0] {
+ } md_fsm_e;
+ md_fsm_e md_state_q, md_state_d;
+ logic unused_mult_sel_i;
+ assign unused_mult_sel_i = mult_sel_i;
+ assign mult_en_internal = mult_en_i & ~mult_hold;
+ assign div_en_internal = div_en_i & ~div_hold;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ div_counter_q <= '0;
+ md_state_q <= MD_IDLE;
+ op_numerator_q <= '0;
+ op_quotient_q <= '0;
+ div_by_zero_q <= '0;
+ end else if (div_en_internal) begin
+ div_counter_q <= div_counter_d;
+ op_numerator_q <= op_numerator_d;
+ op_quotient_q <= op_quotient_d;
+ md_state_q <= md_state_d;
+ div_by_zero_q <= div_by_zero_d;
+ end
+ end
+ assign multdiv_en = mult_en_internal | div_en_internal;
+ // Intermediate value register shared with ALU
+ assign imd_val_d_o[0] = div_sel_i ? op_remainder_d : mac_res_d;
+ assign imd_val_we_o[0] = multdiv_en;
+ assign imd_val_d_o[1] = {2'b0, op_denominator_d};
+ assign imd_val_we_o[1] = div_en_internal;
+ assign op_denominator_q = imd_val_q_i[1][31:0];
+ logic [1:0] unused_imd_val;
+ assign unused_imd_val = imd_val_q_i[1][33:32];
+ logic unused_mac_res_ext;
+ assign unused_mac_res_ext = mac_res_ext[34];
+ assign signed_mult = (signed_mode_i != 2'b00);
+ assign multdiv_result_o = div_sel_i ? imd_val_q_i[0][31:0] : mac_res_d[31:0];
+ // The single cycle multiplier uses three 17 bit multipliers to compute MUL instructions in a
+ // single cycle and MULH instructions in two cycles.
+ if (RV32M == RV32MSingleCycle) begin : gen_mult_single_cycle
+ typedef enum logic {
+ } mult_fsm_e;
+ mult_fsm_e mult_state_q, mult_state_d;
+ logic signed [33:0] mult1_res, mult2_res, mult3_res;
+ logic [33:0] mult1_res_uns;
+ logic [33:32] unused_mult1_res_uns;
+ logic [15:0] mult1_op_a, mult1_op_b;
+ logic [15:0] mult2_op_a, mult2_op_b;
+ logic [15:0] mult3_op_a, mult3_op_b;
+ logic mult1_sign_a, mult1_sign_b;
+ logic mult2_sign_a, mult2_sign_b;
+ logic mult3_sign_a, mult3_sign_b;
+ logic [33:0] summand1, summand2, summand3;
+ assign mult1_res = $signed({mult1_sign_a, mult1_op_a}) * $signed({mult1_sign_b, mult1_op_b});
+ assign mult2_res = $signed({mult2_sign_a, mult2_op_a}) * $signed({mult2_sign_b, mult2_op_b});
+ assign mult3_res = $signed({mult3_sign_a, mult3_op_a}) * $signed({mult3_sign_b, mult3_op_b});
+ assign mac_res_signed = $signed(summand1) + $signed(summand2) + $signed(summand3);
+ assign mult1_res_uns = $unsigned(mult1_res);
+ assign mac_res_ext = $unsigned(mac_res_signed);
+ assign mac_res = mac_res_ext[33:0];
+ assign sign_a = signed_mode_i[0] & op_a_i[31];
+ assign sign_b = signed_mode_i[1] & op_b_i[31];
+ // The first two multipliers are only used in state 1 (MULL). We can assign them statically.
+ // al*bl
+ assign mult1_sign_a = 1'b0;
+ assign mult1_sign_b = 1'b0;
+ assign mult1_op_a = op_a_i[`OP_L];
+ assign mult1_op_b = op_b_i[`OP_L];
+ // al*bh
+ assign mult2_sign_a = 1'b0;
+ assign mult2_sign_b = sign_b;
+ assign mult2_op_a = op_a_i[`OP_L];
+ assign mult2_op_b = op_b_i[`OP_H];
+ // used in MULH
+ assign accum[17:0] = imd_val_q_i[0][33:16];
+ assign accum[33:18] = {16{signed_mult & imd_val_q_i[0][33]}};
+ always_comb begin
+ // Default values == MULL
+ // ah*bl
+ mult3_sign_a = sign_a;
+ mult3_sign_b = 1'b0;
+ mult3_op_a = op_a_i[`OP_H];
+ mult3_op_b = op_b_i[`OP_L];
+ summand1 = {18'h0, mult1_res_uns[`OP_H]};
+ summand2 = $unsigned(mult2_res);
+ summand3 = $unsigned(mult3_res);
+ // mac_res = A*B[47:16], mult1_res = A*B[15:0]
+ mac_res_d = {2'b0, mac_res[`OP_L], mult1_res_uns[`OP_L]};
+ mult_valid = mult_en_i;
+ mult_state_d = MULL;
+ mult_hold = 1'b0;
+ unique case (mult_state_q)
+ MULL: begin
+ if (operator_i != MD_OP_MULL) begin
+ mac_res_d = mac_res;
+ mult_valid = 1'b0;
+ mult_state_d = MULH;
+ end else begin
+ mult_hold = ~multdiv_ready_id_i;
+ end
+ end
+ MULH: begin
+ // ah*bh
+ mult3_sign_a = sign_a;
+ mult3_sign_b = sign_b;
+ mult3_op_a = op_a_i[`OP_H];
+ mult3_op_b = op_b_i[`OP_H];
+ mac_res_d = mac_res;
+ summand1 = '0;
+ summand2 = accum;
+ summand3 = mult3_res;
+ mult_state_d = MULL;
+ mult_valid = 1'b1;
+ mult_hold = ~multdiv_ready_id_i;
+ end
+ default: begin
+ mult_state_d = MULL;
+ end
+ endcase // mult_state_q
+ end
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ mult_state_q <= MULL;
+ end else begin
+ if (mult_en_internal) begin
+ mult_state_q <= mult_state_d;
+ end
+ end
+ end
+ assign unused_mult1_res_uns = mult1_res_uns[33:32];
+ // States must be knwon/valid.
+ // The fast multiplier uses one 17 bit multiplier to compute MUL instructions in 3 cycles
+ // and MULH instructions in 4 cycles.
+ end else begin : gen_mult_fast
+ logic [15:0] mult_op_a;
+ logic [15:0] mult_op_b;
+ typedef enum logic [1:0] {
+ } mult_fsm_e;
+ mult_fsm_e mult_state_q, mult_state_d;
+ // The 2 MSBs of mac_res_ext (mac_res_ext[34:33]) are always equal since:
+ // 1. The 2 MSBs of the multiplicants are always equal, and
+ // 2. The 16 MSBs of the addend (accum[33:18]) are always equal.
+ // Thus, it is safe to ignore mac_res_ext[34].
+ assign mac_res_signed =
+ $signed({sign_a, mult_op_a}) * $signed({sign_b, mult_op_b}) + $signed(accum);
+ assign mac_res_ext = $unsigned(mac_res_signed);
+ assign mac_res = mac_res_ext[33:0];
+ always_comb begin
+ mult_op_a = op_a_i[`OP_L];
+ mult_op_b = op_b_i[`OP_L];
+ sign_a = 1'b0;
+ sign_b = 1'b0;
+ accum = imd_val_q_i[0];
+ mac_res_d = mac_res;
+ mult_state_d = mult_state_q;
+ mult_valid = 1'b0;
+ mult_hold = 1'b0;
+ unique case (mult_state_q)
+ ALBL: begin
+ // al*bl
+ mult_op_a = op_a_i[`OP_L];
+ mult_op_b = op_b_i[`OP_L];
+ sign_a = 1'b0;
+ sign_b = 1'b0;
+ accum = '0;
+ mac_res_d = mac_res;
+ mult_state_d = ALBH;
+ end
+ ALBH: begin
+ // al*bh<<16
+ mult_op_a = op_a_i[`OP_L];
+ mult_op_b = op_b_i[`OP_H];
+ sign_a = 1'b0;
+ sign_b = signed_mode_i[1] & op_b_i[31];
+ // result of AL*BL (in imd_val_q_i[0]) always unsigned with no carry
+ accum = {18'b0, imd_val_q_i[0][31:16]};
+ if (operator_i == MD_OP_MULL) begin
+ mac_res_d = {2'b0, mac_res[`OP_L], imd_val_q_i[0][`OP_L]};
+ end else begin
+ mac_res_d = mac_res;
+ end
+ mult_state_d = AHBL;
+ end
+ AHBL: begin
+ // ah*bl<<16
+ mult_op_a = op_a_i[`OP_H];
+ mult_op_b = op_b_i[`OP_L];
+ sign_a = signed_mode_i[0] & op_a_i[31];
+ sign_b = 1'b0;
+ if (operator_i == MD_OP_MULL) begin
+ accum = {18'b0, imd_val_q_i[0][31:16]};
+ mac_res_d = {2'b0, mac_res[15:0], imd_val_q_i[0][15:0]};
+ mult_valid = 1'b1;
+ // Note no state transition will occur if mult_hold is set
+ mult_state_d = ALBL;
+ mult_hold = ~multdiv_ready_id_i;
+ end else begin
+ accum = imd_val_q_i[0];
+ mac_res_d = mac_res;
+ mult_state_d = AHBH;
+ end
+ end
+ AHBH: begin
+ // only MD_OP_MULH here
+ // ah*bh
+ mult_op_a = op_a_i[`OP_H];
+ mult_op_b = op_b_i[`OP_H];
+ sign_a = signed_mode_i[0] & op_a_i[31];
+ sign_b = signed_mode_i[1] & op_b_i[31];
+ accum[17: 0] = imd_val_q_i[0][33:16];
+ accum[33:18] = {16{signed_mult & imd_val_q_i[0][33]}};
+ // result of AH*BL is not signed only if signed_mode_i == 2'b00
+ mac_res_d = mac_res;
+ mult_valid = 1'b1;
+ // Note no state transition will occur if mult_hold is set
+ mult_state_d = ALBL;
+ mult_hold = ~multdiv_ready_id_i;
+ end
+ default: begin
+ mult_state_d = ALBL;
+ end
+ endcase // mult_state_q
+ end
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ mult_state_q <= ALBL;
+ end else begin
+ if (mult_en_internal) begin
+ mult_state_q <= mult_state_d;
+ end
+ end
+ end
+ // States must be knwon/valid.
+ end // gen_mult_fast
+ // Divider
+ assign res_adder_h = alu_adder_ext_i[32:1];
+ logic [1:0] unused_alu_adder_ext;
+ assign unused_alu_adder_ext = {alu_adder_ext_i[33],alu_adder_ext_i[0]};
+ assign next_remainder = is_greater_equal ? res_adder_h[31:0] : imd_val_q_i[0][31:0];
+ assign next_quotient = is_greater_equal ? {1'b0, op_quotient_q} | {1'b0, one_shift} :
+ {1'b0, op_quotient_q};
+ assign one_shift = {31'b0, 1'b1} << div_counter_q;
+ // The adder in the ALU computes alu_operand_a_o + alu_operand_b_o which means
+ // Remainder - Divisor. If Remainder - Divisor >= 0, is_greater_equal is equal to 1,
+ // the next Remainder is Remainder - Divisor contained in res_adder_h and the
+ always_comb begin
+ if ((imd_val_q_i[0][31] ^ op_denominator_q[31]) == 1'b0) begin
+ is_greater_equal = (res_adder_h[31] == 1'b0);
+ end else begin
+ is_greater_equal = imd_val_q_i[0][31];
+ end
+ end
+ assign div_sign_a = op_a_i[31] & signed_mode_i[0];
+ assign div_sign_b = op_b_i[31] & signed_mode_i[1];
+ assign div_change_sign = (div_sign_a ^ div_sign_b) & ~div_by_zero_q;
+ assign rem_change_sign = div_sign_a;
+ always_comb begin
+ div_counter_d = div_counter_q - 5'h1;
+ op_remainder_d = imd_val_q_i[0];
+ op_quotient_d = op_quotient_q;
+ md_state_d = md_state_q;
+ op_numerator_d = op_numerator_q;
+ op_denominator_d = op_denominator_q;
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~op_b_i, 1'b1};
+ div_valid = 1'b0;
+ div_hold = 1'b0;
+ div_by_zero_d = div_by_zero_q;
+ unique case(md_state_q)
+ MD_IDLE: begin
+ if (operator_i == MD_OP_DIV) begin
+ // Check if the Denominator is 0
+ // quotient for division by 0 is specified to be -1
+ // Note with data-independent time option, the full divide operation will proceed as
+ // normal and will naturally return -1
+ op_remainder_d = '1;
+ md_state_d = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A;
+ // Record that this is a div by zero to stop the sign change at the end of the
+ // division (in data_ind_timing mode).
+ div_by_zero_d = equal_to_zero_i;
+ end else begin
+ // Check if the Denominator is 0
+ // remainder for division by 0 is specified to be the numerator (operand a)
+ // Note with data-independent time option, the full divide operation will proceed as
+ // normal and will naturally return operand a
+ op_remainder_d = {2'b0, op_a_i};
+ md_state_d = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A;
+ end
+ // 0 - B = 0 iff B == 0
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~op_b_i, 1'b1};
+ div_counter_d = 5'd31;
+ end
+ MD_ABS_A: begin
+ // quotient
+ op_quotient_d = '0;
+ // A abs value
+ op_numerator_d = div_sign_a ? alu_adder_i : op_a_i;
+ md_state_d = MD_ABS_B;
+ div_counter_d = 5'd31;
+ // ABS(A) = 0 - A
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~op_a_i, 1'b1};
+ end
+ MD_ABS_B: begin
+ // remainder
+ op_remainder_d = { 33'h0, op_numerator_q[31]};
+ // B abs value
+ op_denominator_d = div_sign_b ? alu_adder_i : op_b_i;
+ md_state_d = MD_COMP;
+ div_counter_d = 5'd31;
+ // ABS(B) = 0 - B
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~op_b_i, 1'b1};
+ end
+ MD_COMP: begin
+ op_remainder_d = {1'b0, next_remainder[31:0], op_numerator_q[div_counter_d]};
+ op_quotient_d = next_quotient[31:0];
+ md_state_d = (div_counter_q == 5'd1) ? MD_LAST : MD_COMP;
+ // Division
+ alu_operand_a_o = {imd_val_q_i[0][31:0], 1'b1}; // it contains the remainder
+ alu_operand_b_o = {~op_denominator_q[31:0], 1'b1}; // -denominator two's compliment
+ end
+ MD_LAST: begin
+ if (operator_i == MD_OP_DIV) begin
+ // this time we save the quotient in op_remainder_d (i.e. imd_val_q_i[0]) since
+ // we do not need anymore the remainder
+ op_remainder_d = {1'b0, next_quotient};
+ end else begin
+ // this time we do not save the quotient anymore since we need only the remainder
+ op_remainder_d = {2'b0, next_remainder[31:0]};
+ end
+ // Division
+ alu_operand_a_o = {imd_val_q_i[0][31:0], 1'b1}; // it contains the remainder
+ alu_operand_b_o = {~op_denominator_q[31:0], 1'b1}; // -denominator two's compliment
+ md_state_d = MD_CHANGE_SIGN;
+ end
+ md_state_d = MD_FINISH;
+ if (operator_i == MD_OP_DIV) begin
+ op_remainder_d = (div_change_sign) ? {2'h0, alu_adder_i} : imd_val_q_i[0];
+ end else begin
+ op_remainder_d = (rem_change_sign) ? {2'h0, alu_adder_i} : imd_val_q_i[0];
+ end
+ // ABS(Quotient) = 0 - Quotient (or Remainder)
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~imd_val_q_i[0][31:0], 1'b1};
+ end
+ MD_FINISH: begin
+ // Hold result until ID stage is ready to accept it
+ // Note no state transition will occur if div_hold is set
+ md_state_d = MD_IDLE;
+ div_hold = ~multdiv_ready_id_i;
+ div_valid = 1'b1;
+ end
+ default: begin
+ md_state_d = MD_IDLE;
+ end
+ endcase // md_state_q
+ end
+ assign valid_o = mult_valid | div_valid;
+endmodule // brq_mult
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..5592cb6
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,356 @@
+ * Slow Multiplier and Division
+ *
+ * Baugh-Wooley multiplier and Long Division
+ */
+module brq_exu_multdiv_slow
+ input logic clk_i,
+ input logic rst_ni,
+ input logic mult_en_i, // dynamic enable signal, for FSM control
+ input logic div_en_i, // dynamic enable signal, for FSM control
+ input logic mult_sel_i, // static decoder output, for data muxes
+ input logic div_sel_i, // static decoder output, for data muxes
+ input brq_pkg::md_op_e operator_i,
+ input logic [1:0] signed_mode_i,
+ input logic [31:0] op_a_i,
+ input logic [31:0] op_b_i,
+ input logic [33:0] alu_adder_ext_i,
+ input logic [31:0] alu_adder_i,
+ input logic equal_to_zero_i,
+ input logic data_ind_timing_i,
+ output logic [32:0] alu_operand_a_o,
+ output logic [32:0] alu_operand_b_o,
+ input logic [33:0] imd_val_q_i[2],
+ output logic [33:0] imd_val_d_o[2],
+ output logic [1:0] imd_val_we_o,
+ input logic multdiv_ready_id_i,
+ output logic [31:0] multdiv_result_o,
+ output logic valid_o
+ import brq_pkg::*;
+ typedef enum logic [2:0] {
+ } md_fsm_e;
+ md_fsm_e md_state_q, md_state_d;
+ logic [32:0] accum_window_q, accum_window_d;
+ logic unused_imd_val0;
+ logic [ 1:0] unused_imd_val1;
+ logic [32:0] res_adder_l;
+ logic [32:0] res_adder_h;
+ logic [ 4:0] multdiv_count_q, multdiv_count_d;
+ logic [32:0] op_b_shift_q, op_b_shift_d;
+ logic [32:0] op_a_shift_q, op_a_shift_d;
+ logic [32:0] op_a_ext, op_b_ext;
+ logic [32:0] one_shift;
+ logic [32:0] op_a_bw_pp, op_a_bw_last_pp;
+ logic [31:0] b_0;
+ logic sign_a, sign_b;
+ logic [32:0] next_quotient;
+ logic [31:0] next_remainder;
+ logic [31:0] op_numerator_q, op_numerator_d;
+ logic is_greater_equal;
+ logic div_change_sign, rem_change_sign;
+ logic div_by_zero_d, div_by_zero_q;
+ logic multdiv_hold;
+ logic multdiv_en;
+ // (accum_window_q + op_a_shift_q)
+ assign res_adder_l = alu_adder_ext_i[32:0];
+ // (accum_window_q + op_a_shift_q)>>1
+ assign res_adder_h = alu_adder_ext_i[33:1];
+ /////////////////////
+ // ALU Operand MUX //
+ /////////////////////
+ // Intermediate value register shared with ALU
+ assign imd_val_d_o[0] = {1'b0,accum_window_d};
+ assign imd_val_we_o[0] = ~multdiv_hold;
+ assign accum_window_q = imd_val_q_i[0][32:0];
+ assign unused_imd_val0 = imd_val_q_i[0][33];
+ assign imd_val_d_o[1] = {2'b00, op_numerator_d};
+ assign imd_val_we_o[1] = multdiv_en;
+ assign op_numerator_q = imd_val_q_i[1][31:0];
+ assign unused_imd_val1 = imd_val_q_i[1][33:32];
+ always_comb begin
+ alu_operand_a_o = accum_window_q;
+ unique case(operator_i)
+ MD_OP_MULL: begin
+ alu_operand_b_o = op_a_bw_pp;
+ end
+ MD_OP_MULH: begin
+ alu_operand_b_o = (md_state_q == MD_LAST) ? op_a_bw_last_pp : op_a_bw_pp;
+ end
+ MD_OP_REM: begin
+ unique case(md_state_q)
+ MD_IDLE: begin
+ // 0 - B = 0 iff B == 0
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~op_b_i, 1'b1};
+ end
+ MD_ABS_A: begin
+ // ABS(A) = 0 - A
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~op_a_i, 1'b1};
+ end
+ MD_ABS_B: begin
+ // ABS(B) = 0 - B
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~op_b_i, 1'b1};
+ end
+ // ABS(Quotient) = 0 - Quotient (or Reminder)
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~accum_window_q[31:0], 1'b1};
+ end
+ default: begin
+ // Division
+ alu_operand_a_o = {accum_window_q[31:0], 1'b1}; // it contains the remainder
+ alu_operand_b_o = {~op_b_shift_q[31:0], 1'b1}; // -denominator two's compliment
+ end
+ endcase
+ end
+ //default: begin
+ // alu_operand_a_o = accum_window_q;
+ // alu_operand_b_o = {~op_b_shift_q[31:0], 1'b1};
+ // end
+ endcase
+ end
+ // Multiplier partial product calculation
+ assign b_0 = {32{op_b_shift_q[0]}};
+ assign op_a_bw_pp = { ~(op_a_shift_q[32] & op_b_shift_q[0]), (op_a_shift_q[31:0] & b_0) };
+ assign op_a_bw_last_pp = { (op_a_shift_q[32] & op_b_shift_q[0]), ~(op_a_shift_q[31:0] & b_0) };
+ // Sign extend the input operands
+ assign sign_a = op_a_i[31] & signed_mode_i[0];
+ assign sign_b = op_b_i[31] & signed_mode_i[1];
+ assign op_a_ext = {sign_a, op_a_i};
+ assign op_b_ext = {sign_b, op_b_i};
+ // Divider calculations
+ // The adder in the ALU computes Remainder - Divisor. If Remainder - Divisor >= 0,
+ // is_greater_equal is true, the next Remainder is the subtraction result and the Quotient
+ // multdiv_count_q-th bit is set to 1.
+ assign is_greater_equal = (accum_window_q[31] == op_b_shift_q[31]) ?
+ ~res_adder_h[31] : accum_window_q[31];
+ assign one_shift = {32'b0, 1'b1} << multdiv_count_q;
+ assign next_remainder = is_greater_equal ? res_adder_h[31:0] : accum_window_q[31:0];
+ assign next_quotient = is_greater_equal ? op_a_shift_q | one_shift : op_a_shift_q;
+ assign div_change_sign = (sign_a ^ sign_b) & ~div_by_zero_q;
+ assign rem_change_sign = sign_a;
+ always_comb begin
+ multdiv_count_d = multdiv_count_q;
+ accum_window_d = accum_window_q;
+ op_b_shift_d = op_b_shift_q;
+ op_a_shift_d = op_a_shift_q;
+ op_numerator_d = op_numerator_q;
+ md_state_d = md_state_q;
+ multdiv_hold = 1'b0;
+ div_by_zero_d = div_by_zero_q;
+ if (mult_sel_i || div_sel_i) begin
+ unique case(md_state_q)
+ MD_IDLE: begin
+ unique case(operator_i)
+ MD_OP_MULL: begin
+ op_a_shift_d = op_a_ext << 1;
+ accum_window_d = { ~(op_a_ext[32] & op_b_i[0]),
+ op_a_ext[31:0] & {32{op_b_i[0]}} };
+ op_b_shift_d = op_b_ext >> 1;
+ // Proceed with multiplication by 0/1 in data-independent time mode
+ md_state_d = (!data_ind_timing_i && ((op_b_ext >> 1) == 0)) ? MD_LAST : MD_COMP;
+ end
+ MD_OP_MULH: begin
+ op_a_shift_d = op_a_ext;
+ accum_window_d = { 1'b1, ~(op_a_ext[32] & op_b_i[0]),
+ op_a_ext[31:1] & {31{op_b_i[0]}} };
+ op_b_shift_d = op_b_ext >> 1;
+ md_state_d = MD_COMP;
+ end
+ MD_OP_DIV: begin
+ // Check if the denominator is 0
+ // quotient for division by 0 is specified to be -1
+ // Note with data-independent time option, the full divide operation will proceed as
+ // normal and will naturally return -1
+ accum_window_d = {33{1'b1}};
+ md_state_d = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A;
+ // Record that this is a div by zero to stop the sign change at the end of the
+ // division (in data_ind_timing mode).
+ div_by_zero_d = equal_to_zero_i;
+ end
+ MD_OP_REM: begin
+ // Check if the denominator is 0
+ // remainder for division by 0 is specified to be the numerator (operand a)
+ // Note with data-independent time option, the full divide operation will proceed as
+ // normal and will naturally return operand a
+ accum_window_d = op_a_ext;
+ md_state_d = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A;
+ end
+ // default:;
+ endcase
+ multdiv_count_d = 5'd31;
+ end
+ MD_ABS_A: begin
+ // quotient
+ op_a_shift_d = '0;
+ // A abs value
+ op_numerator_d = sign_a ? alu_adder_i : op_a_i;
+ md_state_d = MD_ABS_B;
+ end
+ MD_ABS_B: begin
+ // remainder
+ accum_window_d = {32'h0,op_numerator_q[31]};
+ // B abs value
+ op_b_shift_d = sign_b ? {1'b0,alu_adder_i} : {1'b0,op_b_i};
+ md_state_d = MD_COMP;
+ end
+ MD_COMP: begin
+ multdiv_count_d = multdiv_count_q - 5'h1;
+ unique case(operator_i)
+ MD_OP_MULL: begin
+ accum_window_d = res_adder_l;
+ op_a_shift_d = op_a_shift_q << 1;
+ op_b_shift_d = op_b_shift_q >> 1;
+ // Multiplication is complete once op_b is zero, unless in data_ind_timing mode where
+ // the maximum possible shift-add operations will be completed regardless of op_b
+ md_state_d = ((!data_ind_timing_i && (op_b_shift_d == 0)) ||
+ (multdiv_count_q == 5'd1)) ? MD_LAST : MD_COMP;
+ end
+ MD_OP_MULH: begin
+ accum_window_d = res_adder_h;
+ op_a_shift_d = op_a_shift_q;
+ op_b_shift_d = op_b_shift_q >> 1;
+ md_state_d = (multdiv_count_q == 5'd1) ? MD_LAST : MD_COMP;
+ end
+ MD_OP_REM: begin
+ accum_window_d = {next_remainder[31:0], op_numerator_q[multdiv_count_d]};
+ op_a_shift_d = next_quotient;
+ md_state_d = (multdiv_count_q == 5'd1) ? MD_LAST : MD_COMP;
+ end
+ // default: ;
+ endcase
+ end
+ MD_LAST: begin
+ unique case(operator_i)
+ MD_OP_MULL: begin
+ accum_window_d = res_adder_l;
+ // Note no state transition will occur if multdiv_hold is set
+ md_state_d = MD_IDLE;
+ multdiv_hold = ~multdiv_ready_id_i;
+ end
+ MD_OP_MULH: begin
+ accum_window_d = res_adder_l;
+ md_state_d = MD_IDLE;
+ // Note no state transition will occur if multdiv_hold is set
+ md_state_d = MD_IDLE;
+ multdiv_hold = ~multdiv_ready_id_i;
+ end
+ MD_OP_DIV: begin
+ // this time we save the quotient in accum_window_q since we do not need anymore the
+ // remainder
+ accum_window_d = next_quotient;
+ md_state_d = MD_CHANGE_SIGN;
+ end
+ MD_OP_REM: begin
+ // this time we do not save the quotient anymore since we need only the remainder
+ accum_window_d = {1'b0, next_remainder[31:0]};
+ md_state_d = MD_CHANGE_SIGN;
+ end
+ // default: ;
+ endcase
+ end
+ md_state_d = MD_FINISH;
+ unique case(operator_i)
+ accum_window_d = div_change_sign ? {1'b0,alu_adder_i} : accum_window_q;
+ accum_window_d = rem_change_sign ? {1'b0,alu_adder_i} : accum_window_q;
+ default: ;
+ endcase
+ end
+ MD_FINISH: begin
+ // Note no state transition will occur if multdiv_hold is set
+ md_state_d = MD_IDLE;
+ multdiv_hold = ~multdiv_ready_id_i;
+ end
+ default: begin
+ md_state_d = MD_IDLE;
+ end
+ endcase // md_state_q
+ end // (mult_sel_i || div_sel_i)
+ end
+ //////////////////////////////////////////
+ // Mutliplier / Divider state registers //
+ //////////////////////////////////////////
+ assign multdiv_en = (mult_en_i | div_en_i) & ~multdiv_hold;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ multdiv_count_q <= 5'h0;
+ op_b_shift_q <= 33'h0;
+ op_a_shift_q <= 33'h0;
+ md_state_q <= MD_IDLE;
+ div_by_zero_q <= 1'b0;
+ end else if (multdiv_en) begin
+ multdiv_count_q <= multdiv_count_d;
+ op_b_shift_q <= op_b_shift_d;
+ op_a_shift_q <= op_a_shift_d;
+ md_state_q <= md_state_d;
+ div_by_zero_q <= div_by_zero_d;
+ end
+ end
+ /////////////
+ // Outputs //
+ /////////////
+ assign valid_o = (md_state_q == MD_FINISH) |
+ (md_state_q == MD_LAST &
+ (operator_i == MD_OP_MULL |
+ operator_i == MD_OP_MULH));
+ assign multdiv_result_o = div_en_i ? accum_window_q[31:0] : res_adder_l[31:0];
\ No newline at end of file
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..3b1b597
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,77 @@
+//`timescale 1ns/1ps
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+ * RISC-V register file
+ *
+ * Register file with 31 or 15x 32 bit wide registers. Register 0 is fixed to 0.
+ * This register file is based on flip flops. Use this register file when
+ * targeting FPGA synthesis or Verilator simulation.
+ */
+module brq_fp_register_file_ff #(
+ parameter brq_pkg::rvfloat_e RVF = brq_pkg::RV32FSingle,
+ parameter int unsigned DataWidth = 32
+ ) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+ //Read port R1
+ input logic [4:0] raddr_a_i,
+ output logic [DataWidth-1:0] rdata_a_o,
+ //Read port R2
+ input logic [4:0] raddr_b_i,
+ output logic [DataWidth-1:0] rdata_b_o,
+ //Read port R2
+ input logic [4:0] raddr_c_i,
+ output logic [DataWidth-1:0] rdata_c_o,
+ // Write port W1
+ input logic [4:0] waddr_a_i,
+ input logic [DataWidth-1:0] wdata_a_i,
+ input logic we_a_i
+import brq_pkg::rvfloat_e;
+ localparam int unsigned ADDR_WIDTH = (RVF==brq_pkg::RV64FDouble) ? 6 : 5;
+ localparam int unsigned NUM_WORDS = (RVF==brq_pkg::RV64FDouble) ? 64 : 32;
+ logic [NUM_WORDS-1:0][DataWidth-1:0] rf_reg;
+ logic [NUM_WORDS-1:0][DataWidth-1:0] rf_reg_q;
+ logic [NUM_WORDS-1:0] we_a_dec;
+ always_comb begin : we_a_decoder
+ for (int unsigned i = 0; i < NUM_WORDS; i++) begin
+ we_a_dec[i] = (waddr_a_i == 5'(i)) ? we_a_i : 1'b0;
+ end
+ end
+ for (genvar i = 0; i < NUM_WORDS; i++) begin : g_rf_flops
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rf_reg_q[i] <= '0;
+ end else if(we_a_dec[i]) begin
+ rf_reg_q[i] <= wdata_a_i;
+ end
+// else begin
+// rf_reg_q[5] <= 32'h41a00000;
+// rf_reg_q[6] <= 32'h41200000;
+// end
+ end
+ end
+ assign rf_reg[NUM_WORDS-1:0] = rf_reg_q[NUM_WORDS-1:0];
+ assign rdata_a_o = rf_reg[raddr_a_i];
+ assign rdata_b_o = rf_reg[raddr_b_i];
+ assign rdata_c_o = rf_reg[raddr_c_i];
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..df82b18
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,1077 @@
+ `define RVFI
+ * Instruction Decode Stage
+ *
+ * Decode stage of the core. It decodes the instructions and hosts the register
+ * file.
+ */
+module brq_idu #(
+ parameter bit RV32E = 0,
+ parameter brq_pkg::rv32m_e RV32M = brq_pkg::RV32MFast,
+ parameter brq_pkg::rv32b_e RV32B = brq_pkg::RV32BNone,
+ parameter brq_pkg::rvfloat_e RVF = brq_pkg::RV64FDouble,
+ parameter bit DataIndTiming = 1'b0,
+ parameter bit BranchTargetALU = 0,
+ parameter bit SpecBranch = 0,
+ parameter bit WritebackStage = 0,
+ parameter bit BranchPredictor = 0
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ output logic ctrl_busy_o,
+ output logic illegal_insn_o,
+ // Interface to IF stage
+ input logic instr_valid_i,
+ input logic [31:0] instr_rdata_i, // from IF-ID pipeline registers
+ input logic [31:0] instr_rdata_alu_i, // from IF-ID pipeline registers
+ input logic [15:0] instr_rdata_c_i, // from IF-ID pipeline registers
+ input logic instr_is_compressed_i,
+ input logic instr_bp_taken_i,
+ output logic instr_req_o,
+ output logic instr_first_cycle_id_o,
+ output logic instr_valid_clear_o, // kill instr in IF-ID reg
+ output logic id_in_ready_o, // ID stage is ready for next instr
+ output logic icache_inval_o,
+ // Jumps and branches
+ input logic branch_decision_i,
+ // IF and ID stage signals
+ output logic pc_set_o,
+ output logic pc_set_spec_o,
+ output brq_pkg::pc_sel_e pc_mux_o,
+ output logic nt_branch_mispredict_o,
+ output brq_pkg::exc_pc_sel_e exc_pc_mux_o,
+ output brq_pkg::exc_cause_e exc_cause_o,
+ input logic illegal_c_insn_i,
+ input logic instr_fetch_err_i,
+ input logic instr_fetch_err_plus2_i,
+ input logic [31:0] pc_id_i,
+ // Stalls
+ input logic ex_valid_i, // EX stage has valid output
+ input logic lsu_resp_valid_i, // LSU has valid output, or is done
+ // ALU
+ output brq_pkg::alu_op_e alu_operator_ex_o,
+ output logic [31:0] alu_operand_a_ex_o,
+ output logic [31:0] alu_operand_b_ex_o,
+ // Multicycle Operation Stage Register
+ input logic [1:0] imd_val_we_ex_i,
+ input logic [33:0] imd_val_d_ex_i[2],
+ output logic [33:0] imd_val_q_ex_o[2],
+ // Branch target ALU
+ output logic [31:0] bt_a_operand_o,
+ output logic [31:0] bt_b_operand_o,
+ // MUL, DIV
+ output logic mult_en_ex_o,
+ output logic div_en_ex_o,
+ output logic mult_sel_ex_o,
+ output logic div_sel_ex_o,
+ output brq_pkg::md_op_e multdiv_operator_ex_o,
+ output logic [1:0] multdiv_signed_mode_ex_o,
+ output logic [31:0] multdiv_operand_a_ex_o,
+ output logic [31:0] multdiv_operand_b_ex_o,
+ output logic multdiv_ready_id_o,
+ // CSR
+ output logic csr_access_o,
+ output brq_pkg::csr_op_e csr_op_o,
+ output logic csr_op_en_o,
+ output logic csr_save_if_o,
+ output logic csr_save_id_o,
+ output logic csr_save_wb_o,
+ output logic csr_restore_mret_id_o,
+ output logic csr_restore_dret_id_o,
+ output logic csr_save_cause_o,
+ output logic [31:0] csr_mtval_o,
+ input brq_pkg::priv_lvl_e priv_mode_i,
+ input logic csr_mstatus_tw_i,
+ input logic illegal_csr_insn_i,
+ input logic data_ind_timing_i,
+ // Interface to load store unit
+ output logic lsu_req_o,
+ output logic lsu_we_o,
+ output logic [1:0] lsu_type_o,
+ output logic lsu_sign_ext_o,
+ output logic [31:0] lsu_wdata_o,
+ input logic lsu_req_done_i, // Data req to LSU is complete and
+ // instruction can move to writeback
+ // (only relevant where writeback stage is
+ // present)
+ input logic lsu_addr_incr_req_i,
+ input logic [31:0] lsu_addr_last_i,
+ // Interrupt signals
+ input logic csr_mstatus_mie_i,
+ input logic irq_pending_i,
+ input brq_pkg::irqs_t irqs_i,
+ input logic irq_nm_i,
+ output logic nmi_mode_o,
+ input logic lsu_load_err_i,
+ input logic lsu_store_err_i,
+ // Debug Signal
+ output logic debug_mode_o,
+ output brq_pkg::dbg_cause_e debug_cause_o,
+ output logic debug_csr_save_o,
+ input logic debug_req_i,
+ input logic debug_single_step_i,
+ input logic debug_ebreakm_i,
+ input logic debug_ebreaku_i,
+ input logic trigger_match_i,
+ // Write back signal
+ input logic [31:0] result_ex_i,
+ input logic [31:0] csr_rdata_i,
+ // Register file read
+ output logic [4:0] rf_raddr_a_o,
+ input logic [31:0] rf_rdata_a_i,
+ output logic [4:0] rf_raddr_b_o,
+ input logic [31:0] rf_rdata_b_i,
+ output logic rf_ren_a_o,
+ output logic rf_ren_b_o,
+ // Register file write (via writeback)
+ output logic [4:0] rf_waddr_id_o,
+ output logic [31:0] rf_wdata_id_o,
+ output logic rf_we_id_o,
+ output logic rf_rd_a_wb_match_o,
+ output logic rf_rd_b_wb_match_o,
+ // Register write information from writeback (for resolving data hazards)
+ input logic [4:0] rf_waddr_wb_i,
+ input logic [31:0] rf_wdata_fwd_wb_i,
+ input logic rf_write_wb_i,
+ output logic en_wb_o,
+ output brq_pkg::wb_instr_type_e instr_type_wb_o,
+ output logic instr_perf_count_id_o,
+ input logic ready_wb_i,
+ input logic outstanding_load_wb_i,
+ input logic outstanding_store_wb_i,
+ // Performance Counters
+ output logic perf_jump_o, // executing a jump instr
+ output logic perf_branch_o, // executing a branch instr
+ output logic perf_tbranch_o, // executing a taken branch instr
+ output logic perf_dside_wait_o, // instruction in ID/EX is awaiting memory
+ // access to finish before proceeding
+ output logic perf_mul_wait_o,
+ output logic perf_div_wait_o,
+ output logic instr_id_done_o,
+ // Floating point extensions IO
+ output fpnew_pkg::roundmode_e fp_rounding_mode_o, // defines the rounding mode
+ // output brq_pkg::op_b_sel_e fp_alu_op_b_mux_sel_o, // operand b selection: reg value or
+ // immediate
+ input logic [31:0] fp_rf_rdata_a_i,
+ input logic [31:0] fp_rf_rdata_b_i,
+ input logic [31:0] fp_rf_rdata_c_i,
+ output logic [4:0] fp_rf_raddr_a_o,
+ output logic [4:0] fp_rf_raddr_b_o,
+ output logic [4:0] fp_rf_raddr_c_o,
+ //output logic fp_rf_ren_a_o,
+ //output logic fp_rf_ren_b_o,
+ //output logic fp_rf_ren_c_o,
+ output logic [4:0] fp_rf_waddr_o,
+ output logic fp_rf_we_o,
+ output fpnew_pkg::operation_e fp_alu_operator_o,
+ output logic fp_alu_op_mod_o,
+ output fpnew_pkg::fp_format_e fp_src_fmt_o,
+ output fpnew_pkg::fp_format_e fp_dst_fmt_o,
+ output logic fp_rm_dynamic_o,
+ output logic fp_flush_o,
+ output logic is_fp_instr_o,
+ output logic use_fp_rs1_o,
+ output logic use_fp_rs2_o,
+ output logic use_fp_rs3_o,
+ output logic use_fp_rd_o,
+ input logic fpu_busy_i,
+ input logic fp_rf_write_wb_i,
+ input logic [31:0] fp_rf_wdata_fwd_wb_i,
+ output logic [2:0][31:0] fp_operands_o,
+ output logic fp_load_o
+ import brq_pkg::*;
+ // Decoder/Controller, ID stage internal signals
+ logic illegal_insn_dec;
+ logic ebrk_insn;
+ logic mret_insn_dec;
+ logic dret_insn_dec;
+ logic ecall_insn_dec;
+ logic wfi_insn_dec;
+ logic wb_exception;
+ logic branch_in_dec;
+ logic branch_spec, branch_set_spec;
+ logic branch_set, branch_set_d;
+ logic branch_not_set;
+ logic branch_taken;
+ logic jump_in_dec;
+ logic jump_set_dec;
+ logic jump_set;
+ logic instr_first_cycle;
+ logic instr_executing;
+ logic instr_done;
+ logic controller_run;
+ logic stall_ld_hz;
+ logic stall_mem;
+ logic stall_multdiv;
+ logic stall_branch;
+ logic stall_jump;
+ logic stall_id;
+ logic stall_wb;
+ logic flush_id;
+ logic multicycle_done;
+ // Immediate decoding and sign extension
+ logic [31:0] imm_i_type;
+ logic [31:0] imm_s_type;
+ logic [31:0] imm_b_type;
+ logic [31:0] imm_u_type;
+ logic [31:0] imm_j_type;
+ logic [31:0] zimm_rs1_type;
+ logic [31:0] imm_a; // contains the immediate for operand b
+ logic [31:0] imm_b; // contains the immediate for operand b
+ // Register file interface
+ rf_wd_sel_e rf_wdata_sel;
+ logic rf_we_dec, rf_we_raw;
+ logic rf_ren_a, rf_ren_b;
+ assign rf_ren_a_o = rf_ren_a;
+ assign rf_ren_b_o = rf_ren_b;
+ logic [31:0] rf_rdata_a_fwd;
+ logic [31:0] rf_rdata_b_fwd;
+ // ALU Control
+ alu_op_e alu_operator;
+ op_a_sel_e alu_op_a_mux_sel, alu_op_a_mux_sel_dec;
+ op_b_sel_e alu_op_b_mux_sel, alu_op_b_mux_sel_dec;
+ logic alu_multicycle_dec;
+ logic stall_alu;
+ logic [33:0] imd_val_q[2];
+ op_a_sel_e bt_a_mux_sel;
+ imm_b_sel_e bt_b_mux_sel;
+ imm_a_sel_e imm_a_mux_sel;
+ imm_b_sel_e imm_b_mux_sel, imm_b_mux_sel_dec;
+ // Multiplier Control
+ logic mult_en_id, mult_en_dec; // use integer multiplier
+ logic div_en_id, div_en_dec; // use integer division or reminder
+ logic multdiv_en_dec;
+ md_op_e multdiv_operator;
+ logic [1:0] multdiv_signed_mode;
+ // Data Memory Control
+ logic lsu_we;
+ logic [1:0] lsu_type;
+ logic lsu_sign_ext;
+ logic lsu_req, lsu_req_dec;
+ logic data_req_allowed;
+ // CSR control
+ logic csr_pipe_flush;
+ logic [31:0] alu_operand_a;
+ logic [31:0] alu_operand_b;
+ // Floating point
+ logic fp_swap_oprnds;
+ logic [31:0] fp_rf_rdata_a_fwd;
+ logic [31:0] fp_rf_rdata_b_fwd;
+ logic [31:0] fp_rf_rdata_c_fwd;
+ logic [31:0] temp;
+ logic [31:0] fpu_op_a;
+ logic [31:0] fpu_op_b;
+ logic [31:0] fpu_op_c;
+ logic mv_instr;
+ logic [31:0] result_wb;
+ /////////////
+ // LSU Mux //
+ /////////////
+ // Misaligned loads/stores result in two aligned loads/stores, compute second address
+ assign alu_op_a_mux_sel = lsu_addr_incr_req_i ? OP_A_FWD : alu_op_a_mux_sel_dec;
+ assign alu_op_b_mux_sel = lsu_addr_incr_req_i ? OP_B_IMM : alu_op_b_mux_sel_dec;
+ assign imm_b_mux_sel = lsu_addr_incr_req_i ? IMM_B_INCR_ADDR : imm_b_mux_sel_dec;
+ ///////////////////
+ // Operand MUXES //
+ ///////////////////
+ // Main ALU immediate MUX for Operand A
+ assign imm_a = (imm_a_mux_sel == IMM_A_Z) ? zimm_rs1_type : '0;
+ // Main ALU MUX for Operand A
+ always_comb begin : alu_operand_a_mux
+ unique case (alu_op_a_mux_sel)
+ OP_A_REG_A: alu_operand_a = rf_rdata_a_fwd;
+ OP_A_FWD: alu_operand_a = lsu_addr_last_i;
+ OP_A_CURRPC: alu_operand_a = pc_id_i;
+ OP_A_IMM: alu_operand_a = imm_a;
+ //default: alu_operand_a = pc_id_i;
+ endcase
+ end
+ if (BranchTargetALU) begin : g_btalu_muxes
+ // Branch target ALU operand A mux
+ always_comb begin : bt_operand_a_mux
+ unique case (bt_a_mux_sel)
+ OP_A_REG_A: bt_a_operand_o = rf_rdata_a_fwd;
+ OP_A_CURRPC: bt_a_operand_o = pc_id_i;
+ default: bt_a_operand_o = pc_id_i;
+ endcase
+ end
+ // Branch target ALU operand B mux
+ always_comb begin : bt_immediate_b_mux
+ unique case (bt_b_mux_sel)
+ IMM_B_I: bt_b_operand_o = imm_i_type;
+ IMM_B_B: bt_b_operand_o = imm_b_type;
+ IMM_B_J: bt_b_operand_o = imm_j_type;
+ IMM_B_INCR_PC: bt_b_operand_o = instr_is_compressed_i ? 32'h2 : 32'h4;
+ default: bt_b_operand_o = instr_is_compressed_i ? 32'h2 : 32'h4;
+ endcase
+ end
+ // Reduced main ALU immediate MUX for Operand B
+ always_comb begin : immediate_b_mux
+ unique case (imm_b_mux_sel)
+ IMM_B_I: imm_b = imm_i_type;
+ IMM_B_S: imm_b = imm_s_type;
+ IMM_B_U: imm_b = imm_u_type;
+ IMM_B_INCR_PC: imm_b = instr_is_compressed_i ? 32'h2 : 32'h4;
+ IMM_B_INCR_ADDR: imm_b = 32'h4;
+ default: imm_b = 32'h4;
+ endcase
+ end
+ end else begin : g_nobtalu
+ op_a_sel_e unused_a_mux_sel;
+ imm_b_sel_e unused_b_mux_sel;
+ assign unused_a_mux_sel = bt_a_mux_sel;
+ assign unused_b_mux_sel = bt_b_mux_sel;
+ assign bt_a_operand_o = '0;
+ assign bt_b_operand_o = '0;
+ // Full main ALU immediate MUX for Operand B
+ always_comb begin : immediate_b_mux
+ unique case (imm_b_mux_sel)
+ IMM_B_I: imm_b = imm_i_type;
+ IMM_B_S: imm_b = imm_s_type;
+ IMM_B_B: imm_b = imm_b_type;
+ IMM_B_U: imm_b = imm_u_type;
+ IMM_B_J: imm_b = imm_j_type;
+ IMM_B_INCR_PC: imm_b = instr_is_compressed_i ? 32'h2 : 32'h4;
+ IMM_B_INCR_ADDR: imm_b = 32'h4;
+ default: imm_b = 32'h4;
+ endcase
+ end
+ end
+ // ALU MUX for Operand B
+ assign alu_operand_b = (alu_op_b_mux_sel == OP_B_IMM) ? imm_b : rf_rdata_b_fwd;
+ /////////////////////////////////////////
+ // Multicycle Operation Stage Register //
+ /////////////////////////////////////////
+ for (genvar i=0; i<2; i++) begin : gen_intermediate_val_reg
+ always_ff @(posedge clk_i or negedge rst_ni) begin : intermediate_val_reg
+ if (!rst_ni) begin
+ imd_val_q[i] <= '0;
+ end else if (imd_val_we_ex_i[i]) begin
+ imd_val_q[i] <= imd_val_d_ex_i[i];
+ end
+ end
+ end
+ assign imd_val_q_ex_o = imd_val_q;
+ /////////////
+ // Decoder //
+ /////////////
+ brq_idu_decoder #(
+ .RV32E ( RV32E ),
+ .RV32M ( RV32M ),
+ .RV32B ( RV32B ),
+ .BranchTargetALU ( BranchTargetALU )
+ ) decoder_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ // controller
+ .illegal_insn_o ( illegal_insn_dec ),
+ .ebrk_insn_o ( ebrk_insn ),
+ .mret_insn_o ( mret_insn_dec ),
+ .dret_insn_o ( dret_insn_dec ),
+ .ecall_insn_o ( ecall_insn_dec ),
+ .wfi_insn_o ( wfi_insn_dec ),
+ .jump_set_o ( jump_set_dec ),
+ .branch_taken_i ( branch_taken ),
+ .icache_inval_o ( icache_inval_o ),
+ // from IF-ID pipeline register
+ .instr_first_cycle_i ( instr_first_cycle ),
+ .instr_rdata_i ( instr_rdata_i ),
+ .instr_rdata_alu_i ( instr_rdata_alu_i ),
+ .illegal_c_insn_i ( illegal_c_insn_i ),
+ // immediates
+ .imm_a_mux_sel_o ( imm_a_mux_sel ),
+ .imm_b_mux_sel_o ( imm_b_mux_sel_dec ),
+ .bt_a_mux_sel_o ( bt_a_mux_sel ),
+ .bt_b_mux_sel_o ( bt_b_mux_sel ),
+ .imm_i_type_o ( imm_i_type ),
+ .imm_s_type_o ( imm_s_type ),
+ .imm_b_type_o ( imm_b_type ),
+ .imm_u_type_o ( imm_u_type ),
+ .imm_j_type_o ( imm_j_type ),
+ .zimm_rs1_type_o ( zimm_rs1_type ),
+ // register file
+ .rf_wdata_sel_o ( rf_wdata_sel ),
+ .rf_we_o ( rf_we_dec ),
+ .rf_raddr_a_o ( rf_raddr_a_o ),
+ .rf_raddr_b_o ( rf_raddr_b_o ),
+ .rf_waddr_o ( rf_waddr_id_o ),
+ .rf_ren_a_o ( rf_ren_a ),
+ .rf_ren_b_o ( rf_ren_b ),
+ // ALU
+ .alu_operator_o ( alu_operator ),
+ .alu_op_a_mux_sel_o ( alu_op_a_mux_sel_dec ),
+ .alu_op_b_mux_sel_o ( alu_op_b_mux_sel_dec ),
+ .alu_multicycle_o ( alu_multicycle_dec ),
+ // MULT & DIV
+ .mult_en_o ( mult_en_dec ),
+ .div_en_o ( div_en_dec ),
+ .mult_sel_o ( mult_sel_ex_o ),
+ .div_sel_o ( div_sel_ex_o ),
+ .multdiv_operator_o ( multdiv_operator ),
+ .multdiv_signed_mode_o ( multdiv_signed_mode ),
+ // CSRs
+ .csr_access_o ( csr_access_o ),
+ .csr_op_o ( csr_op_o ),
+ // LSU
+ .data_req_o ( lsu_req_dec ),
+ .data_we_o ( lsu_we ),
+ .data_type_o ( lsu_type ),
+ .data_sign_extension_o ( lsu_sign_ext ),
+ // jump/branches
+ .jump_in_dec_o ( jump_in_dec ),
+ .branch_in_dec_o ( branch_in_dec ),
+ // Floating point extensions IO
+ .fp_rounding_mode_o ( fp_rounding_mode_o ), // defines the rounding mode
+ .fp_rf_raddr_a_o ( fp_rf_raddr_a_o ),
+ .fp_rf_raddr_b_o ( fp_rf_raddr_b_o ),
+ .fp_rf_raddr_c_o ( fp_rf_raddr_c_o ),
+ .fp_rf_waddr_o ( fp_rf_waddr_o ),
+ .fp_rf_we_o ( fp_rf_we_o ),
+ .fp_alu_operator_o ( fp_alu_operator_o ),
+ .fp_alu_op_mod_o ( fp_alu_op_mod_o ),
+ .fp_src_fmt_o ( fp_src_fmt_o ),
+ .fp_dst_fmt_o ( fp_dst_fmt_o ),
+ .fp_rm_dynamic_o ( fp_rm_dynamic_o ),
+ .is_fp_instr_o ( is_fp_instr_o ),
+ .use_fp_rs1_o ( use_fp_rs1_o ),
+ .use_fp_rs2_o ( use_fp_rs2_o ),
+ .use_fp_rs3_o ( use_fp_rs3_o ),
+ .use_fp_rd_o ( use_fp_rd_o ),
+ .fp_swap_oprnds_o ( fp_swap_oprnds ),
+ .fp_load_o ( fp_load_o ),
+ .mv_instr_o ( mv_instr )
+ );
+// assign fpu_op_a = use_fp_rs1_o ? fp_rf_rdata_a_fwd : rf_rdata_a_fwd;
+// assign fpu_op_b = use_fp_rs2_o ? fp_rf_rdata_b_fwd : rf_rdata_b_fwd;
+// assign fpu_op_c = fp_rf_rdata_c_fwd;
+ ///////////////////////
+ // Register File MUX //
+ ///////////////////////
+ // Suppress register write if there is an illegal CSR access or instruction is not executing
+ assign rf_we_id_o = rf_we_raw & instr_executing & ~illegal_csr_insn_i;
+ // Register file write data mux
+ always_comb begin : rf_wdata_id_mux
+ unique case (rf_wdata_sel)
+ RF_WD_EX: rf_wdata_id_o = result_wb;
+ RF_WD_CSR: rf_wdata_id_o = csr_rdata_i;
+ // default: rf_wdata_id_o = result_wb;
+ endcase
+ end
+ /////////////////////////////////
+ // CSR-related pipline flushes //
+ /////////////////////////////////
+ always_comb begin : csr_pipeline_flushes
+ csr_pipe_flush = 1'b0;
+ // A pipeline flush is needed to let the controller react after modifying certain CSRs:
+ // - When enabling interrupts, pending IRQs become visible to the controller only during
+ // the next cycle. If during that cycle the core disables interrupts again, it does not
+ // see any pending IRQs and consequently does not start to handle interrupts.
+ // - When modifying debug CSRs - TODO: Check if this is really needed
+ if (csr_op_en_o == 1'b1 && (csr_op_o == CSR_OP_WRITE || csr_op_o == CSR_OP_SET)) begin
+ if (csr_num_e'(instr_rdata_i[31:20]) == CSR_MSTATUS ||
+ csr_num_e'(instr_rdata_i[31:20]) == CSR_MIE) begin
+ csr_pipe_flush = 1'b1;
+ end
+ end else if (csr_op_en_o == 1'b1 && csr_op_o != CSR_OP_READ) begin
+ if (csr_num_e'(instr_rdata_i[31:20]) == CSR_DCSR ||
+ csr_num_e'(instr_rdata_i[31:20]) == CSR_DPC ||
+ csr_num_e'(instr_rdata_i[31:20]) == CSR_DSCRATCH0 ||
+ csr_num_e'(instr_rdata_i[31:20]) == CSR_DSCRATCH1) begin
+ csr_pipe_flush = 1'b1;
+ end
+ end
+ end
+ ////////////////
+ // Controller //
+ ////////////////
+ assign illegal_insn_o = instr_valid_i & (illegal_insn_dec | illegal_csr_insn_i);
+ brq_idu_controller #(
+ .WritebackStage ( WritebackStage ),
+ .BranchPredictor ( BranchPredictor )
+ ) controller_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .ctrl_busy_o ( ctrl_busy_o ),
+ // decoder related signals
+ .illegal_insn_i ( illegal_insn_o ),
+ .ecall_insn_i ( ecall_insn_dec ),
+ .mret_insn_i ( mret_insn_dec ),
+ .dret_insn_i ( dret_insn_dec ),
+ .wfi_insn_i ( wfi_insn_dec ),
+ .ebrk_insn_i ( ebrk_insn ),
+ .csr_pipe_flush_i ( csr_pipe_flush ),
+ // from IF-ID pipeline
+ .instr_valid_i ( instr_valid_i ),
+ .instr_i ( instr_rdata_i ),
+ .instr_compressed_i ( instr_rdata_c_i ),
+ .instr_is_compressed_i ( instr_is_compressed_i ),
+ .instr_bp_taken_i ( instr_bp_taken_i ),
+ .instr_fetch_err_i ( instr_fetch_err_i ),
+ .instr_fetch_err_plus2_i ( instr_fetch_err_plus2_i ),
+ .pc_id_i ( pc_id_i ),
+ // to IF-ID pipeline
+ .instr_valid_clear_o ( instr_valid_clear_o ),
+ .id_in_ready_o ( id_in_ready_o ),
+ .controller_run_o ( controller_run ),
+ // to prefetcher
+ .instr_req_o ( instr_req_o ),
+ .pc_set_o ( pc_set_o ),
+ .pc_set_spec_o ( pc_set_spec_o ),
+ .pc_mux_o ( pc_mux_o ),
+ .nt_branch_mispredict_o ( nt_branch_mispredict_o ),
+ .exc_pc_mux_o ( exc_pc_mux_o ),
+ .exc_cause_o ( exc_cause_o ),
+ // LSU
+ .lsu_addr_last_i ( lsu_addr_last_i ),
+ .load_err_i ( lsu_load_err_i ),
+ .store_err_i ( lsu_store_err_i ),
+ .wb_exception_o ( wb_exception ),
+ // jump/branch control
+ .branch_set_i ( branch_set ),
+ .branch_set_spec_i ( branch_set_spec ),
+ .branch_not_set_i ( branch_not_set ),
+ .jump_set_i ( jump_set ),
+ // interrupt signals
+ .csr_mstatus_mie_i ( csr_mstatus_mie_i ),
+ .irq_pending_i ( irq_pending_i ),
+ .irqs_i ( irqs_i ),
+ .irq_nm_i ( irq_nm_i ),
+ .nmi_mode_o ( nmi_mode_o ),
+ // CSR Controller Signals
+ .csr_save_if_o ( csr_save_if_o ),
+ .csr_save_id_o ( csr_save_id_o ),
+ .csr_save_wb_o ( csr_save_wb_o ),
+ .csr_restore_mret_id_o ( csr_restore_mret_id_o ),
+ .csr_restore_dret_id_o ( csr_restore_dret_id_o ),
+ .csr_save_cause_o ( csr_save_cause_o ),
+ .csr_mtval_o ( csr_mtval_o ),
+ .priv_mode_i ( priv_mode_i ),
+ .csr_mstatus_tw_i ( csr_mstatus_tw_i ),
+ // Debug Signal
+ .debug_mode_o ( debug_mode_o ),
+ .debug_cause_o ( debug_cause_o ),
+ .debug_csr_save_o ( debug_csr_save_o ),
+ .debug_req_i ( debug_req_i ),
+ .debug_single_step_i ( debug_single_step_i ),
+ .debug_ebreakm_i ( debug_ebreakm_i ),
+ .debug_ebreaku_i ( debug_ebreaku_i ),
+ .trigger_match_i ( trigger_match_i ),
+ .stall_id_i ( stall_id ),
+ .stall_wb_i ( stall_wb ),
+ .flush_id_o ( flush_id ),
+ .ready_wb_i ( ready_wb_i ),
+ // Performance Counters
+ .perf_jump_o ( perf_jump_o ),
+ .perf_tbranch_o ( perf_tbranch_o ),
+ .fpu_busy_i ( fpu_busy_i )
+ );
+ assign fp_flush_o = flush_id;
+ assign multdiv_en_dec = mult_en_dec | div_en_dec;
+ assign lsu_req = instr_executing ? data_req_allowed & lsu_req_dec : 1'b0;
+ assign mult_en_id = instr_executing ? mult_en_dec : 1'b0;
+ assign div_en_id = instr_executing ? div_en_dec : 1'b0;
+ assign lsu_req_o = lsu_req;
+ assign lsu_we_o = lsu_we;
+ assign lsu_type_o = lsu_type;
+ assign lsu_sign_ext_o = lsu_sign_ext;
+ assign lsu_wdata_o = fpu_op_b; //rf_rdata_b_fwd;
+ // csr_op_en_o is set when CSR access should actually happen.
+ // csv_access_o is set when CSR access instruction is present and is used to compute whether a CSR
+ // access is illegal. A combinational loop would be created if csr_op_en_o was used along (as
+ // asserting it for an illegal csr access would result in a flush that would need to deassert it).
+ assign csr_op_en_o = csr_access_o & instr_executing & instr_id_done_o;
+ assign alu_operator_ex_o = alu_operator;
+ assign alu_operand_a_ex_o = alu_operand_a;
+ assign alu_operand_b_ex_o = alu_operand_b;
+ assign mult_en_ex_o = mult_en_id;
+ assign div_en_ex_o = div_en_id;
+ assign multdiv_operator_ex_o = multdiv_operator;
+ assign multdiv_signed_mode_ex_o = multdiv_signed_mode;
+ assign multdiv_operand_a_ex_o = rf_rdata_a_fwd;
+ assign multdiv_operand_b_ex_o = rf_rdata_b_fwd;
+ ////////////////////////
+ // Branch set control //
+ ////////////////////////
+ if (BranchTargetALU && !DataIndTiming) begin : g_branch_set_direct
+ // Branch set fed straight to controller with branch target ALU
+ // (condition pass/fail used same cycle as generated instruction request)
+ assign branch_set = branch_set_d;
+ assign branch_set_spec = branch_spec;
+ end else begin : g_branch_set_flop
+ // Branch set flopped without branch target ALU, or in fixed time execution mode
+ // (condition pass/fail used next cycle where branch target is calculated)
+ logic branch_set_q;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ branch_set_q <= 1'b0;
+ end else begin
+ branch_set_q <= branch_set_d;
+ end
+ end
+ // Branches always take two cycles in fixed time execution mode, with or without the branch
+ // target ALU (to avoid a path from the branch decision into the branch target ALU operand
+ // muxing).
+ assign branch_set = (BranchTargetALU && !data_ind_timing_i) ? branch_set_d : branch_set_q;
+ // Use the speculative branch signal when BTALU is enabled
+ assign branch_set_spec = (BranchTargetALU && !data_ind_timing_i) ? branch_spec : branch_set_q;
+ end
+ // Branch condition is calculated in the first cycle and flopped for use in the second cycle
+ // (only used in fixed time execution mode to determine branch destination).
+ if (DataIndTiming) begin : g_sec_branch_taken
+ logic branch_taken_q;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ branch_taken_q <= 1'b0;
+ end else begin
+ branch_taken_q <= branch_decision_i;
+ end
+ end
+ assign branch_taken = ~data_ind_timing_i | branch_taken_q;
+ end else begin : g_nosec_branch_taken
+ // Signal unused without fixed time execution mode - only taken branches will trigger branch_set
+ assign branch_taken = 1'b1;
+ end
+ // Holding branch_set/jump_set high for more than one cycle should not cause a functional issue.
+ // However it could generate needless prefetch buffer flushes and instruction fetches. The ID/EX
+ // designs ensures that this never happens for non-predicted branches.
+ ///////////////
+ // ID-EX FSM //
+ ///////////////
+ typedef enum logic { FIRST_CYCLE, MULTI_CYCLE } id_fsm_e;
+ id_fsm_e id_fsm_q, id_fsm_d;
+ always_ff @(posedge clk_i or negedge rst_ni) begin : id_pipeline_reg
+ if (!rst_ni) begin
+ id_fsm_q <= FIRST_CYCLE;
+ end else begin
+ id_fsm_q <= id_fsm_d;
+ end
+ end
+ // ID/EX stage can be in two states, FIRST_CYCLE and MULTI_CYCLE. An instruction enters
+ // MULTI_CYCLE if it requires multiple cycles to complete regardless of stalls and other
+ // considerations. An instruction may be held in FIRST_CYCLE if it's unable to begin executing
+ // (this is controlled by instr_executing).
+ always_comb begin
+ id_fsm_d = id_fsm_q;
+ rf_we_raw = rf_we_dec;
+ stall_multdiv = 1'b0;
+ stall_jump = 1'b0;
+ stall_branch = 1'b0;
+ stall_alu = 1'b0;
+ branch_set_d = 1'b0;
+ branch_spec = 1'b0;
+ branch_not_set = 1'b0;
+ jump_set = 1'b0;
+ perf_branch_o = 1'b0;
+ if (instr_executing) begin
+ unique case (id_fsm_q)
+ FIRST_CYCLE: begin
+ unique case (1'b1)
+ lsu_req_dec: begin
+ if (!WritebackStage) begin
+ // LSU operation
+ id_fsm_d = MULTI_CYCLE;
+ end else begin
+ if(~lsu_req_done_i) begin
+ id_fsm_d = MULTI_CYCLE;
+ end
+ end
+ end
+ multdiv_en_dec: begin
+ // MUL or DIV operation
+ if (~ex_valid_i) begin
+ // When single-cycle multiply is configured mul can finish in the first cycle so
+ // only enter MULTI_CYCLE state if a result isn't immediately available
+ id_fsm_d = MULTI_CYCLE;
+ rf_we_raw = 1'b0;
+ stall_multdiv = 1'b1;
+ end
+ end
+ branch_in_dec: begin
+ // cond branch operation
+ // All branches take two cycles in fixed time execution mode, regardless of branch
+ // condition.
+ id_fsm_d = (data_ind_timing_i || (!BranchTargetALU && branch_decision_i)) ?
+ stall_branch = (~BranchTargetALU & branch_decision_i) | data_ind_timing_i;
+ branch_set_d = branch_decision_i | data_ind_timing_i;
+ if (BranchPredictor) begin
+ branch_not_set = ~branch_decision_i;
+ end
+ // Speculative branch (excludes branch_decision_i)
+ branch_spec = SpecBranch ? 1'b1 : branch_decision_i;
+ perf_branch_o = 1'b1;
+ end
+ jump_in_dec: begin
+ // uncond branch operation
+ // BTALU means jumps only need one cycle
+ id_fsm_d = BranchTargetALU ? FIRST_CYCLE : MULTI_CYCLE;
+ stall_jump = ~BranchTargetALU;
+ jump_set = jump_set_dec;
+ end
+ alu_multicycle_dec: begin
+ stall_alu = 1'b1;
+ id_fsm_d = MULTI_CYCLE;
+ rf_we_raw = 1'b0;
+ end
+ default: begin
+ id_fsm_d = FIRST_CYCLE;
+ end
+ endcase
+ end
+ MULTI_CYCLE: begin
+ if(multdiv_en_dec) begin
+ rf_we_raw = rf_we_dec & ex_valid_i;
+ end
+ if (multicycle_done & ready_wb_i) begin
+ id_fsm_d = FIRST_CYCLE;
+ end else begin
+ stall_multdiv = multdiv_en_dec;
+ stall_branch = branch_in_dec;
+ stall_jump = jump_in_dec;
+ end
+ end
+ // default: begin
+ // id_fsm_d = FIRST_CYCLE;
+ // end
+ endcase
+ end
+ end
+ // Note for the two-stage configuration ready_wb_i is always set
+ assign multdiv_ready_id_o = ready_wb_i;
+ // Stall ID/EX stage for reason that relates to instruction in ID/EX
+ assign stall_id = stall_ld_hz | stall_mem | stall_multdiv | stall_jump | stall_branch |
+ stall_alu;
+ assign instr_done = ~stall_id & ~flush_id & instr_executing;
+ // Signal instruction in ID is in it's first cycle. It can remain in its
+ // first cycle if it is stalled.
+ assign instr_first_cycle = instr_valid_i & (id_fsm_q == FIRST_CYCLE);
+ // Used by RVFI to know when to capture register read data
+ // Used by ALU to access RS3 if ternary instruction.
+ assign instr_first_cycle_id_o = instr_first_cycle;
+ if (WritebackStage) begin : gen_stall_mem
+ // Register read address matches write address in WB
+ logic rf_rd_a_wb_match;
+ logic rf_rd_b_wb_match;
+ logic fp_rf_rd_a_wb_match;
+ logic fp_rf_rd_b_wb_match;
+ logic fp_rf_rd_c_wb_match;
+ // Hazard between registers being read and written
+ logic rf_rd_a_hz;
+ logic rf_rd_b_hz;
+ logic rf_rd_c_hz;
+ logic outstanding_memory_access;
+ logic instr_kill;
+ assign multicycle_done = lsu_req_dec ? ~stall_mem : ex_valid_i;
+ // Is a memory access ongoing that isn't finishing this cycle
+ assign outstanding_memory_access = (outstanding_load_wb_i | outstanding_store_wb_i) &
+ ~lsu_resp_valid_i;
+ // Can start a new memory access if any previous one has finished or is finishing
+ assign data_req_allowed = ~outstanding_memory_access;
+ // Instruction won't execute because:
+ // - There is a pending exception in writeback
+ // The instruction in ID/EX will be flushed and the core will jump to an exception handler
+ // - The controller isn't running instructions
+ // This either happens in preparation for a flush and jump to an exception handler e.g. in
+ // response to an IRQ or debug request or whilst the core is sleeping or resetting/fetching
+ // first instruction in which case any valid instruction in ID/EX should be ignored.
+ // - There was an error on instruction fetch
+ assign instr_kill = instr_fetch_err_i |
+ wb_exception |
+ ~controller_run;
+ // With writeback stage instructions must be prevented from executing if there is:
+ // - A load hazard
+ // - A pending memory access
+ // If it receives an error response this results in a precise exception from WB so ID/EX
+ // instruction must not execute until error response is known).
+ // - A load/store error
+ // This will cause a precise exception for the instruction in WB so ID/EX instruction must not
+ // execute
+ assign instr_executing = instr_valid_i &
+ ~instr_kill &
+ ~stall_ld_hz &
+ ~outstanding_memory_access;
+ // Stall for reasons related to memory:
+ // * There is an outstanding memory access that won't resolve this cycle (need to wait to allow
+ // precise exceptions)
+ // * There is a load/store request not being granted or which is unaligned and waiting to issue
+ // a second request (needs to stay in ID for the address calculation)
+ assign stall_mem = instr_valid_i &
+ (outstanding_memory_access | (lsu_req_dec & ~lsu_req_done_i));
+ // If we stall a load in ID for any reason, it must not make an LSU request
+ // (otherwide we might issue two requests for the same instruction)
+ assign rf_rd_a_wb_match = (rf_waddr_wb_i == rf_raddr_a_o) & |rf_raddr_a_o;
+ assign rf_rd_b_wb_match = (rf_waddr_wb_i == rf_raddr_b_o) & |rf_raddr_b_o;
+ assign fp_rf_rd_a_wb_match = (rf_waddr_wb_i == rf_raddr_a_o);
+ assign fp_rf_rd_b_wb_match = (rf_waddr_wb_i == rf_raddr_b_o);
+ assign fp_rf_rd_c_wb_match = (rf_waddr_wb_i == fp_rf_raddr_c_o);
+ assign rf_rd_a_wb_match_o = rf_rd_a_wb_match;
+ assign rf_rd_b_wb_match_o = rf_rd_b_wb_match;
+ // If instruction is reading register that load will be writing stall in
+ // ID until load is complete. No need to stall when reading zero register.
+ assign rf_rd_a_hz = rf_rd_a_wb_match & (rf_ren_a | use_fp_rs1_o);
+ assign rf_rd_b_hz = rf_rd_b_wb_match & (rf_ren_b | use_fp_rs2_o);
+ assign rf_rd_c_hz = rf_rd_b_wb_match & use_fp_rs3_o;
+ // If instruction is read register that writeback is writing forward writeback data to read
+ // data. Note this doesn't factor in load data as it arrives too late, such hazards are
+ // resolved via a stall (see above).
+ assign rf_rdata_a_fwd = rf_rd_a_wb_match & rf_write_wb_i ? rf_wdata_fwd_wb_i : rf_rdata_a_i;
+ assign rf_rdata_b_fwd = rf_rd_b_wb_match & rf_write_wb_i ? rf_wdata_fwd_wb_i : rf_rdata_b_i;
+ // forwarding for floating point unit
+ assign fp_rf_rdata_a_fwd = fp_rf_rd_a_wb_match & fp_rf_write_wb_i ? fp_rf_wdata_fwd_wb_i : fp_rf_rdata_a_i;
+ assign fp_rf_rdata_b_fwd = fp_rf_rd_b_wb_match & fp_rf_write_wb_i ? fp_rf_wdata_fwd_wb_i : fp_rf_rdata_b_i;
+ assign fp_rf_rdata_c_fwd = fp_rf_rd_c_wb_match & fp_rf_write_wb_i ? fp_rf_wdata_fwd_wb_i : fp_rf_rdata_c_i;
+ assign stall_ld_hz = outstanding_load_wb_i & (rf_rd_a_hz | rf_rd_b_hz | rf_rd_c_hz);
+ assign instr_type_wb_o = ~lsu_req_dec ? WB_INSTR_OTHER :
+ lsu_we ? WB_INSTR_STORE :
+ assign instr_id_done_o = en_wb_o & ready_wb_i;
+ // Stall ID/EX as instruction in ID/EX cannot proceed to writeback yet
+ assign stall_wb = en_wb_o & ~ready_wb_i;
+ assign perf_dside_wait_o = instr_valid_i & ~instr_kill &
+ (outstanding_memory_access | stall_ld_hz);
+ end else begin : gen_no_stall_mem
+ assign multicycle_done = lsu_req_dec ? lsu_resp_valid_i : ex_valid_i;
+ assign data_req_allowed = instr_first_cycle;
+ // Without Writeback Stage always stall the first cycle of a load/store.
+ // Then stall until it is complete
+ assign stall_mem = instr_valid_i & (lsu_req_dec & (~lsu_resp_valid_i | instr_first_cycle));
+ // No load hazards without Writeback Stage
+ assign stall_ld_hz = 1'b0;
+ // Without writeback stage any valid instruction that hasn't seen an error will execute
+ assign instr_executing = instr_valid_i & ~instr_fetch_err_i & controller_run;
+ // No data forwarding without writeback stage so always take source register data direct from
+ // register file
+ assign rf_rdata_a_fwd = rf_rdata_a_i;
+ assign rf_rdata_b_fwd = rf_rdata_b_i;
+ assign fp_rf_rdata_a_fwd = fp_rf_rdata_a_i;
+ assign fp_rf_rdata_b_fwd = fp_rf_rdata_b_i;
+ assign fp_rf_rdata_c_fwd = fp_rf_rdata_c_i;
+ assign rf_rd_a_wb_match_o = 1'b0;
+ assign rf_rd_b_wb_match_o = 1'b0;
+ // Unused Writeback stage only IO & wiring
+ // Assign inputs and internal wiring to unused signals to satisfy lint checks
+ // Tie-off outputs to constant values
+ logic unused_data_req_done_ex;
+ logic [4:0] unused_rf_waddr_wb;
+ logic unused_rf_write_wb;
+ logic unused_outstanding_load_wb;
+ logic unused_outstanding_store_wb;
+ logic unused_wb_exception;
+ logic [31:0] unused_rf_wdata_fwd_wb;
+ assign unused_data_req_done_ex = lsu_req_done_i;
+ assign unused_rf_waddr_wb = rf_waddr_wb_i;
+ assign unused_rf_write_wb = rf_write_wb_i;
+ assign unused_outstanding_load_wb = outstanding_load_wb_i;
+ assign unused_outstanding_store_wb = outstanding_store_wb_i;
+ assign unused_wb_exception = wb_exception;
+ assign unused_rf_wdata_fwd_wb = rf_wdata_fwd_wb_i;
+ assign instr_type_wb_o = WB_INSTR_OTHER;
+ assign stall_wb = 1'b0;
+ assign perf_dside_wait_o = instr_executing & lsu_req_dec & ~lsu_resp_valid_i;
+ assign instr_id_done_o = instr_done;
+ end
+ /* Swap operands */
+ always_comb begin : swapping
+ fpu_op_a = use_fp_rs1_o ? fp_rf_rdata_a_fwd : rf_rdata_a_fwd;
+ fpu_op_b = use_fp_rs2_o ? fp_rf_rdata_b_fwd : rf_rdata_b_fwd;
+ if (fp_swap_oprnds) begin
+ fpu_op_c = fpu_op_a;
+ end else begin
+ fpu_op_c = fp_rf_rdata_c_fwd;
+ end
+ fp_operands_o = {fpu_op_c , fpu_op_b , fpu_op_a};
+ end
+ assign result_wb = mv_instr ? fpu_op_a : result_ex_i;
+ // Signal which instructions to count as retired in minstret, all traps along with ebrk and
+ // ecall instructions are not counted.
+ assign instr_perf_count_id_o = ~ebrk_insn & ~ecall_insn_dec & ~illegal_insn_dec &
+ ~illegal_csr_insn_i & ~instr_fetch_err_i;
+ // An instruction is ready to move to the writeback stage (or retire if there is no writeback
+ // stage)
+ assign en_wb_o = instr_done;
+ assign perf_mul_wait_o = stall_multdiv & mult_en_dec;
+ assign perf_div_wait_o = stall_multdiv & div_en_dec;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..c35f470
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,815 @@
+ * Main controller of the processor
+ */
+module brq_idu_controller #(
+ parameter bit WritebackStage = 0,
+ parameter bit BranchPredictor = 0
+ ) (
+ input logic clk_i,
+ input logic rst_ni,
+ output logic ctrl_busy_o, // core is busy processing instrs
+ // decoder related signals
+ input logic illegal_insn_i, // decoder has an invalid instr
+ input logic ecall_insn_i, // decoder has ECALL instr
+ input logic mret_insn_i, // decoder has MRET instr
+ input logic dret_insn_i, // decoder has DRET instr
+ input logic wfi_insn_i, // decoder has WFI instr
+ input logic ebrk_insn_i, // decoder has EBREAK instr
+ input logic csr_pipe_flush_i, // do CSR-related pipeline flush
+ // instr from IF-ID pipeline stage
+ input logic instr_valid_i, // instr is valid
+ input logic [31:0] instr_i, // uncompressed instr data for mtval
+ input logic [15:0] instr_compressed_i, // instr compressed data for mtval
+ input logic instr_is_compressed_i, // instr is compressed
+ input logic instr_bp_taken_i, // instr was predicted taken branch
+ input logic instr_fetch_err_i, // instr has error
+ input logic instr_fetch_err_plus2_i, // instr error is x32
+ input logic [31:0] pc_id_i, // instr address
+ // to IF-ID pipeline stage
+ output logic instr_valid_clear_o, // kill instr in IF-ID reg
+ output logic id_in_ready_o, // ID stage is ready for new instr
+ output logic controller_run_o, // Controller is in standard instruction
+ // run mode
+ // to prefetcher
+ output logic instr_req_o, // start fetching instructions
+ output logic pc_set_o, // jump to address set by pc_mux
+ output logic pc_set_spec_o, // speculative branch
+ output brq_pkg::pc_sel_e pc_mux_o, // IF stage fetch address selector
+ // (boot, normal, exception...)
+ output logic nt_branch_mispredict_o, // Not-taken branch in ID/EX was
+ // mispredicted (predicted taken)
+ output brq_pkg::exc_pc_sel_e exc_pc_mux_o, // IF stage selector for exception PC
+ output brq_pkg::exc_cause_e exc_cause_o, // for IF stage, CSRs
+ // LSU
+ input logic [31:0] lsu_addr_last_i, // for mtval
+ input logic load_err_i,
+ input logic store_err_i,
+ output logic wb_exception_o, // Instruction in WB taking an exception
+ // jump/branch signals
+ input logic branch_set_i, // branch set signal (branch definitely
+ // taken)
+ input logic branch_set_spec_i, // speculative branch signal (branch
+ // may be taken)
+ input logic branch_not_set_i, // branch is definitely not taken
+ input logic jump_set_i, // jump taken set signal
+ // interrupt signals
+ input logic csr_mstatus_mie_i, // M-mode interrupt enable bit
+ input logic irq_pending_i, // interrupt request pending
+ input brq_pkg::irqs_t irqs_i, // interrupt requests qualified with
+ // mie CSR
+ input logic irq_nm_i, // non-maskeable interrupt
+ output logic nmi_mode_o, // core executing NMI handler
+ // debug signals
+ input logic debug_req_i,
+ output brq_pkg::dbg_cause_e debug_cause_o,
+ output logic debug_csr_save_o,
+ output logic debug_mode_o,
+ input logic debug_single_step_i,
+ input logic debug_ebreakm_i,
+ input logic debug_ebreaku_i,
+ input logic trigger_match_i,
+ output logic csr_save_if_o,
+ output logic csr_save_id_o,
+ output logic csr_save_wb_o,
+ output logic csr_restore_mret_id_o,
+ output logic csr_restore_dret_id_o,
+ output logic csr_save_cause_o,
+ output logic [31:0] csr_mtval_o,
+ input brq_pkg::priv_lvl_e priv_mode_i,
+ input logic csr_mstatus_tw_i,
+ // stall & flush signals
+ input logic stall_id_i,
+ input logic stall_wb_i,
+ output logic flush_id_o,
+ input logic ready_wb_i,
+ // performance monitors
+ output logic perf_jump_o, // we are executing a jump
+ // instruction (j, jr, jal, jalr)
+ output logic perf_tbranch_o, // we are executing a taken branch
+ // instruction
+ input logic fpu_busy_i
+ import brq_pkg::*;
+ // FSM state encoding
+ typedef enum logic [3:0] {
+ } ctrl_fsm_e;
+ ctrl_fsm_e ctrl_fsm_cs, ctrl_fsm_ns;
+ logic nmi_mode_q, nmi_mode_d;
+ logic debug_mode_q, debug_mode_d;
+ logic load_err_q, load_err_d;
+ logic store_err_q, store_err_d;
+ logic exc_req_q, exc_req_d;
+ logic illegal_insn_q, illegal_insn_d;
+ // Of the various exception/fault signals, which one takes priority in FLUSH and hence controls
+ // what happens next (setting exc_cause, csr_mtval etc)
+ logic instr_fetch_err_prio;
+ logic illegal_insn_prio;
+ logic ecall_insn_prio;
+ logic ebrk_insn_prio;
+ logic store_err_prio;
+ logic load_err_prio;
+ logic stall;
+ logic halt_if;
+ logic retain_id;
+ logic flush_id;
+ logic illegal_dret;
+ logic illegal_umode;
+ logic exc_req_lsu;
+ logic special_req_all;
+ logic special_req_branch;
+ logic enter_debug_mode;
+ logic ebreak_into_debug;
+ logic handle_irq;
+ logic [3:0] mfip_id;
+ logic unused_irq_timer;
+ logic ecall_insn;
+ logic mret_insn;
+ logic dret_insn;
+ logic wfi_insn;
+ logic ebrk_insn;
+ logic csr_pipe_flush;
+ logic instr_fetch_err;
+`ifndef SYNTHESIS
+ // synopsys translate_off
+ // make sure we are called later so that we do not generate messages for
+ // glitches
+ always_ff @(negedge clk_i) begin
+ // print warning in case of decoding errors
+ if ((ctrl_fsm_cs == DECODE) && instr_valid_i && !instr_fetch_err_i && illegal_insn_d) begin
+ $display("%t: Illegal instruction (hart %0x) at PC 0x%h: 0x%h", $time, brq_core.hart_id_i,
+ brq_idu.pc_id_i, brq_idu.instr_rdata_i);
+ end
+ end
+ // synopsys translate_on
+ ////////////////
+ // Exceptions //
+ ////////////////
+ assign load_err_d = load_err_i;
+ assign store_err_d = store_err_i;
+ // Decoder doesn't take instr_valid into account, factor it in here.
+ assign ecall_insn = ecall_insn_i & instr_valid_i;
+ assign mret_insn = mret_insn_i & instr_valid_i;
+ assign dret_insn = dret_insn_i & instr_valid_i;
+ assign wfi_insn = wfi_insn_i & instr_valid_i;
+ assign ebrk_insn = ebrk_insn_i & instr_valid_i;
+ assign csr_pipe_flush = csr_pipe_flush_i & instr_valid_i;
+ assign instr_fetch_err = instr_fetch_err_i & instr_valid_i;
+ // "Executing DRET outside of Debug Mode causes an illegal instruction exception."
+ // [Debug Spec v0.13.2, p.41]
+ assign illegal_dret = dret_insn & ~debug_mode_q;
+ // Some instructions can only be executed in M-Mode
+ assign illegal_umode = (priv_mode_i != PRIV_LVL_M) &
+ // MRET must be in M-Mode. TW means trap WFI to M-Mode.
+ (mret_insn | (csr_mstatus_tw_i & wfi_insn));
+ // This is recorded in the illegal_insn_q flop to help timing. Specifically
+ // it is needed to break the path from brq_cs_registers/illegal_csr_insn_o
+ // to pc_set_o. Clear when controller is in FLUSH so it won't remain set
+ // once illegal instruction is handled.
+ // All terms in this expression are qualified by instr_valid_i
+ assign illegal_insn_d = (illegal_insn_i | illegal_dret | illegal_umode) & (ctrl_fsm_cs != FLUSH);
+ // exception requests
+ // requests are flopped in exc_req_q. This is cleared when controller is in
+ // the FLUSH state so the cycle following exc_req_q won't remain set for an
+ // exception request that has just been handled.
+ // All terms in this expression are qualified by instr_valid_i
+ assign exc_req_d = (ecall_insn | ebrk_insn | illegal_insn_d | instr_fetch_err) &
+ (ctrl_fsm_cs != FLUSH);
+ // LSU exception requests
+ assign exc_req_lsu = store_err_i | load_err_i;
+ // special requests: special instructions, pipeline flushes, exceptions...
+ // To avoid creating a path from data_err_i -> instr_req_o and to help timing the below
+ // special_req_all has a version that only applies to branches. For a branch the controller needs
+ // to set pc_set_o but only if there is no special request. If the generic special_req_all signal
+ // is used then a variety of signals that will never cause a special request during a branch
+ // instruction end up factored into pc_set_o. The special_req_branch only considers the special
+ // request reasons that are relevant to a branch.
+ // generic special request signal, applies to all instructions
+ // All terms in this expression are qualified by instr_valid_i except exc_req_lsu which can come
+ // from the Writeback stage with no instr_valid_i from the ID stage
+ assign special_req_all = mret_insn | dret_insn | wfi_insn | csr_pipe_flush |
+ exc_req_d | exc_req_lsu;
+ // special request that can specifically occur during branch instructions
+ // All terms in this expression are qualified by instr_valid_i
+ assign special_req_branch = instr_fetch_err & (ctrl_fsm_cs != FLUSH);
+ // Exception/fault prioritisation is taken from Table 3.7 of Priviledged Spec v1.11
+ if (WritebackStage) begin : g_wb_exceptions
+ always_comb begin
+ instr_fetch_err_prio = 0;
+ illegal_insn_prio = 0;
+ ecall_insn_prio = 0;
+ ebrk_insn_prio = 0;
+ store_err_prio = 0;
+ load_err_prio = 0;
+ // Note that with the writeback stage store/load errors occur on the instruction in writeback,
+ // all other exception/faults occur on the instruction in ID/EX. The faults from writeback
+ // must take priority as that instruction is architecurally ordered before the one in ID/EX.
+ if (store_err_q) begin
+ store_err_prio = 1'b1;
+ end else if (load_err_q) begin
+ load_err_prio = 1'b1;
+ end else if (instr_fetch_err) begin
+ instr_fetch_err_prio = 1'b1;
+ end else if (illegal_insn_q) begin
+ illegal_insn_prio = 1'b1;
+ end else if (ecall_insn) begin
+ ecall_insn_prio = 1'b1;
+ end else if (ebrk_insn) begin
+ ebrk_insn_prio = 1'b1;
+ end
+ end
+ // Instruction in writeback is generating an exception so instruction in ID must not execute
+ assign wb_exception_o = load_err_q | store_err_q | load_err_i | store_err_i;
+ end else begin : g_no_wb_exceptions
+ always_comb begin
+ instr_fetch_err_prio = 0;
+ illegal_insn_prio = 0;
+ ecall_insn_prio = 0;
+ ebrk_insn_prio = 0;
+ store_err_prio = 0;
+ load_err_prio = 0;
+ if (instr_fetch_err) begin
+ instr_fetch_err_prio = 1'b1;
+ end else if (illegal_insn_q) begin
+ illegal_insn_prio = 1'b1;
+ end else if (ecall_insn) begin
+ ecall_insn_prio = 1'b1;
+ end else if (ebrk_insn) begin
+ ebrk_insn_prio = 1'b1;
+ end else if (store_err_q) begin
+ store_err_prio = 1'b1;
+ end else if (load_err_q) begin
+ load_err_prio = 1'b1;
+ end
+ end
+ assign wb_exception_o = 1'b0;
+ end
+ ////////////////
+ // Interrupts //
+ ////////////////
+ // Enter debug mode due to an external debug_req_i or because the core is in
+ // single step mode (dcsr.step == 1). Single step must be qualified with
+ // instruction valid otherwise the core will immediately enter debug mode
+ // due to a recently flushed IF (or a delay in an instruction returning from
+ // memory) before it has had anything to single step.
+ // Also enter debug mode on a trigger match (hardware breakpoint)
+ assign enter_debug_mode = (debug_req_i | (debug_single_step_i & instr_valid_i) |
+ trigger_match_i) & ~debug_mode_q;
+ // Set when an ebreak should enter debug mode rather than jump to exception
+ // handler
+ assign ebreak_into_debug = priv_mode_i == PRIV_LVL_M ? debug_ebreakm_i :
+ priv_mode_i == PRIV_LVL_U ? debug_ebreaku_i :
+ 1'b0;
+ // Interrupts including NMI are ignored,
+ // - while in debug mode [Debug Spec v0.13.2, p.39],
+ // - while in NMI mode (nested NMIs are not supported, NMI has highest priority and
+ // cannot be interrupted by regular interrupts).
+ assign handle_irq = ~debug_mode_q & ~nmi_mode_q &
+ (irq_nm_i | (irq_pending_i & csr_mstatus_mie_i));
+ // generate ID of fast interrupts, highest priority to highest ID
+ always_comb begin : gen_mfip_id
+ if (irqs_i.irq_fast[14]) mfip_id = 4'd14;
+ else if (irqs_i.irq_fast[13]) mfip_id = 4'd13;
+ else if (irqs_i.irq_fast[12]) mfip_id = 4'd12;
+ else if (irqs_i.irq_fast[11]) mfip_id = 4'd11;
+ else if (irqs_i.irq_fast[10]) mfip_id = 4'd10;
+ else if (irqs_i.irq_fast[ 9]) mfip_id = 4'd9;
+ else if (irqs_i.irq_fast[ 8]) mfip_id = 4'd8;
+ else if (irqs_i.irq_fast[ 7]) mfip_id = 4'd7;
+ else if (irqs_i.irq_fast[ 6]) mfip_id = 4'd6;
+ else if (irqs_i.irq_fast[ 5]) mfip_id = 4'd5;
+ else if (irqs_i.irq_fast[ 4]) mfip_id = 4'd4;
+ else if (irqs_i.irq_fast[ 3]) mfip_id = 4'd3;
+ else if (irqs_i.irq_fast[ 2]) mfip_id = 4'd2;
+ else if (irqs_i.irq_fast[ 1]) mfip_id = 4'd1;
+ else mfip_id = 4'd0;
+ end
+ assign unused_irq_timer = irqs_i.irq_timer;
+ /////////////////////
+ // Core controller //
+ /////////////////////
+ always_comb begin
+ // Default values
+ instr_req_o = 1'b1;
+ csr_save_if_o = 1'b0;
+ csr_save_id_o = 1'b0;
+ csr_save_wb_o = 1'b0;
+ csr_restore_mret_id_o = 1'b0;
+ csr_restore_dret_id_o = 1'b0;
+ csr_save_cause_o = 1'b0;
+ csr_mtval_o = '0;
+ // The values of pc_mux and exc_pc_mux are only relevant if pc_set is set. Some of the states
+ // below always set pc_mux and exc_pc_mux but only set pc_set if certain conditions are met.
+ // This avoid having to factor those conditions into the pc_mux and exc_pc_mux select signals
+ // helping timing.
+ pc_mux_o = PC_BOOT;
+ pc_set_o = 1'b0;
+ pc_set_spec_o = 1'b0;
+ nt_branch_mispredict_o = 1'b0;
+ exc_pc_mux_o = EXC_PC_IRQ;
+ exc_cause_o = EXC_CAUSE_INSN_ADDR_MISA; // = 6'h00
+ ctrl_fsm_ns = ctrl_fsm_cs;
+ ctrl_busy_o = 1'b1;
+ halt_if = 1'b0;
+ retain_id = 1'b0;
+ flush_id = 1'b0;
+ debug_csr_save_o = 1'b0;
+ debug_cause_o = DBG_CAUSE_EBREAK;
+ debug_mode_d = debug_mode_q;
+ nmi_mode_d = nmi_mode_q;
+ perf_tbranch_o = 1'b0;
+ perf_jump_o = 1'b0;
+ controller_run_o = 1'b0;
+ unique case (ctrl_fsm_cs)
+ RESET: begin
+ instr_req_o = 1'b0;
+ pc_mux_o = PC_BOOT;
+ pc_set_o = 1'b1;
+ pc_set_spec_o = 1'b1;
+ ctrl_fsm_ns = BOOT_SET;
+ end
+ BOOT_SET: begin
+ // copy boot address to instr fetch address
+ instr_req_o = 1'b1;
+ pc_mux_o = PC_BOOT;
+ pc_set_o = 1'b1;
+ pc_set_spec_o = 1'b1;
+ ctrl_fsm_ns = FIRST_FETCH;
+ end
+ WAIT_SLEEP: begin
+ ctrl_busy_o = 1'b0;
+ instr_req_o = 1'b0;
+ halt_if = 1'b1;
+ flush_id = 1'b1;
+ ctrl_fsm_ns = SLEEP;
+ end
+ SLEEP: begin
+ // instruction in IF stage is already valid
+ // we begin execution when an interrupt has arrived
+ instr_req_o = 1'b0;
+ halt_if = 1'b1;
+ flush_id = 1'b1;
+ // normal execution flow
+ // in debug mode or single step mode we leave immediately (wfi=nop)
+ if (irq_nm_i || irq_pending_i || debug_req_i || debug_mode_q || debug_single_step_i) begin
+ ctrl_fsm_ns = FIRST_FETCH;
+ end else begin
+ // Make sure clock remains disabled.
+ ctrl_busy_o = 1'b0;
+ end
+ end
+ FIRST_FETCH: begin
+ // Stall because of IF miss
+ if (id_in_ready_o) begin
+ ctrl_fsm_ns = DECODE;
+ end
+ // handle interrupts
+ if (handle_irq) begin
+ // We are handling an interrupt. Set halt_if to tell IF not to give
+ // us any more instructions before it redirects to the handler, but
+ // don't set flush_id: we must allow this instruction to complete
+ // (since it might have outstanding loads or stores).
+ ctrl_fsm_ns = IRQ_TAKEN;
+ halt_if = 1'b1;
+ end
+ // enter debug mode
+ if (enter_debug_mode) begin
+ ctrl_fsm_ns = DBG_TAKEN_IF;
+ // Halt IF only for now, ID will be flushed in DBG_TAKEN_IF as the
+ // ID state is needed for correct debug mode entry
+ halt_if = 1'b1;
+ end
+ end
+ DECODE: begin
+ // normal operating mode of the ID stage, in case of debug and interrupt requests,
+ // priorities are as follows (lower number == higher priority)
+ // 1. currently running (multicycle) instructions and exceptions caused by these
+ // 2. debug requests
+ // 3. interrupt requests
+ controller_run_o = 1'b1;
+ // Set PC mux for branch and jump here to ease timing. Value is only relevant if pc_set_o is
+ // also set. Setting the mux value here avoids factoring in special_req and instr_valid_i
+ // which helps timing.
+ pc_mux_o = PC_JUMP;
+ // Get ready for special instructions, exceptions, pipeline flushes
+ if (special_req_all) begin
+ // Halt IF but don't flush ID. This leaves a valid instruction in
+ // ID so controller can determine appropriate action in the
+ // FLUSH state.
+ retain_id = 1'b1;
+ // Wait for the writeback stage to either be ready for a new instruction or raise its own
+ // exception before going to FLUSH. If the instruction in writeback raises an exception it
+ // must take priority over any exception from an instruction in ID/EX. Only once the
+ // writeback stage is ready can we be certain that won't happen. Without a writeback
+ // stage ready_wb_i == 1 so the FSM will always go directly to FLUSH.
+ if (ready_wb_i | wb_exception_o) begin
+ ctrl_fsm_ns = FLUSH;
+ end
+ end
+ if (!special_req_branch) begin
+ if (branch_set_i || jump_set_i) begin
+ // Only set the PC if the branch predictor hasn't already done the branch for us
+ pc_set_o = BranchPredictor ? ~instr_bp_taken_i : 1'b1;
+ perf_tbranch_o = branch_set_i;
+ perf_jump_o = jump_set_i;
+ end
+ if (BranchPredictor) begin
+ if (instr_bp_taken_i & branch_not_set_i) begin
+ // If the instruction is a branch that was predicted to be taken but was not taken
+ // signal a mispredict.
+ nt_branch_mispredict_o = 1'b1;
+ end
+ end
+ end
+ // pc_set signal excluding branch taken condition
+ if ((branch_set_spec_i || jump_set_i) && !special_req_branch) begin
+ // Only speculatively set the PC if the branch predictor hasn't already done the branch
+ // for us
+ pc_set_spec_o = BranchPredictor ? ~instr_bp_taken_i : 1'b1;
+ end
+ // If entering debug mode or handling an IRQ the core needs to wait
+ // until the current instruction has finished executing. Stall IF
+ // during that time.
+ if ((enter_debug_mode || handle_irq) && stall) begin
+ halt_if = 1'b1;
+ end
+ if (!stall && !special_req_all) begin
+ if (enter_debug_mode) begin
+ // enter debug mode
+ ctrl_fsm_ns = DBG_TAKEN_IF;
+ // Halt IF only for now, ID will be flushed in DBG_TAKEN_IF as the
+ // ID state is needed for correct debug mode entry
+ halt_if = 1'b1;
+ end else if (handle_irq) begin
+ // handle interrupt (not in debug mode)
+ ctrl_fsm_ns = IRQ_TAKEN;
+ // We are handling an interrupt (not in debug mode). Set halt_if to
+ // tell IF not to give us any more instructions before it redirects
+ // to the handler, but don't set flush_id: we must allow this
+ // instruction to complete (since it might have outstanding loads
+ // or stores).
+ halt_if = 1'b1;
+ end
+ end
+ end // DECODE
+ IRQ_TAKEN: begin
+ pc_mux_o = PC_EXC;
+ exc_pc_mux_o = EXC_PC_IRQ;
+ if (handle_irq) begin
+ pc_set_o = 1'b1;
+ pc_set_spec_o = 1'b1;
+ csr_save_if_o = 1'b1;
+ csr_save_cause_o = 1'b1;
+ // interrupt priorities according to Privileged Spec v1.11 p.31
+ if (irq_nm_i && !nmi_mode_q) begin
+ exc_cause_o = EXC_CAUSE_IRQ_NM;
+ nmi_mode_d = 1'b1; // enter NMI mode
+ end else if (irqs_i.irq_fast != 15'b0) begin
+ // generate exception cause ID from fast interrupt ID:
+ // - first bit distinguishes interrupts from exceptions,
+ // - second bit adds 16 to fast interrupt ID
+ // for example EXC_CAUSE_IRQ_FAST_0 = {1'b1, 5'd16}
+ exc_cause_o = exc_cause_e'({2'b11, mfip_id});
+ end else if (irqs_i.irq_external) begin
+ exc_cause_o = EXC_CAUSE_IRQ_EXTERNAL_M;
+ end else if (irqs_i.irq_software) begin
+ exc_cause_o = EXC_CAUSE_IRQ_SOFTWARE_M;
+ end else begin // irqs_i.irq_timer
+ exc_cause_o = EXC_CAUSE_IRQ_TIMER_M;
+ end
+ end
+ ctrl_fsm_ns = DECODE;
+ end
+ DBG_TAKEN_IF: begin
+ pc_mux_o = PC_EXC;
+ exc_pc_mux_o = EXC_PC_DBD;
+ // enter debug mode and save PC in IF to dpc
+ // jump to debug exception handler in debug memory
+ if (debug_single_step_i || debug_req_i || trigger_match_i) begin
+ flush_id = 1'b1;
+ pc_set_o = 1'b1;
+ pc_set_spec_o = 1'b1;
+ csr_save_if_o = 1'b1;
+ debug_csr_save_o = 1'b1;
+ csr_save_cause_o = 1'b1;
+ if (trigger_match_i) begin
+ debug_cause_o = DBG_CAUSE_TRIGGER;
+ end else if (debug_single_step_i) begin
+ debug_cause_o = DBG_CAUSE_STEP;
+ end else begin
+ debug_cause_o = DBG_CAUSE_HALTREQ;
+ end
+ // enter debug mode
+ debug_mode_d = 1'b1;
+ end
+ ctrl_fsm_ns = DECODE;
+ end
+ DBG_TAKEN_ID: begin
+ // enter debug mode and save PC in ID to dpc, used when encountering
+ // 1. EBREAK during debug mode
+ // 2. EBREAK with forced entry into debug mode (ebreakm or ebreaku set).
+ // regular ebreak's go through FLUSH.
+ //
+ // for 1. do not update dcsr and dpc, for 2. do so [Debug Spec v0.13.2, p.39]
+ // jump to debug exception handler in debug memory
+ flush_id = 1'b1;
+ pc_mux_o = PC_EXC;
+ pc_set_o = 1'b1;
+ pc_set_spec_o = 1'b1;
+ exc_pc_mux_o = EXC_PC_DBD;
+ // update dcsr and dpc
+ if (ebreak_into_debug && !debug_mode_q) begin // ebreak with forced entry
+ // dpc (set to the address of the EBREAK, i.e. set to PC in ID stage)
+ csr_save_cause_o = 1'b1;
+ csr_save_id_o = 1'b1;
+ // dcsr
+ debug_csr_save_o = 1'b1;
+ debug_cause_o = DBG_CAUSE_EBREAK;
+ end
+ // enter debug mode
+ debug_mode_d = 1'b1;
+ ctrl_fsm_ns = DECODE;
+ end
+ FLUSH: begin
+ // flush the pipeline
+ halt_if = 1'b1;
+ flush_id = 1'b1;
+ ctrl_fsm_ns = DECODE;
+ // As pc_mux and exc_pc_mux can take various values in this state they aren't set early
+ // here.
+ // exceptions: set exception PC, save PC and exception cause
+ // exc_req_lsu is high for one clock cycle only (in DECODE)
+ if (exc_req_q || store_err_q || load_err_q) begin
+ pc_set_o = 1'b1;
+ pc_set_spec_o = 1'b1;
+ pc_mux_o = PC_EXC;
+ exc_pc_mux_o = debug_mode_q ? EXC_PC_DBG_EXC : EXC_PC_EXC;
+ if (WritebackStage) begin : g_writeback_mepc_save
+ // With the writeback stage present whether an instruction accessing memory will cause
+ // an exception is only known when it is in writeback. So when taking such an exception
+ // epc must come from writeback.
+ csr_save_id_o = ~(store_err_q | load_err_q);
+ csr_save_wb_o = store_err_q | load_err_q;
+ end else begin : g_no_writeback_mepc_save
+ csr_save_id_o = 1'b0;
+ end
+ csr_save_cause_o = 1'b1;
+ // Exception/fault prioritisation logic will have set exactly 1 X_prio signal
+ unique case (1'b1)
+ instr_fetch_err_prio: begin
+ csr_mtval_o = instr_fetch_err_plus2_i ? (pc_id_i + 32'd2) : pc_id_i;
+ end
+ illegal_insn_prio: begin
+ exc_cause_o = EXC_CAUSE_ILLEGAL_INSN;
+ csr_mtval_o = instr_is_compressed_i ? {16'b0, instr_compressed_i} : instr_i;
+ end
+ ecall_insn_prio: begin
+ exc_cause_o = (priv_mode_i == PRIV_LVL_M) ? EXC_CAUSE_ECALL_MMODE :
+ end
+ ebrk_insn_prio: begin
+ if (debug_mode_q | ebreak_into_debug) begin
+ /*
+ * EBREAK in debug mode re-enters debug mode
+ *
+ * "The only exception is EBREAK. When that is executed in Debug
+ * Mode, it halts the hart again but without updating dpc or
+ * dcsr." [Debug Spec v0.13.2, p.39]
+ */
+ /*
+ * dcsr.ebreakm == 1:
+ * "EBREAK instructions in M-mode enter Debug Mode."
+ * [Debug Spec v0.13.2, p.42]
+ */
+ pc_set_o = 1'b0;
+ pc_set_spec_o = 1'b0;
+ csr_save_id_o = 1'b0;
+ csr_save_cause_o = 1'b0;
+ ctrl_fsm_ns = DBG_TAKEN_ID;
+ flush_id = 1'b0;
+ end else begin
+ /*
+ * "The EBREAK instruction is used by debuggers to cause control
+ * to be transferred back to a debugging environment. It
+ * generates a breakpoint exception and performs no other
+ * operation. [...] ECALL and EBREAK cause the receiving
+ * privilege mode's epc register to be set to the address of the
+ * ECALL or EBREAK instruction itself, not the address of the
+ * following instruction." [Privileged Spec v1.11, p.40]
+ */
+ exc_cause_o = EXC_CAUSE_BREAKPOINT;
+ end
+ end
+ store_err_prio: begin
+ csr_mtval_o = lsu_addr_last_i;
+ end
+ load_err_prio: begin
+ csr_mtval_o = lsu_addr_last_i;
+ end
+ default: ;
+ endcase
+ end else begin
+ // special instructions and pipeline flushes
+ if (mret_insn) begin
+ pc_mux_o = PC_ERET;
+ pc_set_o = 1'b1;
+ pc_set_spec_o = 1'b1;
+ csr_restore_mret_id_o = 1'b1;
+ if (nmi_mode_q) begin
+ nmi_mode_d = 1'b0; // exit NMI mode
+ end
+ end else if (dret_insn) begin
+ pc_mux_o = PC_DRET;
+ pc_set_o = 1'b1;
+ pc_set_spec_o = 1'b1;
+ debug_mode_d = 1'b0;
+ csr_restore_dret_id_o = 1'b1;
+ end else if (wfi_insn) begin
+ ctrl_fsm_ns = WAIT_SLEEP;
+ end else if (csr_pipe_flush && handle_irq) begin
+ // start handling IRQs when doing CSR-related pipeline flushes
+ ctrl_fsm_ns = IRQ_TAKEN;
+ end
+ end // exc_req_q
+ // Entering debug mode due to either single step or debug_req. Ensure
+ // registers are set for exception but then enter debug handler rather
+ // than exception handler [Debug Spec v0.13.2, p.44]
+ // Leave all other signals as is to ensure CSRs and PC get set as if
+ // core was entering exception handler, entry to debug mode will then
+ // see the appropriate state and setup dpc correctly.
+ // If an EBREAK instruction is causing us to enter debug mode on the
+ // same cycle as a debug_req or single step, honor the EBREAK and
+ // proceed to DBG_TAKEN_ID.
+ if (enter_debug_mode && !(ebrk_insn_prio && ebreak_into_debug)) begin
+ ctrl_fsm_ns = DBG_TAKEN_IF;
+ end
+ end // FLUSH
+ default: begin
+ instr_req_o = 1'b0;
+ ctrl_fsm_ns = RESET;
+ end
+ endcase
+ end
+ assign flush_id_o = flush_id;
+ // signal to CSR when in debug mode
+ assign debug_mode_o = debug_mode_q;
+ // signal to CSR when in an NMI handler (for nested exception handling)
+ assign nmi_mode_o = nmi_mode_q;
+ ///////////////////
+ // Stall control //
+ ///////////////////
+ // If high current instruction cannot complete this cycle. Either because it needs more cycles to
+ // finish (stall_id_i) or because the writeback stage cannot accept it yet (stall_wb_i). If there
+ // is no writeback stage stall_wb_i is a constant 0.
+ assign stall = stall_id_i | stall_wb_i | fpu_busy_i;
+ // signal to IF stage that ID stage is ready for next instr
+ assign id_in_ready_o = ~stall & ~halt_if & ~retain_id;
+ // kill instr in IF-ID pipeline reg that are done, or if a
+ // multicycle instr causes an exception for example
+ // retain_id is another kind of stall, where the instr_valid bit must remain
+ // set (unless flush_id is set also). It cannot be factored directly into
+ // stall as this causes a combinational loop.
+ assign instr_valid_clear_o = ~(stall | retain_id) | flush_id;
+ // update registers
+ always_ff @(posedge clk_i or negedge rst_ni) begin : update_regs
+ if (!rst_ni) begin
+ ctrl_fsm_cs <= RESET;
+ nmi_mode_q <= 1'b0;
+ debug_mode_q <= 1'b0;
+ load_err_q <= 1'b0;
+ store_err_q <= 1'b0;
+ exc_req_q <= 1'b0;
+ illegal_insn_q <= 1'b0;
+ end else begin
+ ctrl_fsm_cs <= ctrl_fsm_ns;
+ nmi_mode_q <= nmi_mode_d;
+ debug_mode_q <= debug_mode_d;
+ load_err_q <= load_err_d;
+ store_err_q <= store_err_d;
+ exc_req_q <= exc_req_d;
+ illegal_insn_q <= illegal_insn_d;
+ end
+ end
\ No newline at end of file
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..3b4c452
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,1720 @@
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+ * Instruction decoder
+ *
+ * This module is fully combinatorial, clock and reset are used for
+ * assertions only.
+ */
+module brq_idu_decoder #(
+ parameter bit RV32E = 0,
+ parameter brq_pkg::rv32m_e RV32M = brq_pkg::RV32MFast,
+ parameter brq_pkg::rv32b_e RV32B = brq_pkg::RV32BNone,
+ parameter brq_pkg::rvfloat_e RVF = brq_pkg::RV64FDouble,
+ parameter bit BranchTargetALU = 0
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ // to/from controller
+ output logic illegal_insn_o, // illegal instr encountered
+ output logic ebrk_insn_o, // trap instr encountered
+ output logic mret_insn_o, // return from exception instr
+ // encountered
+ output logic dret_insn_o, // return from debug instr encountered
+ output logic ecall_insn_o, // syscall instr encountered
+ output logic wfi_insn_o, // wait for interrupt instr encountered
+ output logic jump_set_o, // jump taken set signal
+ input logic branch_taken_i, // registered branch decision
+ output logic icache_inval_o,
+ // from IF-ID pipeline register
+ input logic instr_first_cycle_i, // instruction read is in its first cycle
+ input logic [31:0] instr_rdata_i, // instruction read from memory/cache
+ input logic [31:0] instr_rdata_alu_i, // instruction read from memory/cache
+ // replicated to ease fan-out)
+ input logic illegal_c_insn_i, // compressed instruction decode failed
+ // immediates
+ output brq_pkg::imm_a_sel_e imm_a_mux_sel_o, // immediate selection for operand a
+ output brq_pkg::imm_b_sel_e imm_b_mux_sel_o, // immediate selection for operand b
+ output brq_pkg::op_a_sel_e bt_a_mux_sel_o, // branch target selection operand a
+ output brq_pkg::imm_b_sel_e bt_b_mux_sel_o, // branch target selection operand b
+ output logic [31:0] imm_i_type_o,
+ output logic [31:0] imm_s_type_o,
+ output logic [31:0] imm_b_type_o,
+ output logic [31:0] imm_u_type_o,
+ output logic [31:0] imm_j_type_o,
+ output logic [31:0] zimm_rs1_type_o,
+ // register file
+ output brq_pkg::rf_wd_sel_e rf_wdata_sel_o, // RF write data selection
+ output logic rf_we_o, // write enable for regfile
+ output logic [4:0] rf_raddr_a_o,
+ output logic [4:0] rf_raddr_b_o,
+ output logic [4:0] rf_waddr_o,
+ output logic rf_ren_a_o, // Instruction reads from RF addr A
+ output logic rf_ren_b_o, // Instruction reads from RF addr B
+ // ALU
+ output brq_pkg::alu_op_e alu_operator_o, // ALU operation selection
+ output brq_pkg::op_a_sel_e alu_op_a_mux_sel_o, // operand a selection: reg value, PC,
+ // immediate or zero
+ output brq_pkg::op_b_sel_e alu_op_b_mux_sel_o, // operand b selection: reg value or
+ // immediate
+ output logic alu_multicycle_o, // ternary bitmanip instruction
+ // MULT & DIV
+ output logic mult_en_o, // perform integer multiplication
+ output logic div_en_o, // perform integer division or remainder
+ output logic mult_sel_o, // as above but static, for data muxes
+ output logic div_sel_o, // as above but static, for data muxes
+ output brq_pkg::md_op_e multdiv_operator_o,
+ output logic [1:0] multdiv_signed_mode_o,
+ // CSRs
+ output logic csr_access_o, // access to CSR
+ output brq_pkg::csr_op_e csr_op_o, // operation to perform on CSR
+ // LSU
+ output logic data_req_o, // start transaction to data memory
+ output logic data_we_o, // write enable
+ output logic [1:0] data_type_o, // size of transaction: byte, half
+ // word or word
+ output logic data_sign_extension_o, // sign extension for data read from
+ // memory
+ // jump/branches
+ output logic jump_in_dec_o, // jump is being calculated in ALU
+ output logic branch_in_dec_o,
+ // Floating point extensions IO
+ output fpnew_pkg::roundmode_e fp_rounding_mode_o, // defines the rounding mode
+ output brq_pkg::op_b_sel_e fp_alu_op_b_mux_sel_o, // operand b selection: reg value or
+ // immediate
+ output logic [4:0] fp_rf_raddr_a_o,
+ output logic [4:0] fp_rf_raddr_b_o,
+ output logic [4:0] fp_rf_raddr_c_o,
+ output logic [4:0] fp_rf_waddr_o,
+ output logic fp_rf_we_o,
+ output fpnew_pkg::operation_e fp_alu_operator_o,
+ output logic fp_alu_op_mod_o,
+ output logic fp_rm_dynamic_o,
+ output fpnew_pkg::fp_format_e fp_src_fmt_o,
+ output fpnew_pkg::fp_format_e fp_dst_fmt_o,
+ output logic is_fp_instr_o,
+ output logic use_fp_rs1_o,
+ output logic use_fp_rs2_o,
+ output logic use_fp_rs3_o,
+ output logic use_fp_rd_o,
+ output logic fp_swap_oprnds_o,
+ output logic fp_load_o,
+ output logic mv_instr_o
+ import brq_pkg::*;
+ import fpnew_pkg::*;
+ logic fp_invalid_rm;
+ logic illegal_insn;
+ logic illegal_reg_rv32e;
+ logic csr_illegal;
+ logic rf_we;
+ logic [31:0] instr;
+ logic [31:0] instr_alu;
+ // Source/Destination register instruction index
+ logic [4:0] instr_rs1;
+ logic [4:0] instr_rs2;
+ logic [4:0] instr_rs3;
+ logic [4:0] instr_rd;
+ logic use_rs3_d;
+ logic use_rs3_q;
+ csr_op_e csr_op;
+ opcode_e opcode;
+ opcode_e opcode_alu;
+ // To help timing the flops containing the current instruction are replicated to reduce fan-out.
+ // instr_alu is used to determine the ALU control logic and associated operand/imm select signals
+ // as the ALU is often on the more critical timing paths. instr is used for everything else.
+ assign instr = instr_rdata_i;
+ assign instr_alu = instr_rdata_alu_i;
+ //////////////////////////////////////
+ // Register and immediate selection //
+ //////////////////////////////////////
+ // immediate extraction and sign extension
+ assign imm_i_type_o = { {20{instr[31]}}, instr[31:20] };
+ assign imm_s_type_o = { {20{instr[31]}}, instr[31:25], instr[11:7] };
+ assign imm_b_type_o = { {19{instr[31]}}, instr[31], instr[7], instr[30:25], instr[11:8], 1'b0 };
+ assign imm_u_type_o = { instr[31:12], 12'b0 };
+ assign imm_j_type_o = { {12{instr[31]}}, instr[19:12], instr[20], instr[30:21], 1'b0 };
+ // immediate for CSR manipulation (zero extended)
+ assign zimm_rs1_type_o = { 27'b0, instr_rs1 }; // rs1
+ // the use of rs3 is known one cycle ahead.
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ use_rs3_q <= 1'b0;
+ end else begin
+ use_rs3_q <= use_rs3_d;
+ end
+ end
+ // source registers
+ assign instr_rs1 = instr[19:15];
+ assign instr_rs2 = instr[24:20];
+ assign instr_rs3 = instr[31:27];
+ assign rf_raddr_a_o = (use_rs3_q & ~instr_first_cycle_i) ? instr_rs3 : instr_rs1; // rs3 / rs1
+ assign rf_raddr_b_o = instr_rs2; // rs2
+ // destination register
+ assign instr_rd = instr[11:7];
+ assign rf_waddr_o = instr_rd; // rd
+ // fp source registers
+ assign fp_rf_raddr_a_o = instr_rs1;
+ assign fp_rf_raddr_b_o = instr_rs2;
+ assign fp_rf_raddr_c_o = instr_rs3;
+ // fp destination register
+ assign fp_rf_waddr_o = instr_rd;
+ assign fp_rounding_mode_o = roundmode_e'(instr[14:12]);
+ assign fp_invalid_rm = (instr[14:12] == 3'b101) ? 1'b1 :
+ (instr[14:12] == 3'b110) ? 1'b1 : 1'b0;
+ assign fp_rm_dynamic_o = (instr[14:12] == 3'b111) ? 1'b1 : 1'b0;
+ assign fp_dst_fmt_o = FP32;
+ ////////////////////
+ // Register check //
+ ////////////////////
+ if (RV32E) begin : gen_rv32e_reg_check_active
+ assign illegal_reg_rv32e = ((rf_raddr_a_o[4] & (alu_op_a_mux_sel_o == OP_A_REG_A)) |
+ (rf_raddr_b_o[4] & (alu_op_b_mux_sel_o == OP_B_REG_B)) |
+ (rf_waddr_o[4] & rf_we));
+ end else begin : gen_rv32e_reg_check_inactive
+ assign illegal_reg_rv32e = 1'b0;
+ end
+ ///////////////////////
+ // CSR operand check //
+ ///////////////////////
+ always_comb begin : csr_operand_check
+ csr_op_o = csr_op;
+ // CSRRSI/CSRRCI must not write 0 to CSRs (uimm[4:0]=='0)
+ // CSRRS/CSRRC must not write from x0 to CSRs (rs1=='0)
+ if ((csr_op == CSR_OP_SET || csr_op == CSR_OP_CLEAR) &&
+ instr_rs1 == '0) begin
+ csr_op_o = CSR_OP_READ;
+ end
+ end
+ /////////////
+ // Decoder //
+ /////////////
+ always_comb begin
+ jump_in_dec_o = 1'b0;
+ jump_set_o = 1'b0;
+ branch_in_dec_o = 1'b0;
+ icache_inval_o = 1'b0;
+ multdiv_operator_o = MD_OP_MULL;
+ multdiv_signed_mode_o = 2'b00;
+ rf_wdata_sel_o = RF_WD_EX;
+ rf_we = 1'b0;
+ rf_ren_a_o = 1'b0;
+ rf_ren_b_o = 1'b0;
+ csr_access_o = 1'b0;
+ csr_illegal = 1'b0;
+ csr_op = CSR_OP_READ;
+ data_we_o = 1'b0;
+ data_type_o = 2'b00;
+ data_sign_extension_o = 1'b0;
+ data_req_o = 1'b0;
+ illegal_insn = 1'b0;
+ ebrk_insn_o = 1'b0;
+ mret_insn_o = 1'b0;
+ dret_insn_o = 1'b0;
+ ecall_insn_o = 1'b0;
+ wfi_insn_o = 1'b0;
+ // Floating Point
+ fp_rf_we_o = 1'b0;
+ is_fp_instr_o = 1'b0;
+ use_fp_rs1_o = 1'b0;
+ use_fp_rs2_o = 1'b0;
+ use_fp_rs3_o = 1'b0;
+ use_fp_rd_o = 1'b0;
+ fp_load_o = 1'b0;
+ fp_src_fmt_o = FP32;
+ fp_dst_fmt_o = FP32;
+ fp_swap_oprnds_o = 1'b0;
+ mv_instr_o = 1'b0;
+ opcode = opcode_e'(instr[6:0]);
+ unique case (opcode)
+ ///////////
+ // Jumps //
+ ///////////
+ OPCODE_JAL: begin // Jump and Link
+ jump_in_dec_o = 1'b1;
+ if (instr_first_cycle_i) begin
+ // Calculate jump target (and store PC + 4 if BranchTargetALU is configured)
+ rf_we = BranchTargetALU;
+ jump_set_o = 1'b1;
+ end else begin
+ // Calculate and store PC+4
+ rf_we = 1'b1;
+ end
+ end
+ OPCODE_JALR: begin // Jump and Link Register
+ jump_in_dec_o = 1'b1;
+ if (instr_first_cycle_i) begin
+ // Calculate jump target (and store PC + 4 if BranchTargetALU is configured)
+ rf_we = BranchTargetALU;
+ jump_set_o = 1'b1;
+ end else begin
+ // Calculate and store PC+4
+ rf_we = 1'b1;
+ end
+ if (instr[14:12] != 3'b0) begin
+ illegal_insn = 1'b1;
+ end
+ rf_ren_a_o = 1'b1;
+ end
+ OPCODE_BRANCH: begin // Branch
+ branch_in_dec_o = 1'b1;
+ // Check branch condition selection
+ unique case (instr[14:12])
+ 3'b000,
+ 3'b001,
+ 3'b100,
+ 3'b101,
+ 3'b110,
+ 3'b111: illegal_insn = 1'b0;
+ default: illegal_insn = 1'b1;
+ endcase
+ rf_ren_a_o = 1'b1;
+ rf_ren_b_o = 1'b1;
+ end
+ ////////////////
+ // Load/store //
+ ////////////////
+ rf_ren_a_o = 1'b1;
+ rf_ren_b_o = 1'b1;
+ data_req_o = 1'b1;
+ data_we_o = 1'b1;
+ if (instr[14]) begin
+ illegal_insn = 1'b1;
+ end
+ // store size
+ unique case (instr[13:12])
+ 2'b00: data_type_o = 2'b10; // sb
+ 2'b01: data_type_o = 2'b01; // sh
+ 2'b10: data_type_o = 2'b00; // sw
+ default: illegal_insn = 1'b1;
+ endcase
+ end
+ OPCODE_LOAD: begin
+ rf_ren_a_o = 1'b1;
+ data_req_o = 1'b1;
+ data_type_o = 2'b00;
+ // sign/zero extension
+ data_sign_extension_o = ~instr[14];
+ // load size
+ unique case (instr[13:12])
+ 2'b00: data_type_o = 2'b10; // lb(u)
+ 2'b01: data_type_o = 2'b01; // lh(u)
+ 2'b10: begin
+ data_type_o = 2'b00; // lw
+ if (instr[14]) begin
+ illegal_insn = 1'b1; // lwu does not exist
+ end
+ end
+ default: begin
+ illegal_insn = 1'b1;
+ end
+ endcase
+ end
+ /////////
+ // ALU //
+ /////////
+ OPCODE_LUI: begin // Load Upper Immediate
+ rf_we = 1'b1;
+ end
+ OPCODE_AUIPC: begin // Add Upper Immediate to PC
+ rf_we = 1'b1;
+ end
+ OPCODE_OP_IMM: begin // Register-Immediate ALU Operations
+ rf_ren_a_o = 1'b1;
+ rf_we = 1'b1;
+ unique case (instr[14:12])
+ 3'b000,
+ 3'b010,
+ 3'b011,
+ 3'b100,
+ 3'b110,
+ 3'b111: illegal_insn = 1'b0;
+ 3'b001: begin
+ unique case (instr[31:27])
+ 5'b0_0000: illegal_insn = (instr[26:25] == 2'b00) ? 1'b0 : 1'b1; // slli
+ 5'b0_0100, // sloi
+ 5'b0_1001, // sbclri
+ 5'b0_0101, // sbseti
+ 5'b0_1101: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // sbinvi
+ 5'b0_0001: if (instr[26] == 1'b0) begin
+ illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // shfl
+ end else begin
+ illegal_insn = 1'b1;
+ end
+ 5'b0_1100: begin
+ unique case(instr[26:20])
+ 7'b000_0000, // clz
+ 7'b000_0001, // ctz
+ 7'b000_0010, // pcnt
+ 7'b000_0100, // sext.b
+ 7'b000_0101: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // sext.h
+ 7'b001_0000, // crc32.b
+ 7'b001_0001, // crc32.h
+ 7'b001_0010, // crc32.w
+ 7'b001_1000, // crc32c.b
+ 7'b001_1001, // crc32c.h
+ 7'b001_1010: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // crc32c.w
+ default: illegal_insn = 1'b1;
+ endcase
+ end
+ default : illegal_insn = 1'b1;
+ endcase
+ end
+ 3'b101: begin
+ if (instr[26]) begin
+ illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // fsri
+ end else begin
+ unique case (instr[31:27])
+ 5'b0_0000, // srli
+ 5'b0_1000: illegal_insn = (instr[26:25] == 2'b00) ? 1'b0 : 1'b1; // srai
+ 5'b0_0100, // sroi
+ 5'b0_1100, // rori
+ 5'b0_1001: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // sbexti
+ 5'b0_1101: begin
+ if ((RV32B == RV32BFull)) begin
+ illegal_insn = 1'b0; // grevi
+ end else begin
+ unique case (instr[24:20])
+ 5'b11111, // rev
+ 5'b11000: illegal_insn = (RV32B == RV32BBalanced) ? 1'b0 : 1'b1; // rev8
+ default: illegal_insn = 1'b1;
+ endcase
+ end
+ end
+ 5'b0_0101: begin
+ if ((RV32B == RV32BFull)) begin
+ illegal_insn = 1'b0; // gorci
+ end else if (instr[24:20] == 5'b00111) begin
+ illegal_insn = (RV32B == RV32BBalanced) ? 1'b0 : 1'b1; // orc.b
+ end
+ end
+ 5'b0_0001: begin
+ if (instr[26] == 1'b0) begin
+ illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // unshfl
+ end else begin
+ illegal_insn = 1'b1;
+ end
+ end
+ default: illegal_insn = 1'b1;
+ endcase
+ end
+ end
+ // default: illegal_insn = 1'b1;
+ endcase
+ end
+ OPCODE_OP: begin // Register-Register ALU operation
+ rf_ren_a_o = 1'b1;
+ rf_ren_b_o = 1'b1;
+ rf_we = 1'b1;
+ if ({instr[26], instr[13:12]} == {1'b1, 2'b01}) begin
+ illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // cmix / cmov / fsl / fsr
+ end else begin
+ unique case ({instr[31:25], instr[14:12]})
+ // RV32I ALU operations
+ {7'b000_0000, 3'b000},
+ {7'b010_0000, 3'b000},
+ {7'b000_0000, 3'b010},
+ {7'b000_0000, 3'b011},
+ {7'b000_0000, 3'b100},
+ {7'b000_0000, 3'b110},
+ {7'b000_0000, 3'b111},
+ {7'b000_0000, 3'b001},
+ {7'b000_0000, 3'b101},
+ {7'b010_0000, 3'b101}: illegal_insn = 1'b0;
+ // RV32B zbb
+ {7'b010_0000, 3'b111}, // andn
+ {7'b010_0000, 3'b110}, // orn
+ {7'b010_0000, 3'b100}, // xnor
+ {7'b001_0000, 3'b001}, // slo
+ {7'b001_0000, 3'b101}, // sro
+ {7'b011_0000, 3'b001}, // rol
+ {7'b011_0000, 3'b101}, // ror
+ {7'b000_0101, 3'b100}, // min
+ {7'b000_0101, 3'b101}, // max
+ {7'b000_0101, 3'b110}, // minu
+ {7'b000_0101, 3'b111}, // maxu
+ {7'b000_0100, 3'b100}, // pack
+ {7'b010_0100, 3'b100}, // packu
+ {7'b000_0100, 3'b111}, // packh
+ // RV32B zbs
+ {7'b010_0100, 3'b001}, // sbclr
+ {7'b001_0100, 3'b001}, // sbset
+ {7'b011_0100, 3'b001}, // sbinv
+ {7'b010_0100, 3'b101}, // sbext
+ // RV32B zbf
+ {7'b010_0100, 3'b111}: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // bfp
+ // RV32B zbe
+ {7'b010_0100, 3'b110}, // bdep
+ {7'b000_0100, 3'b110}, // bext
+ // RV32B zbp
+ {7'b011_0100, 3'b101}, // grev
+ {7'b001_0100, 3'b101}, // gorc
+ {7'b000_0100, 3'b001}, // shfl
+ {7'b000_0100, 3'b101}, // unshfl
+ // RV32B zbc
+ {7'b000_0101, 3'b001}, // clmul
+ {7'b000_0101, 3'b010}, // clmulr
+ {7'b000_0101, 3'b011}: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // clmulh
+ // RV32M instructions
+ {7'b000_0001, 3'b000}: begin // mul
+ multdiv_operator_o = MD_OP_MULL;
+ multdiv_signed_mode_o = 2'b00;
+ illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+ end
+ {7'b000_0001, 3'b001}: begin // mulh
+ multdiv_operator_o = MD_OP_MULH;
+ multdiv_signed_mode_o = 2'b11;
+ illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+ end
+ {7'b000_0001, 3'b010}: begin // mulhsu
+ multdiv_operator_o = MD_OP_MULH;
+ multdiv_signed_mode_o = 2'b01;
+ illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+ end
+ {7'b000_0001, 3'b011}: begin // mulhu
+ multdiv_operator_o = MD_OP_MULH;
+ multdiv_signed_mode_o = 2'b00;
+ illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+ end
+ {7'b000_0001, 3'b100}: begin // div
+ multdiv_operator_o = MD_OP_DIV;
+ multdiv_signed_mode_o = 2'b11;
+ illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+ end
+ {7'b000_0001, 3'b101}: begin // divu
+ multdiv_operator_o = MD_OP_DIV;
+ multdiv_signed_mode_o = 2'b00;
+ illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+ end
+ {7'b000_0001, 3'b110}: begin // rem
+ multdiv_operator_o = MD_OP_REM;
+ multdiv_signed_mode_o = 2'b11;
+ illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+ end
+ {7'b000_0001, 3'b111}: begin // remu
+ multdiv_operator_o = MD_OP_REM;
+ multdiv_signed_mode_o = 2'b00;
+ illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+ end
+ default: begin
+ illegal_insn = 1'b1;
+ end
+ endcase
+ end
+ end
+ /////////////
+ // Special //
+ /////////////
+ unique case (instr[14:12])
+ 3'b000: begin
+ // FENCE is treated as a NOP since all memory operations are already strictly ordered.
+ rf_we = 1'b0;
+ end
+ 3'b001: begin
+ // FENCE.I is implemented as a jump to the next PC, this gives the required flushing
+ // behaviour (iside prefetch buffer flushed and response to any outstanding iside
+ // requests will be ignored).
+ // If present, the ICache will also be flushed.
+ jump_in_dec_o = 1'b1;
+ rf_we = 1'b0;
+ if (instr_first_cycle_i) begin
+ jump_set_o = 1'b1;
+ icache_inval_o = 1'b1;
+ end
+ end
+ default: begin
+ illegal_insn = 1'b1;
+ end
+ endcase
+ end
+ if (instr[14:12] == 3'b000) begin
+ // non CSR related SYSTEM instructions
+ unique case (instr[31:20])
+ 12'h000: // ECALL
+ // environment (system) call
+ ecall_insn_o = 1'b1;
+ 12'h001: // ebreak
+ // debugger trap
+ ebrk_insn_o = 1'b1;
+ 12'h302: // mret
+ mret_insn_o = 1'b1;
+ 12'h7b2: // dret
+ dret_insn_o = 1'b1;
+ 12'h105: // wfi
+ wfi_insn_o = 1'b1;
+ default:
+ illegal_insn = 1'b1;
+ endcase
+ // rs1 and rd must be 0
+ if (instr_rs1 != 5'b0 || instr_rd != 5'b0) begin
+ illegal_insn = 1'b1;
+ end
+ end else begin
+ // instruction to read/modify CSR
+ csr_access_o = 1'b1;
+ rf_wdata_sel_o = RF_WD_CSR;
+ rf_we = 1'b1;
+ if (~instr[14]) begin
+ rf_ren_a_o = 1'b1;
+ end
+ unique case (instr[13:12])
+ 2'b01: csr_op = CSR_OP_WRITE;
+ 2'b10: csr_op = CSR_OP_SET;
+ 2'b11: csr_op = CSR_OP_CLEAR;
+ default: csr_illegal = 1'b1;
+ endcase
+ illegal_insn = csr_illegal;
+ end
+ end
+ //////////////////////////////////////////
+ // Floating Point Extension (F and D) //
+ //////////////////////////////////////////
+ data_req_o = 1'b1;
+ data_we_o = 1'b1;
+ data_type_o = 2'b00;
+ use_fp_rs2_o = 1'b1;
+ unique case(instr[14:12])
+ 3'b011: begin // FSD
+ illegal_insn = (RVF == RV64FDouble) ? 1'b0 : 1'b1;
+ fp_src_fmt_o = FP64;
+ end
+ 3'b010: begin // FSW
+ illegal_insn = (RVF == RV32FNone) ? 1'b1 : 1'b0;
+ fp_src_fmt_o = FP32;
+ end
+ default: illegal_insn = 1'b1;
+ endcase
+ end
+ data_req_o = 1'b1;
+ data_type_o = 2'b00;
+ fp_load_o = 1'b1;
+ use_fp_rd_o = 1'b1;
+ unique case(instr[14:12])
+ 3'b011: begin // FLD
+ illegal_insn = (RVF == RV64FDouble) ? 1'b0 : 1'b1;
+ fp_src_fmt_o = FP64;
+ end
+ 3'b010: begin // FLW
+ illegal_insn = (RVF == RV32FNone) ? 1'b1 : 1'b0;
+ fp_src_fmt_o = FP32;
+ end
+ default: illegal_insn = 1'b1;
+ endcase
+ end
+ fp_rf_we_o = 1'b1;
+ fp_src_fmt_o = FP32;
+ is_fp_instr_o = 1'b1;
+ use_fp_rs1_o = 1'b1;
+ use_fp_rs2_o = 1'b1;
+ use_fp_rs3_o = 1'b1;
+ use_fp_rd_o = 1'b1;
+ unique case (instr[26:25])
+ 01: begin
+ illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+ fp_src_fmt_o = FP64;
+ end
+ 00: begin
+ illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+ fp_src_fmt_o = FP32;
+ end
+ default: illegal_insn = 1'b1;
+ endcase
+ end
+ OPCODE_OP_FP: begin
+ fp_src_fmt_o = FP32;
+ is_fp_instr_o = 1'b1;
+ unique case (instr[31:25])
+ 7'b0000001, // FADD.D
+ 7'b0000101: begin // FSUB.D
+ fp_rf_we_o = 1'b1;
+ use_fp_rs1_o = 1'b1;
+ use_fp_rs2_o = 1'b1;
+ use_fp_rd_o = 1'b1;
+ fp_swap_oprnds_o = 1'b1;
+ illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+ fp_src_fmt_o = FP64;
+ end
+ 7'b0001001, // FMUL.D
+ 7'b0001101:begin // FDIV.D
+ fp_rf_we_o = 1'b1;
+ use_fp_rs1_o = 1'b1;
+ use_fp_rs2_o = 1'b1;
+ use_fp_rd_o = 1'b1;
+ illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+ fp_src_fmt_o = FP64;
+ end
+ 7'b0000000, // FADD.S
+ 7'b0000100: begin // FSUB.S
+ fp_rf_we_o = 1'b1;
+ use_fp_rs1_o = 1'b1;
+ use_fp_rs2_o = 1'b1;
+ use_fp_rd_o = 1'b1;
+ fp_swap_oprnds_o = 1'b1;
+ illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+ fp_src_fmt_o = FP32;
+ end
+ 7'b0001000, // FMUL.S
+ 7'b0001100: begin // FDIV.S
+ fp_rf_we_o = 1'b1;
+ use_fp_rs1_o = 1'b1;
+ use_fp_rs2_o = 1'b1;
+ use_fp_rd_o = 1'b1;
+ illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+ fp_src_fmt_o = FP32;
+ end
+ 7'b0101101: begin
+ fp_rf_we_o = 1'b1;
+ use_fp_rs1_o = 1'b1;
+ use_fp_rd_o = 1'b1;
+ if (~|instr[24:20]) begin //FSQRT.D
+ illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+ fp_src_fmt_o = FP64;
+ end
+ end
+ 7'b0101100: begin // FSQRT.S
+ fp_rf_we_o = 1'b1;
+ use_fp_rs1_o = 1'b1;
+ use_fp_rd_o = 1'b1;
+ if (~|instr[24:20]) begin
+ illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+ fp_src_fmt_o = FP32;
+ end
+ end
+ 7'b0010001: begin // FSGNJ.D, FSGNJN.D, FSGNJX.D
+ fp_rf_we_o = 1'b1;
+ use_fp_rs1_o = 1'b1;
+ use_fp_rs2_o = 1'b1;
+ use_fp_rd_o = 1'b1;
+ if (~(instr[14] | (&instr[13:12]))) begin
+ illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+ fp_src_fmt_o = FP64;
+ end
+ end
+ 7'b0010000: begin // FSGNJ.S, FSGNJN.S, FSGNJX.S
+ fp_rf_we_o = 1'b1;
+ use_fp_rs1_o = 1'b1;
+ use_fp_rs2_o = 1'b1;
+ use_fp_rd_o = 1'b1;
+ if (~(instr[14] | (&instr[13:12]))) begin
+ illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+ fp_src_fmt_o = FP32;
+ end
+ end
+ 7'b0010101: begin // FMIN.D, FMAX.D
+ fp_rf_we_o = 1'b1;
+ use_fp_rs1_o = 1'b1;
+ use_fp_rs2_o = 1'b1;
+ use_fp_rd_o = 1'b1;
+ if (~|instr[14:13]) begin
+ illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+ fp_src_fmt_o = FP64;
+ end
+ end
+ 7'b0010100: begin // FMIN.S, FMAX.S
+ fp_rf_we_o = 1'b1;
+ use_fp_rs1_o = 1'b1;
+ use_fp_rs2_o = 1'b1;
+ use_fp_rd_o = 1'b1;
+ if (~|instr[14:13]) begin
+ illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+ fp_src_fmt_o = FP32;
+ end
+ end
+ 7'b0100000: begin // FCVT.S.D
+ fp_rf_we_o = 1'b1;
+ use_fp_rs1_o = 1'b1;
+ use_fp_rd_o = 1'b1;
+ if (~(|instr[24:21] | (~instr[20]))) begin
+ illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+ fp_src_fmt_o = FP64;
+ end
+ end
+ 7'b1100000: begin // FCVT.W.S, FCVT.WU.S
+ rf_we = 1'b1; // write back in int_regfile
+ use_fp_rs1_o = 1'b1;
+ if (~|instr[24:21]) begin
+ illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+ fp_src_fmt_o = FP32;
+ end
+ end
+ 7'b0100001: begin // FCVT.D.S
+ fp_rf_we_o = 1'b1;
+ use_fp_rs1_o = 1'b1;
+ use_fp_rd_o = 1'b1;
+ if (~|instr[24:20]) begin
+ illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+ fp_src_fmt_o = FP64;
+ end
+ end
+ 7'b1110000: begin // FMV.X.W , FCLASS.S
+ rf_we = 1'b1; // write back in int_regfile
+ unique case ({instr[24:20],instr[14:12]})
+ {5'b00000,3'b000}: begin
+ use_fp_rs1_o = 1'b1;
+ illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+ fp_src_fmt_o = FP32;
+ mv_instr_o = 1'b1;
+ end
+ {5'b00000,3'b001}: begin
+ use_fp_rs1_o = 1'b1;
+ illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+ fp_src_fmt_o = FP32;
+ end
+ default: begin
+ illegal_insn =1'b1;
+ end
+ endcase
+ end
+ 7'b1010001: begin // FEQ.D, FLT.D, FLE.D
+ rf_we = 1'b1; // write back in int_regfile
+ use_fp_rs1_o = 1'b1;
+ use_fp_rs2_o = 1'b1;
+ if (~(instr[14]) | (&instr[13:12])) begin
+ illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+ fp_src_fmt_o = FP64;
+ end
+ end
+ 7'b1010000: begin // FEQ.S, FLT.S, FLE.S
+ rf_we = 1'b1; // write back in int_regfile
+ use_fp_rs1_o = 1'b1;
+ use_fp_rs2_o = 1'b1;
+ if (~(instr[14]) | (&instr[13:12])) begin
+ illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+ fp_src_fmt_o = FP32;
+ end
+ end
+ 7'b1110001: begin // FCLASS.D
+ rf_we = 1'b1; // write back in int_regfile
+ use_fp_rs1_o = 1'b1;
+ unique case ({instr[24:20],instr[14:12]})
+ {5'b00000,3'b001}: begin
+ illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+ fp_src_fmt_o = FP64;
+ end
+ default: begin
+ illegal_insn =1'b1;
+ end
+ endcase
+ end
+ 7'b1100001: begin // // FCVT.W.D, FCVT.WU.D
+ rf_we = 1'b1; // write back in int_regfile
+ use_fp_rs1_o = 1'b1;
+ if (~|instr[24:21]) begin
+ illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+ fp_src_fmt_o = FP64;
+ end
+ end
+ 7'b1101000: begin // FCVT.S.W, FCVT.S.WU
+ fp_rf_we_o = 1'b1;
+ use_fp_rd_o = 1'b1;
+ if (~|instr[24:21]) begin
+ illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+ fp_src_fmt_o = FP32;
+ end
+ end
+ 7'b1111001: begin // FCVT.D.W, FCVT.D.WU
+ rf_we = 1'b1; // write back in int_regfile
+ use_fp_rd_o = 1'b1;
+ if (~|instr[24:21]) begin
+ illegal_insn = ((RVF == RV64FDouble) & (fp_invalid_rm)) ? 1'b0 : 1'b1;
+ fp_src_fmt_o = FP64;
+ end
+ end
+ 7'b1111000: begin // FMV.W.X
+ fp_rf_we_o = 1'b1;
+ use_fp_rd_o = 1'b1;
+ mv_instr_o = 1'b1;
+ if (~(|instr[24:20]) | (|instr[14:12])) begin
+ illegal_insn = ((RVF == RV32FNone) & (~fp_invalid_rm)) ? 1'b1 : 1'b0;
+ fp_src_fmt_o = FP32;
+ end
+ end
+ default: illegal_insn = 1'b1;
+ endcase
+ end
+ default: begin
+ illegal_insn = 1'b1;
+ end
+ endcase
+ // make sure illegal compressed instructions cause illegal instruction exceptions
+ if (illegal_c_insn_i) begin
+ illegal_insn = 1'b1;
+ end
+ // make sure illegal instructions detected in the decoder do not propagate from decoder
+ // into register file, LSU, EX, WB, CSRs, PC
+ // NOTE: instructions can also be detected to be illegal inside the CSRs (upon accesses with
+ // insufficient privileges), or when accessing non-available registers in RV32E,
+ // these cases are not handled here
+ if (illegal_insn) begin
+ rf_we = 1'b0;
+ data_req_o = 1'b0;
+ data_we_o = 1'b0;
+ jump_in_dec_o = 1'b0;
+ jump_set_o = 1'b0;
+ branch_in_dec_o = 1'b0;
+ csr_access_o = 1'b0;
+ // floating point
+ fp_rf_we_o = 1'b0;
+ end
+ end
+ /////////////////////////////
+ // Decoder for ALU control //
+ /////////////////////////////
+ always_comb begin
+ alu_operator_o = ALU_SLTU;
+ alu_op_a_mux_sel_o = OP_A_IMM;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_a_mux_sel_o = IMM_A_ZERO;
+ imm_b_mux_sel_o = IMM_B_I;
+ bt_a_mux_sel_o = OP_A_CURRPC;
+ bt_b_mux_sel_o = IMM_B_I;
+ opcode_alu = opcode_e'(instr_alu[6:0]);
+ use_rs3_d = 1'b0;
+ alu_multicycle_o = 1'b0;
+ mult_sel_o = 1'b0;
+ div_sel_o = 1'b0;
+ fp_alu_op_mod_o = 1'b0;
+ fp_alu_operator_o = FMADD;
+ fp_alu_op_b_mux_sel_o = OP_B_IMM; // op_b_sel_e, OP_B_REG_B
+ unique case (opcode_alu)
+ ///////////
+ // Jumps //
+ ///////////
+ OPCODE_JAL: begin // Jump and Link
+ if (BranchTargetALU) begin
+ bt_a_mux_sel_o = OP_A_CURRPC;
+ bt_b_mux_sel_o = IMM_B_J;
+ end
+ // Jumps take two cycles without the BTALU
+ if (instr_first_cycle_i && !BranchTargetALU) begin
+ // Calculate jump target
+ alu_op_a_mux_sel_o = OP_A_CURRPC;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_J;
+ alu_operator_o = ALU_ADD;
+ end else begin
+ // Calculate and store PC+4
+ alu_op_a_mux_sel_o = OP_A_CURRPC;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_INCR_PC;
+ alu_operator_o = ALU_ADD;
+ end
+ end
+ OPCODE_JALR: begin // Jump and Link Register
+ if (BranchTargetALU) begin
+ bt_a_mux_sel_o = OP_A_REG_A;
+ bt_b_mux_sel_o = IMM_B_I;
+ end
+ // Jumps take two cycles without the BTALU
+ if (instr_first_cycle_i && !BranchTargetALU) begin
+ // Calculate jump target
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_I;
+ alu_operator_o = ALU_ADD;
+ end else begin
+ // Calculate and store PC+4
+ alu_op_a_mux_sel_o = OP_A_CURRPC;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_INCR_PC;
+ alu_operator_o = ALU_ADD;
+ end
+ end
+ OPCODE_BRANCH: begin // Branch
+ // Check branch condition selection
+ unique case (instr_alu[14:12])
+ 3'b000: alu_operator_o = ALU_EQ;
+ 3'b001: alu_operator_o = ALU_NE;
+ 3'b100: alu_operator_o = ALU_LT;
+ 3'b101: alu_operator_o = ALU_GE;
+ 3'b110: alu_operator_o = ALU_LTU;
+ 3'b111: alu_operator_o = ALU_GEU;
+ default: ;
+ endcase
+ if (BranchTargetALU) begin
+ bt_a_mux_sel_o = OP_A_CURRPC;
+ // Not-taken branch will jump to next instruction (used in secure mode)
+ bt_b_mux_sel_o = branch_taken_i ? IMM_B_B : IMM_B_INCR_PC;
+ end
+ // Without branch target ALU, a branch is a two-stage operation using the Main ALU in both
+ // stages
+ if (instr_first_cycle_i) begin
+ // First evaluate the branch condition
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_op_b_mux_sel_o = OP_B_REG_B;
+ end else begin
+ // Then calculate jump target
+ alu_op_a_mux_sel_o = OP_A_CURRPC;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ // Not-taken branch will jump to next instruction (used in secure mode)
+ imm_b_mux_sel_o = branch_taken_i ? IMM_B_B : IMM_B_INCR_PC;
+ alu_operator_o = ALU_ADD;
+ end
+ end
+ ////////////////
+ // Load/store //
+ ////////////////
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_op_b_mux_sel_o = OP_B_REG_B;
+ alu_operator_o = ALU_ADD;
+ if (!instr_alu[14]) begin
+ // offset from immediate
+ imm_b_mux_sel_o = IMM_B_S;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ end
+ end
+ OPCODE_LOAD: begin
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ // offset from immediate
+ alu_operator_o = ALU_ADD;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_I;
+ end
+ /////////
+ // ALU //
+ /////////
+ OPCODE_LUI: begin // Load Upper Immediate
+ alu_op_a_mux_sel_o = OP_A_IMM;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_a_mux_sel_o = IMM_A_ZERO;
+ imm_b_mux_sel_o = IMM_B_U;
+ alu_operator_o = ALU_ADD;
+ end
+ OPCODE_AUIPC: begin // Add Upper Immediate to PC
+ alu_op_a_mux_sel_o = OP_A_CURRPC;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_U;
+ alu_operator_o = ALU_ADD;
+ end
+ OPCODE_OP_IMM: begin // Register-Immediate ALU Operations
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_I;
+ unique case (instr_alu[14:12])
+ 3'b000: alu_operator_o = ALU_ADD; // Add Immediate
+ 3'b010: alu_operator_o = ALU_SLT; // Set to one if Lower Than Immediate
+ 3'b011: alu_operator_o = ALU_SLTU; // Set to one if Lower Than Immediate Unsigned
+ 3'b100: alu_operator_o = ALU_XOR; // Exclusive Or with Immediate
+ 3'b110: alu_operator_o = ALU_OR; // Or with Immediate
+ 3'b111: alu_operator_o = ALU_AND; // And with Immediate
+ 3'b001: begin
+ if (RV32B != RV32BNone) begin
+ unique case (instr_alu[31:27])
+ 5'b0_0000: alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
+ 5'b0_0100: alu_operator_o = ALU_SLO; // Shift Left Ones by Immediate
+ 5'b0_1001: alu_operator_o = ALU_SBCLR; // Clear bit specified by immediate
+ 5'b0_0101: alu_operator_o = ALU_SBSET; // Set bit specified by immediate
+ 5'b0_1101: alu_operator_o = ALU_SBINV; // Invert bit specified by immediate.
+ // Shuffle with Immediate Control Value
+ 5'b0_0001: if (instr_alu[26] == 0) alu_operator_o = ALU_SHFL;
+ 5'b0_1100: begin
+ unique case (instr_alu[26:20])
+ 7'b000_0000: alu_operator_o = ALU_CLZ; // clz
+ 7'b000_0001: alu_operator_o = ALU_CTZ; // ctz
+ 7'b000_0010: alu_operator_o = ALU_PCNT; // pcnt
+ 7'b000_0100: alu_operator_o = ALU_SEXTB; // sext.b
+ 7'b000_0101: alu_operator_o = ALU_SEXTH; // sext.h
+ 7'b001_0000: begin
+ if (RV32B == RV32BFull) begin
+ alu_operator_o = ALU_CRC32_B; // crc32.b
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ 7'b001_0001: begin
+ if (RV32B == RV32BFull) begin
+ alu_operator_o = ALU_CRC32_H; // crc32.h
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ 7'b001_0010: begin
+ if (RV32B == RV32BFull) begin
+ alu_operator_o = ALU_CRC32_W; // crc32.w
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ 7'b001_1000: begin
+ if (RV32B == RV32BFull) begin
+ alu_operator_o = ALU_CRC32C_B; // crc32c.b
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ 7'b001_1001: begin
+ if (RV32B == RV32BFull) begin
+ alu_operator_o = ALU_CRC32C_H; // crc32c.h
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ 7'b001_1010: begin
+ if (RV32B == RV32BFull) begin
+ alu_operator_o = ALU_CRC32C_W; // crc32c.w
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ default: ;
+ endcase
+ end
+ default: ;
+ endcase
+ end else begin
+ alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
+ end
+ end
+ 3'b101: begin
+ if (RV32B != RV32BNone) begin
+ if (instr_alu[26] == 1'b1) begin
+ alu_operator_o = ALU_FSR;
+ alu_multicycle_o = 1'b1;
+ if (instr_first_cycle_i) begin
+ use_rs3_d = 1'b1;
+ end else begin
+ use_rs3_d = 1'b0;
+ end
+ end else begin
+ unique case (instr_alu[31:27])
+ 5'b0_0000: alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate
+ 5'b0_1000: alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate
+ 5'b0_0100: alu_operator_o = ALU_SRO; // Shift Right Ones by Immediate
+ 5'b0_1001: alu_operator_o = ALU_SBEXT; // Extract bit specified by immediate.
+ 5'b0_1100: begin
+ alu_operator_o = ALU_ROR; // Rotate Right by Immediate
+ alu_multicycle_o = 1'b1;
+ end
+ 5'b0_1101: alu_operator_o = ALU_GREV; // General Reverse with Imm Control Val
+ 5'b0_0101: alu_operator_o = ALU_GORC; // General Or-combine with Imm Control Val
+ // Unshuffle with Immediate Control Value
+ 5'b0_0001: begin
+ if (RV32B == RV32BFull) begin
+ if (instr_alu[26] == 1'b0) alu_operator_o = ALU_UNSHFL;
+ end
+ end
+ default: ;
+ endcase
+ end
+ end else begin
+ if (instr_alu[31:27] == 5'b0_0000) begin
+ alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate
+ end else if (instr_alu[31:27] == 5'b0_1000) begin
+ alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate
+ end
+ end
+ end
+ // default: ;
+ endcase
+ end
+ OPCODE_OP: begin // Register-Register ALU operation
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_op_b_mux_sel_o = OP_B_REG_B;
+ if (instr_alu[26]) begin
+ if (RV32B != RV32BNone) begin
+ unique case ({instr_alu[26:25], instr_alu[14:12]})
+ {2'b11, 3'b001}: begin
+ alu_operator_o = ALU_CMIX; // cmix
+ alu_multicycle_o = 1'b1;
+ if (instr_first_cycle_i) begin
+ use_rs3_d = 1'b1;
+ end else begin
+ use_rs3_d = 1'b0;
+ end
+ end
+ {2'b11, 3'b101}: begin
+ alu_operator_o = ALU_CMOV; // cmov
+ alu_multicycle_o = 1'b1;
+ if (instr_first_cycle_i) begin
+ use_rs3_d = 1'b1;
+ end else begin
+ use_rs3_d = 1'b0;
+ end
+ end
+ {2'b10, 3'b001}: begin
+ alu_operator_o = ALU_FSL; // fsl
+ alu_multicycle_o = 1'b1;
+ if (instr_first_cycle_i) begin
+ use_rs3_d = 1'b1;
+ end else begin
+ use_rs3_d = 1'b0;
+ end
+ end
+ {2'b10, 3'b101}: begin
+ alu_operator_o = ALU_FSR; // fsr
+ alu_multicycle_o = 1'b1;
+ if (instr_first_cycle_i) begin
+ use_rs3_d = 1'b1;
+ end else begin
+ use_rs3_d = 1'b0;
+ end
+ end
+ default: ;
+ endcase
+ end
+ end else begin
+ unique case ({instr_alu[31:25], instr_alu[14:12]})
+ // RV32I ALU operations
+ {7'b000_0000, 3'b000}: alu_operator_o = ALU_ADD; // Add
+ {7'b010_0000, 3'b000}: alu_operator_o = ALU_SUB; // Sub
+ {7'b000_0000, 3'b010}: alu_operator_o = ALU_SLT; // Set Lower Than
+ {7'b000_0000, 3'b011}: alu_operator_o = ALU_SLTU; // Set Lower Than Unsigned
+ {7'b000_0000, 3'b100}: alu_operator_o = ALU_XOR; // Xor
+ {7'b000_0000, 3'b110}: alu_operator_o = ALU_OR; // Or
+ {7'b000_0000, 3'b111}: alu_operator_o = ALU_AND; // And
+ {7'b000_0000, 3'b001}: alu_operator_o = ALU_SLL; // Shift Left Logical
+ {7'b000_0000, 3'b101}: alu_operator_o = ALU_SRL; // Shift Right Logical
+ {7'b010_0000, 3'b101}: alu_operator_o = ALU_SRA; // Shift Right Arithmetic
+ // RV32B ALU Operations
+ {7'b001_0000, 3'b001}: if (RV32B != RV32BNone) alu_operator_o = ALU_SLO; // slo
+ {7'b001_0000, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_SRO; // sro
+ {7'b011_0000, 3'b001}: begin
+ if (RV32B != RV32BNone) begin
+ alu_operator_o = ALU_ROL; // rol
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ {7'b011_0000, 3'b101}: begin
+ if (RV32B != RV32BNone) begin
+ alu_operator_o = ALU_ROR; // ror
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ {7'b000_0101, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_MIN; // min
+ {7'b000_0101, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_MAX; // max
+ {7'b000_0101, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_MINU; // minu
+ {7'b000_0101, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_MAXU; // maxu
+ {7'b000_0100, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACK; // pack
+ {7'b010_0100, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACKU; // packu
+ {7'b000_0100, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACKH; // packh
+ {7'b010_0000, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_XNOR; // xnor
+ {7'b010_0000, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_ORN; // orn
+ {7'b010_0000, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_ANDN; // andn
+ // RV32B zbs
+ {7'b010_0100, 3'b001}: if (RV32B != RV32BNone) alu_operator_o = ALU_SBCLR; // sbclr
+ {7'b001_0100, 3'b001}: if (RV32B != RV32BNone) alu_operator_o = ALU_SBSET; // sbset
+ {7'b011_0100, 3'b001}: if (RV32B != RV32BNone) alu_operator_o = ALU_SBINV; // sbinv
+ {7'b010_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_SBEXT; // sbext
+ // RV32B zbf
+ {7'b010_0100, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_BFP; // bfp
+ // RV32B zbp
+ {7'b011_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GREV; // grev
+ {7'b001_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GORC; // grev
+ {7'b000_0100, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SHFL; // shfl
+ {7'b000_0100, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_UNSHFL; // unshfl
+ // RV32B zbc
+ {7'b000_0101, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_CLMUL; // clmul
+ {7'b000_0101, 3'b010}: if (RV32B == RV32BFull) alu_operator_o = ALU_CLMULR; // clmulr
+ {7'b000_0101, 3'b011}: if (RV32B == RV32BFull) alu_operator_o = ALU_CLMULH; // clmulh
+ // RV32B zbe
+ {7'b010_0100, 3'b110}: begin
+ if (RV32B == RV32BFull) begin
+ alu_operator_o = ALU_BDEP; // bdep
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ {7'b000_0100, 3'b110}: begin
+ if (RV32B == RV32BFull) begin
+ alu_operator_o = ALU_BEXT; // bext
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ // RV32M instructions, all use the same ALU operation
+ {7'b000_0001, 3'b000}: begin // mul
+ alu_operator_o = ALU_ADD;
+ mult_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+ end
+ {7'b000_0001, 3'b001}: begin // mulh
+ alu_operator_o = ALU_ADD;
+ mult_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+ end
+ {7'b000_0001, 3'b010}: begin // mulhsu
+ alu_operator_o = ALU_ADD;
+ mult_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+ end
+ {7'b000_0001, 3'b011}: begin // mulhu
+ alu_operator_o = ALU_ADD;
+ mult_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+ end
+ {7'b000_0001, 3'b100}: begin // div
+ alu_operator_o = ALU_ADD;
+ div_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+ end
+ {7'b000_0001, 3'b101}: begin // divu
+ alu_operator_o = ALU_ADD;
+ div_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+ end
+ {7'b000_0001, 3'b110}: begin // rem
+ alu_operator_o = ALU_ADD;
+ div_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+ end
+ {7'b000_0001, 3'b111}: begin // remu
+ alu_operator_o = ALU_ADD;
+ div_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+ end
+ default: ;
+ endcase
+ end
+ end
+ /////////////
+ // Special //
+ /////////////
+ unique case (instr_alu[14:12])
+ 3'b000: begin
+ // FENCE is treated as a NOP since all memory operations are already strictly ordered.
+ alu_operator_o = ALU_ADD; // nop
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ end
+ 3'b001: begin
+ // FENCE.I will flush the IF stage, prefetch buffer and ICache if present.
+ if (BranchTargetALU) begin
+ bt_a_mux_sel_o = OP_A_CURRPC;
+ bt_b_mux_sel_o = IMM_B_INCR_PC;
+ end else begin
+ alu_op_a_mux_sel_o = OP_A_CURRPC;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_INCR_PC;
+ alu_operator_o = ALU_ADD;
+ end
+ end
+ default: ;
+ endcase
+ end
+ if (instr_alu[14:12] == 3'b000) begin
+ // non CSR related SYSTEM instructions
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ end else begin
+ // instruction to read/modify CSR
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_a_mux_sel_o = IMM_A_Z;
+ imm_b_mux_sel_o = IMM_B_I; // CSR address is encoded in I imm
+ if (instr_alu[14]) begin
+ // rs1 field is used as immediate
+ alu_op_a_mux_sel_o = OP_A_IMM;
+ end else begin
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ end
+ end
+ end
+ //////////////////////////////////////////
+ // Floating Point Extension (F and D) //
+ //////////////////////////////////////////
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_op_b_mux_sel_o = OP_B_REG_B;
+ alu_operator_o = ALU_ADD;
+ unique case(instr[14:12])
+ 3'b011: begin // FSD
+ imm_b_mux_sel_o = IMM_B_S;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ end
+ 3'b010: begin // FSW
+ imm_b_mux_sel_o = IMM_B_S;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ end
+ default: ;
+ endcase
+ end
+ unique case(instr[14:12])
+ 3'b011: begin // FLD
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_operator_o = ALU_ADD;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_I;
+ end
+ 3'b010: begin // FLW
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_operator_o = ALU_ADD;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_I;
+ end
+ default: ;
+ endcase
+ end
+ unique case (instr[26:25])
+ 01: begin
+ fp_alu_operator_o = FMADD;
+ fp_alu_op_mod_o = 1'b0;
+ end
+ 00: begin
+ fp_alu_operator_o = FMADD;
+ fp_alu_op_mod_o = 1'b0;
+ end
+ default: ;
+ endcase
+ end
+ unique case (instr[26:25])
+ 01: begin
+ fp_alu_operator_o = FMADD;
+ fp_alu_op_mod_o = 1'b1;
+ end
+ 00: begin
+ fp_alu_operator_o = FMADD;
+ fp_alu_op_mod_o = 1'b1;
+ end
+ default: ;
+ endcase
+ end
+ unique case (instr[26:25])
+ 01: begin
+ fp_alu_operator_o = FNMSUB;
+ end
+ 00: begin
+ fp_alu_operator_o = FNMSUB;
+ end
+ default: ;
+ endcase
+ end
+ unique case (instr[26:25])
+ 01: begin
+ fp_alu_operator_o = FNMSUB;
+ fp_alu_op_mod_o = 1'b1;
+ end
+ 00: begin
+ fp_alu_operator_o = FNMSUB;
+ fp_alu_op_mod_o = 1'b1;
+ end
+ default: ;
+ endcase
+ end
+ OPCODE_OP_FP: begin
+ unique case (instr[31:25])
+ 7'b0000001: begin // FADD.D
+ fp_alu_operator_o = ADD;
+ end
+ 7'b0000101: begin // FSUB.D
+ fp_alu_operator_o = ADD;
+ fp_alu_op_mod_o = 1'b1;
+ end
+ 7'b0001001: begin // FMUL.D
+ fp_alu_operator_o = MUL;
+ end
+ 7'b0001101:begin // FDIV.S
+ fp_alu_operator_o = DIV;
+ end
+ 7'b0000000: begin // FADD.S
+ fp_alu_operator_o = ADD;
+ end
+ 7'b0000100: begin // FSUB.S
+ fp_alu_operator_o = ADD;
+ fp_alu_op_mod_o = 1'b1;
+ end
+ 7'b0001000: begin // FMUL.S
+ fp_alu_operator_o = MUL;
+ end
+ 7'b0001100: begin // FDIV.S
+ fp_alu_operator_o = DIV;
+ end
+ 7'b0101101: begin
+ if (~|instr[24:20]) begin // FSQRT.D
+ fp_alu_operator_o = SQRT;
+ end
+ end
+ 7'b0101100: begin // FSQRT.S
+ if (~|instr[24:20]) begin
+ fp_alu_operator_o = SQRT;
+ end
+ end
+ 7'b0010001: begin // FSGNJ.D, FSGNJN.D, FSGNJX.D
+ if (~(instr[14] | (&instr[13:12]))) begin
+ fp_alu_operator_o = SGNJ;
+ end
+ end
+ 7'b0010000: begin // FSGNJ.S, FSGNJN.S, FSGNJX.S
+ if (~(instr[14] | (&instr[13:12]))) begin
+ fp_alu_operator_o = SGNJ;
+ end
+ end
+ 7'b0010101: begin // FMIN.D, FMAX.D
+ if (~|instr[14:13]) begin
+ fp_alu_operator_o = MINMAX;
+ end
+ end
+ 7'b0010100: begin // FMIN.S, FMAX.S
+ if (~|instr[14:13]) begin
+ fp_alu_operator_o = MINMAX;
+ end
+ end
+ 7'b0100000: begin // FCVT.S.D
+ if (~(|instr[24:21] | (~instr[20]))) begin
+ fp_alu_operator_o = F2F;
+ end
+ end
+ 7'b1100000: begin // FCVT.W.S, FCVT.WU.S
+ if (~|instr[24:21]) begin
+ fp_alu_operator_o = F2I;
+ if (instr[20])
+ fp_alu_op_mod_o = 1'b1;
+ end
+ end
+ 7'b0100001: begin // FCVT.D.S
+ if (~|instr[24:20]) begin
+ fp_alu_operator_o = F2F;
+ end
+ end
+ 7'b1110000: begin // FMV.X.W , FCLASS.S
+ unique case ({instr[24:20],instr[14:12]})
+ // {3'b0000000,3'b000}: begin
+ // fp_alu_operator_o = ADD; // to be decided YET
+ // end
+ {3'b000,3'b001}: begin
+ fp_alu_operator_o = CLASSIFY;
+ end
+ default: ;
+ endcase
+ end
+ 7'b1010001: begin // FEQ.D, FLT.D, FLE.D
+ if ((~instr[14]) | (&instr[13:12])) begin
+ fp_alu_operator_o = CMP;
+ end
+ end
+ 7'b1010000: begin // FEQ.S, FLT.S, FLE.S
+ if ((~instr[14]) | (&instr[13:12])) begin
+ fp_alu_operator_o = CMP;
+ end
+ end
+ 7'b1110001: begin // FCLASS.D
+ unique case ({instr[24:20],instr[14:12]})
+ {3'b000,3'b001}: begin
+ fp_alu_operator_o = CLASSIFY;
+ end
+ default: ;
+ endcase
+ end
+ 7'b1100001: begin // // FCVT.W.D, FCVT.WU.D
+ if (~|instr[24:21]) begin
+ fp_alu_operator_o = F2I;
+ if (instr[20])
+ fp_alu_op_mod_o = 1'b1;
+ end
+ end
+ 7'b1101000: begin // FCVT.S.W, FCVT.S.WU
+ if (~(|instr[24:21])) begin
+ fp_alu_operator_o = I2F;
+ if (instr[20])
+ fp_alu_op_mod_o = 1'b1;
+ end
+ end
+ 7'b1111001: begin // FCVT.D.W, FCVT.D.WU
+ if (~|instr[24:21]) begin
+ fp_alu_operator_o = I2F;
+ if (instr[20])
+ fp_alu_op_mod_o = 1'b1;
+ end
+ end
+ // 7'b1111000: begin // FMV.W.X
+ // if ((|instr[24:20]) | (|instr[14:12])) begin
+ // fp_alu_operator_o = FMADD; // to be decided
+ // end
+ // end
+ default: ;
+ endcase
+ end
+ default: ;
+ endcase
+ end
+ // do not enable multdiv in case of illegal instruction exceptions
+ assign mult_en_o = illegal_insn ? 1'b0 : mult_sel_o;
+ assign div_en_o = illegal_insn ? 1'b0 : div_sel_o;
+ // make sure instructions accessing non-available registers in RV32E cause illegal
+ // instruction exceptions
+ assign illegal_insn_o = illegal_insn | illegal_reg_rv32e;
+ // do not propgate regfile write enable if non-available registers are accessed in RV32E
+ assign rf_we_o = rf_we & ~illegal_reg_rv32e;
+ ////////////////
+ // Assertions //
+ ////////////////
+// // Selectors must be known/valid.
+// `ASSERT(buraqRegImmAluOpKnown, (opcode == OPCODE_OP_IMM) |->
+// !$isunknown(instr[14:12]))
\ No newline at end of file
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..4331423
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,501 @@
+ * Instruction Fetch Stage
+ *
+ * Instruction fetch unit: Selection of the next PC, and buffering (sampling) of
+ * the read instruction.
+ */
+module brq_ifu #(
+ parameter int unsigned DmHaltAddr = 32'h1A110800,
+ parameter int unsigned DmExceptionAddr = 32'h1A110808,
+ parameter bit DummyInstructions = 1'b0,
+ parameter bit ICache = 1'b0,
+ parameter bit ICacheECC = 1'b0,
+ parameter bit PCIncrCheck = 1'b0,
+ parameter bit BranchPredictor = 1'b0
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ input logic [31:0] boot_addr_i, // also used for mtvec
+ input logic req_i, // instruction request control
+ // instruction cache interface
+ output logic instr_req_o,
+ output logic [31:0] instr_addr_o,
+ input logic instr_gnt_i,
+ input logic instr_rvalid_i,
+ input logic [31:0] instr_rdata_i,
+ input logic instr_err_i,
+ input logic instr_pmp_err_i,
+ // output of ID stage
+ output logic instr_valid_id_o, // instr in IF-ID is valid
+ output logic instr_new_id_o, // instr in IF-ID is new
+ output logic [31:0] instr_rdata_id_o, // instr for ID stage
+ output logic [31:0] instr_rdata_alu_id_o, // replicated instr for ID stage
+ // to reduce fan-out
+ output logic [15:0] instr_rdata_c_id_o, // compressed instr for ID stage
+ // (mtval), meaningful only if
+ // instr_is_compressed_id_o = 1'b1
+ output logic instr_is_compressed_id_o, // compressed decoder thinks this
+ // is a compressed instr
+ output logic instr_bp_taken_o, // instruction was predicted to be
+ // a taken branch
+ output logic instr_fetch_err_o, // bus error on fetch
+ output logic instr_fetch_err_plus2_o, // bus error misaligned
+ output logic illegal_c_insn_id_o, // compressed decoder thinks this
+ // is an invalid instr
+ output logic dummy_instr_id_o, // Instruction is a dummy
+ output logic [31:0] pc_if_o,
+ output logic [31:0] pc_id_o,
+ // control signals
+ input logic instr_valid_clear_i, // clear instr valid bit in IF-ID
+ input logic pc_set_i, // set the PC to a new value
+ input logic pc_set_spec_i,
+ input brq_pkg::pc_sel_e pc_mux_i, // selector for PC multiplexer
+ input logic nt_branch_mispredict_i, // Not-taken branch in ID/EX was
+ // mispredicted (predicted taken)
+ input brq_pkg::exc_pc_sel_e exc_pc_mux_i, // selects ISR address
+ input brq_pkg::exc_cause_e exc_cause, // selects ISR address for
+ // vectorized interrupt lines
+ input logic dummy_instr_en_i,
+ input logic [2:0] dummy_instr_mask_i,
+ input logic dummy_instr_seed_en_i,
+ input logic [31:0] dummy_instr_seed_i,
+ input logic icache_enable_i,
+ input logic icache_inval_i,
+ // jump and branch target
+ input logic [31:0] branch_target_ex_i, // branch/jump target address
+ // CSRs
+ input logic [31:0] csr_mepc_i, // PC to restore after handling
+ // the interrupt/exception
+ input logic [31:0] csr_depc_i, // PC to restore after handling
+ // the debug request
+ input logic [31:0] csr_mtvec_i, // base PC to jump to on exception
+ output logic csr_mtvec_init_o, // tell CS regfile to init mtvec
+ // pipeline stall
+ input logic id_in_ready_i, // ID stage is ready for new instr
+ // misc signals
+ output logic pc_mismatch_alert_o,
+ output logic if_busy_o // IF stage is busy fetching instr
+ import brq_pkg::*;
+ logic instr_valid_id_d, instr_valid_id_q;
+ logic instr_new_id_d, instr_new_id_q;
+ // prefetch buffer related signals
+ logic prefetch_busy;
+ logic branch_req;
+ logic branch_spec;
+ logic predicted_branch;
+ logic [31:0] fetch_addr_n;
+ logic unused_fetch_addr_n0;
+ logic fetch_valid;
+ logic fetch_ready;
+ logic [31:0] fetch_rdata;
+ logic [31:0] fetch_addr;
+ logic fetch_err;
+ logic fetch_err_plus2;
+ logic if_instr_valid;
+ logic [31:0] if_instr_rdata;
+ logic [31:0] if_instr_addr;
+ logic if_instr_err;
+ logic [31:0] exc_pc;
+ logic [5:0] irq_id;
+ logic unused_irq_bit;
+ logic if_id_pipe_reg_we; // IF-ID pipeline reg write enable
+ // Dummy instruction signals
+ logic stall_dummy_instr;
+ logic [31:0] instr_out;
+ logic instr_is_compressed_out;
+ logic illegal_c_instr_out;
+ logic instr_err_out;
+ logic predict_branch_taken;
+ logic [31:0] predict_branch_pc;
+ brq_pkg::pc_sel_e pc_mux_internal;
+ logic [7:0] unused_boot_addr;
+ logic [7:0] unused_csr_mtvec;
+ assign unused_boot_addr = boot_addr_i[7:0];
+ assign unused_csr_mtvec = csr_mtvec_i[7:0];
+ // extract interrupt ID from exception cause
+ assign irq_id = {exc_cause};
+ assign unused_irq_bit = irq_id[5]; // MSB distinguishes interrupts from exceptions
+ // exception PC selection mux
+ always_comb begin : exc_pc_mux
+ unique case (exc_pc_mux_i)
+ EXC_PC_EXC: exc_pc = { csr_mtvec_i[31:2], 2'b00 };
+ EXC_PC_IRQ: exc_pc = { csr_mtvec_i[31:2], 2'b00 };
+ EXC_PC_DBD: exc_pc = DmHaltAddr;
+ EXC_PC_DBG_EXC: exc_pc = DmExceptionAddr;
+ // default: exc_pc = { csr_mtvec_i[31:8], 8'h00 };
+ endcase
+ end
+ // The Branch predictor can provide a new PC which is internal to ifu. Only override the mux
+ // select to choose this if the core isn't already trying to set a PC.
+ assign pc_mux_internal =
+ (BranchPredictor && predict_branch_taken && !pc_set_i) ? PC_BP : pc_mux_i;
+ // fetch address selection mux
+ always_comb begin : fetch_addr_mux
+ unique case (pc_mux_internal)
+ PC_BOOT: fetch_addr_n = { boot_addr_i[31:2], 2'b00 };
+ PC_JUMP: fetch_addr_n = branch_target_ex_i;
+ PC_EXC: fetch_addr_n = exc_pc; // set PC to exception handler
+ PC_ERET: fetch_addr_n = csr_mepc_i; // restore PC when returning from EXC
+ PC_DRET: fetch_addr_n = csr_depc_i;
+ // Without branch predictor will never get pc_mux_internal == PC_BP. We still handle no branch
+ // predictor case here to ensure redundant mux logic isn't synthesised.
+ PC_BP: fetch_addr_n = BranchPredictor ? predict_branch_pc : { boot_addr_i[31:2], 2'b00 };
+ default: fetch_addr_n = { boot_addr_i[31:2], 2'b00 };
+ endcase
+ end
+ // tell CS register file to initialize mtvec on boot
+ assign csr_mtvec_init_o = (pc_mux_i == PC_BOOT) & pc_set_i;
+ if (ICache) begin : gen_ifu_icache
+ // Full I-Cache option
+ brq_ifu_icache #(
+ .BranchPredictor (BranchPredictor),
+ .ICacheECC (ICacheECC)
+ ) icache_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .req_i ( req_i ),
+ .branch_i ( branch_req ),
+ .branch_spec_i ( branch_spec ),
+ .predicted_branch_i ( predicted_branch ),
+ .branch_mispredict_i ( nt_branch_mispredict_i ),
+ .addr_i ( {fetch_addr_n[31:1], 1'b0} ),
+ .ready_i ( fetch_ready ),
+ .valid_o ( fetch_valid ),
+ .rdata_o ( fetch_rdata ),
+ .addr_o ( fetch_addr ),
+ .err_o ( fetch_err ),
+ .err_plus2_o ( fetch_err_plus2 ),
+ .instr_req_o ( instr_req_o ),
+ .instr_addr_o ( instr_addr_o ),
+ .instr_gnt_i ( instr_gnt_i ),
+ .instr_rvalid_i ( instr_rvalid_i ),
+ .instr_rdata_i ( instr_rdata_i ),
+ .instr_err_i ( instr_err_i ),
+ .instr_pmp_err_i ( instr_pmp_err_i ),
+ .icache_enable_i ( icache_enable_i ),
+ .icache_inval_i ( icache_inval_i ),
+ .busy_o ( prefetch_busy )
+ );
+ end else begin : gen_ifu_prefetch_buffer
+ // prefetch buffer, caches a fixed number of instructions
+ brq_ifu_prefetch_buffer #(
+ .BranchPredictor (BranchPredictor)
+ ) ifu_prefetch_buffer_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .req_i ( req_i ),
+ .branch_i ( branch_req ),
+ .branch_spec_i ( branch_spec ),
+ .predicted_branch_i ( predicted_branch ),
+ .branch_mispredict_i ( nt_branch_mispredict_i ),
+ .addr_i ( {fetch_addr_n[31:1], 1'b0} ),
+ .ready_i ( fetch_ready ),
+ .valid_o ( fetch_valid ),
+ .rdata_o ( fetch_rdata ),
+ .addr_o ( fetch_addr ),
+ .err_o ( fetch_err ),
+ .err_plus2_o ( fetch_err_plus2 ),
+ .instr_req_o ( instr_req_o ),
+ .instr_addr_o ( instr_addr_o ),
+ .instr_gnt_i ( instr_gnt_i ),
+ .instr_rvalid_i ( instr_rvalid_i ),
+ .instr_rdata_i ( instr_rdata_i ),
+ .instr_err_i ( instr_err_i ),
+ .instr_pmp_err_i ( instr_pmp_err_i ),
+ .busy_o ( prefetch_busy )
+ );
+ // ICache tieoffs
+ logic unused_icen, unused_icinv;
+ assign unused_icen = icache_enable_i;
+ assign unused_icinv = icache_inval_i;
+ end
+ assign unused_fetch_addr_n0 = fetch_addr_n[0];
+ assign branch_req = pc_set_i | predict_branch_taken;
+ assign branch_spec = pc_set_spec_i | predict_branch_taken;
+ assign pc_if_o = if_instr_addr;
+ assign if_busy_o = prefetch_busy;
+ // compressed instruction decoding, or more precisely compressed instruction
+ // expander
+ //
+ // since it does not matter where we decompress instructions, we do it here
+ // to ease timing closure
+ logic [31:0] instr_decompressed;
+ logic illegal_c_insn;
+ logic instr_is_compressed;
+ brq_ifu_compressed_decoder ifu_compressed_decoder_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .valid_i ( fetch_valid & ~fetch_err ),
+ .instr_i ( if_instr_rdata ),
+ .instr_o ( instr_decompressed ),
+ .is_compressed_o ( instr_is_compressed ),
+ .illegal_instr_o ( illegal_c_insn )
+ );
+ // Dummy instruction insertion
+ if (DummyInstructions) begin : gen_dummy_instr
+ logic insert_dummy_instr;
+ logic [31:0] dummy_instr_data;
+ brq_ifu_dummy_instr dummy_instr_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .dummy_instr_en_i ( dummy_instr_en_i ),
+ .dummy_instr_mask_i ( dummy_instr_mask_i ),
+ .dummy_instr_seed_en_i ( dummy_instr_seed_en_i ),
+ .dummy_instr_seed_i ( dummy_instr_seed_i ),
+ .fetch_valid_i ( fetch_valid ),
+ .id_in_ready_i ( id_in_ready_i ),
+ .insert_dummy_instr_o ( insert_dummy_instr ),
+ .dummy_instr_data_o ( dummy_instr_data )
+ );
+ // Mux between actual instructions and dummy instructions
+ assign instr_out = insert_dummy_instr ? dummy_instr_data : instr_decompressed;
+ assign instr_is_compressed_out = insert_dummy_instr ? 1'b0 : instr_is_compressed;
+ assign illegal_c_instr_out = insert_dummy_instr ? 1'b0 : illegal_c_insn;
+ assign instr_err_out = insert_dummy_instr ? 1'b0 : if_instr_err;
+ // Stall the IF stage if we insert a dummy instruction. The dummy will execute between whatever
+ // is currently in the ID stage and whatever is valid from the prefetch buffer this cycle. The
+ // PC of the dummy instruction will match whatever is next from the prefetch buffer.
+ assign stall_dummy_instr = insert_dummy_instr;
+ // Register the dummy instruction indication into the ID stage
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ dummy_instr_id_o <= 1'b0;
+ end else if (if_id_pipe_reg_we) begin
+ dummy_instr_id_o <= insert_dummy_instr;
+ end
+ end
+ end else begin : gen_no_dummy_instr
+ logic unused_dummy_en;
+ logic [2:0] unused_dummy_mask;
+ logic unused_dummy_seed_en;
+ logic [31:0] unused_dummy_seed;
+ assign unused_dummy_en = dummy_instr_en_i;
+ assign unused_dummy_mask = dummy_instr_mask_i;
+ assign unused_dummy_seed_en = dummy_instr_seed_en_i;
+ assign unused_dummy_seed = dummy_instr_seed_i;
+ assign instr_out = instr_decompressed;
+ assign instr_is_compressed_out = instr_is_compressed;
+ assign illegal_c_instr_out = illegal_c_insn;
+ assign instr_err_out = if_instr_err;
+ assign stall_dummy_instr = 1'b0;
+ assign dummy_instr_id_o = 1'b0;
+ end
+ // The ID stage becomes valid as soon as any instruction is registered in the ID stage flops.
+ // Note that the current instruction is squashed by the incoming pc_set_i signal.
+ // Valid is held until it is explicitly cleared (due to an instruction completing or an exception)
+ assign instr_valid_id_d = (if_instr_valid & id_in_ready_i & ~pc_set_i) |
+ (instr_valid_id_q & ~instr_valid_clear_i);
+ assign instr_new_id_d = if_instr_valid & id_in_ready_i;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ instr_valid_id_q <= 1'b0;
+ instr_new_id_q <= 1'b0;
+ end else begin
+ instr_valid_id_q <= instr_valid_id_d;
+ instr_new_id_q <= instr_new_id_d;
+ end
+ end
+ assign instr_valid_id_o = instr_valid_id_q;
+ // Signal when a new instruction enters the ID stage (only used for RVFI signalling).
+ assign instr_new_id_o = instr_new_id_q;
+ // IF-ID pipeline registers, frozen when the ID stage is stalled
+ assign if_id_pipe_reg_we = instr_new_id_d;
+ always_ff @(posedge clk_i) begin
+ if (if_id_pipe_reg_we) begin
+ instr_rdata_id_o <= instr_out;
+ // To reduce fan-out and help timing from the instr_rdata_id flops they are replicated.
+ instr_rdata_alu_id_o <= instr_out;
+ instr_fetch_err_o <= instr_err_out;
+ instr_fetch_err_plus2_o <= fetch_err_plus2;
+ instr_rdata_c_id_o <= if_instr_rdata[15:0];
+ instr_is_compressed_id_o <= instr_is_compressed_out;
+ illegal_c_insn_id_o <= illegal_c_instr_out;
+ pc_id_o <= pc_if_o;
+ end
+ end
+ // Check for expected increments of the PC when security hardening enabled
+ if (PCIncrCheck) begin : g_secure_pc
+ logic [31:0] prev_instr_addr_incr;
+ logic prev_instr_seq_q, prev_instr_seq_d;
+ // Do not check for sequential increase after a branch, jump, exception, interrupt or debug
+ // request, all of which will set branch_req. Also do not check after reset or for dummys.
+ assign prev_instr_seq_d = (prev_instr_seq_q | instr_new_id_d) &
+ ~branch_req & ~stall_dummy_instr;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ prev_instr_seq_q <= 1'b0;
+ end else begin
+ prev_instr_seq_q <= prev_instr_seq_d;
+ end
+ end
+ assign prev_instr_addr_incr = pc_id_o + ((instr_is_compressed_id_o && !instr_fetch_err_o) ?
+ 32'd2 : 32'd4);
+ // Check that the address equals the previous address +2/+4
+ assign pc_mismatch_alert_o = prev_instr_seq_q & (pc_if_o != prev_instr_addr_incr);
+ end else begin : g_no_secure_pc
+ assign pc_mismatch_alert_o = 1'b0;
+ end
+ if (BranchPredictor) begin : g_ifu_branch_predictor
+ logic [31:0] instr_skid_data_q;
+ logic [31:0] instr_skid_addr_q;
+ logic instr_skid_bp_taken_q;
+ logic instr_skid_valid_q, instr_skid_valid_d;
+ logic instr_skid_en;
+ logic instr_bp_taken_q, instr_bp_taken_d;
+ logic predict_branch_taken_raw;
+ // ID stages needs to know if branch was predicted taken so it can signal mispredicts
+ always_ff @(posedge clk_i) begin
+ if (if_id_pipe_reg_we) begin
+ instr_bp_taken_q <= instr_bp_taken_d;
+ end
+ end
+ // When branch prediction is enabled a skid buffer between the IF and ID/EX stage is introduced.
+ // If an instruction in IF is predicted to be a taken branch and ID/EX is not ready the
+ // instruction in IF is moved to the skid buffer which becomes the output of the IF stage until
+ // the ID/EX stage accepts the instruction. The skid buffer is required as otherwise the ID/EX
+ // ready signal is coupled to the instr_req_o output which produces a feedthrough path from
+ // data_gnt_i -> instr_req_o (which needs to be avoided as for some interconnects this will
+ // result in a combinational loop).
+ assign instr_skid_en = predicted_branch & ~id_in_ready_i & ~instr_skid_valid_q;
+ assign instr_skid_valid_d = (instr_skid_valid_q & ~id_in_ready_i & ~stall_dummy_instr) |
+ instr_skid_en;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ instr_skid_valid_q <= 1'b0;
+ end else begin
+ instr_skid_valid_q <= instr_skid_valid_d;
+ end
+ end
+ always_ff @(posedge clk_i) begin
+ if (instr_skid_en) begin
+ instr_skid_bp_taken_q <= predict_branch_taken;
+ instr_skid_data_q <= fetch_rdata;
+ instr_skid_addr_q <= fetch_addr;
+ end
+ end
+ brq_ifu_branch_predict branch_predict_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .fetch_rdata_i ( fetch_rdata ),
+ .fetch_pc_i ( fetch_addr ),
+ .fetch_valid_i ( fetch_valid ),
+ .predict_branch_taken_o ( predict_branch_taken_raw ),
+ .predict_branch_pc_o ( predict_branch_pc )
+ );
+ // If there is an instruction in the skid buffer there must be no branch prediction.
+ // Instructions are only placed in the skid after they have been predicted to be a taken branch
+ // so with the skid valid any prediction has already occurred.
+ // Do not branch predict on instruction errors.
+ assign predict_branch_taken = predict_branch_taken_raw & ~instr_skid_valid_q & ~fetch_err;
+ // pc_set_i takes precendence over branch prediction
+ assign predicted_branch = predict_branch_taken & ~pc_set_i;
+ assign if_instr_valid = fetch_valid | instr_skid_valid_q;
+ assign if_instr_rdata = instr_skid_valid_q ? instr_skid_data_q : fetch_rdata;
+ assign if_instr_addr = instr_skid_valid_q ? instr_skid_addr_q : fetch_addr;
+ // Don't branch predict on instruction error so only instructions without errors end up in the
+ // skid buffer.
+ assign if_instr_err = ~instr_skid_valid_q & fetch_err;
+ assign instr_bp_taken_d = instr_skid_valid_q ? instr_skid_bp_taken_q : predict_branch_taken;
+ assign fetch_ready = id_in_ready_i & ~stall_dummy_instr & ~instr_skid_valid_q;
+ assign instr_bp_taken_o = instr_bp_taken_q;
+ end else begin : g_no_ifu_branch_predictor
+ assign instr_bp_taken_o = 1'b0;
+ assign predict_branch_taken = 1'b0;
+ assign predicted_branch = 1'b0;
+ assign predict_branch_pc = 32'b0;
+ assign if_instr_valid = fetch_valid;
+ assign if_instr_rdata = fetch_rdata;
+ assign if_instr_addr = fetch_addr;
+ assign if_instr_err = fetch_err;
+ assign fetch_ready = id_in_ready_i & ~stall_dummy_instr;
+ end
\ No newline at end of file
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..1a6fc57
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,276 @@
+ * Compressed instruction decoder
+ *
+ * Decodes RISC-V compressed instructions into their RV32 equivalent.
+ * This module is fully combinatorial, clock and reset are used for
+ * assertions only.
+ */
+module brq_ifu_compressed_decoder (
+ input logic clk_i,
+ input logic rst_ni,
+ input logic valid_i,
+ input logic [31:0] instr_i,
+ output logic [31:0] instr_o,
+ output logic is_compressed_o,
+ output logic illegal_instr_o
+ import brq_pkg::*;
+ // valid_i indicates if instr_i is valid and is used for assertions only.
+ // The following signal is used to avoid possible lint errors.
+ logic unused_valid;
+ assign unused_valid = valid_i;
+ ////////////////////////
+ // Compressed decoder //
+ ////////////////////////
+ always_comb begin
+ // By default, forward incoming instruction, mark it as legal.
+ instr_o = instr_i;
+ illegal_instr_o = 1'b0;
+ // Check if incoming instruction is compressed.
+ unique case (instr_i[1:0])
+ // C0
+ 2'b00: begin
+ unique case (instr_i[15:13])
+ 3'b000: begin
+ // c.addi4spn -> addi rd', x2, imm
+ instr_o = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5],
+ instr_i[6], 2'b00, 5'h02, 3'b000, 2'b01, instr_i[4:2], {OPCODE_OP_IMM}};
+ if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1;
+ end
+ 3'b010: begin
+ // c.lw -> lw rd', imm(rs1')
+ instr_o = {5'b0, instr_i[5], instr_i[12:10], instr_i[6],
+ 2'b00, 2'b01, instr_i[9:7], 3'b010, 2'b01, instr_i[4:2], {OPCODE_LOAD}};
+ end
+ 3'b110: begin
+ // c.sw -> sw rs2', imm(rs1')
+ instr_o = {5'b0, instr_i[5], instr_i[12], 2'b01, instr_i[4:2],
+ 2'b01, instr_i[9:7], 3'b010, instr_i[11:10], instr_i[6],
+ 2'b00, {OPCODE_STORE}};
+ end
+ 3'b001,
+ 3'b011,
+ 3'b100,
+ 3'b101,
+ 3'b111: begin
+ illegal_instr_o = 1'b1;
+ end
+ //default: begin
+ // illegal_instr_o = 1'b1;
+ //end
+ endcase
+ end
+ // C1
+ //
+ // Register address checks for RV32E are performed in the regular instruction decoder.
+ // If this check fails, an illegal instruction exception is triggered and the controller
+ // writes the actual faulting instruction to mtval.
+ 2'b01: begin
+ unique case (instr_i[15:13])
+ 3'b000: begin
+ // c.addi -> addi rd, rd, nzimm
+ // c.nop
+ instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2],
+ instr_i[11:7], 3'b0, instr_i[11:7], {OPCODE_OP_IMM}};
+ end
+ 3'b001, 3'b101: begin
+ // 001: c.jal -> jal x1, imm
+ // 101: c.j -> jal x0, imm
+ instr_o = {instr_i[12], instr_i[8], instr_i[10:9], instr_i[6],
+ instr_i[7], instr_i[2], instr_i[11], instr_i[5:3],
+ {9 {instr_i[12]}}, 4'b0, ~instr_i[15], {OPCODE_JAL}};
+ end
+ 3'b010: begin
+ // -> addi rd, x0, nzimm
+ // ( hints are translated into an addi hint)
+ instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0,
+ 3'b0, instr_i[11:7], {OPCODE_OP_IMM}};
+ end
+ 3'b011: begin
+ // c.lui -> lui rd, imm
+ // (c.lui hints are translated into a lui hint)
+ instr_o = {{15 {instr_i[12]}}, instr_i[6:2], instr_i[11:7], {OPCODE_LUI}};
+ if (instr_i[11:7] == 5'h02) begin
+ // c.addi16sp -> addi x2, x2, nzimm
+ instr_o = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2],
+ instr_i[6], 4'b0, 5'h02, 3'b000, 5'h02, {OPCODE_OP_IMM}};
+ end
+ if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1;
+ end
+ 3'b100: begin
+ unique case (instr_i[11:10])
+ 2'b00,
+ 2'b01: begin
+ // 00: c.srli -> srli rd, rd, shamt
+ // 01: c.srai -> srai rd, rd, shamt
+ // (c.srli/c.srai hints are translated into a srli/srai hint)
+ instr_o = {1'b0, instr_i[10], 5'b0, instr_i[6:2], 2'b01, instr_i[9:7],
+ 3'b101, 2'b01, instr_i[9:7], {OPCODE_OP_IMM}};
+ if (instr_i[12] == 1'b1) illegal_instr_o = 1'b1;
+ end
+ 2'b10: begin
+ // c.andi -> andi rd, rd, imm
+ instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7],
+ 3'b111, 2'b01, instr_i[9:7], {OPCODE_OP_IMM}};
+ end
+ 2'b11: begin
+ unique case ({instr_i[12], instr_i[6:5]})
+ 3'b000: begin
+ // c.sub -> sub rd', rd', rs2'
+ instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7],
+ 3'b000, 2'b01, instr_i[9:7], {OPCODE_OP}};
+ end
+ 3'b001: begin
+ // c.xor -> xor rd', rd', rs2'
+ instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b100,
+ 2'b01, instr_i[9:7], {OPCODE_OP}};
+ end
+ 3'b010: begin
+ // c.or -> or rd', rd', rs2'
+ instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b110,
+ 2'b01, instr_i[9:7], {OPCODE_OP}};
+ end
+ 3'b011: begin
+ // c.and -> and rd', rd', rs2'
+ instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b111,
+ 2'b01, instr_i[9:7], {OPCODE_OP}};
+ end
+ 3'b100,
+ 3'b101,
+ 3'b110,
+ 3'b111: begin
+ // 100: c.subw
+ // 101: c.addw
+ illegal_instr_o = 1'b1;
+ end
+ // default: begin
+ // illegal_instr_o = 1'b1;
+ // end
+ endcase
+ end
+ // default: begin
+ // illegal_instr_o = 1'b1;
+ // end
+ endcase
+ end
+ 3'b110, 3'b111: begin
+ // 0: c.beqz -> beq rs1', x0, imm
+ // 1: c.bnez -> bne rs1', x0, imm
+ instr_o = {{4 {instr_i[12]}}, instr_i[6:5], instr_i[2], 5'b0, 2'b01,
+ instr_i[9:7], 2'b00, instr_i[13], instr_i[11:10], instr_i[4:3],
+ instr_i[12], {OPCODE_BRANCH}};
+ end
+ // default: begin
+ // illegal_instr_o = 1'b1;
+ // end
+ endcase
+ end
+ // C2
+ //
+ // Register address checks for RV32E are performed in the regular instruction decoder.
+ // If this check fails, an illegal instruction exception is triggered and the controller
+ // writes the actual faulting instruction to mtval.
+ 2'b10: begin
+ unique case (instr_i[15:13])
+ 3'b000: begin
+ // c.slli -> slli rd, rd, shamt
+ // (c.ssli hints are translated into a slli hint)
+ instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], {OPCODE_OP_IMM}};
+ if (instr_i[12] == 1'b1) illegal_instr_o = 1'b1; // reserved for custom extensions
+ end
+ 3'b010: begin
+ // c.lwsp -> lw rd, imm(x2)
+ instr_o = {4'b0, instr_i[3:2], instr_i[12], instr_i[6:4], 2'b00, 5'h02,
+ 3'b010, instr_i[11:7], OPCODE_LOAD};
+ if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
+ end
+ 3'b100: begin
+ if (instr_i[12] == 1'b0) begin
+ if (instr_i[6:2] != 5'b0) begin
+ // -> add rd/rs1, x0, rs2
+ // ( hints are translated into an add hint)
+ instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], {OPCODE_OP}};
+ end else begin
+ // c.jr -> jalr x0, rd/rs1, 0
+ instr_o = {12'b0, instr_i[11:7], 3'b0, 5'b0, {OPCODE_JALR}};
+ if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
+ end
+ end else begin
+ if (instr_i[6:2] != 5'b0) begin
+ // c.add -> add rd, rd, rs2
+ // (c.add hints are translated into an add hint)
+ instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], {OPCODE_OP}};
+ end else begin
+ if (instr_i[11:7] == 5'b0) begin
+ // c.ebreak -> ebreak
+ instr_o = {32'h00_10_00_73};
+ end else begin
+ // c.jalr -> jalr x1, rs1, 0
+ instr_o = {12'b0, instr_i[11:7], 3'b000, 5'b00001, {OPCODE_JALR}};
+ end
+ end
+ end
+ end
+ 3'b110: begin
+ // c.swsp -> sw rs2, imm(x2)
+ instr_o = {4'b0, instr_i[8:7], instr_i[12], instr_i[6:2], 5'h02, 3'b010,
+ instr_i[11:9], 2'b00, {OPCODE_STORE}};
+ end
+ 3'b001,
+ 3'b011,
+ 3'b101,
+ 3'b111: begin
+ illegal_instr_o = 1'b1;
+ end
+ // default: begin
+ // illegal_instr_o = 1'b1;
+ // end
+ endcase
+ end
+ // Incoming instruction is not compressed.
+ 2'b11:;
+ // default: begin
+ // illegal_instr_o = 1'b1;
+ // end
+ endcase
+ end
+ assign is_compressed_o = (instr_i[1:0] != 2'b11);
\ No newline at end of file
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..91bec36
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,141 @@
+ * Dummy instruction module
+ *
+ * Provides pseudo-randomly inserted fake instructions for secure code obfuscation
+ */
+module brq_ifu_dummy_instr (
+ // Clock and reset
+ input logic clk_i,
+ input logic rst_ni,
+ // Interface to CSRs
+ input logic dummy_instr_en_i,
+ input logic [2:0] dummy_instr_mask_i,
+ input logic dummy_instr_seed_en_i,
+ input logic [31:0] dummy_instr_seed_i,
+ // Interface to IF stage
+ input logic fetch_valid_i,
+ input logic id_in_ready_i,
+ output logic insert_dummy_instr_o,
+ output logic [31:0] dummy_instr_data_o
+ localparam int unsigned TIMEOUT_CNT_W = 5;
+ localparam int unsigned OP_W = 5;
+ typedef enum logic [1:0] {
+ DUMMY_ADD = 2'b00,
+ DUMMY_MUL = 2'b01,
+ DUMMY_DIV = 2'b10,
+ DUMMY_AND = 2'b11
+ } dummy_instr_e;
+ typedef struct packed {
+ dummy_instr_e instr_type;
+ logic [OP_W-1:0] op_b;
+ logic [OP_W-1:0] op_a;
+ logic [TIMEOUT_CNT_W-1:0] cnt;
+ } lfsr_data_t;
+ localparam int unsigned LFSR_OUT_W = $bits(lfsr_data_t);
+ lfsr_data_t lfsr_data;
+ logic [TIMEOUT_CNT_W-1:0] dummy_cnt_incr, dummy_cnt_threshold;
+ logic [TIMEOUT_CNT_W-1:0] dummy_cnt_d, dummy_cnt_q;
+ logic dummy_cnt_en;
+ logic lfsr_en;
+ logic [LFSR_OUT_W-1:0] lfsr_state;
+ logic insert_dummy_instr;
+ logic [6:0] dummy_set;
+ logic [2:0] dummy_opcode;
+ logic [31:0] dummy_instr;
+ logic [31:0] dummy_instr_seed_q, dummy_instr_seed_d;
+ // Shift the LFSR every time we insert an instruction
+ assign lfsr_en = insert_dummy_instr & id_in_ready_i;
+ assign dummy_instr_seed_d = dummy_instr_seed_q ^ dummy_instr_seed_i;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ dummy_instr_seed_q <= '0;
+ end else if (dummy_instr_seed_en_i) begin
+ dummy_instr_seed_q <= dummy_instr_seed_d;
+ end
+ end
+ prim_lfsr #(
+ .LfsrDw ( 32 ),
+ .StateOutDw ( LFSR_OUT_W )
+ ) lfsr_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .seed_en_i ( dummy_instr_seed_en_i ),
+ .seed_i ( dummy_instr_seed_d ),
+ .lfsr_en_i ( lfsr_en ),
+ .entropy_i ( '0 ),
+ .state_o ( lfsr_state )
+ );
+ // Extract fields from LFSR
+ assign lfsr_data = lfsr_data_t'(lfsr_state);
+ // Set count threshold for inserting a new instruction. This is the pseudo-random value from the
+ // LFSR with a mask applied (based on CSR config data) to shorten the period if required.
+ assign dummy_cnt_threshold = lfsr_data.cnt & {dummy_instr_mask_i,{TIMEOUT_CNT_W-3{1'b1}}};
+ assign dummy_cnt_incr = dummy_cnt_q + {{TIMEOUT_CNT_W-1{1'b0}},1'b1};
+ // Clear the counter everytime a new instruction is inserted
+ assign dummy_cnt_d = insert_dummy_instr ? '0 : dummy_cnt_incr;
+ // Increment the counter for each executed instruction while dummy instuctions are
+ // enabled.
+ assign dummy_cnt_en = dummy_instr_en_i & id_in_ready_i &
+ (fetch_valid_i | insert_dummy_instr);
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ dummy_cnt_q <= '0;
+ end else if (dummy_cnt_en) begin
+ dummy_cnt_q <= dummy_cnt_d;
+ end
+ end
+ // Insert a dummy instruction each time the counter hits the threshold
+ assign insert_dummy_instr = dummy_instr_en_i & (dummy_cnt_q == dummy_cnt_threshold);
+ // Encode instruction
+ always_comb begin
+ unique case (lfsr_data.instr_type)
+ DUMMY_ADD : begin
+ dummy_set = 7'b0000000;
+ dummy_opcode = 3'b000;
+ end
+ DUMMY_MUL : begin
+ dummy_set = 7'b0000001;
+ dummy_opcode = 3'b000;
+ end
+ DUMMY_DIV : begin
+ dummy_set = 7'b0000001;
+ dummy_opcode = 3'b100;
+ end
+ DUMMY_AND : begin
+ dummy_set = 7'b0000000;
+ dummy_opcode = 3'b111;
+ end
+ default : begin
+ dummy_set = 7'b0000000;
+ dummy_opcode = 3'b000;
+ end
+ endcase
+ end
+ // SET RS2 RS1 OP RD
+ assign dummy_instr = {dummy_set,lfsr_data.op_b,lfsr_data.op_a,dummy_opcode,5'h00,7'h33};
+ // Assign outputs
+ assign insert_dummy_instr_o = insert_dummy_instr;
+ assign dummy_instr_data_o = dummy_instr;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..09bb06c
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,237 @@
+ * Fetch Fifo for 32 bit memory interface
+ *
+ * input port: send address and data to the FIFO
+ * clear_i clears the FIFO for the following cycle, including any new request
+ */
+module brq_ifu_fifo #(
+ parameter int unsigned NUM_REQS = 2
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ // control signals
+ input logic clear_i, // clears the contents of the FIFO
+ output logic [NUM_REQS-1:0] busy_o,
+ // input port
+ input logic in_valid_i,
+ input logic [31:0] in_addr_i,
+ input logic [31:0] in_rdata_i,
+ input logic in_err_i,
+ // output port
+ output logic out_valid_o,
+ input logic out_ready_i,
+ output logic [31:0] out_addr_o,
+ output logic [31:0] out_addr_next_o,
+ output logic [31:0] out_rdata_o,
+ output logic out_err_o,
+ output logic out_err_plus2_o
+ localparam int unsigned DEPTH = NUM_REQS+1;
+ // index 0 is used for output
+ logic [DEPTH-1:0] [31:0] rdata_d, rdata_q;
+ logic [DEPTH-1:0] err_d, err_q;
+ logic [DEPTH-1:0] valid_d, valid_q;
+ logic [DEPTH-1:0] lowest_free_entry;
+ logic [DEPTH-1:0] valid_pushed, valid_popped;
+ logic [DEPTH-1:0] entry_en;
+ logic pop_fifo;
+ logic [31:0] rdata, rdata_unaligned;
+ logic err, err_unaligned, err_plus2;
+ logic valid, valid_unaligned;
+ logic aligned_is_compressed, unaligned_is_compressed;
+ logic addr_incr_two;
+ logic [31:1] instr_addr_next;
+ logic [31:1] instr_addr_d, instr_addr_q;
+ logic instr_addr_en;
+ logic unused_addr_in;
+ /////////////////
+ // Output port //
+ /////////////////
+ assign rdata = valid_q[0] ? rdata_q[0] : in_rdata_i;
+ assign err = valid_q[0] ? err_q[0] : in_err_i;
+ assign valid = valid_q[0] | in_valid_i;
+ // The FIFO contains word aligned memory fetches, but the instructions contained in each entry
+ // might be half-word aligned (due to compressed instructions)
+ // e.g.
+ // | 31 16 | 15 0 |
+ // FIFO entry 0 | Instr 1 [15:0] | Instr 0 [15:0] |
+ // FIFO entry 1 | Instr 2 [15:0] | Instr 1 [31:16] |
+ //
+ // The FIFO also has a direct bypass path, so a complete instruction might be made up of data
+ // from the FIFO and new incoming data.
+ //
+ // Construct the output data for an unaligned instruction
+ assign rdata_unaligned = valid_q[1] ? {rdata_q[1][15:0], rdata[31:16]} :
+ {in_rdata_i[15:0], rdata[31:16]};
+ // If entry[1] is valid, an error can come from entry[0] or entry[1], unless the
+ // instruction in entry[0] is compressed (entry[1] is a new instruction)
+ // If entry[1] is not valid, and entry[0] is, an error can come from entry[0] or the incoming
+ // data, unless the instruction in entry[0] is compressed
+ // If entry[0] is not valid, the error must come from the incoming data
+ assign err_unaligned = valid_q[1] ? ((err_q[1] & ~unaligned_is_compressed) | err_q[0]) :
+ ((valid_q[0] & err_q[0]) |
+ (in_err_i & (~valid_q[0] | ~unaligned_is_compressed)));
+ // Record when an error is caused by the second half of an unaligned 32bit instruction.
+ // Only needs to be correct when unaligned and if err_unaligned is set
+ assign err_plus2 = valid_q[1] ? (err_q[1] & ~err_q[0]) :
+ (in_err_i & valid_q[0] & ~err_q[0]);
+ // An uncompressed unaligned instruction is only valid if both parts are available
+ assign valid_unaligned = valid_q[1] ? 1'b1 :
+ (valid_q[0] & in_valid_i);
+ // If there is an error, rdata is unknown
+ assign unaligned_is_compressed = (rdata[17:16] != 2'b11) & ~err;
+ assign aligned_is_compressed = (rdata[ 1: 0] != 2'b11) & ~err;
+ ////////////////////////////////////////
+ // Instruction aligner (if unaligned) //
+ ////////////////////////////////////////
+ always_comb begin
+ if (out_addr_o[1]) begin
+ // unaligned case
+ out_rdata_o = rdata_unaligned;
+ out_err_o = err_unaligned;
+ out_err_plus2_o = err_plus2;
+ if (unaligned_is_compressed) begin
+ out_valid_o = valid;
+ end else begin
+ out_valid_o = valid_unaligned;
+ end
+ end else begin
+ // aligned case
+ out_rdata_o = rdata;
+ out_err_o = err;
+ out_err_plus2_o = 1'b0;
+ out_valid_o = valid;
+ end
+ end
+ /////////////////////////
+ // Instruction address //
+ /////////////////////////
+ // Update the address on branches and every time an instruction is driven
+ assign instr_addr_en = clear_i | (out_ready_i & out_valid_o);
+ // Increment the address by two every time a compressed instruction is popped
+ assign addr_incr_two = instr_addr_q[1] ? unaligned_is_compressed :
+ aligned_is_compressed;
+ assign instr_addr_next = (instr_addr_q[31:1] +
+ // Increment address by 4 or 2
+ {29'd0,~addr_incr_two,addr_incr_two});
+ assign instr_addr_d = clear_i ? in_addr_i[31:1] :
+ instr_addr_next;
+ always_ff @(posedge clk_i) begin
+ if (instr_addr_en) begin
+ instr_addr_q <= instr_addr_d;
+ end
+ end
+ // Output both PC of current instruction and instruction following. PC of instruction following is
+ // required for the branch predictor. It's used to fetch the instruction following a branch that
+ // was not-taken but (mis)predicted taken.
+ assign out_addr_next_o = {instr_addr_next, 1'b0};
+ assign out_addr_o = {instr_addr_q, 1'b0};
+ // The LSB of the address is unused, since all addresses are halfword aligned
+ assign unused_addr_in = in_addr_i[0];
+ /////////////////
+ // FIFO status //
+ /////////////////
+ // Indicate the fill level of fifo-entries. This is used to determine when a new request can be
+ // made on the bus. The prefetch buffer only needs to know about the upper entries which overlap
+ // with NUM_REQS.
+ assign busy_o = valid_q[DEPTH-1:DEPTH-NUM_REQS];
+ /////////////////////
+ // FIFO management //
+ /////////////////////
+ // Since an entry can contain unaligned instructions, popping an entry can leave the entry valid
+ assign pop_fifo = out_ready_i & out_valid_o & (~aligned_is_compressed | out_addr_o[1]);
+ for (genvar i = 0; i < (DEPTH - 1); i++) begin : g_fifo_next
+ // Calculate lowest free entry (write pointer)
+ if (i == 0) begin : g_ent0
+ assign lowest_free_entry[i] = ~valid_q[i];
+ end else begin : g_ent_others
+ assign lowest_free_entry[i] = ~valid_q[i] & valid_q[i-1];
+ end
+ // An entry is set when an incoming request chooses the lowest available entry
+ assign valid_pushed[i] = (in_valid_i & lowest_free_entry[i]) |
+ valid_q[i];
+ // Popping the FIFO shifts all entries down
+ assign valid_popped[i] = pop_fifo ? valid_pushed[i+1] : valid_pushed[i];
+ // All entries are wiped out on a clear
+ assign valid_d[i] = valid_popped[i] & ~clear_i;
+ // data flops are enabled if there is new data to shift into it, or
+ assign entry_en[i] = (valid_pushed[i+1] & pop_fifo) |
+ // a new request is incoming and this is the lowest free entry
+ (in_valid_i & lowest_free_entry[i] & ~pop_fifo);
+ // take the next entry or the incoming data
+ assign rdata_d[i] = valid_q[i+1] ? rdata_q[i+1] : in_rdata_i;
+ assign err_d [i] = valid_q[i+1] ? err_q [i+1] : in_err_i;
+ end
+ // The top entry is similar but with simpler muxing
+ assign lowest_free_entry[DEPTH-1] = ~valid_q[DEPTH-1] & valid_q[DEPTH-2];
+ assign valid_pushed [DEPTH-1] = valid_q[DEPTH-1] | (in_valid_i & lowest_free_entry[DEPTH-1]);
+ assign valid_popped [DEPTH-1] = pop_fifo ? 1'b0 : valid_pushed[DEPTH-1];
+ assign valid_d [DEPTH-1] = valid_popped[DEPTH-1] & ~clear_i;
+ assign entry_en[DEPTH-1] = in_valid_i & lowest_free_entry[DEPTH-1];
+ assign rdata_d [DEPTH-1] = in_rdata_i;
+ assign err_d [DEPTH-1] = in_err_i;
+ ////////////////////
+ // FIFO registers //
+ ////////////////////
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ valid_q <= '0;
+ end else begin
+ valid_q <= valid_d;
+ end
+ end
+ for (genvar i = 0; i < DEPTH; i++) begin : g_fifo_regs
+ always_ff @(posedge clk_i) begin
+ if (entry_en[i]) begin
+ rdata_q[i] <= rdata_d[i];
+ err_q[i] <= err_d[i];
+ end
+ end
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..51a998c
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,1052 @@
+ * Instruction cache
+ *
+ * Provides an instruction cache along with cache management, instruction buffering and prefetching
+ */
+module brq_ifu_icache #(
+ parameter bit BranchPredictor = 1'b0,
+ // Cache arrangement parameters
+ parameter int unsigned BusWidth = 32,
+ parameter int unsigned CacheSizeBytes = 4*1024,
+ parameter bit ICacheECC = 1'b0,
+ parameter int unsigned LineSize = 64,
+ parameter int unsigned NumWays = 2,
+ // Only cache branch targets
+ parameter bit BranchCache = 1'b0
+) (
+ // Clock and reset
+ input logic clk_i,
+ input logic rst_ni,
+ // Signal that the core would like instructions
+ input logic req_i,
+ // Set the cache's address counter
+ input logic branch_i,
+ input logic branch_spec_i,
+ input logic predicted_branch_i,
+ input logic branch_mispredict_i,
+ input logic [31:0] addr_i,
+ // IF stage interface: Pass fetched instructions to the core
+ input logic ready_i,
+ output logic valid_o,
+ output logic [31:0] rdata_o,
+ output logic [31:0] addr_o,
+ output logic err_o,
+ output logic err_plus2_o,
+ // Instruction memory / interconnect interface: Fetch instruction data from memory
+ output logic instr_req_o,
+ input logic instr_gnt_i,
+ output logic [31:0] instr_addr_o,
+ input logic [BusWidth-1:0] instr_rdata_i,
+ input logic instr_err_i,
+ input logic instr_pmp_err_i,
+ input logic instr_rvalid_i,
+ // Cache status
+ input logic icache_enable_i,
+ input logic icache_inval_i,
+ output logic busy_o
+ // Local constants
+ localparam int unsigned ADDR_W = 32;
+ // Number of fill buffers (must be >= 2)
+ localparam int unsigned NUM_FB = 4;
+ // Request throttling threshold
+ localparam int unsigned FB_THRESHOLD = NUM_FB - 2;
+ // Derived parameters
+ localparam int unsigned LINE_SIZE_ECC = ICacheECC ? (LineSize + 8) : LineSize;
+ localparam int unsigned LINE_SIZE_BYTES = LineSize/8;
+ localparam int unsigned LINE_W = $clog2(LINE_SIZE_BYTES);
+ localparam int unsigned BUS_BYTES = BusWidth/8;
+ localparam int unsigned BUS_W = $clog2(BUS_BYTES);
+ localparam int unsigned LINE_BEATS = LINE_SIZE_BYTES / BUS_BYTES;
+ localparam int unsigned LINE_BEATS_W = $clog2(LINE_BEATS);
+ localparam int unsigned NUM_LINES = CacheSizeBytes / NumWays / LINE_SIZE_BYTES;
+ localparam int unsigned INDEX_W = $clog2(NUM_LINES);
+ localparam int unsigned INDEX_HI = INDEX_W + LINE_W - 1;
+ localparam int unsigned TAG_SIZE = ADDR_W - INDEX_W - LINE_W + 1; // 1 valid bit
+ localparam int unsigned TAG_SIZE_ECC = ICacheECC ? (TAG_SIZE + 6) : TAG_SIZE;
+ localparam int unsigned OUTPUT_BEATS = (BUS_BYTES / 2); // number of halfwords
+ // Prefetch signals
+ logic [ADDR_W-1:0] lookup_addr_aligned;
+ logic [ADDR_W-1:0] branch_mispredict_addr;
+ logic [ADDR_W-1:0] prefetch_addr_d, prefetch_addr_q;
+ logic prefetch_addr_en;
+ logic branch_or_mispredict;
+ // Cache pipelipe IC0 signals
+ logic branch_suppress;
+ logic lookup_throttle;
+ logic lookup_req_ic0;
+ logic [ADDR_W-1:0] lookup_addr_ic0;
+ logic [INDEX_W-1:0] lookup_index_ic0;
+ logic fill_req_ic0;
+ logic [INDEX_W-1:0] fill_index_ic0;
+ logic [TAG_SIZE-1:0] fill_tag_ic0;
+ logic [LineSize-1:0] fill_wdata_ic0;
+ logic lookup_grant_ic0;
+ logic lookup_actual_ic0;
+ logic fill_grant_ic0;
+ logic tag_req_ic0;
+ logic [INDEX_W-1:0] tag_index_ic0;
+ logic [NumWays-1:0] tag_banks_ic0;
+ logic tag_write_ic0;
+ logic [TAG_SIZE_ECC-1:0] tag_wdata_ic0;
+ logic data_req_ic0;
+ logic [INDEX_W-1:0] data_index_ic0;
+ logic [NumWays-1:0] data_banks_ic0;
+ logic data_write_ic0;
+ logic [LINE_SIZE_ECC-1:0] data_wdata_ic0;
+ // Cache pipelipe IC1 signals
+ logic [TAG_SIZE_ECC-1:0] tag_rdata_ic1 [NumWays];
+ logic [LINE_SIZE_ECC-1:0] data_rdata_ic1 [NumWays];
+ logic [LINE_SIZE_ECC-1:0] hit_data_ic1;
+ logic lookup_valid_ic1;
+ logic [ADDR_W-1:INDEX_HI+1] lookup_addr_ic1;
+ logic [NumWays-1:0] tag_match_ic1;
+ logic tag_hit_ic1;
+ logic [NumWays-1:0] tag_invalid_ic1;
+ logic [NumWays-1:0] lowest_invalid_way_ic1;
+ logic [NumWays-1:0] round_robin_way_ic1, round_robin_way_q;
+ logic [NumWays-1:0] sel_way_ic1;
+ logic ecc_err_ic1;
+ logic ecc_write_req;
+ logic [NumWays-1:0] ecc_write_ways;
+ logic [INDEX_W-1:0] ecc_write_index;
+ // Fill buffer signals
+ logic gnt_or_pmp_err, gnt_not_pmp_err;
+ logic [$clog2(NUM_FB)-1:0] fb_fill_level;
+ logic fill_cache_new;
+ logic fill_new_alloc;
+ logic fill_spec_req, fill_spec_done, fill_spec_hold;
+ logic [NUM_FB-1:0][NUM_FB-1:0] fill_older_d, fill_older_q;
+ logic [NUM_FB-1:0] fill_alloc_sel, fill_alloc;
+ logic [NUM_FB-1:0] fill_busy_d, fill_busy_q;
+ logic [NUM_FB-1:0] fill_done;
+ logic [NUM_FB-1:0] fill_in_ic1;
+ logic [NUM_FB-1:0] fill_stale_d, fill_stale_q;
+ logic [NUM_FB-1:0] fill_cache_d, fill_cache_q;
+ logic [NUM_FB-1:0] fill_hit_ic1, fill_hit_d, fill_hit_q;
+ logic [NUM_FB-1:0][LINE_BEATS_W:0] fill_ext_cnt_d, fill_ext_cnt_q;
+ logic [NUM_FB-1:0] fill_ext_hold_d, fill_ext_hold_q;
+ logic [NUM_FB-1:0] fill_ext_done_d, fill_ext_done_q;
+ logic [NUM_FB-1:0][LINE_BEATS_W:0] fill_rvd_cnt_d, fill_rvd_cnt_q;
+ logic [NUM_FB-1:0] fill_rvd_done;
+ logic [NUM_FB-1:0] fill_ram_done_d, fill_ram_done_q;
+ logic [NUM_FB-1:0] fill_out_grant;
+ logic [NUM_FB-1:0][LINE_BEATS_W:0] fill_out_cnt_d, fill_out_cnt_q;
+ logic [NUM_FB-1:0] fill_out_done;
+ logic [NUM_FB-1:0] fill_ext_req, fill_rvd_exp, fill_ram_req, fill_out_req;
+ logic [NUM_FB-1:0] fill_data_sel, fill_data_reg, fill_data_hit, fill_data_rvd;
+ logic [NUM_FB-1:0][LINE_BEATS_W-1:0] fill_ext_off, fill_rvd_off;
+ logic [NUM_FB-1:0][LINE_BEATS_W:0] fill_ext_beat, fill_rvd_beat;
+ logic [NUM_FB-1:0] fill_ext_arb, fill_ram_arb, fill_out_arb;
+ logic [NUM_FB-1:0] fill_rvd_arb;
+ logic [NUM_FB-1:0] fill_entry_en;
+ logic [NUM_FB-1:0] fill_addr_en;
+ logic [NUM_FB-1:0] fill_way_en;
+ logic [NUM_FB-1:0][LINE_BEATS-1:0] fill_data_en;
+ logic [NUM_FB-1:0][LINE_BEATS-1:0] fill_err_d, fill_err_q;
+ logic [ADDR_W-1:0] fill_addr_q [NUM_FB];
+ logic [NumWays-1:0] fill_way_q [NUM_FB];
+ logic [LineSize-1:0] fill_data_d [NUM_FB];
+ logic [LineSize-1:0] fill_data_q [NUM_FB];
+ logic [ADDR_W-1:BUS_W] fill_ext_req_addr;
+ logic [ADDR_W-1:0] fill_ram_req_addr;
+ logic [NumWays-1:0] fill_ram_req_way;
+ logic [LineSize-1:0] fill_ram_req_data;
+ logic [LineSize-1:0] fill_out_data;
+ logic [LINE_BEATS-1:0] fill_out_err;
+ // External req signals
+ logic instr_req;
+ logic [ADDR_W-1:BUS_W] instr_addr;
+ // Data output signals
+ logic skid_complete_instr;
+ logic skid_ready;
+ logic output_compressed;
+ logic skid_valid_d, skid_valid_q, skid_en;
+ logic [15:0] skid_data_d, skid_data_q;
+ logic skid_err_q;
+ logic output_valid;
+ logic addr_incr_two;
+ logic output_addr_en;
+ logic [ADDR_W-1:1] output_addr_incr;
+ logic [ADDR_W-1:1] output_addr_d, output_addr_q;
+ logic [15:0] output_data_lo, output_data_hi;
+ logic data_valid, output_ready;
+ logic [LineSize-1:0] line_data;
+ logic [LINE_BEATS-1:0] line_err;
+ logic [31:0] line_data_muxed;
+ logic line_err_muxed;
+ logic [31:0] output_data;
+ logic output_err;
+ // Invalidations
+ logic start_inval, inval_done;
+ logic reset_inval_q;
+ logic inval_prog_d, inval_prog_q;
+ logic [INDEX_W-1:0] inval_index_d, inval_index_q;
+ //////////////////////////
+ // Instruction prefetch //
+ //////////////////////////
+ if (BranchPredictor) begin : g_branch_predictor
+ // Where the branch predictor is present record what address followed a predicted branch. If
+ // that branch is predicted taken but mispredicted (so not-taken) this is used to resume on
+ // the not-taken code path.
+ logic [31:0] branch_mispredict_addr_q;
+ logic branch_mispredict_addr_en;
+ assign branch_mispredict_addr_en = branch_i & predicted_branch_i;
+ always_ff @(posedge clk_i) begin
+ if (branch_mispredict_addr_en) begin
+ branch_mispredict_addr_q <= {output_addr_incr, 1'b0};
+ end
+ end
+ assign branch_mispredict_addr = branch_mispredict_addr_q;
+ end else begin : g_no_branch_predictor
+ logic unused_predicted_branch;
+ assign unused_predicted_branch = predicted_branch_i;
+ assign branch_mispredict_addr = '0;
+ end
+ assign branch_or_mispredict = branch_i | branch_mispredict_i;
+ assign lookup_addr_aligned = {lookup_addr_ic0[ADDR_W-1:LINE_W],{LINE_W{1'b0}}};
+ // The prefetch address increments by one cache line for each granted request.
+ // This address is also updated if there is a branch that is not granted, since the target
+ // address (addr_i) is only valid for one cycle while branch_i is high.
+ // The captured branch target address is not forced to be aligned since the offset in the cache
+ // line must also be recorded for later use by the fill buffers.
+ assign prefetch_addr_d =
+ lookup_grant_ic0 ? (lookup_addr_aligned + {{ADDR_W-LINE_W-1{1'b0}},1'b1,{LINE_W{1'b0}}}) :
+ branch_i ? addr_i :
+ branch_mispredict_addr;
+ assign prefetch_addr_en = branch_or_mispredict | lookup_grant_ic0;
+ always_ff @(posedge clk_i) begin
+ if (prefetch_addr_en) begin
+ prefetch_addr_q <= prefetch_addr_d;
+ end
+ end
+ ////////////////////////
+ // Pipeline stage IC0 //
+ ////////////////////////
+ // Cache lookup
+ assign lookup_throttle = (fb_fill_level > FB_THRESHOLD[$clog2(NUM_FB)-1:0]);
+ assign lookup_req_ic0 = req_i & ~&fill_busy_q & (branch_or_mispredict | ~lookup_throttle) &
+ ~ecc_write_req;
+ assign lookup_addr_ic0 = branch_spec_i ? addr_i :
+ branch_mispredict_i ? branch_mispredict_addr :
+ prefetch_addr_q;
+ assign lookup_index_ic0 = lookup_addr_ic0[INDEX_HI:LINE_W];
+ // Cache write
+ assign fill_req_ic0 = (|fill_ram_req);
+ assign fill_index_ic0 = fill_ram_req_addr[INDEX_HI:LINE_W];
+ assign fill_tag_ic0 = {(~inval_prog_q & ~ecc_write_req),fill_ram_req_addr[ADDR_W-1:INDEX_HI+1]};
+ assign fill_wdata_ic0 = fill_ram_req_data;
+ // Suppress a new lookup on a not-taken branch (as the address will be incorrect)
+ assign branch_suppress = branch_spec_i & ~branch_i;
+ // Arbitrated signals - lookups have highest priority
+ assign lookup_grant_ic0 = lookup_req_ic0 & ~branch_suppress;
+ assign fill_grant_ic0 = fill_req_ic0 & (~lookup_req_ic0 | branch_suppress) & ~inval_prog_q &
+ ~ecc_write_req;
+ // Qualified lookup grant to mask ram signals in IC1 if access was not made
+ assign lookup_actual_ic0 = lookup_grant_ic0 & icache_enable_i & ~inval_prog_q & ~start_inval;
+ // Tagram
+ assign tag_req_ic0 = lookup_req_ic0 | fill_req_ic0 | inval_prog_q | ecc_write_req;
+ assign tag_index_ic0 = inval_prog_q ? inval_index_q :
+ ecc_write_req ? ecc_write_index :
+ fill_grant_ic0 ? fill_index_ic0 :
+ lookup_index_ic0;
+ assign tag_banks_ic0 = ecc_write_req ? ecc_write_ways :
+ fill_grant_ic0 ? fill_ram_req_way :
+ {NumWays{1'b1}};
+ assign tag_write_ic0 = fill_grant_ic0 | inval_prog_q | ecc_write_req;
+ // Dataram
+ assign data_req_ic0 = lookup_req_ic0 | fill_req_ic0;
+ assign data_index_ic0 = tag_index_ic0;
+ assign data_banks_ic0 = tag_banks_ic0;
+ assign data_write_ic0 = tag_write_ic0;
+ // Append ECC checkbits to write data if required
+ if (ICacheECC) begin : gen_ecc_wdata
+ // Tagram ECC
+ // Reuse the same ecc encoding module for larger cache sizes by padding with zeros
+ logic [21:0] tag_ecc_input_padded;
+ logic [27:0] tag_ecc_output_padded;
+ logic [22-TAG_SIZE:0] tag_ecc_output_unused;
+ assign tag_ecc_input_padded = {{22-TAG_SIZE{1'b0}},fill_tag_ic0};
+ assign tag_ecc_output_unused = tag_ecc_output_padded[21:TAG_SIZE-1];
+ prim_secded_28_22_enc tag_ecc_enc (
+ .in (tag_ecc_input_padded),
+ .out (tag_ecc_output_padded)
+ );
+ assign tag_wdata_ic0 = {tag_ecc_output_padded[27:22],tag_ecc_output_padded[TAG_SIZE-1:0]};
+ // Dataram ECC
+ prim_secded_72_64_enc data_ecc_enc (
+ .in (fill_wdata_ic0),
+ .out (data_wdata_ic0)
+ );
+ end else begin : gen_noecc_wdata
+ assign tag_wdata_ic0 = fill_tag_ic0;
+ assign data_wdata_ic0 = fill_wdata_ic0;
+ end
+ ////////////////
+ // IC0 -> IC1 //
+ ////////////////
+ for (genvar way = 0; way < NumWays; way++) begin : gen_rams
+ // Tag RAM instantiation
+ prim_ram_1p #(
+ .Width (TAG_SIZE_ECC),
+ .Depth (NUM_LINES),
+ .DataBitsPerMask (TAG_SIZE_ECC)
+ ) tag_bank (
+ .clk_i (clk_i),
+ .req_i (tag_req_ic0 & tag_banks_ic0[way]),
+ .write_i (tag_write_ic0),
+ .wmask_i ({TAG_SIZE_ECC{1'b1}}),
+ .addr_i (tag_index_ic0),
+ .wdata_i (tag_wdata_ic0),
+ .rdata_o (tag_rdata_ic1[way])
+ );
+ // Data RAM instantiation
+ prim_ram_1p #(
+ .Width (LINE_SIZE_ECC),
+ .Depth (NUM_LINES),
+ .DataBitsPerMask (LINE_SIZE_ECC)
+ ) data_bank (
+ .clk_i (clk_i),
+ .req_i (data_req_ic0 & data_banks_ic0[way]),
+ .write_i (data_write_ic0),
+ .wmask_i ({LINE_SIZE_ECC{1'b1}}),
+ .addr_i (data_index_ic0),
+ .wdata_i (data_wdata_ic0),
+ .rdata_o (data_rdata_ic1[way])
+ );
+ end
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ lookup_valid_ic1 <= 1'b0;
+ end else begin
+ lookup_valid_ic1 <= lookup_actual_ic0;
+ end
+ end
+ always_ff @(posedge clk_i) begin
+ if (lookup_grant_ic0) begin
+ lookup_addr_ic1 <= lookup_addr_ic0[ADDR_W-1:INDEX_HI+1];
+ fill_in_ic1 <= fill_alloc_sel;
+ end
+ end
+ ////////////////////////
+ // Pipeline stage IC1 //
+ ////////////////////////
+ // Tag matching
+ for (genvar way = 0; way < NumWays; way++) begin : gen_tag_match
+ assign tag_match_ic1[way] = (tag_rdata_ic1[way][TAG_SIZE-1:0] ==
+ {1'b1,lookup_addr_ic1[ADDR_W-1:INDEX_HI+1]});
+ assign tag_invalid_ic1[way] = ~tag_rdata_ic1[way][TAG_SIZE-1];
+ end
+ assign tag_hit_ic1 = |tag_match_ic1;
+ // Hit data mux
+ always_comb begin
+ hit_data_ic1 = 'b0;
+ for (int way = 0; way < NumWays; way++) begin
+ if (tag_match_ic1[way]) begin
+ hit_data_ic1 |= data_rdata_ic1[way];
+ end
+ end
+ end
+ // Way selection for allocations to the cache (onehot signals)
+ // 1 first invalid way
+ // 2 global round-robin (pseudorandom) way
+ assign lowest_invalid_way_ic1[0] = tag_invalid_ic1[0];
+ assign round_robin_way_ic1[0] = round_robin_way_q[NumWays-1];
+ for (genvar way = 1; way < NumWays; way++) begin : gen_lowest_way
+ assign lowest_invalid_way_ic1[way] = tag_invalid_ic1[way] & ~|tag_invalid_ic1[way-1:0];
+ assign round_robin_way_ic1[way] = round_robin_way_q[way-1];
+ end
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ round_robin_way_q <= {{NumWays-1{1'b0}},1'b1};
+ end else if (lookup_valid_ic1) begin
+ round_robin_way_q <= round_robin_way_ic1;
+ end
+ end
+ assign sel_way_ic1 = |tag_invalid_ic1 ? lowest_invalid_way_ic1 :
+ round_robin_way_q;
+ // ECC checking logic
+ if (ICacheECC) begin : gen_data_ecc_checking
+ logic [NumWays-1:0] tag_err_ic1;
+ logic [1:0] data_err_ic1;
+ logic ecc_correction_write_d, ecc_correction_write_q;
+ logic [NumWays-1:0] ecc_correction_ways_d, ecc_correction_ways_q;
+ logic [INDEX_W-1:0] lookup_index_ic1, ecc_correction_index_q;
+ // Tag ECC checking
+ for (genvar way = 0; way < NumWays; way++) begin : gen_tag_ecc
+ logic [1:0] tag_err_bank_ic1;
+ logic [27:0] tag_rdata_padded_ic1;
+ // Expand the tag rdata with extra padding if the tag size is less than the maximum
+ assign tag_rdata_padded_ic1 = {tag_rdata_ic1[way][TAG_SIZE_ECC-1-:6],
+ {22-TAG_SIZE{1'b0}},
+ tag_rdata_ic1[way][TAG_SIZE-1:0]};
+ prim_secded_28_22_dec data_ecc_dec (
+ .in (tag_rdata_padded_ic1),
+ .d_o (),
+ .syndrome_o (),
+ .err_o (tag_err_bank_ic1)
+ );
+ assign tag_err_ic1[way] = |tag_err_bank_ic1;
+ end
+ // Data ECC checking
+ // Note - could generate for all ways and mux after
+ prim_secded_72_64_dec data_ecc_dec (
+ .in (hit_data_ic1),
+ .d_o (),
+ .syndrome_o (),
+ .err_o (data_err_ic1)
+ );
+ assign ecc_err_ic1 = lookup_valid_ic1 & ((|data_err_ic1) | (|tag_err_ic1));
+ // Error correction
+ // All ways will be invalidated on a tag error to prevent X-propagation from data_err_ic1 on
+ // spurious hits. Also prevents the same line being allocated twice when there was a true
+ // hit and a spurious hit.
+ assign ecc_correction_ways_d = {NumWays{|tag_err_ic1}} |
+ (tag_match_ic1 & {NumWays{|data_err_ic1}});
+ assign ecc_correction_write_d = ecc_err_ic1;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ ecc_correction_write_q <= 1'b0;
+ end else begin
+ ecc_correction_write_q <= ecc_correction_write_d;
+ end
+ end
+ // The index is required in IC1 only when ECC is configured so is registered here
+ always_ff @(posedge clk_i) begin
+ if (lookup_grant_ic0) begin
+ lookup_index_ic1 <= lookup_addr_ic0[INDEX_HI-:INDEX_W];
+ end
+ end
+ // Store the ways with errors to be invalidated
+ always_ff @(posedge clk_i) begin
+ if (ecc_err_ic1) begin
+ ecc_correction_ways_q <= ecc_correction_ways_d;
+ ecc_correction_index_q <= lookup_index_ic1;
+ end
+ end
+ assign ecc_write_req = ecc_correction_write_q;
+ assign ecc_write_ways = ecc_correction_ways_q;
+ assign ecc_write_index = ecc_correction_index_q;
+ end else begin : gen_no_data_ecc
+ assign ecc_err_ic1 = 1'b0;
+ assign ecc_write_req = 1'b0;
+ assign ecc_write_ways = '0;
+ assign ecc_write_index = '0;
+ end
+ ///////////////////////////////
+ // Cache allocation decision //
+ ///////////////////////////////
+ if (BranchCache) begin : gen_caching_logic
+ // Cache branch target + a number of subsequent lines
+ localparam int unsigned CACHE_AHEAD = 2;
+ localparam int unsigned CACHE_CNT_W = (CACHE_AHEAD == 1) ? 1 : $clog2(CACHE_AHEAD) + 1;
+ logic cache_cnt_dec;
+ logic [CACHE_CNT_W-1:0] cache_cnt_d, cache_cnt_q;
+ assign cache_cnt_dec = lookup_grant_ic0 & (|cache_cnt_q);
+ assign cache_cnt_d = branch_i ? CACHE_AHEAD[CACHE_CNT_W-1:0] :
+ (cache_cnt_q - {{CACHE_CNT_W-1{1'b0}},cache_cnt_dec});
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ cache_cnt_q <= '0;
+ end else begin
+ cache_cnt_q <= cache_cnt_d;
+ end
+ end
+ assign fill_cache_new = (branch_i | (|cache_cnt_q)) & icache_enable_i &
+ ~icache_inval_i & ~inval_prog_q;
+ end else begin : gen_cache_all
+ // Cache all missing fetches
+ assign fill_cache_new = icache_enable_i & ~start_inval & ~inval_prog_q;
+ end
+ //////////////////////////
+ // Fill buffer tracking //
+ //////////////////////////
+ always_comb begin
+ fb_fill_level = '0;
+ for (int i = 0; i < NUM_FB; i++) begin
+ if (fill_busy_q[i] & ~fill_stale_q[i]) begin
+ fb_fill_level += {{$clog2(NUM_FB)-1{1'b0}},1'b1};
+ end
+ end
+ end
+ // PMP errors might not / don't need to be granted (since the external request is masked)
+ assign gnt_or_pmp_err = instr_gnt_i | instr_pmp_err_i;
+ assign gnt_not_pmp_err = instr_gnt_i & ~instr_pmp_err_i;
+ // Allocate a new buffer for every granted lookup
+ assign fill_new_alloc = lookup_grant_ic0;
+ // Track whether a speculative external request was made from IC0, and whether it was granted
+ // Speculative requests are only made for branches, or if the cache is disabled
+ assign fill_spec_req = (~icache_enable_i | branch_or_mispredict) & ~|fill_ext_req;
+ assign fill_spec_done = fill_spec_req & gnt_not_pmp_err;
+ assign fill_spec_hold = fill_spec_req & ~gnt_or_pmp_err;
+ for (genvar fb = 0; fb < NUM_FB; fb++) begin : gen_fbs
+ /////////////////////////////
+ // Fill buffer allocations //
+ /////////////////////////////
+ // Allocate the lowest available buffer
+ if (fb == 0) begin : gen_fb_zero
+ assign fill_alloc_sel[fb] = ~fill_busy_q[fb];
+ end else begin : gen_fb_rest
+ assign fill_alloc_sel[fb] = ~fill_busy_q[fb] & (&fill_busy_q[fb-1:0]);
+ end
+ assign fill_alloc[fb] = fill_alloc_sel[fb] & fill_new_alloc;
+ assign fill_busy_d[fb] = fill_alloc[fb] | (fill_busy_q[fb] & ~fill_done[fb]);
+ // Track which other fill buffers are older than this one (for age-based arbitration)
+ // TODO sparsify
+ assign fill_older_d[fb] = (fill_alloc[fb] ? fill_busy_q : fill_older_q[fb]) & ~fill_done;
+ // A fill buffer can release once all its actions are completed
+ // all data written to the cache (unless hit or error)
+ assign fill_done[fb] = (fill_ram_done_q[fb] | fill_hit_q[fb] | ~fill_cache_q[fb] |
+ (|fill_err_q[fb])) &
+ // all data output unless stale due to intervening branch
+ (fill_out_done[fb] | fill_stale_q[fb] | branch_or_mispredict) &
+ // all external requests completed
+ fill_rvd_done[fb];
+ /////////////////////////////////
+ // Fill buffer status tracking //
+ /////////////////////////////////
+ // Track staleness (requests become stale when a branch intervenes)
+ assign fill_stale_d[fb] = fill_busy_q[fb] & (branch_or_mispredict | fill_stale_q[fb]);
+ // Track whether or not this request should allocate to the cache
+ // Any invalidation or disabling of the cache while the buffer is busy will stop allocation
+ assign fill_cache_d[fb] = (fill_alloc[fb] & fill_cache_new) |
+ (fill_cache_q[fb] & fill_busy_q[fb] &
+ icache_enable_i & ~icache_inval_i);
+ // Record whether the request hit in the cache
+ assign fill_hit_ic1[fb] = lookup_valid_ic1 & fill_in_ic1[fb] & tag_hit_ic1 & ~ecc_err_ic1;
+ assign fill_hit_d[fb] = fill_hit_ic1[fb] | (fill_hit_q[fb] & fill_busy_q[fb]);
+ ///////////////////////////////////////////
+ // Fill buffer external request tracking //
+ ///////////////////////////////////////////
+ // Make an external request
+ assign fill_ext_req[fb] = fill_busy_q[fb] & ~fill_ext_done_d[fb];
+ // Count the number of completed external requests (each line requires LINE_BEATS requests)
+ // Don't count fake PMP error grants here since they will never receive an rvalid response
+ assign fill_ext_cnt_d[fb] = fill_alloc[fb] ?
+ {{LINE_BEATS_W{1'b0}},fill_spec_done} :
+ (fill_ext_cnt_q[fb] + {{LINE_BEATS_W{1'b0}},
+ fill_ext_arb[fb] & gnt_not_pmp_err});
+ // External request must be held until granted
+ assign fill_ext_hold_d[fb] = (fill_alloc[fb] & fill_spec_hold) |
+ (fill_ext_arb[fb] & ~gnt_or_pmp_err);
+ // External requests are completed when the counter is filled or when the request is cancelled
+ assign fill_ext_done_d[fb] = (fill_ext_cnt_q[fb][LINE_BEATS_W] |
+ // external requests are considered complete if the request hit
+ fill_hit_ic1[fb] | fill_hit_q[fb] |
+ // external requests will stop once any PMP error is received
+ fill_err_q[fb][fill_ext_off[fb]] |
+ // cancel if the line won't be cached and, it is stale
+ (~fill_cache_q[fb] & (branch_or_mispredict | fill_stale_q[fb] |
+ // or we're already at the end of the line
+ fill_ext_beat[fb][LINE_BEATS_W]))) &
+ // can't cancel while we are waiting for a grant on the bus
+ ~fill_ext_hold_q[fb] & fill_busy_q[fb];
+ // Track whether this fill buffer expects to receive beats of data
+ assign fill_rvd_exp[fb] = fill_busy_q[fb] & ~fill_rvd_done[fb];
+ // Count the number of rvalid beats received
+ assign fill_rvd_cnt_d[fb] = fill_alloc[fb] ? '0 :
+ (fill_rvd_cnt_q[fb] +
+ {{LINE_BEATS_W{1'b0}},fill_rvd_arb[fb]});
+ // External data is complete when all issued external requests have received their data
+ assign fill_rvd_done[fb] = (fill_ext_done_q[fb] & ~fill_ext_hold_q[fb]) &
+ (fill_rvd_cnt_q[fb] == fill_ext_cnt_q[fb]);
+ //////////////////////////////////////
+ // Fill buffer data output tracking //
+ //////////////////////////////////////
+ // Send data to the IF stage for requests that are not stale, have not completed their
+ // data output, and have data available to send.
+ // Data is available if:
+ // - The request hit in the cache
+ // - The current beat is an error (since a PMP error might not actually receive any data)
+ // - Buffered data is available (fill_rvd_cnt_q is ahead of fill_out_cnt_q)
+ // - Data is available from the bus this cycle (fill_rvd_arb)
+ assign fill_out_req[fb] = fill_busy_q[fb] & ~fill_stale_q[fb] & ~fill_out_done[fb] &
+ (fill_hit_ic1[fb] | fill_hit_q[fb] |
+ (fill_err_q[fb][fill_out_cnt_q[fb][LINE_BEATS_W-1:0]]) |
+ (fill_rvd_beat[fb] > fill_out_cnt_q[fb]) | fill_rvd_arb[fb]);
+ // Calculate when a beat of data is output. Any ECC error squashes the output that cycle.
+ assign fill_out_grant[fb] = fill_out_arb[fb] & output_ready;
+ // Count the beats of data output to the IF stage
+ assign fill_out_cnt_d[fb] = fill_alloc[fb] ? {1'b0,lookup_addr_ic0[LINE_W-1:BUS_W]} :
+ (fill_out_cnt_q[fb] +
+ {{LINE_BEATS_W{1'b0}},fill_out_grant[fb]});
+ // Data output complete when the counter fills
+ assign fill_out_done[fb] = fill_out_cnt_q[fb][LINE_BEATS_W];
+ //////////////////////////////////////
+ // Fill buffer ram request tracking //
+ //////////////////////////////////////
+ // make a fill request once all data beats received
+ assign fill_ram_req[fb] = fill_busy_q[fb] & fill_rvd_cnt_q[fb][LINE_BEATS_W] &
+ // unless the request hit, was non-allocating or got an error
+ ~fill_hit_q[fb] & fill_cache_q[fb] & ~|fill_err_q[fb] &
+ // or the request was already completed
+ ~fill_ram_done_q[fb];
+ // Record when a cache allocation request has been completed
+ assign fill_ram_done_d[fb] = fill_ram_arb[fb] | (fill_ram_done_q[fb] & fill_busy_q[fb]);
+ //////////////////////////////
+ // Fill buffer line offsets //
+ //////////////////////////////
+ // When we branch into the middle of a line, the output count will not start from zero. This
+ // beat count is used to know which incoming rdata beats are relevant.
+ assign fill_ext_beat[fb] = {1'b0,fill_addr_q[fb][LINE_W-1:BUS_W]} +
+ fill_ext_cnt_q[fb][LINE_BEATS_W:0];
+ assign fill_ext_off[fb] = fill_ext_beat[fb][LINE_BEATS_W-1:0];
+ assign fill_rvd_beat[fb] = {1'b0,fill_addr_q[fb][LINE_W-1:BUS_W]} +
+ fill_rvd_cnt_q[fb][LINE_BEATS_W:0];
+ assign fill_rvd_off[fb] = fill_rvd_beat[fb][LINE_BEATS_W-1:0];
+ /////////////////////////////
+ // Fill buffer arbitration //
+ /////////////////////////////
+ // Age based arbitration - all these signals are one-hot
+ assign fill_ext_arb[fb] = fill_ext_req[fb] & ~|(fill_ext_req & fill_older_q[fb]);
+ assign fill_ram_arb[fb] = fill_ram_req[fb] & fill_grant_ic0 & ~|(fill_ram_req & fill_older_q[fb]);
+ // Calculate which fill buffer is the oldest one which still needs to output data to IF
+ assign fill_data_sel[fb] = ~|(fill_busy_q & ~fill_out_done & ~fill_stale_q &
+ fill_older_q[fb]);
+ // Arbitrate the request which has data available to send, and is the oldest outstanding
+ assign fill_out_arb[fb] = fill_out_req[fb] & fill_data_sel[fb];
+ // Assign incoming rvalid data to the oldest fill buffer expecting it
+ assign fill_rvd_arb[fb] = instr_rvalid_i & fill_rvd_exp[fb] & ~|(fill_rvd_exp & fill_older_q[fb]);
+ /////////////////////////////
+ // Fill buffer data muxing //
+ /////////////////////////////
+ // Output data muxing controls
+ // 1. Select data from the fill buffer data register
+ assign fill_data_reg[fb] = fill_busy_q[fb] & ~fill_stale_q[fb] &
+ ~fill_out_done[fb] & fill_data_sel[fb] &
+ // The incoming data is already ahead of the output count
+ ((fill_rvd_beat[fb] > fill_out_cnt_q[fb]) | fill_hit_q[fb] |
+ (|fill_err_q[fb]));
+ // 2. Select IC1 hit data
+ assign fill_data_hit[fb] = fill_busy_q[fb] & fill_hit_ic1[fb] & fill_data_sel[fb];
+ // 3. Select incoming instr_rdata_i
+ assign fill_data_rvd[fb] = fill_busy_q[fb] & fill_rvd_arb[fb] & ~fill_hit_q[fb] &
+ ~fill_hit_ic1[fb] & ~fill_stale_q[fb] & ~fill_out_done[fb] &
+ // The incoming data lines up with the output count
+ (fill_rvd_beat[fb] == fill_out_cnt_q[fb]) & fill_data_sel[fb];
+ ///////////////////////////
+ // Fill buffer registers //
+ ///////////////////////////
+ // Fill buffer general enable
+ assign fill_entry_en[fb] = fill_alloc[fb] | fill_busy_q[fb];
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ fill_busy_q[fb] <= 1'b0;
+ fill_older_q[fb] <= '0;
+ fill_stale_q[fb] <= 1'b0;
+ fill_cache_q[fb] <= 1'b0;
+ fill_hit_q[fb] <= 1'b0;
+ fill_ext_cnt_q[fb] <= '0;
+ fill_ext_hold_q[fb] <= 1'b0;
+ fill_ext_done_q[fb] <= 1'b0;
+ fill_rvd_cnt_q[fb] <= '0;
+ fill_ram_done_q[fb] <= 1'b0;
+ fill_out_cnt_q[fb] <= '0;
+ end else if (fill_entry_en[fb]) begin
+ fill_busy_q[fb] <= fill_busy_d[fb];
+ fill_older_q[fb] <= fill_older_d[fb];
+ fill_stale_q[fb] <= fill_stale_d[fb];
+ fill_cache_q[fb] <= fill_cache_d[fb];
+ fill_hit_q[fb] <= fill_hit_d[fb];
+ fill_ext_cnt_q[fb] <= fill_ext_cnt_d[fb];
+ fill_ext_hold_q[fb] <= fill_ext_hold_d[fb];
+ fill_ext_done_q[fb] <= fill_ext_done_d[fb];
+ fill_rvd_cnt_q[fb] <= fill_rvd_cnt_d[fb];
+ fill_ram_done_q[fb] <= fill_ram_done_d[fb];
+ fill_out_cnt_q[fb] <= fill_out_cnt_d[fb];
+ end
+ end
+ ////////////////////////////////////////
+ // Fill buffer address / data storage //
+ ////////////////////////////////////////
+ assign fill_addr_en[fb] = fill_alloc[fb];
+ assign fill_way_en[fb] = (lookup_valid_ic1 & fill_in_ic1[fb]);
+ always_ff @(posedge clk_i) begin
+ if (fill_addr_en[fb]) begin
+ fill_addr_q[fb] <= lookup_addr_ic0;
+ end
+ end
+ always_ff @(posedge clk_i) begin
+ if (fill_way_en[fb]) begin
+ fill_way_q[fb] <= sel_way_ic1;
+ end
+ end
+ // Data either comes from the cache or the bus. If there was an ECC error, we must take
+ // the incoming bus data since the cache hit data is corrupted.
+ assign fill_data_d[fb] = fill_hit_ic1[fb] ? hit_data_ic1[LineSize-1:0] :
+ {LINE_BEATS{instr_rdata_i}};
+ for (genvar b = 0; b < LINE_BEATS; b++) begin : gen_data_buf
+ // Error tracking (per beat)
+ // Either a PMP error on a speculative request,
+ assign fill_err_d[fb][b] = (instr_pmp_err_i & fill_alloc[fb] & fill_spec_req &
+ (lookup_addr_ic0[LINE_W-1:BUS_W] == b[LINE_BEATS_W-1:0])) |
+ // a PMP error on a fill buffer ext req
+ (instr_pmp_err_i & fill_ext_arb[fb] &
+ (fill_ext_off[fb] == b[LINE_BEATS_W-1:0])) |
+ // Or a data error with instr_rvalid_i
+ (fill_rvd_arb[fb] & instr_err_i &
+ (fill_rvd_off[fb] == b[LINE_BEATS_W-1:0])) |
+ // Hold the error once recorded
+ (fill_busy_q[fb] & fill_err_q[fb][b]);
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ fill_err_q[fb][b] <= '0;
+ end else if (fill_entry_en[fb]) begin
+ fill_err_q[fb][b] <= fill_err_d[fb][b];
+ end
+ end
+ // Enable the relevant part of the data register (or all for cache hits)
+ // Ignore incoming rvalid data when we already have cache hit data
+ assign fill_data_en[fb][b] = fill_hit_ic1[fb] |
+ (fill_rvd_arb[fb] & ~fill_hit_q[fb] &
+ (fill_rvd_off[fb] == b[LINE_BEATS_W-1:0]));
+ always_ff @(posedge clk_i) begin
+ if (fill_data_en[fb][b]) begin
+ fill_data_q[fb][b*BusWidth+:BusWidth] <= fill_data_d[fb][b*BusWidth+:BusWidth];
+ end
+ end
+ end
+ end
+ ////////////////////////////////
+ // Fill buffer one-hot muxing //
+ ////////////////////////////////
+ // External req info
+ always_comb begin
+ fill_ext_req_addr = '0;
+ for (int i = 0; i < NUM_FB; i++) begin
+ if (fill_ext_arb[i]) begin
+ fill_ext_req_addr |= {fill_addr_q[i][ADDR_W-1:LINE_W], fill_ext_off[i]};
+ end
+ end
+ end
+ // Cache req info
+ always_comb begin
+ fill_ram_req_addr = '0;
+ fill_ram_req_way = '0;
+ fill_ram_req_data = '0;
+ for (int i = 0; i < NUM_FB; i++) begin
+ if (fill_ram_arb[i]) begin
+ fill_ram_req_addr |= fill_addr_q[i];
+ fill_ram_req_way |= fill_way_q[i];
+ fill_ram_req_data |= fill_data_q[i];
+ end
+ end
+ end
+ // IF stage output data
+ always_comb begin
+ fill_out_data = '0;
+ fill_out_err = '0;
+ for (int i = 0; i < NUM_FB; i++) begin
+ if (fill_data_reg[i]) begin
+ fill_out_data |= fill_data_q[i];
+ // Ignore any speculative errors accumulated on cache hits
+ fill_out_err |= (fill_err_q[i] & ~{LINE_BEATS{fill_hit_q[i]}});
+ end
+ end
+ end
+ ///////////////////////
+ // External requests //
+ ///////////////////////
+ assign instr_req = ((~icache_enable_i | branch_or_mispredict) & lookup_grant_ic0) |
+ (|fill_ext_req);
+ assign instr_addr = |fill_ext_req ? fill_ext_req_addr :
+ lookup_addr_ic0[ADDR_W-1:BUS_W];
+ assign instr_req_o = instr_req;
+ assign instr_addr_o = {instr_addr[ADDR_W-1:BUS_W],{BUS_W{1'b0}}};
+ ////////////////////////
+ // Output data muxing //
+ ////////////////////////
+ // Mux between line-width data sources
+ assign line_data = |fill_data_hit ? hit_data_ic1[LineSize-1:0] : fill_out_data;
+ assign line_err = |fill_data_hit ? {LINE_BEATS{1'b0}} : fill_out_err;
+ // Mux the relevant beat of line data, based on the output address
+ always_comb begin
+ line_data_muxed = '0;
+ line_err_muxed = 1'b0;
+ for (int i = 0; i < LINE_BEATS; i++) begin
+ // When data has been skidded, the output address is behind by one
+ if ((output_addr_q[LINE_W-1:BUS_W] + {{LINE_BEATS_W-1{1'b0}},skid_valid_q}) ==
+ i[LINE_BEATS_W-1:0]) begin
+ line_data_muxed |= line_data[i*32+:32];
+ line_err_muxed |= line_err[i];
+ end
+ end
+ end
+ // Mux between incoming rdata and the muxed line data
+ assign output_data = |fill_data_rvd ? instr_rdata_i : line_data_muxed;
+ assign output_err = |fill_data_rvd ? instr_err_i : line_err_muxed;
+ // Output data is valid (from any of the three possible sources). Note that fill_out_arb
+ // must be used here rather than fill_out_req because data can become valid out of order
+ // (e.g. cache hit data can become available ahead of an older outstanding miss).
+ assign data_valid = |fill_out_arb;
+ // Skid buffer data
+ assign skid_data_d = output_data[31:16];
+ assign skid_en = data_valid & (ready_i | skid_ready);
+ always_ff @(posedge clk_i) begin
+ if (skid_en) begin
+ skid_data_q <= skid_data_d;
+ skid_err_q <= output_err;
+ end
+ end
+ // The data in the skid buffer is ready if it's a complete compressed instruction or if there's
+ // an error (no need to wait for the second half)
+ assign skid_complete_instr = skid_valid_q & ((skid_data_q[1:0] != 2'b11) | skid_err_q);
+ // Data can be loaded into the skid buffer for an unaligned uncompressed instruction
+ assign skid_ready = output_addr_q[1] & ~skid_valid_q & (~output_compressed | output_err);
+ assign output_ready = (ready_i | skid_ready) & ~skid_complete_instr;
+ assign output_compressed = (rdata_o[1:0] != 2'b11);
+ assign skid_valid_d =
+ // Branches invalidate the skid buffer
+ branch_or_mispredict ? 1'b0 :
+ // Once valid, the skid buffer stays valid until a compressed instruction realigns the stream
+ (skid_valid_q ? ~(ready_i & ((skid_data_q[1:0] != 2'b11) | skid_err_q)) :
+ // The skid buffer becomes valid when:
+ // - we branch to an unaligned uncompressed instruction
+ (((output_addr_q[1] & (~output_compressed | output_err)) |
+ // - a compressed instruction misaligns the stream
+ (~output_addr_q[1] & output_compressed & ~output_err & ready_i)) & data_valid));
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ skid_valid_q <= 1'b0;
+ end else begin
+ skid_valid_q <= skid_valid_d;
+ end
+ end
+ // Signal that valid data is available to the IF stage
+ // Note that if the first half of an unaligned instruction reports an error, we do not need
+ // to wait for the second half (and for PMP errors we might not have fetched the second half)
+ // Compressed instruction completely satisfied by skid buffer
+ assign output_valid = skid_complete_instr |
+ // Output data available and, output stream aligned, or skid data available,
+ (data_valid & (~output_addr_q[1] | skid_valid_q |
+ // or this is an error or an unaligned compressed instruction
+ output_err | (output_data[17:16] != 2'b11)));
+ // Update the address on branches and every time an instruction is driven
+ assign output_addr_en = branch_or_mispredict | (ready_i & valid_o);
+ // Increment the address by two every time a compressed instruction is popped
+ assign addr_incr_two = output_compressed & ~err_o;
+ // Next IF stage PC
+ assign output_addr_incr = (output_addr_q[31:1] +
+ // Increment address by 4 or 2
+ {29'd0, ~addr_incr_two, addr_incr_two});
+ // Redirect the address on branches or mispredicts
+ assign output_addr_d = branch_i ? addr_i[31:1] :
+ branch_mispredict_i ? branch_mispredict_addr[31:1] :
+ output_addr_incr;
+ always_ff @(posedge clk_i) begin
+ if (output_addr_en) begin
+ output_addr_q <= output_addr_d;
+ end
+ end
+ // Mux the data from BusWidth to halfword
+ // This muxing realigns data when instruction words are split across BUS_W e.g.
+ // word 1 |----|*h1*|
+ // word 0 |*h0*|----| --> |*h1*|*h0*|
+ // 31 15 0 31 15 0
+ always_comb begin
+ output_data_lo = '0;
+ for (int i = 0; i < OUTPUT_BEATS; i++) begin
+ if (output_addr_q[BUS_W-1:1] == i[BUS_W-2:0]) begin
+ output_data_lo |= output_data[i*16+:16];
+ end
+ end
+ end
+ always_comb begin
+ output_data_hi = '0;
+ for (int i = 0; i < OUTPUT_BEATS-1; i++) begin
+ if (output_addr_q[BUS_W-1:1] == i[BUS_W-2:0]) begin
+ output_data_hi |= output_data[(i+1)*16+:16];
+ end
+ end
+ if (&output_addr_q[BUS_W-1:1]) begin
+ output_data_hi |= output_data[15:0];
+ end
+ end
+ assign valid_o = output_valid & ~branch_mispredict_i;
+ assign rdata_o = {output_data_hi, (skid_valid_q ? skid_data_q : output_data_lo)};
+ assign addr_o = {output_addr_q, 1'b0};
+ assign err_o = (skid_valid_q & skid_err_q) | (~skid_complete_instr & output_err);
+ // Error caused by the second half of a misaligned uncompressed instruction
+ // (only relevant when err_o is set)
+ assign err_plus2_o = skid_valid_q & ~skid_err_q;
+ ///////////////////
+ // Invalidations //
+ ///////////////////
+ // Invalidate on reset, or when instructed. If an invalidation request is received while a
+ // previous invalidation is ongoing, it does not need to be restarted.
+ assign start_inval = (~reset_inval_q | icache_inval_i) & ~inval_prog_q;
+ assign inval_prog_d = start_inval | (inval_prog_q & ~inval_done);
+ assign inval_done = &inval_index_q;
+ assign inval_index_d = start_inval ? '0 :
+ (inval_index_q + {{INDEX_W-1{1'b0}},1'b1});
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ inval_prog_q <= 1'b0;
+ reset_inval_q <= 1'b0;
+ end else begin
+ inval_prog_q <= inval_prog_d;
+ reset_inval_q <= 1'b1;
+ end
+ end
+ always_ff @(posedge clk_i) begin
+ if (inval_prog_d) begin
+ inval_index_q <= inval_index_d;
+ end
+ end
+ /////////////////
+ // Busy status //
+ /////////////////
+ // Only busy (for WFI purposes) while an invalidation is in-progress, or external requests are
+ // outstanding.
+ assign busy_o = inval_prog_q | (|(fill_busy_q & ~fill_rvd_done));
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..f366518
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,316 @@
+ * Prefetcher Buffer for 32 bit memory interface
+ *
+ * Prefetch Buffer that caches instructions. This cuts overly long critical
+ * paths to the instruction cache.
+ */
+module brq_ifu_prefetch_buffer #(
+ parameter bit BranchPredictor = 1'b0
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ input logic req_i,
+ input logic branch_i,
+ input logic branch_spec_i,
+ input logic predicted_branch_i,
+ input logic branch_mispredict_i,
+ input logic [31:0] addr_i,
+ input logic ready_i,
+ output logic valid_o,
+ output logic [31:0] rdata_o,
+ output logic [31:0] addr_o,
+ output logic err_o,
+ output logic err_plus2_o,
+ // goes to instruction memory / instruction cache
+ output logic instr_req_o,
+ input logic instr_gnt_i,
+ output logic [31:0] instr_addr_o,
+ input logic [31:0] instr_rdata_i,
+ input logic instr_err_i,
+ input logic instr_pmp_err_i,
+ input logic instr_rvalid_i,
+ // Prefetch Buffer Status
+ output logic busy_o
+ localparam int unsigned NUM_REQS = 2;
+ logic branch_suppress;
+ logic valid_new_req, valid_req;
+ logic valid_req_d, valid_req_q;
+ logic discard_req_d, discard_req_q;
+ logic gnt_or_pmp_err, rvalid_or_pmp_err;
+ logic [NUM_REQS-1:0] rdata_outstanding_n, rdata_outstanding_s, rdata_outstanding_q;
+ logic [NUM_REQS-1:0] branch_discard_n, branch_discard_s, branch_discard_q;
+ logic [NUM_REQS-1:0] rdata_pmp_err_n, rdata_pmp_err_s, rdata_pmp_err_q;
+ logic [NUM_REQS-1:0] rdata_outstanding_rev;
+ logic [31:0] stored_addr_d, stored_addr_q;
+ logic stored_addr_en;
+ logic [31:0] fetch_addr_d, fetch_addr_q;
+ logic fetch_addr_en;
+ logic [31:0] branch_mispredict_addr;
+ logic [31:0] instr_addr, instr_addr_w_aligned;
+ logic instr_or_pmp_err;
+ logic fifo_valid;
+ logic [31:0] fifo_addr;
+ logic fifo_ready;
+ logic fifo_clear;
+ logic [NUM_REQS-1:0] fifo_busy;
+ logic valid_raw;
+ logic [31:0] addr_next;
+ logic branch_or_mispredict;
+ ////////////////////////////
+ // Prefetch buffer status //
+ ////////////////////////////
+ assign busy_o = (|rdata_outstanding_q) | instr_req_o;
+ assign branch_or_mispredict = branch_i | branch_mispredict_i;
+ //////////////////////////////////////////////
+ // Fetch fifo - consumes addresses and data //
+ //////////////////////////////////////////////
+ // Instruction fetch errors are valid on the data phase of a request
+ // PMP errors are generated in the address phase, and registered into a fake data phase
+ assign instr_or_pmp_err = instr_err_i | rdata_pmp_err_q[0];
+ // A branch will invalidate any previously fetched instructions.
+ // Note that the FENCE.I instruction relies on this flushing behaviour on branch. If it is
+ // altered the FENCE.I implementation may require changes.
+ assign fifo_clear = branch_or_mispredict;
+ // Reversed version of rdata_outstanding_q which can be overlaid with fifo fill state
+ for (genvar i = 0; i < NUM_REQS; i++) begin : gen_rd_rev
+ assign rdata_outstanding_rev[i] = rdata_outstanding_q[NUM_REQS-1-i];
+ end
+ // The fifo is ready to accept a new request if it is not full - including space reserved for
+ // requests already outstanding.
+ // Overlay the fifo fill state with the outstanding requests to see if there is space.
+ assign fifo_ready = ~&(fifo_busy | rdata_outstanding_rev);
+ brq_ifu_fifo #(
+ ) fifo_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .clear_i ( fifo_clear ),
+ .busy_o ( fifo_busy ),
+ .in_valid_i ( fifo_valid ),
+ .in_addr_i ( fifo_addr ),
+ .in_rdata_i ( instr_rdata_i ),
+ .in_err_i ( instr_or_pmp_err ),
+ .out_valid_o ( valid_raw ),
+ .out_ready_i ( ready_i ),
+ .out_rdata_o ( rdata_o ),
+ .out_addr_o ( addr_o ),
+ .out_addr_next_o ( addr_next ),
+ .out_err_o ( err_o ),
+ .out_err_plus2_o ( err_plus2_o )
+ );
+ //////////////
+ // Requests //
+ //////////////
+ // Suppress a new request on a not-taken branch (as the external address will be incorrect)
+ assign branch_suppress = branch_spec_i & ~branch_i;
+ // Make a new request any time there is space in the FIFO, and space in the request queue
+ assign valid_new_req = ~branch_suppress & req_i & (fifo_ready | branch_or_mispredict) &
+ ~rdata_outstanding_q[NUM_REQS-1];
+ assign valid_req = valid_req_q | valid_new_req;
+ // If a request address triggers a PMP error, the external bus request is suppressed. We might
+ // therefore never receive a grant for such a request. The grant is faked in this case to make
+ // sure the request proceeds and the error is pushed to the FIFO.
+ assign gnt_or_pmp_err = instr_gnt_i | instr_pmp_err_i;
+ // As with the grant, the rvalid must be faked for a PMP error, since the request was suppressed.
+ assign rvalid_or_pmp_err = rdata_outstanding_q[0] & (instr_rvalid_i | rdata_pmp_err_q[0]);
+ // Hold the request stable for requests that didn't get granted
+ assign valid_req_d = valid_req & ~gnt_or_pmp_err;
+ // Record whether an outstanding bus request is cancelled by a branch
+ assign discard_req_d = valid_req_q & (branch_or_mispredict | discard_req_q);
+ ////////////////
+ // Fetch addr //
+ ////////////////
+ // Two addresses are tracked in the prefetch buffer:
+ // 1. stored_addr_q - This is the address issued on the bus. It stays stable until
+ // the request is granted.
+ // 2. fetch_addr_q - This is our next address to fetch from. It is updated on branches to
+ // capture the new address, and then for each new request issued.
+ // A third address is tracked in the fetch FIFO itself:
+ // 3. instr_addr_q - This is the address at the head of the FIFO, efectively our oldest fetched
+ // address. This address is updated on branches, and does its own increment
+ // each time the FIFO is popped.
+ // 1. stored_addr_q
+ // Only update stored_addr_q for new ungranted requests
+ assign stored_addr_en = valid_new_req & ~valid_req_q & ~gnt_or_pmp_err;
+ // Store whatever address was issued on the bus
+ assign stored_addr_d = instr_addr;
+ // CPU resets with a branch, so no need to reset these addresses
+ always_ff @(posedge clk_i) begin
+ if (stored_addr_en) begin
+ stored_addr_q <= stored_addr_d;
+ end
+ end
+ if (BranchPredictor) begin : g_branch_predictor
+ // Where the branch predictor is present record what address followed a predicted branch. If
+ // that branch is predicted taken but mispredicted (so not-taken) this is used to resume on
+ // the not-taken code path.
+ logic [31:0] branch_mispredict_addr_q;
+ logic branch_mispredict_addr_en;
+ assign branch_mispredict_addr_en = branch_i & predicted_branch_i;
+ always_ff @(posedge clk_i) begin
+ if (branch_mispredict_addr_en) begin
+ branch_mispredict_addr_q <= addr_next;
+ end
+ end
+ assign branch_mispredict_addr = branch_mispredict_addr_q;
+ end else begin : g_no_branch_predictor
+ logic unused_predicted_branch;
+ logic [31:0] unused_addr_next;
+ assign unused_predicted_branch = predicted_branch_i;
+ assign unused_addr_next = addr_next;
+ assign branch_mispredict_addr = '0;
+ end
+ // 2. fetch_addr_q
+ // Update on a branch or as soon as a request is issued
+ assign fetch_addr_en = branch_or_mispredict | (valid_new_req & ~valid_req_q);
+ assign fetch_addr_d = (branch_i ? addr_i :
+ branch_mispredict_i ? {branch_mispredict_addr[31:2], 2'b00} :
+ {fetch_addr_q[31:2], 2'b00}) +
+ // Current address + 4
+ {{29{1'b0}},(valid_new_req & ~valid_req_q),2'b00};
+ always_ff @(posedge clk_i) begin
+ if (fetch_addr_en) begin
+ fetch_addr_q <= fetch_addr_d;
+ end
+ end
+ // Address mux
+ assign instr_addr = valid_req_q ? stored_addr_q :
+ branch_spec_i ? addr_i :
+ branch_mispredict_i ? branch_mispredict_addr :
+ fetch_addr_q;
+ assign instr_addr_w_aligned = {instr_addr[31:2], 2'b00};
+ ///////////////////////////////
+ // Request outstanding queue //
+ ///////////////////////////////
+ for (genvar i = 0; i < NUM_REQS; i++) begin : g_outstanding_reqs
+ // Request 0 (always the oldest outstanding request)
+ if (i == 0) begin : g_req0
+ // A request becomes outstanding once granted, and is cleared once the rvalid is received.
+ // Outstanding requests shift down the queue towards entry 0.
+ assign rdata_outstanding_n[i] = (valid_req & gnt_or_pmp_err) |
+ rdata_outstanding_q[i];
+ // If a branch is received at any point while a request is outstanding, it must be tracked
+ // to ensure we discard the data once received
+ assign branch_discard_n[i] = (valid_req & gnt_or_pmp_err & discard_req_d) |
+ (branch_or_mispredict & rdata_outstanding_q[i]) |
+ branch_discard_q[i];
+ // Record whether this request received a PMP error
+ assign rdata_pmp_err_n[i] = (valid_req & ~rdata_outstanding_q[i] & instr_pmp_err_i) |
+ rdata_pmp_err_q[i];
+ end else begin : g_reqtop
+ // Entries > 0 consider the FIFO fill state to calculate their next state (by checking
+ // whether the previous entry is valid)
+ assign rdata_outstanding_n[i] = (valid_req & gnt_or_pmp_err &
+ rdata_outstanding_q[i-1]) |
+ rdata_outstanding_q[i];
+ assign branch_discard_n[i] = (valid_req & gnt_or_pmp_err & discard_req_d &
+ rdata_outstanding_q[i-1]) |
+ (branch_or_mispredict & rdata_outstanding_q[i]) |
+ branch_discard_q[i];
+ assign rdata_pmp_err_n[i] = (valid_req & ~rdata_outstanding_q[i] & instr_pmp_err_i &
+ rdata_outstanding_q[i-1]) |
+ rdata_pmp_err_q[i];
+ end
+ end
+ // Shift the entries down on each instr_rvalid_i
+ assign rdata_outstanding_s = rvalid_or_pmp_err ? {1'b0,rdata_outstanding_n[NUM_REQS-1:1]} :
+ rdata_outstanding_n;
+ assign branch_discard_s = rvalid_or_pmp_err ? {1'b0,branch_discard_n[NUM_REQS-1:1]} :
+ branch_discard_n;
+ assign rdata_pmp_err_s = rvalid_or_pmp_err ? {1'b0,rdata_pmp_err_n[NUM_REQS-1:1]} :
+ rdata_pmp_err_n;
+ // Push a new entry to the FIFO once complete (and not cancelled by a branch)
+ assign fifo_valid = rvalid_or_pmp_err & ~branch_discard_q[0];
+ assign fifo_addr = branch_i ? addr_i : branch_mispredict_addr;
+ ///////////////
+ // Registers //
+ ///////////////
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ valid_req_q <= 1'b0;
+ discard_req_q <= 1'b0;
+ rdata_outstanding_q <= 'b0;
+ branch_discard_q <= 'b0;
+ rdata_pmp_err_q <= 'b0;
+ end else begin
+ valid_req_q <= valid_req_d;
+ discard_req_q <= discard_req_d;
+ rdata_outstanding_q <= rdata_outstanding_s;
+ branch_discard_q <= branch_discard_s;
+ rdata_pmp_err_q <= rdata_pmp_err_s;
+ end
+ end
+ /////////////
+ // Outputs //
+ /////////////
+ assign instr_req_o = valid_req;
+ assign instr_addr_o = instr_addr_w_aligned;
+ assign valid_o = valid_raw & ~branch_mispredict_i;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..a87191a
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,489 @@
+ * Load Store Unit
+ *
+ * Load Store Unit, used to eliminate multiple access during processor stalls,
+ * and to align bytes and halfwords.
+ */
+module brq_lsu
+ input logic clk_i,
+ input logic rst_ni,
+ // data interface
+ output logic data_req_o,
+ input logic data_gnt_i,
+ input logic data_rvalid_i,
+ input logic data_err_i,
+ input logic data_pmp_err_i,
+ output logic [31:0] data_addr_o,
+ output logic data_we_o,
+ output logic [3:0] data_be_o,
+ output logic [31:0] data_wdata_o,
+ input logic [31:0] data_rdata_i,
+ // signals to/from ID/EX stage
+ input logic lsu_we_i, // write enable -> from ID/EX
+ input logic [1:0] lsu_type_i, // data type: word, half word, byte -> from ID/EX
+ input logic [31:0] lsu_wdata_i, // data to write to memory -> from ID/EX
+ input logic lsu_sign_ext_i, // sign extension -> from ID/EX
+ output logic [31:0] lsu_rdata_o, // requested data -> to ID/EX
+ output logic lsu_rdata_valid_o,
+ input logic lsu_req_i, // data request -> from ID/EX
+ input logic [31:0] adder_result_ex_i, // address computed in ALU -> from ID/EX
+ output logic addr_incr_req_o, // request address increment for
+ // misaligned accesses -> to ID/EX
+ output logic [31:0] addr_last_o, // address of last transaction -> to controller
+ // -> mtval
+ // -> AGU for misaligned accesses
+ output logic lsu_req_done_o, // Signals that data request is complete
+ // (only need to await final data
+ // response) -> to ID/EX
+ output logic lsu_resp_valid_o, // LSU has response from transaction -> to ID/EX
+ // exception signals
+ output logic load_err_o,
+ output logic store_err_o,
+ output logic busy_o,
+ output logic perf_load_o,
+ output logic perf_store_o
+ logic [31:0] data_addr;
+ logic [31:0] data_addr_w_aligned;
+ logic [31:0] addr_last_q;
+ logic addr_update;
+ logic ctrl_update;
+ logic rdata_update;
+ logic [31:8] rdata_q;
+ logic [1:0] rdata_offset_q;
+ logic [1:0] data_type_q;
+ logic data_sign_ext_q;
+ logic data_we_q;
+ logic [1:0] data_offset; // mux control for data to be written to memory
+ logic [3:0] data_be;
+ logic [31:0] data_wdata;
+ logic [31:0] data_rdata_ext;
+ logic [31:0] rdata_w_ext; // word realignment for misaligned loads
+ logic [31:0] rdata_h_ext; // sign extension for half words
+ logic [31:0] rdata_b_ext; // sign extension for bytes
+ logic split_misaligned_access;
+ logic handle_misaligned_q, handle_misaligned_d; // high after receiving grant for first
+ // part of a misaligned access
+ logic pmp_err_q, pmp_err_d;
+ logic lsu_err_q, lsu_err_d;
+ logic data_or_pmp_err;
+ typedef enum logic [2:0] {
+ } ls_fsm_e;
+ ls_fsm_e ls_fsm_cs, ls_fsm_ns;
+ assign data_addr = adder_result_ex_i;
+ assign data_offset = data_addr[1:0];
+ ///////////////////
+ // BE generation //
+ ///////////////////
+ always_comb begin
+ unique case (lsu_type_i) // Data type 00 Word, 01 Half word, 11,10 byte
+ 2'b00: begin // Writing a word
+ if (!handle_misaligned_q) begin // first part of potentially misaligned transaction
+ unique case (data_offset)
+ 2'b00: data_be = 4'b1111;
+ 2'b01: data_be = 4'b1110;
+ 2'b10: data_be = 4'b1100;
+ 2'b11: data_be = 4'b1000;
+ // default: data_be = 4'b1111;
+ endcase // case (data_offset)
+ end else begin // second part of misaligned transaction
+ unique case (data_offset)
+ 2'b00: data_be = 4'b0000; // this is not used, but included for completeness
+ 2'b01: data_be = 4'b0001;
+ 2'b10: data_be = 4'b0011;
+ 2'b11: data_be = 4'b0111;
+ // default: data_be = 4'b1111;
+ endcase // case (data_offset)
+ end
+ end
+ 2'b01: begin // Writing a half word
+ if (!handle_misaligned_q) begin // first part of potentially misaligned transaction
+ unique case (data_offset)
+ 2'b00: data_be = 4'b0011;
+ 2'b01: data_be = 4'b0110;
+ 2'b10: data_be = 4'b1100;
+ 2'b11: data_be = 4'b1000;
+ // default: data_be = 4'b1111;
+ endcase // case (data_offset)
+ end else begin // second part of misaligned transaction
+ data_be = 4'b0001;
+ end
+ end
+ 2'b10,
+ 2'b11: begin // Writing a byte
+ unique case (data_offset)
+ 2'b00: data_be = 4'b0001;
+ 2'b01: data_be = 4'b0010;
+ 2'b10: data_be = 4'b0100;
+ 2'b11: data_be = 4'b1000;
+ // default: data_be = 4'b1111;
+ endcase // case (data_offset)
+ end
+ // default: data_be = 4'b1111;
+ endcase // case (lsu_type_i)
+ end
+ /////////////////////
+ // WData alignment //
+ /////////////////////
+ // prepare data to be written to the memory
+ // we handle misaligned accesses, half word and byte accesses here
+ always_comb begin
+ unique case (data_offset)
+ 2'b00: data_wdata = lsu_wdata_i[31:0];
+ 2'b01: data_wdata = {lsu_wdata_i[23:0], lsu_wdata_i[31:24]};
+ 2'b10: data_wdata = {lsu_wdata_i[15:0], lsu_wdata_i[31:16]};
+ 2'b11: data_wdata = {lsu_wdata_i[ 7:0], lsu_wdata_i[31: 8]};
+ // default: data_wdata = lsu_wdata_i[31:0];
+ endcase // case (data_offset)
+ end
+ /////////////////////
+ // RData alignment //
+ /////////////////////
+ // register for unaligned rdata
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rdata_q <= '0;
+ end else if (rdata_update) begin
+ rdata_q <= data_rdata_i[31:8];
+ end
+ end
+ // registers for transaction control
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rdata_offset_q <= 2'h0;
+ data_type_q <= 2'h0;
+ data_sign_ext_q <= 1'b0;
+ data_we_q <= 1'b0;
+ end else if (ctrl_update) begin
+ rdata_offset_q <= data_offset;
+ data_type_q <= lsu_type_i;
+ data_sign_ext_q <= lsu_sign_ext_i;
+ data_we_q <= lsu_we_i;
+ end
+ end
+ // Store last address for mtval + AGU for misaligned transactions.
+ // Do not update in case of errors, mtval needs the (first) failing address
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ addr_last_q <= '0;
+ end else if (addr_update) begin
+ addr_last_q <= data_addr;
+ end
+ end
+ // take care of misaligned words
+ always_comb begin
+ unique case (rdata_offset_q)
+ 2'b00: rdata_w_ext = data_rdata_i[31:0];
+ 2'b01: rdata_w_ext = {data_rdata_i[ 7:0], rdata_q[31:8]};
+ 2'b10: rdata_w_ext = {data_rdata_i[15:0], rdata_q[31:16]};
+ 2'b11: rdata_w_ext = {data_rdata_i[23:0], rdata_q[31:24]};
+ // default: rdata_w_ext = data_rdata_i[31:0];
+ endcase
+ end
+ ////////////////////
+ // Sign extension //
+ ////////////////////
+ // sign extension for half words
+ always_comb begin
+ unique case (rdata_offset_q)
+ 2'b00: begin
+ if (!data_sign_ext_q) begin
+ rdata_h_ext = {16'h0000, data_rdata_i[15:0]};
+ end else begin
+ rdata_h_ext = {{16{data_rdata_i[15]}}, data_rdata_i[15:0]};
+ end
+ end
+ 2'b01: begin
+ if (!data_sign_ext_q) begin
+ rdata_h_ext = {16'h0000, data_rdata_i[23:8]};
+ end else begin
+ rdata_h_ext = {{16{data_rdata_i[23]}}, data_rdata_i[23:8]};
+ end
+ end
+ 2'b10: begin
+ if (!data_sign_ext_q) begin
+ rdata_h_ext = {16'h0000, data_rdata_i[31:16]};
+ end else begin
+ rdata_h_ext = {{16{data_rdata_i[31]}}, data_rdata_i[31:16]};
+ end
+ end
+ 2'b11: begin
+ if (!data_sign_ext_q) begin
+ rdata_h_ext = {16'h0000, data_rdata_i[7:0], rdata_q[31:24]};
+ end else begin
+ rdata_h_ext = {{16{data_rdata_i[7]}}, data_rdata_i[7:0], rdata_q[31:24]};
+ end
+ end
+ // default: rdata_h_ext = {16'h0000, data_rdata_i[15:0]};
+ endcase // case (rdata_offset_q)
+ end
+ // sign extension for bytes
+ always_comb begin
+ unique case (rdata_offset_q)
+ 2'b00: begin
+ if (!data_sign_ext_q) begin
+ rdata_b_ext = {24'h00_0000, data_rdata_i[7:0]};
+ end else begin
+ rdata_b_ext = {{24{data_rdata_i[7]}}, data_rdata_i[7:0]};
+ end
+ end
+ 2'b01: begin
+ if (!data_sign_ext_q) begin
+ rdata_b_ext = {24'h00_0000, data_rdata_i[15:8]};
+ end else begin
+ rdata_b_ext = {{24{data_rdata_i[15]}}, data_rdata_i[15:8]};
+ end
+ end
+ 2'b10: begin
+ if (!data_sign_ext_q) begin
+ rdata_b_ext = {24'h00_0000, data_rdata_i[23:16]};
+ end else begin
+ rdata_b_ext = {{24{data_rdata_i[23]}}, data_rdata_i[23:16]};
+ end
+ end
+ 2'b11: begin
+ if (!data_sign_ext_q) begin
+ rdata_b_ext = {24'h00_0000, data_rdata_i[31:24]};
+ end else begin
+ rdata_b_ext = {{24{data_rdata_i[31]}}, data_rdata_i[31:24]};
+ end
+ end
+ // default: rdata_b_ext = {24'h00_0000, data_rdata_i[7:0]};
+ endcase // case (rdata_offset_q)
+ end
+ // select word, half word or byte sign extended version
+ always_comb begin
+ unique case (data_type_q)
+ 2'b00: data_rdata_ext = rdata_w_ext;
+ 2'b01: data_rdata_ext = rdata_h_ext;
+ 2'b10,2'b11: data_rdata_ext = rdata_b_ext;
+ // default: data_rdata_ext = rdata_w_ext;
+ endcase // case (data_type_q)
+ end
+ /////////////
+ // LSU FSM //
+ /////////////
+ // check for misaligned accesses that need to be split into two word-aligned accesses
+ assign split_misaligned_access =
+ ((lsu_type_i == 2'b00) && (data_offset != 2'b00)) || // misaligned word access
+ ((lsu_type_i == 2'b01) && (data_offset == 2'b11)); // misaligned half-word access
+ // FSM
+ always_comb begin
+ ls_fsm_ns = ls_fsm_cs;
+ data_req_o = 1'b0;
+ addr_incr_req_o = 1'b0;
+ handle_misaligned_d = handle_misaligned_q;
+ pmp_err_d = pmp_err_q;
+ lsu_err_d = lsu_err_q;
+ addr_update = 1'b0;
+ ctrl_update = 1'b0;
+ rdata_update = 1'b0;
+ perf_load_o = 1'b0;
+ perf_store_o = 1'b0;
+ unique case (ls_fsm_cs)
+ IDLE: begin
+ pmp_err_d = 1'b0;
+ if (lsu_req_i) begin
+ data_req_o = 1'b1;
+ pmp_err_d = data_pmp_err_i;
+ lsu_err_d = 1'b0;
+ perf_load_o = ~lsu_we_i;
+ perf_store_o = lsu_we_i;
+ if (data_gnt_i) begin
+ ctrl_update = 1'b1;
+ addr_update = 1'b1;
+ handle_misaligned_d = split_misaligned_access;
+ ls_fsm_ns = split_misaligned_access ? WAIT_RVALID_MIS : IDLE;
+ end else begin
+ ls_fsm_ns = split_misaligned_access ? WAIT_GNT_MIS : WAIT_GNT;
+ end
+ end
+ end
+ WAIT_GNT_MIS: begin
+ data_req_o = 1'b1;
+ // data_pmp_err_i is valid during the address phase of a request. An error will block the
+ // external request and so a data_gnt_i might never be signalled. The registered version
+ // pmp_err_q is only updated for new address phases and so can be used in WAIT_GNT* and
+ // WAIT_RVALID* states
+ if (data_gnt_i || pmp_err_q) begin
+ addr_update = 1'b1;
+ ctrl_update = 1'b1;
+ handle_misaligned_d = 1'b1;
+ ls_fsm_ns = WAIT_RVALID_MIS;
+ end
+ end
+ // push out second request
+ data_req_o = 1'b1;
+ // tell ID/EX stage to update the address
+ addr_incr_req_o = 1'b1;
+ // first part rvalid is received, or gets a PMP error
+ if (data_rvalid_i || pmp_err_q) begin
+ // Update the PMP error for the second part
+ pmp_err_d = data_pmp_err_i;
+ // Record the error status of the first part
+ lsu_err_d = data_err_i | pmp_err_q;
+ // Capture the first rdata for loads
+ rdata_update = ~data_we_q;
+ // If already granted, wait for second rvalid
+ ls_fsm_ns = data_gnt_i ? IDLE : WAIT_GNT;
+ // Update the address for the second part, if no error
+ addr_update = data_gnt_i & ~(data_err_i | pmp_err_q);
+ // clear handle_misaligned if second request is granted
+ handle_misaligned_d = ~data_gnt_i;
+ end else begin
+ // first part rvalid is NOT received
+ if (data_gnt_i) begin
+ // second grant is received
+ handle_misaligned_d = 1'b0;
+ end
+ end
+ end
+ WAIT_GNT: begin
+ // tell ID/EX stage to update the address
+ addr_incr_req_o = handle_misaligned_q;
+ data_req_o = 1'b1;
+ if (data_gnt_i || pmp_err_q) begin
+ ctrl_update = 1'b1;
+ // Update the address, unless there was an error
+ addr_update = ~lsu_err_q;
+ ls_fsm_ns = IDLE;
+ handle_misaligned_d = 1'b0;
+ end
+ end
+ // tell ID/EX stage to update the address (to make sure the
+ // second address can be captured correctly for mtval and PMP checking)
+ addr_incr_req_o = 1'b1;
+ // Wait for the first rvalid, second request is already granted
+ if (data_rvalid_i) begin
+ // Update the pmp error for the second part
+ pmp_err_d = data_pmp_err_i;
+ // The first part cannot see a PMP error in this state
+ lsu_err_d = data_err_i;
+ // Now we can update the address for the second part if no error
+ addr_update = ~data_err_i;
+ // Capture the first rdata for loads
+ rdata_update = ~data_we_q;
+ // Wait for second rvalid
+ ls_fsm_ns = IDLE;
+ end
+ end
+ default: begin
+ ls_fsm_ns = IDLE;
+ end
+ endcase
+ end
+ assign lsu_req_done_o = (lsu_req_i | (ls_fsm_cs != IDLE)) & (ls_fsm_ns == IDLE);
+ // registers for FSM
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ ls_fsm_cs <= IDLE;
+ handle_misaligned_q <= '0;
+ pmp_err_q <= '0;
+ lsu_err_q <= '0;
+ end else begin
+ ls_fsm_cs <= ls_fsm_ns;
+ handle_misaligned_q <= handle_misaligned_d;
+ pmp_err_q <= pmp_err_d;
+ lsu_err_q <= lsu_err_d;
+ end
+ end
+ /////////////
+ // Outputs //
+ /////////////
+ assign data_or_pmp_err = lsu_err_q | data_err_i | pmp_err_q;
+ assign lsu_resp_valid_o = (data_rvalid_i | pmp_err_q) & (ls_fsm_cs == IDLE);
+ assign lsu_rdata_valid_o = (ls_fsm_cs == IDLE) & data_rvalid_i & ~data_or_pmp_err & ~data_we_q;
+ // output to register file
+ assign lsu_rdata_o = data_rdata_ext;
+ // output data address must be word aligned
+ assign data_addr_w_aligned = {data_addr[31:2], 2'b00};
+ // output to data interface
+ assign data_addr_o = data_addr_w_aligned;
+ assign data_wdata_o = data_wdata;
+ assign data_we_o = lsu_we_i;
+ assign data_be_o = data_be;
+ // output to ID stage: mtval + AGU for misaligned transactions
+ assign addr_last_o = addr_last_q;
+ // Signal a load or store error depending on the transaction type outstanding
+ assign load_err_o = data_or_pmp_err & ~data_we_q & lsu_resp_valid_o;
+ assign store_err_o = data_or_pmp_err & data_we_q & lsu_resp_valid_o;
+ assign busy_o = (ls_fsm_cs != IDLE);
\ No newline at end of file
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..fa44b1d
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,535 @@
+// Copyright lowRISC contributors.
+// Copyright 2017 ETH Zurich and University of Bologna, see also
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+ * Package with constants used by Ibex
+ */
+package brq_pkg;
+// Parameter Enums //
+typedef enum integer {
+ RegFileFF = 0,
+ RegFileFPGA = 1,
+ RegFileLatch = 2
+} regfile_e;
+typedef enum integer {
+ RV32MNone = 0,
+ RV32MSlow = 1,
+ RV32MFast = 2,
+ RV32MSingleCycle = 3
+} rv32m_e;
+typedef enum integer {
+ RV32BNone = 0,
+ RV32BBalanced = 1,
+ RV32BFull = 2
+} rv32b_e;
+// floatig point
+typedef enum integer {
+ RV32FNone = 0,
+ RV32FSingle = 1,
+ RV64FDouble = 2
+ // RV32FQuad = 3
+} rvfloat_e;
+// Opcodes //
+typedef enum logic [6:0] {
+ OPCODE_LOAD = 7'h03,
+ OPCODE_MISC_MEM = 7'h0f,
+ OPCODE_OP_IMM = 7'h13,
+ OPCODE_AUIPC = 7'h17,
+ OPCODE_STORE = 7'h23,
+ OPCODE_OP = 7'h33,
+ OPCODE_LUI = 7'h37,
+ OPCODE_BRANCH = 7'h63,
+ OPCODE_JALR = 7'h67,
+ OPCODE_JAL = 7'h6f,
+ OPCODE_SYSTEM = 7'h73,
+ // Floating Point
+ OPCODE_LOAD_FP = 7'h07,
+ OPCODE_STORE_FP = 7'h27,
+ OPCODE_MADD_FP = 7'h43,
+ OPCODE_MSUB_FP = 7'h47,
+ OPCODE_NMSUB_FP = 7'h4b,
+ OPCODE_NMADD_FP = 7'h4f,
+ OPCODE_OP_FP = 7'h53
+} opcode_e;
+// ALU operations //
+typedef enum logic [5:0] {
+ // Arithmetics
+ // Logics
+ // RV32B
+ // Shifts
+ // RV32B
+ // Comparisons
+ // RV32B
+ // Pack
+ // RV32B
+ // Sign-Extend
+ // RV32B
+ // Bitcounting
+ // RV32B
+ // Set lower than
+ // Ternary Bitmanip Operations
+ // RV32B
+ // Single-Bit Operations
+ // RV32B
+ // Bit Extract / Deposit
+ // RV32B
+ // Bit Field Place
+ // RV32B
+ // Carry-less Multiply
+ // RV32B
+ // Cyclic Redundancy Check
+ ALU_CRC32_B,
+ ALU_CRC32_H,
+ ALU_CRC32_W,
+} alu_op_e;
+typedef enum logic [1:0] {
+ // Multiplier/divider
+} md_op_e;
+// define which type instruction
+// is catered
+typedef enum logic {
+} fp_type_e;
+// Control and status registers //
+// CSR operations
+typedef enum logic [1:0] {
+} csr_op_e;
+// Privileged mode
+typedef enum logic[1:0] {
+ PRIV_LVL_M = 2'b11,
+ PRIV_LVL_H = 2'b10,
+ PRIV_LVL_S = 2'b01,
+ PRIV_LVL_U = 2'b00
+} priv_lvl_e;
+// Constants for the dcsr.xdebugver fields
+typedef enum logic[3:0] {
+ XDEBUGVER_NO = 4'd0, // no external debug support
+ XDEBUGVER_STD = 4'd4, // external debug according to RISC-V debug spec
+ XDEBUGVER_NONSTD = 4'd15 // debug not conforming to RISC-V debug spec
+} x_debug_ver_e;
+// WB stage //
+// Type of instruction present in writeback stage
+typedef enum logic[1:0] {
+ WB_INSTR_LOAD, // Instruction is awaiting load data
+ WB_INSTR_STORE, // Instruction is awaiting store response
+ WB_INSTR_OTHER // Instruction doesn't fit into above categories
+} wb_instr_type_e;
+// ID stage //
+// Operand a selection
+typedef enum logic[1:0] {
+} op_a_sel_e;
+// Immediate a selection
+typedef enum logic {
+ IMM_A_Z,
+} imm_a_sel_e;
+// Operand b selection
+typedef enum logic {
+} op_b_sel_e;
+// Immediate b selection
+typedef enum logic [2:0] {
+ IMM_B_I,
+ IMM_B_S,
+ IMM_B_B,
+ IMM_B_U,
+ IMM_B_J,
+} imm_b_sel_e;
+// Regfile write data selection
+typedef enum logic {
+} rf_wd_sel_e;
+// IF stage //
+// PC mux selection
+typedef enum logic [2:0] {
+} pc_sel_e;
+// Exception PC mux selection
+typedef enum logic [1:0] {
+ EXC_PC_DBG_EXC // Exception while in debug mode
+} exc_pc_sel_e;
+// Interrupt requests
+typedef struct packed {
+ logic irq_software;
+ logic irq_timer;
+ logic irq_external;
+ logic [14:0] irq_fast; // 15 fast interrupts,
+ // one interrupt is reserved for NMI (not visible through mip/mie)
+} irqs_t;
+// Exception cause
+typedef enum logic [5:0] {
+ EXC_CAUSE_IRQ_SOFTWARE_M = {1'b1, 5'd03},
+ EXC_CAUSE_IRQ_TIMER_M = {1'b1, 5'd07},
+ EXC_CAUSE_IRQ_EXTERNAL_M = {1'b1, 5'd11},
+ // EXC_CAUSE_IRQ_FAST_0 = {1'b1, 5'd16},
+ // EXC_CAUSE_IRQ_FAST_14 = {1'b1, 5'd30},
+ EXC_CAUSE_IRQ_NM = {1'b1, 5'd31}, // == EXC_CAUSE_IRQ_FAST_15
+ EXC_CAUSE_INSN_ADDR_MISA = {1'b0, 5'd00},
+ EXC_CAUSE_ILLEGAL_INSN = {1'b0, 5'd02},
+ EXC_CAUSE_BREAKPOINT = {1'b0, 5'd03},
+ EXC_CAUSE_LOAD_ACCESS_FAULT = {1'b0, 5'd05},
+ EXC_CAUSE_ECALL_UMODE = {1'b0, 5'd08},
+ EXC_CAUSE_ECALL_MMODE = {1'b0, 5'd11}
+} exc_cause_e;
+// Debug cause
+typedef enum logic [2:0] {
+ DBG_CAUSE_NONE = 3'h0,
+} dbg_cause_e;
+// PMP constants
+parameter int unsigned PMP_MAX_REGIONS = 16;
+parameter int unsigned PMP_CFG_W = 8;
+// PMP acces type
+parameter int unsigned PMP_I = 0;
+parameter int unsigned PMP_D = 1;
+typedef enum logic [1:0] {
+ PMP_ACC_EXEC = 2'b00,
+ PMP_ACC_WRITE = 2'b01,
+ PMP_ACC_READ = 2'b10
+} pmp_req_e;
+// PMP cfg structures
+typedef enum logic [1:0] {
+ PMP_MODE_OFF = 2'b00,
+ PMP_MODE_TOR = 2'b01,
+ PMP_MODE_NA4 = 2'b10,
+ PMP_MODE_NAPOT = 2'b11
+} pmp_cfg_mode_e;
+typedef struct packed {
+ logic lock;
+ pmp_cfg_mode_e mode;
+ logic exec;
+ logic write;
+ logic read;
+} pmp_cfg_t;
+// CSRs
+typedef enum logic[11:0] {
+ // Machine information
+ CSR_MHARTID = 12'hF14,
+ // Machine trap setup
+ CSR_MSTATUS = 12'h300,
+ CSR_MISA = 12'h301,
+ CSR_MIE = 12'h304,
+ CSR_MTVEC = 12'h305,
+ // Machine trap handling
+ CSR_MSCRATCH = 12'h340,
+ CSR_MEPC = 12'h341,
+ CSR_MCAUSE = 12'h342,
+ CSR_MTVAL = 12'h343,
+ CSR_MIP = 12'h344,
+ // Physical memory protection
+ CSR_PMPCFG0 = 12'h3A0,
+ CSR_PMPCFG1 = 12'h3A1,
+ CSR_PMPCFG2 = 12'h3A2,
+ CSR_PMPCFG3 = 12'h3A3,
+ CSR_PMPADDR0 = 12'h3B0,
+ CSR_PMPADDR1 = 12'h3B1,
+ CSR_PMPADDR2 = 12'h3B2,
+ CSR_PMPADDR3 = 12'h3B3,
+ CSR_PMPADDR4 = 12'h3B4,
+ CSR_PMPADDR5 = 12'h3B5,
+ CSR_PMPADDR6 = 12'h3B6,
+ CSR_PMPADDR7 = 12'h3B7,
+ CSR_PMPADDR8 = 12'h3B8,
+ CSR_PMPADDR9 = 12'h3B9,
+ CSR_PMPADDR10 = 12'h3BA,
+ CSR_PMPADDR11 = 12'h3BB,
+ CSR_PMPADDR12 = 12'h3BC,
+ CSR_PMPADDR13 = 12'h3BD,
+ CSR_PMPADDR14 = 12'h3BE,
+ CSR_PMPADDR15 = 12'h3BF,
+ // Debug trigger
+ CSR_TSELECT = 12'h7A0,
+ CSR_TDATA1 = 12'h7A1,
+ CSR_TDATA2 = 12'h7A2,
+ CSR_TDATA3 = 12'h7A3,
+ CSR_MCONTEXT = 12'h7A8,
+ // Debug/trace
+ CSR_DCSR = 12'h7b0,
+ CSR_DPC = 12'h7b1,
+ // Debug
+ CSR_DSCRATCH0 = 12'h7b2, // optional
+ CSR_DSCRATCH1 = 12'h7b3, // optional
+ // Machine Counter/Timers
+ CSR_MHPMEVENT3 = 12'h323,
+ CSR_MHPMEVENT4 = 12'h324,
+ CSR_MHPMEVENT5 = 12'h325,
+ CSR_MHPMEVENT6 = 12'h326,
+ CSR_MHPMEVENT7 = 12'h327,
+ CSR_MHPMEVENT8 = 12'h328,
+ CSR_MHPMEVENT9 = 12'h329,
+ CSR_MHPMEVENT10 = 12'h32A,
+ CSR_MHPMEVENT11 = 12'h32B,
+ CSR_MHPMEVENT12 = 12'h32C,
+ CSR_MHPMEVENT13 = 12'h32D,
+ CSR_MHPMEVENT14 = 12'h32E,
+ CSR_MHPMEVENT15 = 12'h32F,
+ CSR_MHPMEVENT16 = 12'h330,
+ CSR_MHPMEVENT17 = 12'h331,
+ CSR_MHPMEVENT18 = 12'h332,
+ CSR_MHPMEVENT19 = 12'h333,
+ CSR_MHPMEVENT20 = 12'h334,
+ CSR_MHPMEVENT21 = 12'h335,
+ CSR_MHPMEVENT22 = 12'h336,
+ CSR_MHPMEVENT23 = 12'h337,
+ CSR_MHPMEVENT24 = 12'h338,
+ CSR_MHPMEVENT25 = 12'h339,
+ CSR_MHPMEVENT26 = 12'h33A,
+ CSR_MHPMEVENT27 = 12'h33B,
+ CSR_MHPMEVENT28 = 12'h33C,
+ CSR_MHPMEVENT29 = 12'h33D,
+ CSR_MHPMEVENT30 = 12'h33E,
+ CSR_MHPMEVENT31 = 12'h33F,
+ CSR_MCYCLE = 12'hB00,
+ CSR_MINSTRET = 12'hB02,
+ CSR_MHPMCOUNTER16 = 12'hB10,
+ CSR_MHPMCOUNTER17 = 12'hB11,
+ CSR_MHPMCOUNTER18 = 12'hB12,
+ CSR_MHPMCOUNTER19 = 12'hB13,
+ CSR_MHPMCOUNTER20 = 12'hB14,
+ CSR_MHPMCOUNTER21 = 12'hB15,
+ CSR_MHPMCOUNTER22 = 12'hB16,
+ CSR_MHPMCOUNTER23 = 12'hB17,
+ CSR_MHPMCOUNTER24 = 12'hB18,
+ CSR_MHPMCOUNTER25 = 12'hB19,
+ CSR_MCYCLEH = 12'hB80,
+ CSR_MINSTRETH = 12'hB82,
+ CSR_CPUCTRL = 12'h7C0,
+ // Floating point fcsr
+ CSR_FCSR = 12'h003,
+ CSR_FRM = 12'h002,
+ CSR_FFLAG = 12'h001
+} csr_num_e;
+// CSR pmp-related offsets
+parameter logic [11:0] CSR_OFF_PMP_CFG = 12'h3A0; // pmp_cfg @ 12'h3a0 - 12'h3a3
+parameter logic [11:0] CSR_OFF_PMP_ADDR = 12'h3B0; // pmp_addr @ 12'h3b0 - 12'h3bf
+// CSR status bits
+parameter int unsigned CSR_MSTATUS_MIE_BIT = 3;
+parameter int unsigned CSR_MSTATUS_MPIE_BIT = 7;
+parameter int unsigned CSR_MSTATUS_MPP_BIT_LOW = 11;
+parameter int unsigned CSR_MSTATUS_MPP_BIT_HIGH = 12;
+parameter int unsigned CSR_MSTATUS_MPRV_BIT = 17;
+parameter int unsigned CSR_MSTATUS_TW_BIT = 21;
+// CSR machine ISA
+parameter logic [1:0] CSR_MISA_MXL = 2'd1; // M-XLEN: XLEN in M-Mode for RV32
+// CSR interrupt pending/enable bits
+parameter int unsigned CSR_MSIX_BIT = 3;
+parameter int unsigned CSR_MTIX_BIT = 7;
+parameter int unsigned CSR_MEIX_BIT = 11;
+parameter int unsigned CSR_MFIX_BIT_LOW = 16;
+parameter int unsigned CSR_MFIX_BIT_HIGH = 30;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..0a5e800
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,125 @@
+module brq_pmp #(
+ // Granularity of NAPOT access,
+ // 0 = No restriction, 1 = 8 byte, 2 = 16 byte, 3 = 32 byte, etc.
+ parameter int unsigned PMPGranularity = 0,
+ // Number of access channels (e.g. i-side + d-side)
+ parameter int unsigned PMPNumChan = 2,
+ // Number of implemented regions
+ parameter int unsigned PMPNumRegions = 4
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+ // Interface to CSRs
+ input brq_pkg::pmp_cfg_t csr_pmp_cfg_i [PMPNumRegions],
+ input logic [33:0] csr_pmp_addr_i [PMPNumRegions],
+ input brq_pkg::priv_lvl_e priv_mode_i [PMPNumChan],
+ // Access checking channels
+ input logic [33:0] pmp_req_addr_i [PMPNumChan],
+ input brq_pkg::pmp_req_e pmp_req_type_i [PMPNumChan],
+ output logic pmp_req_err_o [PMPNumChan]
+ import brq_pkg::*;
+ // Access Checking Signals
+ logic [33:0] region_start_addr [PMPNumRegions];
+ logic [33:PMPGranularity+2] region_addr_mask [PMPNumRegions];
+ logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_match_gt;
+ logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_match_lt;
+ logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_match_eq;
+ logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_match_all;
+ logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_perm_check;
+ logic [PMPNumChan-1:0] access_fault;
+ // ---------------
+ // Access checking
+ // ---------------
+ for (genvar r = 0; r < PMPNumRegions; r++) begin : g_addr_exp
+ // Start address for TOR matching
+ if (r == 0) begin : g_entry0
+ assign region_start_addr[r] = (csr_pmp_cfg_i[r].mode == PMP_MODE_TOR) ? 34'h000000000 :
+ csr_pmp_addr_i[r];
+ end else begin : g_oth
+ assign region_start_addr[r] = (csr_pmp_cfg_i[r].mode == PMP_MODE_TOR) ? csr_pmp_addr_i[r-1] :
+ csr_pmp_addr_i[r];
+ end
+ // Address mask for NA matching
+ for (genvar b = PMPGranularity+2; b < 34; b++) begin : g_bitmask
+ if (b == 2) begin : g_bit0
+ // Always mask bit 2 for NAPOT
+ assign region_addr_mask[r][b] = (csr_pmp_cfg_i[r].mode != PMP_MODE_NAPOT);
+ end else begin : g_others
+ // We will mask this bit if it is within the programmed granule
+ // i.e. addr = yyyy 0111
+ // ^
+ // | This bit pos is the top of the mask, all lower bits set
+ // thus mask = 1111 0000
+ assign region_addr_mask[r][b] = (csr_pmp_cfg_i[r].mode != PMP_MODE_NAPOT) |
+ ~&csr_pmp_addr_i[r][b-1:PMPGranularity+1];
+ end
+ end
+ end
+ for (genvar c = 0; c < PMPNumChan; c++) begin : g_access_check
+ for (genvar r = 0; r < PMPNumRegions; r++) begin : g_regions
+ // Comparators are sized according to granularity
+ assign region_match_eq[c][r] = (pmp_req_addr_i[c][33:PMPGranularity+2] &
+ region_addr_mask[r]) ==
+ (region_start_addr[r][33:PMPGranularity+2] &
+ region_addr_mask[r]);
+ assign region_match_gt[c][r] = pmp_req_addr_i[c][33:PMPGranularity+2] >
+ region_start_addr[r][33:PMPGranularity+2];
+ assign region_match_lt[c][r] = pmp_req_addr_i[c][33:PMPGranularity+2] <
+ csr_pmp_addr_i[r][33:PMPGranularity+2];
+ always_comb begin
+ region_match_all[c][r] = 1'b0;
+ unique case (csr_pmp_cfg_i[r].mode)
+ PMP_MODE_OFF : region_match_all[c][r] = 1'b0;
+ PMP_MODE_NA4 : region_match_all[c][r] = region_match_eq[c][r];
+ PMP_MODE_NAPOT : region_match_all[c][r] = region_match_eq[c][r];
+ PMP_MODE_TOR : begin
+ region_match_all[c][r] = (region_match_eq[c][r] | region_match_gt[c][r]) &
+ region_match_lt[c][r];
+ end
+ default : region_match_all[c][r] = 1'b0;
+ endcase
+ end
+ // Check specific required permissions
+ assign region_perm_check[c][r] =
+ ((pmp_req_type_i[c] == PMP_ACC_EXEC) & csr_pmp_cfg_i[r].exec) |
+ ((pmp_req_type_i[c] == PMP_ACC_WRITE) & csr_pmp_cfg_i[r].write) |
+ ((pmp_req_type_i[c] == PMP_ACC_READ) & csr_pmp_cfg_i[r].read);
+ end
+ // Access fault determination / prioritization
+ always_comb begin
+ // Default is allow for M-mode, deny for other modes
+ access_fault[c] = (priv_mode_i[c] != PRIV_LVL_M);
+ // PMP entries are statically prioritized, from 0 to N-1
+ // The lowest-numbered PMP entry which matches an address determines accessability
+ for (int r = PMPNumRegions-1; r >= 0; r--) begin
+ if (region_match_all[c][r]) begin
+ access_fault[c] = (priv_mode_i[c] == PRIV_LVL_M) ?
+ // For M-mode, any region which matches with the L-bit clear, or with sufficient
+ // access permissions will be allowed
+ (csr_pmp_cfg_i[r].lock & ~region_perm_check[c][r]) :
+ // For other modes, the lock bit doesn't matter
+ ~region_perm_check[c][r];
+ end
+ end
+ end
+ assign pmp_req_err_o[c] = access_fault[c];
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..9aa643e
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,98 @@
+ * RISC-V register file
+ *
+ * Register file with 31 or 15x 32 bit wide registers. Register 0 is fixed to 0.
+ * This register file is based on flip flops. Use this register file when
+ * targeting FPGA synthesis or Verilator simulation.
+ */
+module brq_register_file_ff #(
+ parameter bit RV32E = 0,
+ parameter int unsigned DataWidth = 32,
+ parameter bit DummyInstructions = 0
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+ input logic test_en_i,
+ input logic dummy_instr_id_i,
+ //Read port R1
+ input logic [4:0] raddr_a_i,
+ output logic [DataWidth-1:0] rdata_a_o,
+ //Read port R2
+ input logic [4:0] raddr_b_i,
+ output logic [DataWidth-1:0] rdata_b_o,
+ // Write port W1
+ input logic [4:0] waddr_a_i,
+ input logic [DataWidth-1:0] wdata_a_i,
+ input logic we_a_i
+ localparam int unsigned ADDR_WIDTH = RV32E ? 4 : 5;
+ localparam int unsigned NUM_WORDS = 2**ADDR_WIDTH;
+ logic [NUM_WORDS-1:0][DataWidth-1:0] rf_reg;
+ logic [NUM_WORDS-1:1][DataWidth-1:0] rf_reg_q;
+ logic [NUM_WORDS-1:1] we_a_dec;
+ always_comb begin : we_a_decoder
+ for (int unsigned i = 1; i < NUM_WORDS; i++) begin
+ we_a_dec[i] = (waddr_a_i == 5'(i)) ? we_a_i : 1'b0;
+ end
+ end
+ // No flops for R0 as it's hard-wired to 0
+ for (genvar i = 1; i < NUM_WORDS; i++) begin : g_rf_flops
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rf_reg_q[i] <= '0;
+ end else if(we_a_dec[i]) begin
+ rf_reg_q[i] <= wdata_a_i;
+ end
+ end
+ end
+ // With dummy instructions enabled, R0 behaves as a real register but will always return 0 for
+ // real instructions.
+ if (DummyInstructions) begin : g_dummy_r0
+ logic we_r0_dummy;
+ logic [DataWidth-1:0] rf_r0_q;
+ // Write enable for dummy R0 register (waddr_a_i will always be 0 for dummy instructions)
+ assign we_r0_dummy = we_a_i & dummy_instr_id_i;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rf_r0_q <= '0;
+ end else if (we_r0_dummy) begin
+ rf_r0_q <= wdata_a_i;
+ end
+ end
+ // Output the dummy data for dummy instructions, otherwise R0 reads as zero
+ assign rf_reg[0] = dummy_instr_id_i ? rf_r0_q : '0;
+ end else begin : g_normal_r0
+ logic unused_dummy_instr_id;
+ assign unused_dummy_instr_id = dummy_instr_id_i;
+ // R0 is nil
+ assign rf_reg[0] = '0;
+ end
+ assign rf_reg[NUM_WORDS-1:1] = rf_reg_q[NUM_WORDS-1:1];
+ assign rdata_a_o = rf_reg[raddr_a_i];
+ assign rdata_b_o = rf_reg[raddr_b_i];
+ // Signal not used in FF register file
+ logic unused_test_en;
+ assign unused_test_en = test_en_i;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..95e801c
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,219 @@
+ * Writeback Stage
+ *
+ * Writeback is an optional third pipeline stage. It writes data back to the register file that was
+ * produced in the ID/EX stage or awaits a response to a load/store (LSU writes direct to register
+ * file for load data). If the writeback stage is not present (WritebackStage == 0) this acts as
+ * a simple passthrough to write data direct to the register file.
+ */
+module brq_wbu #(
+ parameter bit WritebackStage = 1'b0
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ input logic en_wb_i,
+ input brq_pkg::wb_instr_type_e instr_type_wb_i,
+ input logic [31:0] pc_id_i,
+ input logic instr_is_compressed_id_i,
+ input logic instr_perf_count_id_i,
+ output logic ready_wb_o,
+ output logic rf_write_wb_o,
+ output logic outstanding_load_wb_o,
+ output logic outstanding_store_wb_o,
+ output logic [31:0] pc_wb_o,
+ output logic perf_instr_ret_wb_o,
+ output logic perf_instr_ret_compressed_wb_o,
+ input logic [4:0] rf_waddr_id_i,
+ input logic [31:0] rf_wdata_id_i,
+ input logic rf_we_id_i,
+ input logic [31:0] rf_wdata_lsu_i,
+ input logic rf_we_lsu_i,
+ output logic [31:0] rf_wdata_fwd_wb_o,
+ output logic [4:0] rf_waddr_wb_o,
+ output logic [31:0] rf_wdata_wb_o,
+ output logic rf_we_wb_o,
+ input logic lsu_resp_valid_i,
+ input logic lsu_resp_err_i,
+ output logic instr_done_wb_o,
+ // floating point
+ output logic fp_rf_write_wb_o,
+ output logic fp_rf_wen_wb_o,
+ output logic [4:0] fp_rf_waddr_wb_o,
+ input logic [4:0] fp_rf_waddr_id_i,
+ input logic fp_rf_wen_id_i,
+ output logic [31:0] fp_rf_wdata_wb_o,
+ output logic fp_load_i
+ import brq_pkg::*;
+ // 0 == RF write from ID
+ // 1 == RF write from LSU
+ logic [31:0] rf_wdata_wb_mux[2];
+ logic [1:0] rf_wdata_wb_mux_we;
+ logic [31:0] fp_rf_wdata_wb_mux[2];
+ logic [1:0] fp_rf_wdata_wb_mux_we;
+ if(WritebackStage) begin : g_writeback_stage
+ logic [31:0] rf_wdata_wb_q;
+ logic rf_we_wb_q;
+ logic [4:0] rf_waddr_wb_q;
+ logic wb_done;
+ logic wb_valid_q;
+ logic [31:0] wb_pc_q;
+ logic wb_compressed_q;
+ logic wb_count_q;
+ wb_instr_type_e wb_instr_type_q;
+ logic wb_valid_d;
+ // floating point
+ //logic [31:0] fp_rf_wdata_wb_q;
+ logic fp_rf_we_wb_q;
+ //logic [4:0] fp_rf_waddr_wb_q;
+ logic fp_load_q;
+ // Stage becomes valid if an instruction enters for ID/EX and valid is cleared when instruction
+ // is done
+ assign wb_valid_d = (en_wb_i & ready_wb_o) | (wb_valid_q & ~wb_done);
+ // Writeback for non load/store instructions always completes in a cycle (so instantly done)
+ // Writeback for load/store must wait for response to be received by the LSU
+ // Signal only relevant if wb_valid_q set
+ assign wb_done = (wb_instr_type_q == WB_INSTR_OTHER) | lsu_resp_valid_i;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if(~rst_ni) begin
+ wb_valid_q <= 1'b0;
+ end else begin
+ wb_valid_q <= wb_valid_d;
+ end
+ end
+ always_ff @(posedge clk_i) begin
+ if(en_wb_i) begin
+ rf_we_wb_q <= rf_we_id_i;
+ rf_waddr_wb_q <= rf_waddr_id_i;
+ rf_wdata_wb_q <= rf_wdata_id_i;
+ wb_instr_type_q <= instr_type_wb_i;
+ wb_pc_q <= pc_id_i;
+ wb_compressed_q <= instr_is_compressed_id_i;
+ wb_count_q <= instr_perf_count_id_i;
+ // added for floating point registers for wb stage
+ fp_rf_we_wb_q <= fp_rf_wen_id_i;
+ // fp_rf_waddr_wb_q <= rf_waddr_id_i;
+ //fp_rf_wdata_wb_q <= rf_wdata_id_i;
+ fp_load_q <= fp_load_i;
+ end
+ end
+ assign rf_waddr_wb_o = rf_waddr_wb_q;
+ assign rf_wdata_wb_mux[0] = rf_wdata_wb_q;
+ assign rf_wdata_wb_mux_we[0] = rf_we_wb_q & wb_valid_q;
+ assign fp_rf_waddr_wb_o = rf_waddr_wb_q; // no seperate datapath for rd address
+ assign fp_rf_wdata_wb_mux[0] = rf_wdata_wb_q; // no seperate datapath for data bus
+ assign fp_rf_wdata_wb_mux_we[0] = fp_rf_we_wb_q & wb_valid_q;
+ assign ready_wb_o = ~wb_valid_q | wb_done;
+ // Instruction in writeback will be writing to register file if either rf_we is set or writeback
+ // is awaiting load data. This is used for determining RF read hazards in ID/EX
+ assign rf_write_wb_o = wb_valid_q & (rf_we_wb_q | (wb_instr_type_q == WB_INSTR_LOAD));
+ assign fp_rf_write_wb_o = wb_valid_q & (fp_rf_we_wb_q | (wb_instr_type_q == WB_INSTR_LOAD));
+ assign outstanding_load_wb_o = wb_valid_q & (wb_instr_type_q == WB_INSTR_LOAD);
+ assign outstanding_store_wb_o = wb_valid_q & (wb_instr_type_q == WB_INSTR_STORE);
+ assign pc_wb_o = wb_pc_q;
+ assign instr_done_wb_o = wb_valid_q & wb_done;
+ // Increment instruction retire counters for valid instructions which are not lsu errors
+ assign perf_instr_ret_wb_o = instr_done_wb_o & wb_count_q &
+ ~(lsu_resp_valid_i & lsu_resp_err_i);
+ assign perf_instr_ret_compressed_wb_o = perf_instr_ret_wb_o & wb_compressed_q;
+ // Forward data that will be written to the RF back to ID to resolve data hazards. The flopped
+ // rf_wdata_wb_q is used rather than rf_wdata_wb_o as the latter includes read data from memory
+ // that returns too late to be used on the forwarding path.
+ assign rf_wdata_fwd_wb_o = rf_wdata_wb_q;
+ assign rf_wdata_wb_mux[1] = rf_wdata_lsu_i;
+ assign rf_wdata_wb_mux_we[1] = rf_we_lsu_i & ~fp_load_q;
+ assign fp_rf_wdata_wb_mux[1] = rf_wdata_lsu_i;
+ assign fp_rf_wdata_wb_mux_we[1] = rf_we_lsu_i & fp_load_q;
+ end else begin : g_bypass_wb
+ // without writeback stage just pass through register write signals
+ assign rf_waddr_wb_o = rf_waddr_id_i;
+ assign rf_wdata_wb_mux[0] = rf_wdata_id_i;
+ assign rf_wdata_wb_mux_we[0] = rf_we_id_i;
+ // for floating point unit
+ assign fp_rf_waddr_wb_o = rf_waddr_id_i; // no seperate datapath for rd address
+ assign fp_rf_wdata_wb_mux[0] = rf_wdata_id_i; // no seperate datapath for data bus
+ assign fp_rf_wdata_wb_mux_we[0] = fp_rf_wen_id_i;
+ // Increment instruction retire counters for valid instructions which are not lsu errors
+ assign perf_instr_ret_wb_o = instr_perf_count_id_i & en_wb_i &
+ ~(lsu_resp_valid_i & lsu_resp_err_i);
+ assign perf_instr_ret_compressed_wb_o = perf_instr_ret_wb_o & instr_is_compressed_id_i;
+ // ready needs to be constant 1 without writeback stage (otherwise ID/EX stage will stall)
+ assign ready_wb_o = 1'b1;
+ // Unused Writeback stage only IO & wiring
+ // Assign inputs and internal wiring to unused signals to satisfy lint checks
+ // Tie-off outputs to constant values
+ logic unused_clk;
+ logic unused_rst;
+ wb_instr_type_e unused_instr_type_wb;
+ logic [31:0] unused_pc_id;
+ assign unused_clk = clk_i;
+ assign unused_rst = rst_ni;
+ assign unused_instr_type_wb = instr_type_wb_i;
+ assign unused_pc_id = pc_id_i;
+ assign outstanding_load_wb_o = 1'b0;
+ assign outstanding_store_wb_o = 1'b0;
+ assign pc_wb_o = '0;
+ assign rf_write_wb_o = 1'b0;
+ assign rf_wdata_fwd_wb_o = 32'b0;
+ assign instr_done_wb_o = 1'b0;
+ assign rf_wdata_wb_mux[1] = rf_wdata_lsu_i;
+ assign rf_wdata_wb_mux_we[1] = rf_we_lsu_i & ~fp_load_i;
+ assign fp_rf_wdata_wb_mux[1] = rf_wdata_lsu_i;
+ assign fp_rf_wdata_wb_mux_we[1] = rf_we_lsu_i & fp_load_i;
+ end
+ // RF write data can come from ID results (all RF writes that aren't because of loads will come
+ // from here) or the LSU (RF writes for load data)
+ assign rf_wdata_wb_o = (rf_wdata_wb_mux_we[0]) ? rf_wdata_wb_mux[0] :
+ rf_wdata_wb_mux[1];
+ assign rf_we_wb_o = |rf_wdata_wb_mux_we;
+ assign fp_rf_wdata_wb_o = fp_rf_wdata_wb_mux_we[0] ? fp_rf_wdata_wb_mux[0] :
+ fp_rf_wdata_wb_mux[1];
+ assign fp_rf_wen_wb_o = |fp_rf_wdata_wb_mux_we;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..9f35a44
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,61 @@
+// Copyright 2016 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+/// cf_math_pkg: Constant Function Implementations of Mathematical Functions for HDL Elaboration
+/// This package contains a collection of mathematical functions that are commonly used when defining
+/// the value of constants in HDL code. These functions are implemented as Verilog constants
+/// functions. Introduced in Verilog 2001 (IEEE Std 1364-2001), a constant function (§ 10.3.5) is a
+/// function whose value can be evaluated at compile time or during elaboration. A constant function
+/// must be called with arguments that are constants.
+package cf_math_pkg;
+ /// Ceiled Division of Two Natural Numbers
+ ///
+ /// Returns the quotient of two natural numbers, rounded towards plus infinity.
+ function automatic integer ceil_div (input longint dividend, input longint divisor);
+ automatic longint remainder;
+ // pragma translate_off
+ `ifndef VERILATOR
+ if (dividend < 0) begin
+ $fatal(1, "Dividend %0d is not a natural number!", dividend);
+ end
+ if (divisor < 0) begin
+ $fatal(1, "Divisor %0d is not a natural number!", divisor);
+ end
+ if (divisor == 0) begin
+ $fatal(1, "Division by zero!");
+ end
+ `endif
+ // pragma translate_on
+ remainder = dividend;
+ for (ceil_div = 0; remainder > 0; ceil_div++) begin
+ remainder = remainder - divisor;
+ end
+ endfunction
+ /// Index width required to be able to represent up to `num_idx` indices as a binary
+ /// encoded signal.
+ /// Ensures that the minimum width if an index signal is `1`, regardless of parametrization.
+ ///
+ /// Sample usage in type definition:
+ /// As parameter:
+ /// `parameter type idx_t = logic[cf_math_pkg::idx_width(NumIdx)-1:0]`
+ /// As typedef:
+ /// `typedef logic [cf_math_pkg::idx_width(NumIdx)-1:0] idx_t`
+ function automatic integer unsigned idx_width (input integer unsigned num_idx);
+ return (num_idx > 32'd1) ? unsigned'($clog2(num_idx)) : 32'd1;
+ endfunction
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..4c5364d
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,3413 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Company: IIS @ ETHZ - Federal Institute of Technology //
+// //
+// Engineers: Lei Li //
+// //
+// Additional contributions by: //
+// //
+// //
+// //
+// Create Date: 04/03/2018 //
+// Design Name: FPU //
+// Module Name: //
+// Project Name: Private FPU //
+// Language: SystemVerilog //
+// //
+// Description: the control logic of div and sqrt //
+// //
+// Revision Date: 12/04/2018 //
+// Lei Li //
+// To address some requirements by Stefan and add low power //
+// control for special cases //
+// Revision Date: 13/04/2018 //
+// Lei Li //
+// To fix some bug found in Control FSM //
+// when Iteration_unit_num_S = 2'b10 //
+// //
+// //
+// //
+import defs_div_sqrt_mvp::*;
+module control_mvp
+ (//Input
+ input logic Clk_CI,
+ input logic Rst_RBI,
+ input logic Div_start_SI ,
+ input logic Sqrt_start_SI,
+ input logic Start_SI,
+ input logic Kill_SI,
+ input logic Special_case_SBI,
+ input logic Special_case_dly_SBI,
+ input logic [C_PC-1:0] Precision_ctl_SI,
+ input logic [1:0] Format_sel_SI,
+ input logic [C_MANT_FP64:0] Numerator_DI,
+ input logic [C_EXP_FP64:0] Exp_num_DI,
+ input logic [C_MANT_FP64:0] Denominator_DI,
+ input logic [C_EXP_FP64:0] Exp_den_DI,
+ output logic Div_start_dly_SO ,
+ output logic Sqrt_start_dly_SO,
+ output logic Div_enable_SO,
+ output logic Sqrt_enable_SO,
+ //To next stage
+ output logic Full_precision_SO,
+ output logic FP32_SO,
+ output logic FP64_SO,
+ output logic FP16_SO,
+ output logic FP16ALT_SO,
+ output logic Ready_SO,
+ output logic Done_SO,
+ output logic [C_MANT_FP64+4:0] Mant_result_prenorm_DO,
+ // output logic [3:0] Round_bit_DO,
+ output logic [C_EXP_FP64+1:0] Exp_result_prenorm_DO
+ );
+ logic [C_MANT_FP64+1+4:0] Partial_remainder_DN,Partial_remainder_DP; //58bits,r=q+2
+ logic [C_MANT_FP64+4:0] Quotient_DP; //57bits
+ /////////////////////////////////////////////////////////////////////////////
+ // Assign Inputs //
+ /////////////////////////////////////////////////////////////////////////////
+ logic [C_MANT_FP64+1:0] Numerator_se_D; //sign extension and hidden bit
+ logic [C_MANT_FP64+1:0] Denominator_se_D; //signa extension and hidden bit
+ logic [C_MANT_FP64+1:0] Denominator_se_DB; //1's complement
+ assign Numerator_se_D={1'b0,Numerator_DI};
+ assign Denominator_se_D={1'b0,Denominator_DI};
+ always_comb
+ begin
+ if(FP32_SO)
+ begin
+ Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP32], {(C_MANT_FP64-C_MANT_FP32){1'b0}} };
+ end
+ else if(FP64_SO) begin
+ Denominator_se_DB=~Denominator_se_D;
+ end
+ else if(FP16_SO) begin
+ Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16], {(C_MANT_FP64-C_MANT_FP16){1'b0}} };
+ end
+ else begin
+ Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT], {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} };
+ end
+ end
+ logic [C_MANT_FP64+1:0] Mant_D_sqrt_Norm;
+ assign Mant_D_sqrt_Norm=Exp_num_DI[0]?{1'b0,Numerator_DI}:{Numerator_DI,1'b0}; //for sqrt
+ /////////////////////////////////////////////////////////////////////////////
+ // Format Selection //
+ /////////////////////////////////////////////////////////////////////////////
+ logic [1:0] Format_sel_S;
+ always_ff @(posedge Clk_CI, negedge Rst_RBI)
+ begin
+ if(~Rst_RBI)
+ begin
+ Format_sel_S<='b0;
+ end
+ else if(Start_SI&&Ready_SO)
+ begin
+ Format_sel_S<=Format_sel_SI;
+ end
+ else
+ begin
+ Format_sel_S<=Format_sel_S;
+ end
+ end
+ assign FP32_SO = (Format_sel_S==2'b00);
+ assign FP64_SO = (Format_sel_S==2'b01);
+ assign FP16_SO = (Format_sel_S==2'b10);
+ assign FP16ALT_SO = (Format_sel_S==2'b11);
+ /////////////////////////////////////////////////////////////////////////////
+ // Precision Control //
+ /////////////////////////////////////////////////////////////////////////////
+ logic [C_PC-1:0] Precision_ctl_S;
+ always_ff @(posedge Clk_CI, negedge Rst_RBI)
+ begin
+ if(~Rst_RBI)
+ begin
+ Precision_ctl_S<='b0;
+ end
+ else if(Start_SI&&Ready_SO)
+ begin
+ Precision_ctl_S<=Precision_ctl_SI;
+ end
+ else
+ begin
+ Precision_ctl_S<=Precision_ctl_S;
+ end
+ end
+ assign Full_precision_SO = (Precision_ctl_S==6'h00);
+ logic [5:0] State_ctl_S;
+ logic [5:0] State_Two_iteration_unit_S;
+ logic [5:0] State_Four_iteration_unit_S;
+ assign State_Two_iteration_unit_S = Precision_ctl_S[C_PC-1:1]; //Two iteration units
+ assign State_Four_iteration_unit_S = Precision_ctl_S[C_PC-1:2]; //Four iteration units
+ always_comb
+ begin
+ case(Iteration_unit_num_S)
+//////////////////////one iteration unit, start///////////////////////////////////////
+ 2'b00: //one iteration unit
+ begin
+ case(Format_sel_S)
+ 2'b00: //FP32
+ begin
+ if(Full_precision_SO)
+ begin
+ State_ctl_S = 6'h1b; //24+4 more iterations for rounding bits
+ end
+ else
+ begin
+ State_ctl_S = Precision_ctl_S;
+ end
+ end
+ 2'b01: //FP64
+ begin
+ if(Full_precision_SO)
+ begin
+ State_ctl_S = 6'h38; //53+4 more iterations for rounding bits
+ end
+ else
+ begin
+ State_ctl_S = Precision_ctl_S;
+ end
+ end
+ 2'b10: //FP16
+ begin
+ if(Full_precision_SO)
+ begin
+ State_ctl_S = 6'h0e; //11+4 more iterations for rounding bits
+ end
+ else
+ begin
+ State_ctl_S = Precision_ctl_S;
+ end
+ end
+ 2'b11: //FP16ALT
+ begin
+ if(Full_precision_SO)
+ begin
+ State_ctl_S = 6'h0b; //8+4 more iterations for rounding bits
+ end
+ else
+ begin
+ State_ctl_S = Precision_ctl_S;
+ end
+ end
+ endcase
+ end
+//////////////////////one iteration unit, end///////////////////////////////////////
+//////////////////////two iteration units, start///////////////////////////////////////
+ 2'b01: //two iteration units
+ begin
+ case(Format_sel_S)
+ 2'b00: //FP32
+ begin
+ if(Full_precision_SO)
+ begin
+ State_ctl_S = 6'h0d; //24+4 more iterations for rounding bits
+ end
+ else
+ begin
+ State_ctl_S = State_Two_iteration_unit_S;
+ end
+ end
+ 2'b01: //FP64
+ begin
+ if(Full_precision_SO)
+ begin
+ State_ctl_S = 6'h1b; //53+3 more iterations for rounding bits
+ end
+ else
+ begin
+ State_ctl_S = State_Two_iteration_unit_S;
+ end
+ end
+ 2'b10: //FP16
+ begin
+ if(Full_precision_SO)
+ begin
+ State_ctl_S = 6'h06; //11+3 more iterations for rounding bits
+ end
+ else
+ begin
+ State_ctl_S = State_Two_iteration_unit_S;
+ end
+ end
+ 2'b11: //FP16ALT
+ begin
+ if(Full_precision_SO)
+ begin
+ State_ctl_S = 6'h05; //8+4 more iterations for rounding bits
+ end
+ else
+ begin
+ State_ctl_S = State_Two_iteration_unit_S;
+ end
+ end
+ endcase
+ end
+//////////////////////two iteration units, end///////////////////////////////////////
+//////////////////////three iteration units, start///////////////////////////////////////
+ 2'b10: //three iteration units
+ begin
+ case(Format_sel_S)
+ 2'b00: //FP32
+ begin
+ case(Precision_ctl_S)
+ 6'h00:
+ begin
+ State_ctl_S = 6'h08; //24+3 more iterations for rounding bits
+ end
+ 6'h06,6'h07,6'h08:
+ begin
+ State_ctl_S = 6'h02;
+ end
+ 6'h09,6'h0a,6'h0b:
+ begin
+ State_ctl_S = 6'h03;
+ end
+ 6'h0c,6'h0d,6'h0e:
+ begin
+ State_ctl_S = 6'h04;
+ end
+ 6'h0f,6'h10,6'h11:
+ begin
+ State_ctl_S = 6'h05;
+ end
+ 6'h12,6'h13,6'h14:
+ begin
+ State_ctl_S = 6'h06;
+ end
+ 6'h15,6'h16,6'h17:
+ begin
+ State_ctl_S = 6'h07;
+ end
+ default:
+ begin
+ State_ctl_S = 6'h08; //24+3 more iterations for rounding bits
+ end
+ endcase
+ end
+ 2'b01: //FP64
+ begin
+ case(Precision_ctl_S)
+ 6'h00:
+ begin
+ State_ctl_S = 6'h12; //53+4 more iterations for rounding bits
+ end
+ 6'h06,6'h07,6'h08:
+ begin
+ State_ctl_S = 6'h02;
+ end
+ 6'h09,6'h0a,6'h0b:
+ begin
+ State_ctl_S = 6'h03;
+ end
+ 6'h0c,6'h0d,6'h0e:
+ begin
+ State_ctl_S = 6'h04;
+ end
+ 6'h0f,6'h10,6'h11:
+ begin
+ State_ctl_S = 6'h05;
+ end
+ 6'h12,6'h13,6'h14:
+ begin
+ State_ctl_S = 6'h06;
+ end
+ 6'h15,6'h16,6'h17:
+ begin
+ State_ctl_S = 6'h07;
+ end
+ 6'h18,6'h19,6'h1a:
+ begin
+ State_ctl_S = 6'h08;
+ end
+ 6'h1b,6'h1c,6'h1d:
+ begin
+ State_ctl_S = 6'h09;
+ end
+ 6'h1e,6'h1f,6'h20:
+ begin
+ State_ctl_S = 6'h0a;
+ end
+ 6'h21,6'h22,6'h23:
+ begin
+ State_ctl_S = 6'h0b;
+ end
+ 6'h24,6'h25,6'h26:
+ begin
+ State_ctl_S = 6'h0c;
+ end
+ 6'h27,6'h28,6'h29:
+ begin
+ State_ctl_S = 6'h0d;
+ end
+ 6'h2a,6'h2b,6'h2c:
+ begin
+ State_ctl_S = 6'h0e;
+ end
+ 6'h2d,6'h2e,6'h2f:
+ begin
+ State_ctl_S = 6'h0f;
+ end
+ 6'h30,6'h31,6'h32:
+ begin
+ State_ctl_S = 6'h10;
+ end
+ 6'h33,6'h34,6'h35:
+ begin
+ State_ctl_S = 6'h11;
+ end
+ default:
+ begin
+ State_ctl_S = 6'h12; //53+4 more iterations for rounding bits
+ end
+ endcase
+ end
+ 2'b10: //FP16
+ begin
+ case(Precision_ctl_S)
+ 6'h00:
+ begin
+ State_ctl_S = 6'h04; //12+3 more iterations for rounding bits
+ end
+ 6'h06,6'h07,6'h08:
+ begin
+ State_ctl_S = 6'h02;
+ end
+ 6'h09,6'h0a,6'h0b:
+ begin
+ State_ctl_S = 6'h03;
+ end
+ default:
+ begin
+ State_ctl_S = 6'h04; //12+3 more iterations for rounding bits
+ end
+ endcase
+ end
+ 2'b11: //FP16ALT
+ begin
+ case(Precision_ctl_S)
+ 6'h00:
+ begin
+ State_ctl_S = 6'h03; //8+4 more iterations for rounding bits
+ end
+ 6'h06,6'h07,6'h08:
+ begin
+ State_ctl_S = 6'h02;
+ end
+ default:
+ begin
+ State_ctl_S = 6'h03; //8+4 more iterations for rounding bits
+ end
+ endcase
+ end
+ endcase
+ end
+//////////////////////three iteration units, end///////////////////////////////////////
+//////////////////////four iteration units, start///////////////////////////////////////
+ 2'b11: //four iteration units
+ begin
+ case(Format_sel_S)
+ 2'b00: //FP32
+ begin
+ if(Full_precision_SO)
+ begin
+ State_ctl_S = 6'h06; //24+4 more iterations for rounding bits
+ end
+ else
+ begin
+ State_ctl_S = State_Four_iteration_unit_S;
+ end
+ end
+ 2'b01: //FP64
+ begin
+ if(Full_precision_SO)
+ begin
+ State_ctl_S = 6'h0d; //53+3 more iterations for rounding bits
+ end
+ else
+ begin
+ State_ctl_S = State_Four_iteration_unit_S;
+ end
+ end
+ 2'b10: //FP16
+ begin
+ if(Full_precision_SO)
+ begin
+ State_ctl_S = 6'h03; //11+4 more iterations for rounding bits
+ end
+ else
+ begin
+ State_ctl_S = State_Four_iteration_unit_S;
+ end
+ end
+ 2'b11: //FP16ALT
+ begin
+ if(Full_precision_SO)
+ begin
+ State_ctl_S = 6'h02; //8+4 more iterations for rounding bits
+ end
+ else
+ begin
+ State_ctl_S = State_Four_iteration_unit_S;
+ end
+ end
+ endcase
+ end
+//////////////////////four iteration units, end///////////////////////////////////////
+ endcase
+ end
+ /////////////////////////////////////////////////////////////////////////////
+ // control logic //
+ /////////////////////////////////////////////////////////////////////////////
+ logic Div_start_dly_S;
+ always_ff @(posedge Clk_CI, negedge Rst_RBI) // generate Div_start_dly_S signal
+ begin
+ if(~Rst_RBI)
+ begin
+ Div_start_dly_S<=1'b0;
+ end
+ else if(Div_start_SI&&Ready_SO)
+ begin
+ Div_start_dly_S<=1'b1;
+ end
+ else
+ begin
+ Div_start_dly_S<=1'b0;
+ end
+ end
+ assign Div_start_dly_SO=Div_start_dly_S;
+ always_ff @(posedge Clk_CI, negedge Rst_RBI) begin // generate Div_enable_SO signal
+ if(~Rst_RBI)
+ Div_enable_SO<=1'b0;
+ // Synchronous rst_ni with Flush
+ else if (Kill_SI)
+ Div_enable_SO <= 1'b0;
+ else if(Div_start_SI&&Ready_SO)
+ Div_enable_SO<=1'b1;
+ else if(Done_SO)
+ Div_enable_SO<=1'b0;
+ else
+ Div_enable_SO<=Div_enable_SO;
+ end
+ logic Sqrt_start_dly_S;
+ always_ff @(posedge Clk_CI, negedge Rst_RBI) // generate Sqrt_start_dly_SI signal
+ begin
+ if(~Rst_RBI)
+ begin
+ Sqrt_start_dly_S<=1'b0;
+ end
+ else if(Sqrt_start_SI&&Ready_SO)
+ begin
+ Sqrt_start_dly_S<=1'b1;
+ end
+ else
+ begin
+ Sqrt_start_dly_S<=1'b0;
+ end
+ end
+ assign Sqrt_start_dly_SO=Sqrt_start_dly_S;
+ always_ff @(posedge Clk_CI, negedge Rst_RBI) begin // generate Sqrt_enable_SO signal
+ if(~Rst_RBI)
+ Sqrt_enable_SO<=1'b0;
+ else if (Kill_SI)
+ Sqrt_enable_SO <= 1'b0;
+ else if(Sqrt_start_SI&&Ready_SO)
+ Sqrt_enable_SO<=1'b1;
+ else if(Done_SO)
+ Sqrt_enable_SO<=1'b0;
+ else
+ Sqrt_enable_SO<=Sqrt_enable_SO;
+ end
+ logic [5:0] Crtl_cnt_S;
+ logic Start_dly_S;
+ assign Start_dly_S=Div_start_dly_S |Sqrt_start_dly_S;
+ logic Fsm_enable_S;
+ assign Fsm_enable_S=( (Start_dly_S | (| Crtl_cnt_S)) && (~Kill_SI) && Special_case_dly_SBI);
+ logic Final_state_S;
+ assign Final_state_S= (Crtl_cnt_S==State_ctl_S);
+ always_ff @(posedge Clk_CI, negedge Rst_RBI) //control_FSM
+ begin
+ if (~Rst_RBI)
+ begin
+ Crtl_cnt_S <= '0;
+ end
+ else if (Final_state_S | Kill_SI)
+ begin
+ Crtl_cnt_S <= '0;
+ end
+ else if(Fsm_enable_S) // one cycle Start_SI
+ begin
+ Crtl_cnt_S <= Crtl_cnt_S+1;
+ end
+ else
+ begin
+ Crtl_cnt_S <= '0;
+ end
+ end // always_ff
+ always_ff @(posedge Clk_CI, negedge Rst_RBI) //Generate Done_SO, they can share this Done_SO.
+ begin
+ if(~Rst_RBI)
+ begin
+ Done_SO<=1'b0;
+ end
+ else if(Start_SI&&Ready_SO)
+ begin
+ if(~Special_case_SBI)
+ begin
+ Done_SO<=1'b1;
+ end
+ else
+ begin
+ Done_SO<=1'b0;
+ end
+ end
+ else if(Final_state_S)
+ begin
+ Done_SO<=1'b1;
+ end
+ else
+ begin
+ Done_SO<=1'b0;
+ end
+ end
+ always_ff @(posedge Clk_CI, negedge Rst_RBI) //Generate Ready_SO
+ begin
+ if(~Rst_RBI)
+ begin
+ Ready_SO<=1'b1;
+ end
+ else if(Start_SI&&Ready_SO)
+ begin
+ if(~Special_case_SBI)
+ begin
+ Ready_SO<=1'b1;
+ end
+ else
+ begin
+ Ready_SO<=1'b0;
+ end
+ end
+ else if(Final_state_S | Kill_SI)
+ begin
+ Ready_SO<=1'b1;
+ end
+ else
+ begin
+ Ready_SO<=Ready_SO;
+ end
+ end
+ /////////////////////////////////////////////////////////////////////////////
+ // Declarations for square root when Iteration_unit_num_S = 2'b00, start //
+ ////////////////////////////////////////////////////////////////////////////
+ logic Qcnt_one_0;
+ logic Qcnt_one_1;
+ logic [1:0] Qcnt_one_2;
+ logic [2:0] Qcnt_one_3;
+ logic [3:0] Qcnt_one_4;
+ logic [4:0] Qcnt_one_5;
+ logic [5:0] Qcnt_one_6;
+ logic [6:0] Qcnt_one_7;
+ logic [7:0] Qcnt_one_8;
+ logic [8:0] Qcnt_one_9;
+ logic [9:0] Qcnt_one_10;
+ logic [10:0] Qcnt_one_11;
+ logic [11:0] Qcnt_one_12;
+ logic [12:0] Qcnt_one_13;
+ logic [13:0] Qcnt_one_14;
+ logic [14:0] Qcnt_one_15;
+ logic [15:0] Qcnt_one_16;
+ logic [16:0] Qcnt_one_17;
+ logic [17:0] Qcnt_one_18;
+ logic [18:0] Qcnt_one_19;
+ logic [19:0] Qcnt_one_20;
+ logic [20:0] Qcnt_one_21;
+ logic [21:0] Qcnt_one_22;
+ logic [22:0] Qcnt_one_23;
+ logic [23:0] Qcnt_one_24;
+ logic [24:0] Qcnt_one_25;
+ logic [25:0] Qcnt_one_26;
+ logic [26:0] Qcnt_one_27;
+ logic [27:0] Qcnt_one_28;
+ logic [28:0] Qcnt_one_29;
+ logic [29:0] Qcnt_one_30;
+ logic [30:0] Qcnt_one_31;
+ logic [31:0] Qcnt_one_32;
+ logic [32:0] Qcnt_one_33;
+ logic [33:0] Qcnt_one_34;
+ logic [34:0] Qcnt_one_35;
+ logic [35:0] Qcnt_one_36;
+ logic [36:0] Qcnt_one_37;
+ logic [37:0] Qcnt_one_38;
+ logic [38:0] Qcnt_one_39;
+ logic [39:0] Qcnt_one_40;
+ logic [40:0] Qcnt_one_41;
+ logic [41:0] Qcnt_one_42;
+ logic [42:0] Qcnt_one_43;
+ logic [43:0] Qcnt_one_44;
+ logic [44:0] Qcnt_one_45;
+ logic [45:0] Qcnt_one_46;
+ logic [46:0] Qcnt_one_47;
+ logic [47:0] Qcnt_one_48;
+ logic [48:0] Qcnt_one_49;
+ logic [49:0] Qcnt_one_50;
+ logic [50:0] Qcnt_one_51;
+ logic [51:0] Qcnt_one_52;
+ logic [52:0] Qcnt_one_53;
+ logic [53:0] Qcnt_one_54;
+ logic [54:0] Qcnt_one_55;
+ logic [55:0] Qcnt_one_56;
+ logic [56:0] Qcnt_one_57;
+ logic [57:0] Qcnt_one_58;
+ logic [58:0] Qcnt_one_59;
+ logic [59:0] Qcnt_one_60;
+ /////////////////////////////////////////////////////////////////////////////
+ // Declarations for square root when Iteration_unit_num_S = 2'b00, end //
+ ////////////////////////////////////////////////////////////////////////////
+ /////////////////////////////////////////////////////////////////////////////
+ // Declarations for square root when Iteration_unit_num_S = 2'b01, start //
+ ////////////////////////////////////////////////////////////////////////////
+ logic [1:0] Qcnt_two_0;
+ logic [2:0] Qcnt_two_1;
+ logic [4:0] Qcnt_two_2;
+ logic [6:0] Qcnt_two_3;
+ logic [8:0] Qcnt_two_4;
+ logic [10:0] Qcnt_two_5;
+ logic [12:0] Qcnt_two_6;
+ logic [14:0] Qcnt_two_7;
+ logic [16:0] Qcnt_two_8;
+ logic [18:0] Qcnt_two_9;
+ logic [20:0] Qcnt_two_10;
+ logic [22:0] Qcnt_two_11;
+ logic [24:0] Qcnt_two_12;
+ logic [26:0] Qcnt_two_13;
+ logic [28:0] Qcnt_two_14;
+ logic [30:0] Qcnt_two_15;
+ logic [32:0] Qcnt_two_16;
+ logic [34:0] Qcnt_two_17;
+ logic [36:0] Qcnt_two_18;
+ logic [38:0] Qcnt_two_19;
+ logic [40:0] Qcnt_two_20;
+ logic [42:0] Qcnt_two_21;
+ logic [44:0] Qcnt_two_22;
+ logic [46:0] Qcnt_two_23;
+ logic [48:0] Qcnt_two_24;
+ logic [50:0] Qcnt_two_25;
+ logic [52:0] Qcnt_two_26;
+ logic [54:0] Qcnt_two_27;
+ logic [56:0] Qcnt_two_28;
+ /////////////////////////////////////////////////////////////////////////////
+ // Declarations for square root when Iteration_unit_num_S = 2'b01, end //
+ ////////////////////////////////////////////////////////////////////////////
+ /////////////////////////////////////////////////////////////////////////////
+ // Declarations for square root when Iteration_unit_num_S = 2'b10, start //
+ ////////////////////////////////////////////////////////////////////////////
+ logic [2:0] Qcnt_three_0;
+ logic [4:0] Qcnt_three_1;
+ logic [7:0] Qcnt_three_2;
+ logic [10:0] Qcnt_three_3;
+ logic [13:0] Qcnt_three_4;
+ logic [16:0] Qcnt_three_5;
+ logic [19:0] Qcnt_three_6;
+ logic [22:0] Qcnt_three_7;
+ logic [25:0] Qcnt_three_8;
+ logic [28:0] Qcnt_three_9;
+ logic [31:0] Qcnt_three_10;
+ logic [34:0] Qcnt_three_11;
+ logic [37:0] Qcnt_three_12;
+ logic [40:0] Qcnt_three_13;
+ logic [43:0] Qcnt_three_14;
+ logic [46:0] Qcnt_three_15;
+ logic [49:0] Qcnt_three_16;
+ logic [52:0] Qcnt_three_17;
+ logic [55:0] Qcnt_three_18;
+ logic [58:0] Qcnt_three_19;
+ logic [61:0] Qcnt_three_20;
+ /////////////////////////////////////////////////////////////////////////////
+ // Declarations for square root when Iteration_unit_num_S = 2'b10, end //
+ ////////////////////////////////////////////////////////////////////////////
+ /////////////////////////////////////////////////////////////////////////////
+ // Declarations for square root when Iteration_unit_num_S = 2'b11, start //
+ ////////////////////////////////////////////////////////////////////////////
+ logic [3:0] Qcnt_four_0;
+ logic [6:0] Qcnt_four_1;
+ logic [10:0] Qcnt_four_2;
+ logic [14:0] Qcnt_four_3;
+ logic [18:0] Qcnt_four_4;
+ logic [22:0] Qcnt_four_5;
+ logic [26:0] Qcnt_four_6;
+ logic [30:0] Qcnt_four_7;
+ logic [34:0] Qcnt_four_8;
+ logic [38:0] Qcnt_four_9;
+ logic [42:0] Qcnt_four_10;
+ logic [46:0] Qcnt_four_11;
+ logic [50:0] Qcnt_four_12;
+ logic [54:0] Qcnt_four_13;
+ logic [58:0] Qcnt_four_14;
+ /////////////////////////////////////////////////////////////////////////////
+ // Declarations for square root when Iteration_unit_num_S = 2'b11, end //
+ ////////////////////////////////////////////////////////////////////////////
+ logic [C_MANT_FP64+1+4:0] Sqrt_R0,Sqrt_Q0,Q_sqrt0,Q_sqrt_com_0;
+ logic [C_MANT_FP64+1+4:0] Sqrt_R1,Sqrt_Q1,Q_sqrt1,Q_sqrt_com_1;
+ logic [C_MANT_FP64+1+4:0] Sqrt_R2,Sqrt_Q2,Q_sqrt2,Q_sqrt_com_2;
+ logic [C_MANT_FP64+1+4:0] Sqrt_R3,Sqrt_Q3,Q_sqrt3,Q_sqrt_com_3,Sqrt_R4; //Sqrt_Q4;
+ logic [1:0] Sqrt_DI [3:0];
+ logic [1:0] Sqrt_DO [3:0];
+ logic Sqrt_carry_DO;
+ logic [C_MANT_FP64+1+4:0] Iteration_cell_a_D [3:0];
+ logic [C_MANT_FP64+1+4:0] Iteration_cell_b_D [3:0];
+ logic [C_MANT_FP64+1+4:0] Iteration_cell_a_BMASK_D [3:0];
+ logic [C_MANT_FP64+1+4:0] Iteration_cell_b_BMASK_D [3:0];
+ logic Iteration_cell_carry_D [3:0];
+ logic [C_MANT_FP64+1+4:0] Iteration_cell_sum_D [3:0];
+ logic [C_MANT_FP64+1+4:0] Iteration_cell_sum_AMASK_D [3:0];
+ logic [3:0] Sqrt_quotinent_S;
+ always_comb
+ begin //
+ case (Format_sel_S)
+ 2'b00:
+ begin
+ Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP32+5])};
+ Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt0[C_MANT_FP32+5:0] };
+ Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt1[C_MANT_FP32+5:0] };
+ Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt2[C_MANT_FP32+5:0] };
+ Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt3[C_MANT_FP32+5:0] };
+ end
+ 2'b01:
+ begin
+ Sqrt_quotinent_S = {Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2],Iteration_cell_carry_D[3]};
+ Q_sqrt_com_0=~Q_sqrt0;
+ Q_sqrt_com_1=~Q_sqrt1;
+ Q_sqrt_com_2=~Q_sqrt2;
+ Q_sqrt_com_3=~Q_sqrt3;
+ end
+ 2'b10:
+ begin
+ Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP16+5])};
+ Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt0[C_MANT_FP16+5:0] };
+ Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt1[C_MANT_FP16+5:0] };
+ Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt2[C_MANT_FP16+5:0] };
+ Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt3[C_MANT_FP16+5:0] };
+ end
+ 2'b11:
+ begin
+ Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP16ALT+5])};
+ Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt0[C_MANT_FP16ALT+5:0] };
+ Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt1[C_MANT_FP16ALT+5:0] };
+ Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt2[C_MANT_FP16ALT+5:0] };
+ Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt3[C_MANT_FP16ALT+5:0] };
+ end
+ endcase
+ end
+ assign Qcnt_one_0= {1'b0}; //qk for each feedback
+ assign Qcnt_one_1= {Quotient_DP[0]};
+ assign Qcnt_one_2= {Quotient_DP[1:0]};
+ assign Qcnt_one_3= {Quotient_DP[2:0]};
+ assign Qcnt_one_4= {Quotient_DP[3:0]};
+ assign Qcnt_one_5= {Quotient_DP[4:0]};
+ assign Qcnt_one_6= {Quotient_DP[5:0]};
+ assign Qcnt_one_7= {Quotient_DP[6:0]};
+ assign Qcnt_one_8= {Quotient_DP[7:0]};
+ assign Qcnt_one_9= {Quotient_DP[8:0]};
+ assign Qcnt_one_10= {Quotient_DP[9:0]};
+ assign Qcnt_one_11= {Quotient_DP[10:0]};
+ assign Qcnt_one_12= {Quotient_DP[11:0]};
+ assign Qcnt_one_13= {Quotient_DP[12:0]};
+ assign Qcnt_one_14= {Quotient_DP[13:0]};
+ assign Qcnt_one_15= {Quotient_DP[14:0]};
+ assign Qcnt_one_16= {Quotient_DP[15:0]};
+ assign Qcnt_one_17= {Quotient_DP[16:0]};
+ assign Qcnt_one_18= {Quotient_DP[17:0]};
+ assign Qcnt_one_19= {Quotient_DP[18:0]};
+ assign Qcnt_one_20= {Quotient_DP[19:0]};
+ assign Qcnt_one_21= {Quotient_DP[20:0]};
+ assign Qcnt_one_22= {Quotient_DP[21:0]};
+ assign Qcnt_one_23= {Quotient_DP[22:0]};
+ assign Qcnt_one_24= {Quotient_DP[23:0]};
+ assign Qcnt_one_25= {Quotient_DP[24:0]};
+ assign Qcnt_one_26= {Quotient_DP[25:0]};
+ assign Qcnt_one_27= {Quotient_DP[26:0]};
+ assign Qcnt_one_28= {Quotient_DP[27:0]};
+ assign Qcnt_one_29= {Quotient_DP[28:0]};
+ assign Qcnt_one_30= {Quotient_DP[29:0]};
+ assign Qcnt_one_31= {Quotient_DP[30:0]};
+ assign Qcnt_one_32= {Quotient_DP[31:0]};
+ assign Qcnt_one_33= {Quotient_DP[32:0]};
+ assign Qcnt_one_34= {Quotient_DP[33:0]};
+ assign Qcnt_one_35= {Quotient_DP[34:0]};
+ assign Qcnt_one_36= {Quotient_DP[35:0]};
+ assign Qcnt_one_37= {Quotient_DP[36:0]};
+ assign Qcnt_one_38= {Quotient_DP[37:0]};
+ assign Qcnt_one_39= {Quotient_DP[38:0]};
+ assign Qcnt_one_40= {Quotient_DP[39:0]};
+ assign Qcnt_one_41= {Quotient_DP[40:0]};
+ assign Qcnt_one_42= {Quotient_DP[41:0]};
+ assign Qcnt_one_43= {Quotient_DP[42:0]};
+ assign Qcnt_one_44= {Quotient_DP[43:0]};
+ assign Qcnt_one_45= {Quotient_DP[44:0]};
+ assign Qcnt_one_46= {Quotient_DP[45:0]};
+ assign Qcnt_one_47= {Quotient_DP[46:0]};
+ assign Qcnt_one_48= {Quotient_DP[47:0]};
+ assign Qcnt_one_49= {Quotient_DP[48:0]};
+ assign Qcnt_one_50= {Quotient_DP[49:0]};
+ assign Qcnt_one_51= {Quotient_DP[50:0]};
+ assign Qcnt_one_52= {Quotient_DP[51:0]};
+ assign Qcnt_one_53= {Quotient_DP[52:0]};
+ assign Qcnt_one_54= {Quotient_DP[53:0]};
+ assign Qcnt_one_55= {Quotient_DP[54:0]};
+ assign Qcnt_one_56= {Quotient_DP[55:0]};
+ assign Qcnt_one_57= {Quotient_DP[56:0]};
+ assign Qcnt_two_0 = {1'b0, Sqrt_quotinent_S[3]}; //qk for each feedback
+ assign Qcnt_two_1 = {Quotient_DP[1:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_2 = {Quotient_DP[3:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_3 = {Quotient_DP[5:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_4 = {Quotient_DP[7:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_5 = {Quotient_DP[9:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_6 = {Quotient_DP[11:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_7 = {Quotient_DP[13:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_8 = {Quotient_DP[15:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_9 = {Quotient_DP[17:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_10 = {Quotient_DP[19:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_11 = {Quotient_DP[21:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_12 = {Quotient_DP[23:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_13 = {Quotient_DP[25:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_14 = {Quotient_DP[27:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_15 = {Quotient_DP[29:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_16 = {Quotient_DP[31:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_17 = {Quotient_DP[33:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_18 = {Quotient_DP[35:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_19 = {Quotient_DP[37:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_20 = {Quotient_DP[39:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_21 = {Quotient_DP[41:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_22 = {Quotient_DP[43:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_23 = {Quotient_DP[45:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_24 = {Quotient_DP[47:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_25 = {Quotient_DP[49:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_26 = {Quotient_DP[51:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_27 = {Quotient_DP[53:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_two_28 = {Quotient_DP[55:0],Sqrt_quotinent_S[3]};
+ assign Qcnt_three_0 = {1'b0, Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; //qk for each feedback
+ assign Qcnt_three_1 = {Quotient_DP[2:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_2 = {Quotient_DP[5:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_3 = {Quotient_DP[8:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_4 = {Quotient_DP[11:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_5 = {Quotient_DP[14:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_6 = {Quotient_DP[17:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_7 = {Quotient_DP[20:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_8 = {Quotient_DP[23:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_9 = {Quotient_DP[26:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_10 = {Quotient_DP[29:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_11 = {Quotient_DP[32:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_12 = {Quotient_DP[35:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_13 = {Quotient_DP[38:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_14 = {Quotient_DP[41:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_15 = {Quotient_DP[44:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_16 = {Quotient_DP[47:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_17 = {Quotient_DP[50:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_18 = {Quotient_DP[53:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_three_19 = {Quotient_DP[56:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+ assign Qcnt_four_0 = {1'b0, Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+ assign Qcnt_four_1 = {Quotient_DP[3:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+ assign Qcnt_four_2 = {Quotient_DP[7:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+ assign Qcnt_four_3 = {Quotient_DP[11:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+ assign Qcnt_four_4 = {Quotient_DP[15:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+ assign Qcnt_four_5 = {Quotient_DP[19:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+ assign Qcnt_four_6 = {Quotient_DP[23:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+ assign Qcnt_four_7 = {Quotient_DP[27:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+ assign Qcnt_four_8 = {Quotient_DP[31:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+ assign Qcnt_four_9 = {Quotient_DP[35:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+ assign Qcnt_four_10 = {Quotient_DP[39:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+ assign Qcnt_four_11 = {Quotient_DP[43:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+ assign Qcnt_four_12 = {Quotient_DP[47:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+ assign Qcnt_four_13 = {Quotient_DP[51:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+ assign Qcnt_four_14 = {Quotient_DP[55:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+ always_comb begin // the intermediate operands for sqrt
+ case(Iteration_unit_num_S)
+ 2'b00:
+ begin
+ /////////////////////////////////////////////////////////////////////////////
+ // Operands for square root when Iteration_unit_num_S = 2'b00, start //
+ /////////////////////////////////////////////////////////////////////////////
+ case(Crtl_cnt_S)
+ 6'b000000:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+ Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_one_0};
+ Sqrt_Q0=Q_sqrt_com_0;
+ end
+ 6'b000001:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+ Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_one_1};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b000010:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+ Q_sqrt0={{(C_MANT_FP64+4){1'b0}},Qcnt_one_2};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b000011:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+ Q_sqrt0={{(C_MANT_FP64+3){1'b0}},Qcnt_one_3};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b000100:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8];
+ Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_one_4};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b000101:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10];
+ Q_sqrt0={{(C_MANT_FP64+1){1'b0}},Qcnt_one_5};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b000110:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12];
+ Q_sqrt0={{(C_MANT_FP64){1'b0}},Qcnt_one_6};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b000111:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14];
+ Q_sqrt0={{(C_MANT_FP64-1){1'b0}},Qcnt_one_7};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b001000:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16];
+ Q_sqrt0={{(C_MANT_FP64-2){1'b0}},Qcnt_one_8};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b001001:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18];
+ Q_sqrt0={{(C_MANT_FP64-3){1'b0}},Qcnt_one_9};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b001010:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20];
+ Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_one_10};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b001011:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22];
+ Q_sqrt0={{(C_MANT_FP64-5){1'b0}},Qcnt_one_11};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b001100:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24];
+ Q_sqrt0={{(C_MANT_FP64-6){1'b0}},Qcnt_one_12};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b001101:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26];
+ Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_one_13};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b001110:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28];
+ Q_sqrt0={{(C_MANT_FP64-8){1'b0}},Qcnt_one_14};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b001111:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30];
+ Q_sqrt0={{(C_MANT_FP64-9){1'b0}},Qcnt_one_15};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b010000:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32];
+ Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_one_16};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b010001:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34];
+ Q_sqrt0={{(C_MANT_FP64-11){1'b0}},Qcnt_one_17};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b010010:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36];
+ Q_sqrt0={{(C_MANT_FP64-12){1'b0}},Qcnt_one_18};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b010011:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38];
+ Q_sqrt0={{(C_MANT_FP64-13){1'b0}},Qcnt_one_19};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b010100:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40];
+ Q_sqrt0={{(C_MANT_FP64-14){1'b0}},Qcnt_one_20};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b010101:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42];
+ Q_sqrt0={{(C_MANT_FP64-15){1'b0}},Qcnt_one_21};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b010110:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44];
+ Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_one_22};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b010111:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46];
+ Q_sqrt0={{(C_MANT_FP64-17){1'b0}},Qcnt_one_23};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b011000:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48];
+ Q_sqrt0={{(C_MANT_FP64-18){1'b0}},Qcnt_one_24};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b011001:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50];
+ Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_one_25};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b011010:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52];
+ Q_sqrt0={{(C_MANT_FP64-20){1'b0}},Qcnt_one_26};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b011011:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-21){1'b0}},Qcnt_one_27};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b011100:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_one_28};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b011101:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-23){1'b0}},Qcnt_one_29};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b011110:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-24){1'b0}},Qcnt_one_30};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b011111:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-25){1'b0}},Qcnt_one_31};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b100000:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-26){1'b0}},Qcnt_one_32};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b100001:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-27){1'b0}},Qcnt_one_33};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b100010:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_one_34};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b100011:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-29){1'b0}},Qcnt_one_35};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b100100:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-30){1'b0}},Qcnt_one_36};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b100101:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_one_37};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b100110:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-32){1'b0}},Qcnt_one_38};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b100111:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-33){1'b0}},Qcnt_one_39};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b101000:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_one_40};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b101001:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-35){1'b0}},Qcnt_one_41};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b101010:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-36){1'b0}},Qcnt_one_42};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b101011:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-37){1'b0}},Qcnt_one_43};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b101100:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-38){1'b0}},Qcnt_one_44};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b101101:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-39){1'b0}},Qcnt_one_45};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b101110:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_one_46};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b101111:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-41){1'b0}},Qcnt_one_47};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b110000:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-42){1'b0}},Qcnt_one_48};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b110001:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_one_49};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b110010:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-44){1'b0}},Qcnt_one_50};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b110011:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-45){1'b0}},Qcnt_one_51};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b110100:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_one_52};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b110101:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-47){1'b0}},Qcnt_one_53};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b110110:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-48){1'b0}},Qcnt_one_54};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b110111:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-49){1'b0}},Qcnt_one_55};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ 6'b111000:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-50){1'b0}},Qcnt_one_56};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ end
+ default:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0='0;
+ Sqrt_Q0='0;
+ end
+ endcase
+ end
+ /////////////////////////////////////////////////////////////////////////////
+ // Operands for square root when Iteration_unit_num_S = 2'b00, end //
+ /////////////////////////////////////////////////////////////////////////////
+ 2'b01:
+ begin
+ /////////////////////////////////////////////////////////////////////////////
+ // Operands for square root when Iteration_unit_num_S = 2'b01, start //
+ /////////////////////////////////////////////////////////////////////////////
+ case(Crtl_cnt_S)
+ 6'b000000:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+ Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_two_0[1]};
+ Sqrt_Q0=Q_sqrt_com_0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+ Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_two_0[1:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b000001:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+ Q_sqrt0={{(C_MANT_FP64+4){1'b0}},Qcnt_two_1[2:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+ Q_sqrt1={{(C_MANT_FP64+3){1'b0}},Qcnt_two_1[2:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b000010:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8];
+ Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_two_2[4:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10];
+ Q_sqrt1={{(C_MANT_FP64+1){1'b0}},Qcnt_two_2[4:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b000011:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12];
+ Q_sqrt0={{(C_MANT_FP64){1'b0}},Qcnt_two_3[6:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14];
+ Q_sqrt1={{(C_MANT_FP64-1){1'b0}},Qcnt_two_3[6:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b000100:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16];
+ Q_sqrt0={{(C_MANT_FP64-2){1'b0}},Qcnt_two_4[8:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18];
+ Q_sqrt1={{(C_MANT_FP64-3){1'b0}},Qcnt_two_4[8:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b000101:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20];
+ Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_two_5[10:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22];
+ Q_sqrt1={{(C_MANT_FP64-5){1'b0}},Qcnt_two_5[10:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b000110:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24];
+ Q_sqrt0={{(C_MANT_FP64-6){1'b0}},Qcnt_two_6[12:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26];
+ Q_sqrt1={{(C_MANT_FP64-7){1'b0}},Qcnt_two_6[12:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b000111:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28];
+ Q_sqrt0={{(C_MANT_FP64-8){1'b0}},Qcnt_two_7[14:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30];
+ Q_sqrt1={{(C_MANT_FP64-9){1'b0}},Qcnt_two_7[14:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b001000:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32];
+ Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_two_8[16:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34];
+ Q_sqrt1={{(C_MANT_FP64-11){1'b0}},Qcnt_two_8[16:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b001001:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36];
+ Q_sqrt0={{(C_MANT_FP64-12){1'b0}},Qcnt_two_9[18:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38];
+ Q_sqrt1={{(C_MANT_FP64-13){1'b0}},Qcnt_two_9[18:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b001010:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40];
+ Q_sqrt0={{(C_MANT_FP64-14){1'b0}},Qcnt_two_10[20:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42];
+ Q_sqrt1={{(C_MANT_FP64-15){1'b0}},Qcnt_two_10[20:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b001011:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44];
+ Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_two_11[22:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46];
+ Q_sqrt1={{(C_MANT_FP64-17){1'b0}},Qcnt_two_11[22:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b001100:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48];
+ Q_sqrt0={{(C_MANT_FP64-18){1'b0}},Qcnt_two_12[24:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50];
+ Q_sqrt1={{(C_MANT_FP64-19){1'b0}},Qcnt_two_12[24:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b001101:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52];
+ Q_sqrt0={{(C_MANT_FP64-20){1'b0}},Qcnt_two_13[26:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-21){1'b0}},Qcnt_two_13[26:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b001110:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_two_14[28:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-23){1'b0}},Qcnt_two_14[28:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b001111:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-24){1'b0}},Qcnt_two_15[30:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-25){1'b0}},Qcnt_two_15[30:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b010000:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-26){1'b0}},Qcnt_two_16[32:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-27){1'b0}},Qcnt_two_16[32:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b010001:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_two_17[34:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-29){1'b0}},Qcnt_two_17[34:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b010010:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-30){1'b0}},Qcnt_two_18[36:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-31){1'b0}},Qcnt_two_18[36:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b010011:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-32){1'b0}},Qcnt_two_19[38:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-33){1'b0}},Qcnt_two_19[38:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b010100:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_two_20[40:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-35){1'b0}},Qcnt_two_20[40:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b010101:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-36){1'b0}},Qcnt_two_21[42:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-37){1'b0}},Qcnt_two_21[42:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b010110:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-38){1'b0}},Qcnt_two_22[44:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-39){1'b0}},Qcnt_two_22[44:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b010111:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_two_23[46:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-41){1'b0}},Qcnt_two_23[46:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b011000:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-42){1'b0}},Qcnt_two_24[48:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-43){1'b0}},Qcnt_two_24[48:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b011001:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-44){1'b0}},Qcnt_two_25[50:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-45){1'b0}},Qcnt_two_25[50:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b011010:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_two_26[52:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-47){1'b0}},Qcnt_two_26[52:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b011011:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-48){1'b0}},Qcnt_two_27[54:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-49){1'b0}},Qcnt_two_27[54:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ 6'b011100:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-50){1'b0}},Qcnt_two_28[56:1]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-51){1'b0}},Qcnt_two_28[56:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ default:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+ Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_two_0[1]};
+ Sqrt_Q0=Q_sqrt_com_0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+ Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_two_0[1:0]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ end
+ endcase
+ end
+ /////////////////////////////////////////////////////////////////////////////
+ // Operands for square root when Iteration_unit_num_S = 2'b01, end //
+ /////////////////////////////////////////////////////////////////////////////
+ 2'b10:
+ begin
+ /////////////////////////////////////////////////////////////////////////////
+ // Operands for square root when Iteration_unit_num_S = 2'b10, start //
+ /////////////////////////////////////////////////////////////////////////////
+ case(Crtl_cnt_S)
+ 6'b000000:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+ Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_three_0[2]};
+ Sqrt_Q0=Q_sqrt_com_0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+ Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_three_0[2:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+ Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_three_0[2:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b000001:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+ Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_three_1[4:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8];
+ Q_sqrt1={{(C_MANT_FP64+1){1'b0}},Qcnt_three_1[4:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10];
+ Q_sqrt2={{(C_MANT_FP64){1'b0}},Qcnt_three_1[4:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b000010:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12];
+ Q_sqrt0={{(C_MANT_FP64-1){1'b0}},Qcnt_three_2[7:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14];
+ Q_sqrt1={{(C_MANT_FP64-2){1'b0}},Qcnt_three_2[7:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16];
+ Q_sqrt2={{(C_MANT_FP64-3){1'b0}},Qcnt_three_2[7:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b000011:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18];
+ Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_three_3[10:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20];
+ Q_sqrt1={{(C_MANT_FP64-5){1'b0}},Qcnt_three_3[10:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22];
+ Q_sqrt2={{(C_MANT_FP64-6){1'b0}},Qcnt_three_3[10:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b000100:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24];
+ Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_three_4[13:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26];
+ Q_sqrt1={{(C_MANT_FP64-8){1'b0}},Qcnt_three_4[13:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28];
+ Q_sqrt2={{(C_MANT_FP64-9){1'b0}},Qcnt_three_4[13:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b000101:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30];
+ Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_three_5[16:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32];
+ Q_sqrt1={{(C_MANT_FP64-11){1'b0}},Qcnt_three_5[16:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34];
+ Q_sqrt2={{(C_MANT_FP64-12){1'b0}},Qcnt_three_5[16:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b000110:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36];
+ Q_sqrt0={{(C_MANT_FP64-13){1'b0}},Qcnt_three_6[19:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38];
+ Q_sqrt1={{(C_MANT_FP64-14){1'b0}},Qcnt_three_6[19:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40];
+ Q_sqrt2={{(C_MANT_FP64-15){1'b0}},Qcnt_three_6[19:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b000111:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42];
+ Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_three_7[22:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44];
+ Q_sqrt1={{(C_MANT_FP64-17){1'b0}},Qcnt_three_7[22:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46];
+ Q_sqrt2={{(C_MANT_FP64-18){1'b0}},Qcnt_three_7[22:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b001000:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48];
+ Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_three_8[25:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50];
+ Q_sqrt1={{(C_MANT_FP64-20){1'b0}},Qcnt_three_8[25:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52];
+ Q_sqrt2={{(C_MANT_FP64-21){1'b0}},Qcnt_three_8[25:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b001001:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_three_9[28:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-23){1'b0}},Qcnt_three_9[28:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-24){1'b0}},Qcnt_three_9[28:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b001010:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-25){1'b0}},Qcnt_three_10[31:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-26){1'b0}},Qcnt_three_10[31:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-27){1'b0}},Qcnt_three_10[31:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b001011:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_three_11[34:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-29){1'b0}},Qcnt_three_11[34:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-30){1'b0}},Qcnt_three_11[34:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b001100:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_three_12[37:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-32){1'b0}},Qcnt_three_12[37:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-33){1'b0}},Qcnt_three_12[37:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b001101:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_three_13[40:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-35){1'b0}},Qcnt_three_13[40:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-36){1'b0}},Qcnt_three_13[40:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b001110:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-37){1'b0}},Qcnt_three_14[43:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-38){1'b0}},Qcnt_three_14[43:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-39){1'b0}},Qcnt_three_14[43:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b001111:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_three_15[46:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-41){1'b0}},Qcnt_three_15[46:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-42){1'b0}},Qcnt_three_15[46:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b010000:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_three_16[49:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-44){1'b0}},Qcnt_three_16[49:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-45){1'b0}},Qcnt_three_16[49:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b010001:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_three_17[52:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-47){1'b0}},Qcnt_three_17[52:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-48){1'b0}},Qcnt_three_17[52:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ 6'b010010:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-49){1'b0}},Qcnt_three_18[55:2]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-50){1'b0}},Qcnt_three_18[55:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-51){1'b0}},Qcnt_three_18[55:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ default :
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+ Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_three_0[2]};
+ Sqrt_Q0=Q_sqrt_com_0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+ Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_three_0[2:1]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+ Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_three_0[2:0]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ end
+ endcase
+ end
+ /////////////////////////////////////////////////////////////////////////////
+ // Operands for square root when Iteration_unit_num_S = 2'b10, end //
+ /////////////////////////////////////////////////////////////////////////////
+ 2'b11:
+ begin
+ /////////////////////////////////////////////////////////////////////////////
+ // Operands for square root when Iteration_unit_num_S = 2'b11, start //
+ /////////////////////////////////////////////////////////////////////////////
+ case(Crtl_cnt_S)
+ 6'b000000:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+ Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_four_0[3]};
+ Sqrt_Q0=Q_sqrt_com_0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+ Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_four_0[3:2]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+ Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_four_0[3:1]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+ Q_sqrt3={{(C_MANT_FP64+2){1'b0}},Qcnt_four_0[3:0]};
+ Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+ end
+ 6'b000001:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8];
+ Q_sqrt0={{(C_MANT_FP64+1){1'b0}},Qcnt_four_1[6:3]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10];
+ Q_sqrt1={{(C_MANT_FP64){1'b0}},Qcnt_four_1[6:2]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12];
+ Q_sqrt2={{(C_MANT_FP64-1){1'b0}},Qcnt_four_1[6:1]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14];
+ Q_sqrt3={{(C_MANT_FP64-2){1'b0}},Qcnt_four_1[6:0]};
+ Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+ end
+ 6'b000010:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16];
+ Q_sqrt0={{(C_MANT_FP64-3){1'b0}},Qcnt_four_2[10:3]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18];
+ Q_sqrt1={{(C_MANT_FP64-4){1'b0}},Qcnt_four_2[10:2]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20];
+ Q_sqrt2={{(C_MANT_FP64-5){1'b0}},Qcnt_four_2[10:1]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22];
+ Q_sqrt3={{(C_MANT_FP64-6){1'b0}},Qcnt_four_2[10:0]};
+ Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+ end
+ 6'b000011:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24];
+ Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_four_3[14:3]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26];
+ Q_sqrt1={{(C_MANT_FP64-8){1'b0}},Qcnt_four_3[14:2]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28];
+ Q_sqrt2={{(C_MANT_FP64-9){1'b0}},Qcnt_four_3[14:1]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30];
+ Q_sqrt3={{(C_MANT_FP64-10){1'b0}},Qcnt_four_3[14:0]};
+ Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+ end
+ 6'b000100:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32];
+ Q_sqrt0={{(C_MANT_FP64-11){1'b0}},Qcnt_four_4[18:3]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34];
+ Q_sqrt1={{(C_MANT_FP64-12){1'b0}},Qcnt_four_4[18:2]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36];
+ Q_sqrt2={{(C_MANT_FP64-13){1'b0}},Qcnt_four_4[18:1]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38];
+ Q_sqrt3={{(C_MANT_FP64-14){1'b0}},Qcnt_four_4[18:0]};
+ Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+ end
+ 6'b000101:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40];
+ Q_sqrt0={{(C_MANT_FP64-15){1'b0}},Qcnt_four_5[22:3]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42];
+ Q_sqrt1={{(C_MANT_FP64-16){1'b0}},Qcnt_four_5[22:2]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44];
+ Q_sqrt2={{(C_MANT_FP64-17){1'b0}},Qcnt_four_5[22:1]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46];
+ Q_sqrt3={{(C_MANT_FP64-18){1'b0}},Qcnt_four_5[22:0]};
+ Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+ end
+ 6'b000110:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48];
+ Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_four_6[26:3]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50];
+ Q_sqrt1={{(C_MANT_FP64-20){1'b0}},Qcnt_four_6[26:2]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52];
+ Q_sqrt2={{(C_MANT_FP64-21){1'b0}},Qcnt_four_6[26:1]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ Sqrt_DI[3]=2'b00;
+ Q_sqrt3={{(C_MANT_FP64-22){1'b0}},Qcnt_four_6[26:0]};
+ Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+ end
+ 6'b000111:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-23){1'b0}},Qcnt_four_7[30:3]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-24){1'b0}},Qcnt_four_7[30:2]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-25){1'b0}},Qcnt_four_7[30:1]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ Sqrt_DI[3]=2'b00;
+ Q_sqrt3={{(C_MANT_FP64-26){1'b0}},Qcnt_four_7[30:0]};
+ Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+ end
+ 6'b001000:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-27){1'b0}},Qcnt_four_8[34:3]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-28){1'b0}},Qcnt_four_8[34:2]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-29){1'b0}},Qcnt_four_8[34:1]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ Sqrt_DI[3]=2'b00;
+ Q_sqrt3={{(C_MANT_FP64-30){1'b0}},Qcnt_four_8[34:0]};
+ Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+ end
+ 6'b001001:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_four_9[38:3]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-32){1'b0}},Qcnt_four_9[38:2]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-33){1'b0}},Qcnt_four_9[38:1]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ Sqrt_DI[3]=2'b00;
+ Q_sqrt3={{(C_MANT_FP64-34){1'b0}},Qcnt_four_9[38:0]};
+ Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+ end
+ 6'b001010:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-35){1'b0}},Qcnt_four_10[42:3]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-36){1'b0}},Qcnt_four_10[42:2]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-37){1'b0}},Qcnt_four_10[42:1]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ Sqrt_DI[3]=2'b00;
+ Q_sqrt3={{(C_MANT_FP64-38){1'b0}},Qcnt_four_10[42:0]};
+ Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+ end
+ 6'b001011:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-39){1'b0}},Qcnt_four_11[46:3]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-40){1'b0}},Qcnt_four_11[46:2]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-41){1'b0}},Qcnt_four_11[46:1]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ Sqrt_DI[3]=2'b00;
+ Q_sqrt3={{(C_MANT_FP64-42){1'b0}},Qcnt_four_11[46:0]};
+ Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+ end
+ 6'b001100:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_four_12[50:3]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-44){1'b0}},Qcnt_four_12[50:2]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-45){1'b0}},Qcnt_four_12[50:1]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ Sqrt_DI[3]=2'b00;
+ Q_sqrt3={{(C_MANT_FP64-46){1'b0}},Qcnt_four_12[50:0]};
+ Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+ end
+ 6'b001101:
+ begin
+ Sqrt_DI[0]=2'b00;
+ Q_sqrt0={{(C_MANT_FP64-47){1'b0}},Qcnt_four_13[54:3]};
+ Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+ Sqrt_DI[1]=2'b00;
+ Q_sqrt1={{(C_MANT_FP64-48){1'b0}},Qcnt_four_13[54:2]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=2'b00;
+ Q_sqrt2={{(C_MANT_FP64-49){1'b0}},Qcnt_four_13[54:1]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ Sqrt_DI[3]=2'b00;
+ Q_sqrt3={{(C_MANT_FP64-50){1'b0}},Qcnt_four_13[54:0]};
+ Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+ end
+ default:
+ begin
+ Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+ Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_four_0[3]};
+ Sqrt_Q0=Q_sqrt_com_0;
+ Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+ Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_four_0[3:2]};
+ Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+ Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+ Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_four_0[3:1]};
+ Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+ Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+ Q_sqrt3={{(C_MANT_FP64+2){1'b0}},Qcnt_four_0[3:0]};
+ Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+ end
+ endcase
+ end
+ endcase
+ /////////////////////////////////////////////////////////////////////////////
+ // Operands for square root when Iteration_unit_num_S = 2'b11, end //
+ /////////////////////////////////////////////////////////////////////////////
+ end
+ assign Sqrt_R0= ((Sqrt_start_dly_S)?'0:{Partial_remainder_DP[C_MANT_FP64+5:0]});
+ assign Sqrt_R1= {Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+2:0],Sqrt_DO[0]} ;
+ assign Sqrt_R2= {Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+2:0],Sqrt_DO[1]};
+ assign Sqrt_R3= {Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+2:0],Sqrt_DO[2]};
+ assign Sqrt_R4= {Iteration_cell_sum_AMASK_D[3][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[3][C_MANT_FP64+2:0],Sqrt_DO[3]};
+ logic [C_MANT_FP64+5:0] Denominator_se_format_DB; //
+ assign Denominator_se_format_DB={Denominator_se_DB[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT],{FP16ALT_SO?FP16ALT_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP16ALT-1]},
+ Denominator_se_DB[C_MANT_FP64-C_MANT_FP16ALT-2:C_MANT_FP64-C_MANT_FP16],{FP16_SO?FP16_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP16-1]},
+ Denominator_se_DB[C_MANT_FP64-C_MANT_FP16-2:C_MANT_FP64-C_MANT_FP32],{FP32_SO?FP32_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP32-1]},
+ Denominator_se_DB[C_MANT_FP64-C_MANT_FP32-2:C_MANT_FP64-C_MANT_FP64],FP64_SO,3'b0} ;
+ // for iteration cell_U0
+ logic [C_MANT_FP64+5:0] First_iteration_cell_div_a_D,First_iteration_cell_div_b_D;
+ logic Sel_b_for_first_S;
+ assign First_iteration_cell_div_a_D=(Div_start_dly_S)?{Numerator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT],{FP16ALT_SO?FP16ALT_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP16ALT-1]},
+ Numerator_se_D[C_MANT_FP64-C_MANT_FP16ALT-2:C_MANT_FP64-C_MANT_FP16],{FP16_SO?FP16_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP16-1]},
+ Numerator_se_D[C_MANT_FP64-C_MANT_FP16-2:C_MANT_FP64-C_MANT_FP32],{FP32_SO?FP32_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP32-1]},
+ Numerator_se_D[C_MANT_FP64-C_MANT_FP32-2:C_MANT_FP64-C_MANT_FP64],FP64_SO,3'b0}
+ :{Partial_remainder_DP[C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16ALT+2]},
+ Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16+2]},
+ Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP32+2]},
+ Partial_remainder_DP[C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Quotient_DP[0],3'b0};
+ assign Sel_b_for_first_S=(Div_start_dly_S)?1:Quotient_DP[0];
+ assign First_iteration_cell_div_b_D=Sel_b_for_first_S?Denominator_se_format_DB:{Denominator_se_D,4'b0};
+ assign Iteration_cell_a_BMASK_D[0]=Sqrt_enable_SO?Sqrt_R0:{First_iteration_cell_div_a_D};
+ assign Iteration_cell_b_BMASK_D[0]=Sqrt_enable_SO?Sqrt_Q0:{First_iteration_cell_div_b_D};
+ // for iteration cell_U1
+ logic [C_MANT_FP64+5:0] Sec_iteration_cell_div_a_D,Sec_iteration_cell_div_b_D;
+ logic Sel_b_for_sec_S;
+ generate
+ if(|Iteration_unit_num_S)
+ begin
+ assign Sel_b_for_sec_S=~Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+5];
+ assign Sec_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16ALT+2]},
+ Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16+2]},
+ Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP32+2]},
+ Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_sec_S,3'b0};
+ assign Sec_iteration_cell_div_b_D=Sel_b_for_sec_S?Denominator_se_format_DB:{Denominator_se_D,4'b0};
+ assign Iteration_cell_a_BMASK_D[1]=Sqrt_enable_SO?Sqrt_R1:{Sec_iteration_cell_div_a_D};
+ assign Iteration_cell_b_BMASK_D[1]=Sqrt_enable_SO?Sqrt_Q1:{Sec_iteration_cell_div_b_D};
+ end
+ endgenerate
+ // for iteration cell_U2
+ logic [C_MANT_FP64+5:0] Thi_iteration_cell_div_a_D,Thi_iteration_cell_div_b_D;
+ logic Sel_b_for_thi_S;
+ generate
+ if((Iteration_unit_num_S==2'b10) | (Iteration_unit_num_S==2'b11))
+ begin
+ assign Sel_b_for_thi_S=~Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+5];
+ assign Thi_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16ALT+2]},
+ Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16+2]},
+ Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP32+2]},
+ Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_thi_S,3'b0};
+ assign Thi_iteration_cell_div_b_D=Sel_b_for_thi_S?Denominator_se_format_DB:{Denominator_se_D,4'b0};
+ assign Iteration_cell_a_BMASK_D[2]=Sqrt_enable_SO?Sqrt_R2:{Thi_iteration_cell_div_a_D};
+ assign Iteration_cell_b_BMASK_D[2]=Sqrt_enable_SO?Sqrt_Q2:{Thi_iteration_cell_div_b_D};
+ end
+ endgenerate
+ // for iteration cell_U3
+ logic [C_MANT_FP64+5:0] Fou_iteration_cell_div_a_D,Fou_iteration_cell_div_b_D;
+ logic Sel_b_for_fou_S;
+ generate
+ if(Iteration_unit_num_S==2'b11)
+ begin
+ assign Sel_b_for_fou_S=~Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+5];
+ assign Fou_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16ALT+2]},
+ Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16+2]},
+ Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP32+2]},
+ Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_fou_S,3'b0};
+ assign Fou_iteration_cell_div_b_D=Sel_b_for_fou_S?Denominator_se_format_DB:{Denominator_se_D,4'b0};
+ assign Iteration_cell_a_BMASK_D[3]=Sqrt_enable_SO?Sqrt_R3:{Fou_iteration_cell_div_a_D};
+ assign Iteration_cell_b_BMASK_D[3]=Sqrt_enable_SO?Sqrt_Q3:{Fou_iteration_cell_div_b_D};
+ end
+ endgenerate
+ /////////////////////////////////////////////////////////////////////////////
+ // Masking Contrl //
+ /////////////////////////////////////////////////////////////////////////////
+ logic [C_MANT_FP64+1+4:0] Mask_bits_ctl_S; //For extension
+ assign Mask_bits_ctl_S =58'h3ff_ffff_ffff_ffff; //It is not needed. The corresponding process is handled the above codes
+ /////////////////////////////////////////////////////////////////////////////
+ // Iteration Instances with masking control //
+ /////////////////////////////////////////////////////////////////////////////
+ logic Div_enable_SI [3:0];
+ logic Div_start_dly_SI [3:0];
+ logic Sqrt_enable_SI [3:0];
+ generate
+ genvar i,j;
+ for (i=0; i <= Iteration_unit_num_S ; i++)
+ begin
+ for (j = 0; j <= C_MANT_FP64+5; j++) begin
+ assign Iteration_cell_a_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_a_BMASK_D[i][j];
+ assign Iteration_cell_b_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_b_BMASK_D[i][j];
+ assign Iteration_cell_sum_AMASK_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_sum_D[i][j];
+ end
+ assign Div_enable_SI[i] = Div_enable_SO;
+ assign Div_start_dly_SI[i] = Div_start_dly_S;
+ assign Sqrt_enable_SI[i] = Sqrt_enable_SO;
+ iteration_div_sqrt_mvp #(C_MANT_FP64+6) iteration_div_sqrt
+ (
+ .A_DI (Iteration_cell_a_D[i] ),
+ .B_DI (Iteration_cell_b_D[i] ),
+ .Div_enable_SI (Div_enable_SI[i] ),
+ .Div_start_dly_SI (Div_start_dly_SI[i] ),
+ .Sqrt_enable_SI (Sqrt_enable_SI[i] ),
+ .D_DI (Sqrt_DI[i] ),
+ .D_DO (Sqrt_DO[i] ),
+ .Sum_DO (Iteration_cell_sum_D[i] ),
+ .Carry_out_DO (Iteration_cell_carry_D[i] )
+ );
+ end
+ endgenerate
+ always_comb
+ begin
+ case (Iteration_unit_num_S)
+ 2'b00:
+ begin
+ if(Fsm_enable_S)
+ Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R1:Iteration_cell_sum_AMASK_D[0];
+ else
+ Partial_remainder_DN = Partial_remainder_DP;
+ end
+ 2'b01:
+ begin
+ if(Fsm_enable_S)
+ Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R2:Iteration_cell_sum_AMASK_D[1];
+ else
+ Partial_remainder_DN = Partial_remainder_DP;
+ end
+ 2'b10:
+ begin
+ if(Fsm_enable_S)
+ Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R3:Iteration_cell_sum_AMASK_D[2];
+ else
+ Partial_remainder_DN = Partial_remainder_DP;
+ end
+ 2'b11:
+ begin
+ if(Fsm_enable_S)
+ Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R4:Iteration_cell_sum_AMASK_D[3];
+ else
+ Partial_remainder_DN = Partial_remainder_DP;
+ end
+ endcase
+ end
+ always_ff @(posedge Clk_CI, negedge Rst_RBI) // partial_remainder
+ begin
+ if(~Rst_RBI)
+ begin
+ Partial_remainder_DP <= '0;
+ end
+ else
+ begin
+ Partial_remainder_DP <= Partial_remainder_DN;
+ end
+ end
+ logic [C_MANT_FP64+4:0] Quotient_DN;
+ always_comb // Can choosen the different carry-outs based on different operations
+ begin
+ case (Iteration_unit_num_S)
+ 2'b00:
+ begin
+ if(Fsm_enable_S)
+ Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+3:0],Sqrt_quotinent_S[3]} :{Quotient_DP[C_MANT_FP64+3:0],Iteration_cell_carry_D[0]};
+ else
+ Quotient_DN= Quotient_DP;
+ end
+ 2'b01:
+ begin
+ if(Fsm_enable_S)
+ Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+2:0],Sqrt_quotinent_S[3:2]} :{Quotient_DP[C_MANT_FP64+2:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1]};
+ else
+ Quotient_DN= Quotient_DP;
+ end
+ 2'b10:
+ begin
+ if(Fsm_enable_S)
+ Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+1:0],Sqrt_quotinent_S[3:1]} : {Quotient_DP[C_MANT_FP64+1:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2]};
+ else
+ Quotient_DN= Quotient_DP;
+ end
+ 2'b11:
+ begin
+ if(Fsm_enable_S)
+ Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64:0],Sqrt_quotinent_S } : {Quotient_DP[C_MANT_FP64:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2],Iteration_cell_carry_D[3]};
+ else
+ Quotient_DN= Quotient_DP;
+ end
+ endcase
+ end
+ always_ff @(posedge Clk_CI, negedge Rst_RBI) // Quotient
+ begin
+ if(~Rst_RBI)
+ begin
+ Quotient_DP <= '0;
+ end
+ else
+ Quotient_DP <= Quotient_DN;
+ end
+ /////////////////////////////////////////////////////////////////////////////
+ // Precision Control for outputs //
+ /////////////////////////////////////////////////////////////////////////////
+//////////////////////one iteration unit, start///////////////////////////////////////
+ generate
+ if(Iteration_unit_num_S==2'b00)
+ begin
+ always_comb
+ begin
+ case (Format_sel_S)
+ 2'b00:
+ begin
+ case (Precision_ctl_S)
+ 6'h00:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+ end
+ 6'h17:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h16:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-1:0],{(C_MANT_FP64-C_MANT_FP32+4+1){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h15:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-2:0],{(C_MANT_FP64-C_MANT_FP32+4+2){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h14:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-3:0],{(C_MANT_FP64-C_MANT_FP32+4+3){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h13:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h12:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-5:0],{(C_MANT_FP64-C_MANT_FP32+4+5){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h11:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h10:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-7:0],{(C_MANT_FP64-C_MANT_FP32+4+7){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0f:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0e:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-9:0],{(C_MANT_FP64-C_MANT_FP32+4+9){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0d:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-10:0],{(C_MANT_FP64-C_MANT_FP32+4+10){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0c:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-11:0],{(C_MANT_FP64-C_MANT_FP32+4+11){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0b:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0a:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-13:0],{(C_MANT_FP64-C_MANT_FP32+4+13){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h09:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-14:0],{(C_MANT_FP64-C_MANT_FP32+4+14){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h08:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-15:0],{(C_MANT_FP64-C_MANT_FP32+4+15){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h07:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1
+ end
+ default :
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+ end
+ endcase
+ end
+ 2'b01:
+ begin
+ case (Precision_ctl_S)
+ 6'h00:
+ begin
+ Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4
+ end
+ 6'h34:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64:0],{(4){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h33:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(4+1){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h32:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-2:0],{(4+2){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h31:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-3:0],{(4+3){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h30:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-4:0],{(4+4){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h2f:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h2e:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-6:0],{(4+6){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h2d:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-7:0],{(4+7){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h2c:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-8:0],{(4+8){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h2b:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(4+9){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h2a:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-10:0],{(4+10){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h29:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h28:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-12:0],{(4+12){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h27:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(4+13){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h26:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-14:0],{(4+14){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h25:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-15:0],{(4+15){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h24:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-16:0],{(4+16){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h23:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h22:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-18:0],{(4+18){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h21:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-19:0],{(4+19){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h20:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-20:0],{(4+20){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h1f:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(4+21){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h1e:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-22:0],{(4+22){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h1d:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h1c:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-24:0],{(4+24){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h1b:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(4+25){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h1a:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-26:0],{(4+26){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h19:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-27:0],{(4+27){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h18:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-28:0],{(4+28){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h17:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h16:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-30:0],{(4+30){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h15:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-31:0],{(4+31){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h14:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-32:0],{(4+32){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h13:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(4+33){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h12:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-34:0],{(4+34){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h11:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h10:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-36:0],{(4+36){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0f:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(4+37){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0e:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-38:0],{(4+38){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0d:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-39:0],{(4+39){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0c:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-40:0],{(4+40){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0b:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0a:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-42:0],{(4+42){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h09:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-43:0],{(4+43){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h08:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-44:0],{(4+44){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h07:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(4+45){1'b0}}}; //Precision_ctl_S+1
+ end
+ default:
+ begin
+ Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4
+ end
+ endcase
+ end
+ 2'b10:
+ begin
+ case (Precision_ctl_S)
+ 6'b00:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}}}; //+4
+ end
+ 6'h0a:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16:0],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h09:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-1:0],{(C_MANT_FP64-C_MANT_FP16+4+1){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h08:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-2:0],{(C_MANT_FP64-C_MANT_FP16+4+2){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h07:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-3:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}}}; //Precision_ctl_S+1
+ end
+ default :
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}}}; //+4
+ end
+ endcase
+ end
+ 2'b11:
+ begin
+ case (Precision_ctl_S)
+ 6'b00:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}}}; //+4
+ end
+ 6'h07:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}}}; //Precision_ctl_S+1
+ end
+ default :
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}}}; //+4
+ end
+ endcase
+ end
+ endcase
+ end
+ end
+ endgenerate
+//////////////////////one iteration unit, end//////////////////////////////////////////
+//////////////////////two iteration units, start///////////////////////////////////////
+ generate
+ if(Iteration_unit_num_S==2'b01)
+ begin
+ always_comb
+ begin
+ case (Format_sel_S)
+ 2'b00:
+ begin
+ case (Precision_ctl_S)
+ 6'h00:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+ end
+ 6'h17,6'h16:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h15,6'h14:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-2:0],{(C_MANT_FP64-C_MANT_FP32+4+2){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h13,6'h12:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h11,6'h10:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0f,6'h0e:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0d,6'h0c:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-10:0],{(C_MANT_FP64-C_MANT_FP32+4+10){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0b,6'h0a:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h09,6'h08:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-14:0],{(C_MANT_FP64-C_MANT_FP32+4+14){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h07,6'h06:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1
+ end
+ default:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+ end
+ endcase
+ end
+ 2'b01:
+ begin
+ case (Precision_ctl_S)
+ 6'h00:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],1'b0}; //+3
+ end
+ 6'h34:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+1:1],{(4){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h33,6'h32:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(4+1){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h31,6'h30:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-3:0],{(4+3){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h2f,6'h2e:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h2d,6'h2c:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-7:0],{(4+7){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h2b,6'h2a:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(4+9){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h29,6'h28:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h27,6'h26:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(4+13){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h25,6'h24:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-15:0],{(4+15){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h23,6'h22:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h21,6'h20:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-19:0],{(4+19){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h1f,6'h1e:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(4+21){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h1d,6'h1c:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h1b,6'h1a:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(4+25){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h19,6'h18:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-27:0],{(4+27){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h17,6'h16:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h15,6'h14:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-31:0],{(4+31){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h13,6'h12:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(4+33){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h11,6'h10:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h0f,6'h0e:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(4+37){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h0d,6'h0c:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-39:0],{(4+39){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h0b,6'h0a:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h09,6'h08:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-43:0],{(4+43){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h07:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(4+45){1'b0}} }; //Precision_ctl_S+1
+ end
+ default:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],1'b0}; //+3
+ end
+ endcase
+ end
+ 2'b10:
+ begin
+ case (Precision_ctl_S)
+ 6'b00:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+3:0],{(C_MANT_FP64-C_MANT_FP16+1){1'b0}} }; //+3
+ end
+ 6'h0a:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h09,6'h08:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-1:0],{(C_MANT_FP64-C_MANT_FP16+4+1){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h07:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-3:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}} }; //Precision_ctl_S+1
+ end
+ default :
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4
+ end
+ endcase
+ end
+ 2'b11:
+ begin
+ case (Precision_ctl_S)
+ 6'b00:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+ end
+ 6'h07:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1
+ end
+ default :
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+ end
+ endcase
+ end
+ endcase
+ end
+ end
+ endgenerate
+//////////////////////two iteration units, end//////////////////////////////////////////
+//////////////////////three iteration units, start///////////////////////////////////////
+ generate
+ if(Iteration_unit_num_S==2'b10)
+ begin
+ always_comb
+ begin
+ case (Format_sel_S)
+ 2'b00:
+ begin
+ case (Precision_ctl_S)
+ 6'h00:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+3:0],{(C_MANT_FP64-C_MANT_FP32+1){1'b0}}}; //+3
+ end
+ 6'h17,6'h16,6'h15:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h14,6'h13,6'h12:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-3:0],{(C_MANT_FP64-C_MANT_FP32+4+3){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h11,6'h10,6'h0f:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0e,6'h0d,6'h0c:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-9:0],{(C_MANT_FP64-C_MANT_FP32+4+9){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0b,6'h0a,6'h09:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h08,6'h07,6'h06:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-15:0],{(C_MANT_FP64-C_MANT_FP32+4+15){1'b0}}}; //Precision_ctl_S+1
+ end
+ default:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+3:0],{(C_MANT_FP64-C_MANT_FP32+1){1'b0}}}; //+3
+ end
+ endcase
+ end
+ 2'b01:
+ begin
+ case (Precision_ctl_S)
+ 6'h00:
+ begin
+ Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4
+ end
+ 6'h34,6'h33:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+1:1],{(4){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h32,6'h31,6'h30:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-2:0],{(4+2){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h2f,6'h2e,6'h2d:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h2c,6'h2b,6'h2a:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-8:0],{(4+8){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h29,6'h28,6'h27:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h26,6'h25,6'h24:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-14:0],{(4+14){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h23,6'h22,6'h21:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h20,6'h1f,6'h1e:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-20:0],{(4+20){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h1d,6'h1c,6'h1b:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h1a,6'h19,6'h18:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-26:0],{(4+26){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h17,6'h16,6'h15:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h14,6'h13,6'h12:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-32:0],{(4+32){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h11,6'h10,6'h0f:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h0e,6'h0d,6'h0c:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-38:0],{(4+38){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h0b,6'h0a,6'h09:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h08,6'h07,6'h06:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-44:0],{(4+44){1'b0}} }; //Precision_ctl_S+1
+ end
+ default:
+ begin
+ Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4
+ end
+ endcase
+ end
+ 2'b10:
+ begin
+ case (Precision_ctl_S)
+ 6'b00:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4
+ end
+ 6'h0a,6'h09:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h08,6'h07,6'h06:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-2:0],{(C_MANT_FP64-C_MANT_FP16+4+2){1'b0}} }; //Precision_ctl_S+1
+ end
+ default :
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4
+ end
+ endcase
+ end
+ 2'b11:
+ begin
+ case (Precision_ctl_S)
+ 6'b00:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+ end
+ 6'h07,6'h06:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+1:1],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1
+ end
+ default :
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+ end
+ endcase
+ end
+ endcase
+ end
+ end
+ endgenerate
+//////////////////////three iteration units, end//////////////////////////////////////////
+//////////////////////four iteration units, start///////////////////////////////////////
+ generate
+ if(Iteration_unit_num_S==2'b11)
+ begin
+ always_comb
+ begin
+ case (Format_sel_S)
+ 2'b00:
+ begin
+ case (Precision_ctl_S)
+ 6'h00:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+ end
+ 6'h17,6'h16,6'h15,6'h14:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h13,6'h12,6'h11,6'h10:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0f,6'h0e,6'h0d,6'h0c:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h0b,6'h0a,6'h09,6'h08:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1
+ end
+ 6'h07,6'h06:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1
+ end
+ default:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+ end
+ endcase
+ end
+ 2'b01:
+ begin
+ case (Precision_ctl_S)
+ 6'h00:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}}}; //+3
+ end
+ 6'h34:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h33,6'h32,6'h31,6'h30:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(5){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h2f,6'h2e,6'h2d,6'h2c:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(9){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h2b,6'h2a,6'h29,6'h28:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(13){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h27,6'h26,6'h25,6'h24:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(17){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h23,6'h22,6'h21,6'h20:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(21){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h1f,6'h1e,6'h1d,6'h1c:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(25){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h1b,6'h1a,6'h19,6'h18:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(29){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h17,6'h16,6'h15,6'h14:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(33){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h13,6'h12,6'h11,6'h10:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(37){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h0f,6'h0e,6'h0d,6'h0c:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(41){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h0b,6'h0a,6'h09,6'h08:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(45){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h07,6'h06:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(49){1'b0}} }; //Precision_ctl_S+1
+ end
+ default:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}}}; //+3
+ end
+ endcase
+ end
+ 2'b10:
+ begin
+ case (Precision_ctl_S)
+ 6'b00:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+5:0],{(C_MANT_FP64-C_MANT_FP16-1){1'b0}} }; //+5
+ end
+ 6'h0a,6'h09,6'h08:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1
+ end
+ 6'h07,6'h06:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1-4:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}} }; //Precision_ctl_S+1
+ end
+ default :
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+5:0],{(C_MANT_FP64-C_MANT_FP16-1){1'b0}} }; //+5
+ end
+ endcase
+ end
+ 2'b11:
+ begin
+ case (Precision_ctl_S)
+ 6'b00:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+ end
+ 6'h07,6'h06:
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1
+ end
+ default :
+ begin
+ Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+ end
+ endcase
+ end
+ endcase
+ end
+ end
+ endgenerate
+//////////////////////four iteration units, end///////////////////////////////////////
+// resultant exponent
+ logic [C_EXP_FP64+1:0] Exp_result_prenorm_DN,Exp_result_prenorm_DP;
+ logic [C_EXP_FP64+1:0] Exp_add_a_D;
+ logic [C_EXP_FP64+1:0] Exp_add_b_D;
+ logic [C_EXP_FP64+1:0] Exp_add_c_D;
+ always_comb
+ begin //
+ case (Format_sel_S)
+ 2'b00:
+ begin
+ end
+ 2'b01:
+ begin
+ end
+ 2'b10:
+ begin
+ end
+ 2'b11:
+ begin
+ end
+ endcase
+ end
+//For division, exponent=(Exp_a_D-LZ1)-(Exp_b_D-LZ2)+BIAS
+//For square root, exponent=(Exp_a_D-LZ1)/2+(Exp_a_D-LZ1)%2+C_HALF_BIAS
+//For exponent, in preprorces module, (Exp_a_D-LZ1) and (Exp_b_D-LZ2) have been processed with the corresponding process for denormal numbers.
+ assign Exp_add_a_D = {Sqrt_start_dly_S?{Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64:1]}:{Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI}};
+ assign Exp_add_b_D = {Sqrt_start_dly_S?{1'b0,{C_EXP_ZERO_FP64},Exp_num_DI[0]}:{~Exp_den_DI[C_EXP_FP64],~Exp_den_DI[C_EXP_FP64],~Exp_den_DI}};
+ assign Exp_add_c_D = {Div_start_dly_S?{{C_BIAS_AONE}}:{{C_HALF_BIAS}}};
+ assign Exp_result_prenorm_DN = (Start_dly_S)?{Exp_add_a_D + Exp_add_b_D + Exp_add_c_D}:Exp_result_prenorm_DP;
+ always_ff @(posedge Clk_CI, negedge Rst_RBI)
+ begin
+ if(~Rst_RBI)
+ begin
+ Exp_result_prenorm_DP <= '0;
+ end
+ else
+ begin
+ Exp_result_prenorm_DP<= Exp_result_prenorm_DN;
+ end
+ end
+ assign Exp_result_prenorm_DO = Exp_result_prenorm_DP;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..19fb93e
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,67 @@
+module data_mem_top
+ input clk_i,
+ input rst_ni,
+// tl-ul insterface
+ input tlul_pkg::tl_h2d_t tl_d_i,
+ output tlul_pkg::tl_d2h_t tl_d_o,
+// sram interface
+ output logic csb,
+ output logic [11:0] addr_o,
+ output logic [31:0] wdata_o,
+ output logic [3:0] wmask_o,
+ output logic we_o,
+ input logic [31:0] rdata_i
+ logic tl_req;
+ logic [31:0] tl_wmask;
+ logic we_i;
+ logic rvalid_o;
+ assign wmask_o[0] = (tl_wmask[7:0] != 8'b0) ? 1'b1: 1'b0;
+ assign wmask_o[1] = (tl_wmask[15:8] != 8'b0) ? 1'b1: 1'b0;
+ assign wmask_o[2] = (tl_wmask[23:16] != 8'b0) ? 1'b1: 2'b0;
+ assign wmask_o[3] = (tl_wmask[31:24] != 8'b0) ? 1'b1: 2'b0;
+ assign we_o = ~we_i;
+ assign csb = ~tl_req;
+tlul_sram_adapter #(
+ .SramAw (12),
+ .SramDw (32),
+ .Outstanding (4),
+ .ByteAccess (1),
+ .ErrOnWrite (0), // 1: Writes not allowed, automatically error
+ .ErrOnRead (0)
+) data_mem (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tl_i (tl_d_i),
+ .tl_o (tl_d_o),
+ .req_o (tl_req),
+ .gnt_i (1'b1),
+ .we_o (we_i),
+ .addr_o (addr_o),
+ .wdata_o (wdata_o),
+ .wmask_o (tl_wmask),
+ .rdata_i (rst_ni? rdata_i: '0), // (reset) ? rdata_o: '0
+ .rvalid_i (rvalid_o),
+ .rerror_i (2'b0)
+ always_ff @(posedge clk_i) begin
+ if (!rst_ni) begin
+ rvalid_o <= 1'b0;
+ end else if (we_i) begin
+ rvalid_o <= 1'b0;
+ end else begin
+ rvalid_o <= tl_req;
+ end
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..528c62c
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,65 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File: $filename.v
+ *
+ * Description: Auto-generated bootrom
+ */
+// Auto-generated code
+module debug_rom (
+ input logic clk_i,
+ input logic req_i,
+ input logic [63:0] addr_i,
+ output logic [63:0] rdata_o
+ localparam int unsigned RomSize = 19;
+ logic [RomSize-1:0][63:0] mem;
+ assign mem = {
+ 64'h00000000_7b200073,
+ 64'h7b202473_7b302573,
+ 64'h10852423_f1402473,
+ 64'ha85ff06f_7b202473,
+ 64'h7b302573_10052223,
+ 64'h00100073_7b202473,
+ 64'h7b302573_10052623,
+ 64'h00c51513_00c55513,
+ 64'h00000517_fd5ff06f,
+ 64'hfa041ce3_00247413,
+ 64'h40044403_00a40433,
+ 64'hf1402473_02041c63,
+ 64'h00147413_40044403,
+ 64'h00a40433_10852023,
+ 64'hf1402473_00c51513,
+ 64'h00c55513_00000517,
+ 64'h7b351073_7b241073,
+ 64'h0ff0000f_04c0006f,
+ 64'h07c0006f_00c0006f
+ };
+ logic [$clog2(RomSize)-1:0] addr_q;
+ always_ff @(posedge clk_i) begin
+ if (req_i) begin
+ addr_q <= addr_i[$clog2(RomSize)-1+3:3];
+ end
+ end
+ // this prevents spurious Xes from propagating into
+ // the speculative fetch stage of the core
+ always_comb begin : p_outmux
+ rdata_o = '0;
+ if (addr_q < $clog2(RomSize)'(RomSize)) begin
+ rdata_o = mem[addr_q];
+ end
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..3e88406
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,60 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File: $filename.v
+ *
+ * Description: Auto-generated bootrom
+ */
+// Auto-generated code
+module debug_rom_one_scratch (
+ input logic clk_i,
+ input logic req_i,
+ input logic [63:0] addr_i,
+ output logic [63:0] rdata_o
+ localparam int unsigned RomSize = 13;
+ logic [RomSize-1:0][63:0] mem;
+ assign mem = {
+ 64'h00000000_7b200073,
+ 64'h7b202473_10802423,
+ 64'hf1402473_ab1ff06f,
+ 64'h7b202473_10002223,
+ 64'h00100073_7b202473,
+ 64'h10002623_fddff06f,
+ 64'hfc0418e3_00247413,
+ 64'h40044403_f1402473,
+ 64'h02041263_00147413,
+ 64'h40044403_10802023,
+ 64'hf1402473_7b241073,
+ 64'h0ff0000f_0340006f,
+ 64'h0500006f_00c0006f
+ };
+ logic [$clog2(RomSize)-1:0] addr_q;
+ always_ff @(posedge clk_i) begin
+ if (req_i) begin
+ addr_q <= addr_i[$clog2(RomSize)-1+3:3];
+ end
+ end
+ // this prevents spurious Xes from propagating into
+ // the speculative fetch stage of the core
+ always_comb begin : p_outmux
+ rdata_o = '0;
+ if (addr_q < $clog2(RomSize)'(RomSize)) begin
+ rdata_o = mem[addr_q];
+ end
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..b3f41fe
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,83 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// This file contains all div_sqrt_top_mvp parameters
+// Authors : Lei Li (
+package defs_div_sqrt_mvp;
+ // op command
+ localparam C_RM = 3;
+ localparam C_RM_NEAREST = 3'h0;
+ localparam C_RM_TRUNC = 3'h1;
+ localparam C_RM_PLUSINF = 3'h2;
+ localparam C_RM_MINUSINF = 3'h3;
+ localparam C_PC = 6; // Precision Control
+ localparam C_FS = 2; // Format Selection
+ localparam C_IUNC = 2; // Iteration Unit Number Control
+ localparam Iteration_unit_num_S = 2'b10;
+ // FP64
+ localparam C_OP_FP64 = 64;
+ localparam C_MANT_FP64 = 52;
+ localparam C_EXP_FP64 = 11;
+ localparam C_BIAS_FP64 = 1023;
+ localparam C_BIAS_AONE_FP64 = 11'h400;
+ localparam C_HALF_BIAS_FP64 = 511;
+ localparam C_EXP_ZERO_FP64 = 11'h000;
+ localparam C_EXP_ONE_FP64 = 13'h001; // Bit width is in agreement with in norm
+ localparam C_EXP_INF_FP64 = 11'h7FF;
+ localparam C_MANT_ZERO_FP64 = 52'h0;
+ localparam C_MANT_NAN_FP64 = 52'h8_0000_0000_0000;
+ localparam C_PZERO_FP64 = 64'h0000_0000_0000_0000;
+ localparam C_MZERO_FP64 = 64'h8000_0000_0000_0000;
+ localparam C_QNAN_FP64 = 64'h7FF8_0000_0000_0000;
+ // FP32
+ localparam C_OP_FP32 = 32;
+ localparam C_MANT_FP32 = 23;
+ localparam C_EXP_FP32 = 8;
+ localparam C_BIAS_FP32 = 127;
+ localparam C_BIAS_AONE_FP32 = 8'h80;
+ localparam C_HALF_BIAS_FP32 = 63;
+ localparam C_EXP_ZERO_FP32 = 8'h00;
+ localparam C_EXP_INF_FP32 = 8'hFF;
+ localparam C_MANT_ZERO_FP32 = 23'h0;
+ localparam C_PZERO_FP32 = 32'h0000_0000;
+ localparam C_MZERO_FP32 = 32'h8000_0000;
+ localparam C_QNAN_FP32 = 32'h7FC0_0000;
+ // FP16
+ localparam C_OP_FP16 = 16;
+ localparam C_MANT_FP16 = 10;
+ localparam C_EXP_FP16 = 5;
+ localparam C_BIAS_FP16 = 15;
+ localparam C_BIAS_AONE_FP16 = 5'h10;
+ localparam C_HALF_BIAS_FP16 = 7;
+ localparam C_EXP_ZERO_FP16 = 5'h00;
+ localparam C_EXP_INF_FP16 = 5'h1F;
+ localparam C_MANT_ZERO_FP16 = 10'h0;
+ localparam C_PZERO_FP16 = 16'h0000;
+ localparam C_MZERO_FP16 = 16'h8000;
+ localparam C_QNAN_FP16 = 16'h7E00;
+ // FP16alt
+ localparam C_OP_FP16ALT = 16;
+ localparam C_MANT_FP16ALT = 7;
+ localparam C_EXP_FP16ALT = 8;
+ localparam C_BIAS_FP16ALT = 127;
+ localparam C_BIAS_AONE_FP16ALT = 8'h80;
+ localparam C_HALF_BIAS_FP16ALT = 63;
+ localparam C_EXP_ZERO_FP16ALT = 8'h00;
+ localparam C_EXP_INF_FP16ALT = 8'hFF;
+ localparam C_MANT_ZERO_FP16ALT = 7'h0;
+ localparam C_QNAN_FP16ALT = 16'h7FC0;
+endpackage : defs_div_sqrt_mvp
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..3af6081
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,180 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Company: IIS @ ETHZ - Federal Institute of Technology //
+// //
+// Engineers: Lei Li -- //
+// //
+// Additional contributions by: //
+// //
+// //
+// //
+// Create Date: 03/03/2018 //
+// Design Name: div_sqrt_top_mvp //
+// Module Name: //
+// Project Name: The shared divisor and square root //
+// Language: SystemVerilog //
+// //
+// Description: The top of div and sqrt //
+// //
+// //
+// Revision Date: 12/04/2018 //
+// Lei Li //
+// To address some requirements by Stefan and add low power //
+// control for special cases //
+import defs_div_sqrt_mvp::*;
+module div_sqrt_top_mvp
+ (//Input
+ input logic Clk_CI,
+ input logic Rst_RBI,
+ input logic Div_start_SI,
+ input logic Sqrt_start_SI,
+ //Input Operands
+ input logic [C_OP_FP64-1:0] Operand_a_DI,
+ input logic [C_OP_FP64-1:0] Operand_b_DI,
+ // Input Control
+ input logic [C_RM-1:0] RM_SI, //Rounding Mode
+ input logic [C_PC-1:0] Precision_ctl_SI, // Precision Control
+ input logic [C_FS-1:0] Format_sel_SI, // Format Selection,
+ input logic Kill_SI,
+ //Output Result
+ output logic [C_OP_FP64-1:0] Result_DO,
+ //Output-Flags
+ output logic [4:0] Fflags_SO,
+ output logic Ready_SO,
+ output logic Done_SO
+ );
+ //Operand components
+ logic [C_EXP_FP64:0] Exp_a_D;
+ logic [C_EXP_FP64:0] Exp_b_D;
+ logic [C_MANT_FP64:0] Mant_a_D;
+ logic [C_MANT_FP64:0] Mant_b_D;
+ logic [C_EXP_FP64+1:0] Exp_z_D;
+ logic [C_MANT_FP64+4:0] Mant_z_D;
+ logic Sign_z_D;
+ logic Start_S;
+ logic [C_RM-1:0] RM_dly_S;
+ logic Div_enable_S;
+ logic Sqrt_enable_S;
+ logic Inf_a_S;
+ logic Inf_b_S;
+ logic Zero_a_S;
+ logic Zero_b_S;
+ logic NaN_a_S;
+ logic NaN_b_S;
+ logic SNaN_S;
+ logic Special_case_SB,Special_case_dly_SB;
+ logic Full_precision_S;
+ logic FP32_S;
+ logic FP64_S;
+ logic FP16_S;
+ logic FP16ALT_S;
+ preprocess_mvp preprocess_U0
+ (
+ .Clk_CI (Clk_CI ),
+ .Rst_RBI (Rst_RBI ),
+ .Div_start_SI (Div_start_SI ),
+ .Sqrt_start_SI (Sqrt_start_SI ),
+ .Ready_SI (Ready_SO ),
+ .Operand_a_DI (Operand_a_DI ),
+ .Operand_b_DI (Operand_b_DI ),
+ .RM_SI (RM_SI ),
+ .Format_sel_SI (Format_sel_SI ),
+ .Start_SO (Start_S ),
+ .Exp_a_DO_norm (Exp_a_D ),
+ .Exp_b_DO_norm (Exp_b_D ),
+ .Mant_a_DO_norm (Mant_a_D ),
+ .Mant_b_DO_norm (Mant_b_D ),
+ .RM_dly_SO (RM_dly_S ),
+ .Sign_z_DO (Sign_z_D ),
+ .Inf_a_SO (Inf_a_S ),
+ .Inf_b_SO (Inf_b_S ),
+ .Zero_a_SO (Zero_a_S ),
+ .Zero_b_SO (Zero_b_S ),
+ .NaN_a_SO (NaN_a_S ),
+ .NaN_b_SO (NaN_b_S ),
+ .SNaN_SO (SNaN_S ),
+ .Special_case_SBO (Special_case_SB ),
+ .Special_case_dly_SBO (Special_case_dly_SB)
+ );
+ nrbd_nrsc_mvp nrbd_nrsc_U0
+ (
+ .Clk_CI (Clk_CI ),
+ .Rst_RBI (Rst_RBI ),
+ .Div_start_SI (Div_start_SI ) ,
+ .Sqrt_start_SI (Sqrt_start_SI ),
+ .Start_SI (Start_S ),
+ .Kill_SI (Kill_SI ),
+ .Special_case_SBI (Special_case_SB ),
+ .Special_case_dly_SBI (Special_case_dly_SB),
+ .Div_enable_SO (Div_enable_S ),
+ .Sqrt_enable_SO (Sqrt_enable_S ),
+ .Precision_ctl_SI (Precision_ctl_SI ),
+ .Format_sel_SI (Format_sel_SI ),
+ .Exp_a_DI (Exp_a_D ),
+ .Exp_b_DI (Exp_b_D ),
+ .Mant_a_DI (Mant_a_D ),
+ .Mant_b_DI (Mant_b_D ),
+ .Full_precision_SO (Full_precision_S ),
+ .FP32_SO (FP32_S ),
+ .FP64_SO (FP64_S ),
+ .FP16_SO (FP16_S ),
+ .FP16ALT_SO (FP16ALT_S ),
+ .Ready_SO (Ready_SO ),
+ .Done_SO (Done_SO ),
+ .Exp_z_DO (Exp_z_D ),
+ .Mant_z_DO (Mant_z_D )
+ );
+ norm_div_sqrt_mvp fpu_norm_U0
+ (
+ .Mant_in_DI (Mant_z_D ),
+ .Exp_in_DI (Exp_z_D ),
+ .Sign_in_DI (Sign_z_D ),
+ .Div_enable_SI (Div_enable_S ),
+ .Sqrt_enable_SI (Sqrt_enable_S ),
+ .Inf_a_SI (Inf_a_S ),
+ .Inf_b_SI (Inf_b_S ),
+ .Zero_a_SI (Zero_a_S ),
+ .Zero_b_SI (Zero_b_S ),
+ .NaN_a_SI (NaN_a_S ),
+ .NaN_b_SI (NaN_b_S ),
+ .SNaN_SI (SNaN_S ),
+ .RM_SI (RM_dly_S ),
+ .Full_precision_SI (Full_precision_S ),
+ .FP32_SI (FP32_S ),
+ .FP64_SI (FP64_S ),
+ .FP16_SI (FP16_S ),
+ .FP16ALT_SI (FP16ALT_S ),
+ .Result_DO (Result_DO ),
+ .Fflags_SO (Fflags_SO ) //{NV,DZ,OF,UF,NX}
+ );
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..e74d320
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,630 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the “License”); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File:
+ * Author: Florian Zaruba <>
+ * Date: 30.6.2018
+ *
+ * Description: Debug CSRs. Communication over Debug Transport Module (DTM)
+ */
+module dm_csrs #(
+ parameter int unsigned NrHarts = 1,
+ parameter int unsigned BusWidth = 32,
+ parameter logic [NrHarts-1:0] SelectableHarts = {NrHarts{1'b1}}
+) (
+ input logic clk_i, // Clock
+ input logic rst_ni, // Asynchronous reset active low
+ input logic testmode_i,
+ input logic dmi_rst_ni, // Debug Module Intf reset active-low
+ input logic dmi_req_valid_i,
+ output logic dmi_req_ready_o,
+ input dm::dmi_req_t dmi_req_i,
+ // every request needs a response one cycle later
+ output logic dmi_resp_valid_o,
+ input logic dmi_resp_ready_i,
+ output dm::dmi_resp_t dmi_resp_o,
+ // global ctrl
+ output logic ndmreset_o, // non-debug module reset active-high
+ output logic dmactive_o, // 1 -> debug-module is active,
+ // 0 -> synchronous re-set
+ // hart status
+ input dm::hartinfo_t [NrHarts-1:0] hartinfo_i, // static hartinfo
+ input logic [NrHarts-1:0] halted_i, // hart is halted
+ input logic [NrHarts-1:0] unavailable_i, // e.g.: powered down
+ input logic [NrHarts-1:0] resumeack_i, // hart acknowledged resume request
+ // hart control
+ output logic [19:0] hartsel_o, // hartselect to ctrl module
+ output logic [NrHarts-1:0] haltreq_o, // request to halt a hart
+ output logic [NrHarts-1:0] resumereq_o, // request hart to resume
+ output logic clear_resumeack_o,
+ output logic cmd_valid_o, // debugger writing to cmd field
+ output dm::command_t cmd_o, // abstract command
+ input logic cmderror_valid_i, // an error occurred
+ input dm::cmderr_e cmderror_i, // this error occurred
+ input logic cmdbusy_i, // cmd is currently busy executing
+ output logic [dm::ProgBufSize-1:0][31:0] progbuf_o, // to system bus
+ output logic [dm::DataCount-1:0][31:0] data_o,
+ input logic [dm::DataCount-1:0][31:0] data_i,
+ input logic data_valid_i,
+ // system bus access module (SBA)
+ output logic [BusWidth-1:0] sbaddress_o,
+ input logic [BusWidth-1:0] sbaddress_i,
+ output logic sbaddress_write_valid_o,
+ // control signals in
+ output logic sbreadonaddr_o,
+ output logic sbautoincrement_o,
+ output logic [2:0] sbaccess_o,
+ // data out
+ output logic sbreadondata_o,
+ output logic [BusWidth-1:0] sbdata_o,
+ output logic sbdata_read_valid_o,
+ output logic sbdata_write_valid_o,
+ // read data in
+ input logic [BusWidth-1:0] sbdata_i,
+ input logic sbdata_valid_i,
+ // control signals
+ input logic sbbusy_i,
+ input logic sberror_valid_i, // bus error occurred
+ input logic [2:0] sberror_i // bus error occurred
+ // the amount of bits we need to represent all harts
+ localparam int unsigned HartSelLen = (NrHarts == 1) ? 1 : $clog2(NrHarts);
+ localparam int unsigned NrHartsAligned = 2**HartSelLen;
+ dm::dtm_op_e dtm_op;
+ assign dtm_op = dm::dtm_op_e'(dmi_req_i.op);
+ logic [31:0] resp_queue_data;
+ localparam dm::dm_csr_e DataEnd = dm::dm_csr_e'(dm::Data0 + {4'b0, dm::DataCount} - 8'h1);
+ localparam dm::dm_csr_e ProgBufEnd = dm::dm_csr_e'(dm::ProgBuf0 + {4'b0, dm::ProgBufSize} - 8'h1);
+ logic [31:0] haltsum0, haltsum1, haltsum2, haltsum3;
+ logic [((NrHarts-1)/2**5 + 1) * 32 - 1 : 0] halted;
+ logic [(NrHarts-1)/2**5:0][31:0] halted_reshaped0;
+ logic [(NrHarts-1)/2**10:0][31:0] halted_reshaped1;
+ logic [(NrHarts-1)/2**15:0][31:0] halted_reshaped2;
+ logic [((NrHarts-1)/2**10+1)*32-1:0] halted_flat1;
+ logic [((NrHarts-1)/2**15+1)*32-1:0] halted_flat2;
+ logic [31:0] halted_flat3;
+ // haltsum0
+ logic [14:0] hartsel_idx0;
+ always_comb begin : p_haltsum0
+ halted = '0;
+ haltsum0 = '0;
+ hartsel_idx0 = hartsel_o[19:5];
+ halted[NrHarts-1:0] = halted_i;
+ halted_reshaped0 = halted;
+ if (hartsel_idx0 < 15'((NrHarts-1)/2**5+1)) begin
+ haltsum0 = halted_reshaped0[hartsel_idx0];
+ end
+ end
+ // haltsum1
+ logic [9:0] hartsel_idx1;
+ always_comb begin : p_reduction1
+ halted_flat1 = '0;
+ haltsum1 = '0;
+ hartsel_idx1 = hartsel_o[19:10];
+ for (int unsigned k = 0; k < (NrHarts-1)/2**5+1; k++) begin
+ halted_flat1[k] = |halted_reshaped0[k];
+ end
+ halted_reshaped1 = halted_flat1;
+ if (hartsel_idx1 < 10'(((NrHarts-1)/2**10+1))) begin
+ haltsum1 = halted_reshaped1[hartsel_idx1];
+ end
+ end
+ // haltsum2
+ logic [4:0] hartsel_idx2;
+ always_comb begin : p_reduction2
+ halted_flat2 = '0;
+ haltsum2 = '0;
+ hartsel_idx2 = hartsel_o[19:15];
+ for (int unsigned k = 0; k < (NrHarts-1)/2**10+1; k++) begin
+ halted_flat2[k] = |halted_reshaped1[k];
+ end
+ halted_reshaped2 = halted_flat2;
+ if (hartsel_idx2 < 5'(((NrHarts-1)/2**15+1))) begin
+ haltsum2 = halted_reshaped2[hartsel_idx2];
+ end
+ end
+ // haltsum3
+ always_comb begin : p_reduction3
+ halted_flat3 = '0;
+ for (int unsigned k = 0; k < NrHarts/2**15+1; k++) begin
+ halted_flat3[k] = |halted_reshaped2[k];
+ end
+ haltsum3 = halted_flat3;
+ end
+ dm::dmstatus_t dmstatus;
+ dm::dmcontrol_t dmcontrol_d, dmcontrol_q;
+ dm::abstractcs_t abstractcs;
+ dm::cmderr_e cmderr_d, cmderr_q;
+ dm::command_t command_d, command_q;
+ logic cmd_valid_d, cmd_valid_q;
+ dm::abstractauto_t abstractauto_d, abstractauto_q;
+ dm::sbcs_t sbcs_d, sbcs_q;
+ logic [63:0] sbaddr_d, sbaddr_q;
+ logic [63:0] sbdata_d, sbdata_q;
+ logic [NrHarts-1:0] havereset_d, havereset_q;
+ // program buffer
+ logic [dm::ProgBufSize-1:0][31:0] progbuf_d, progbuf_q;
+ logic [dm::DataCount-1:0][31:0] data_d, data_q;
+ logic [HartSelLen-1:0] selected_hart;
+ // a successful response returns zero
+ assign dmi_resp_o.resp = dm::DTM_SUCCESS;
+ // SBA
+ assign sbautoincrement_o = sbcs_q.sbautoincrement;
+ assign sbreadonaddr_o = sbcs_q.sbreadonaddr;
+ assign sbreadondata_o = sbcs_q.sbreadondata;
+ assign sbaccess_o = sbcs_q.sbaccess;
+ assign sbdata_o = sbdata_q[BusWidth-1:0];
+ assign sbaddress_o = sbaddr_q[BusWidth-1:0];
+ assign hartsel_o = {dmcontrol_q.hartselhi, dmcontrol_q.hartsello};
+ // needed to avoid lint warnings
+ logic [NrHartsAligned-1:0] havereset_d_aligned, havereset_q_aligned,
+ resumeack_aligned, unavailable_aligned,
+ halted_aligned;
+ assign resumeack_aligned = NrHartsAligned'(resumeack_i);
+ assign unavailable_aligned = NrHartsAligned'(unavailable_i);
+ assign halted_aligned = NrHartsAligned'(halted_i);
+ assign havereset_d = NrHarts'(havereset_d_aligned);
+ assign havereset_q_aligned = NrHartsAligned'(havereset_q);
+ dm::hartinfo_t [NrHartsAligned-1:0] hartinfo_aligned;
+ always_comb begin : p_hartinfo_align
+ hartinfo_aligned = '0;
+ hartinfo_aligned[NrHarts-1:0] = hartinfo_i;
+ end
+ // helper variables
+ dm::sbcs_t sbcs;
+ dm::dmcontrol_t dmcontrol;
+ dm::abstractcs_t a_abstractcs;
+ logic [4:0] autoexecdata_idx;
+ always_comb begin : csr_read_write
+ // --------------------
+ // Static Values (R/O)
+ // --------------------
+ // dmstatus
+ dmstatus = '0;
+ dmstatus.version = dm::DbgVersion013;
+ // no authentication implemented
+ dmstatus.authenticated = 1'b1;
+ // we do not support halt-on-reset sequence
+ dmstatus.hasresethaltreq = 1'b0;
+ // TODO(zarubaf) things need to change here if we implement the array mask
+ dmstatus.allhavereset = havereset_q_aligned[selected_hart];
+ dmstatus.anyhavereset = havereset_q_aligned[selected_hart];
+ dmstatus.allresumeack = resumeack_aligned[selected_hart];
+ dmstatus.anyresumeack = resumeack_aligned[selected_hart];
+ dmstatus.allunavail = unavailable_aligned[selected_hart];
+ dmstatus.anyunavail = unavailable_aligned[selected_hart];
+ // as soon as we are out of the legal Hart region tell the debugger
+ // that there are only non-existent harts
+ dmstatus.allnonexistent = logic'(32'(hartsel_o) > (NrHarts - 1));
+ dmstatus.anynonexistent = logic'(32'(hartsel_o) > (NrHarts - 1));
+ // We are not allowed to be in multiple states at once. This is a to
+ // make the running/halted and unavailable states exclusive.
+ dmstatus.allhalted = halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart];
+ dmstatus.anyhalted = halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart];
+ dmstatus.allrunning = ~halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart];
+ dmstatus.anyrunning = ~halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart];
+ // abstractcs
+ abstractcs = '0;
+ abstractcs.datacount = dm::DataCount;
+ abstractcs.progbufsize = dm::ProgBufSize;
+ abstractcs.busy = cmdbusy_i;
+ abstractcs.cmderr = cmderr_q;
+ // abstractautoexec
+ abstractauto_d = abstractauto_q;
+ abstractauto_d.zero0 = '0;
+ // default assignments
+ havereset_d_aligned = NrHartsAligned'(havereset_q);
+ dmcontrol_d = dmcontrol_q;
+ cmderr_d = cmderr_q;
+ command_d = command_q;
+ progbuf_d = progbuf_q;
+ data_d = data_q;
+ sbcs_d = sbcs_q;
+ sbaddr_d = 64'(sbaddress_i);
+ sbdata_d = sbdata_q;
+ resp_queue_data = 32'b0;
+ cmd_valid_d = 1'b0;
+ sbaddress_write_valid_o = 1'b0;
+ sbdata_read_valid_o = 1'b0;
+ sbdata_write_valid_o = 1'b0;
+ clear_resumeack_o = 1'b0;
+ // helper variables
+ sbcs = '0;
+ dmcontrol = '0;
+ a_abstractcs = '0;
+ autoexecdata_idx = dmi_req_i.addr[4:0] - 5'(dm::Data0);
+ // localparam int unsigned DataCountAlign = $clog2(dm::DataCount);
+ // reads
+ if (dmi_req_ready_o && dmi_req_valid_i && dtm_op == dm::DTM_READ) begin
+ unique case ({1'b0, dmi_req_i.addr}) inside
+ [(dm::Data0):DataEnd]: begin
+ // logic [$clog2(dm::DataCount)-1:0] resp_queue_idx;
+ // resp_queue_idx = dmi_req_i.addr[4:0] - int'(dm::Data0);
+ resp_queue_data = data_q[$clog2(dm::DataCount)'(autoexecdata_idx)];
+ if (!cmdbusy_i) begin
+ // check whether we need to re-execute the command (just give a cmd_valid)
+ if (autoexecdata_idx < $bits(abstractauto_q.autoexecdata)) begin
+ cmd_valid_d = abstractauto_q.autoexecdata[autoexecdata_idx];
+ end
+ end
+ end
+ dm::DMControl: resp_queue_data = dmcontrol_q;
+ dm::DMStatus: resp_queue_data = dmstatus;
+ dm::Hartinfo: resp_queue_data = hartinfo_aligned[selected_hart];
+ dm::AbstractCS: resp_queue_data = abstractcs;
+ dm::AbstractAuto: resp_queue_data = abstractauto_q;
+ // command is read-only
+ dm::Command: resp_queue_data = '0;
+ [(dm::ProgBuf0):ProgBufEnd]: begin
+ resp_queue_data = progbuf_q[dmi_req_i.addr[$clog2(dm::ProgBufSize)-1:0]];
+ if (!cmdbusy_i) begin
+ // check whether we need to re-execute the command (just give a cmd_valid)
+ // range of autoexecprogbuf is 31:16
+ cmd_valid_d = abstractauto_q.autoexecprogbuf[{1'b1, dmi_req_i.addr[3:0]}];
+ end
+ end
+ dm::HaltSum0: resp_queue_data = haltsum0;
+ dm::HaltSum1: resp_queue_data = haltsum1;
+ dm::HaltSum2: resp_queue_data = haltsum2;
+ dm::HaltSum3: resp_queue_data = haltsum3;
+ dm::SBCS: begin
+ resp_queue_data = sbcs_q;
+ end
+ dm::SBAddress0: begin
+ // access while the SBA was busy
+ if (sbbusy_i) begin
+ sbcs_d.sbbusyerror = 1'b1;
+ end else begin
+ resp_queue_data = sbaddr_q[31:0];
+ end
+ end
+ dm::SBAddress1: begin
+ // access while the SBA was busy
+ if (sbbusy_i) begin
+ sbcs_d.sbbusyerror = 1'b1;
+ end else begin
+ resp_queue_data = sbaddr_q[63:32];
+ end
+ end
+ dm::SBData0: begin
+ // access while the SBA was busy
+ if (sbbusy_i) begin
+ sbcs_d.sbbusyerror = 1'b1;
+ end else begin
+ sbdata_read_valid_o = (sbcs_q.sberror == '0);
+ resp_queue_data = sbdata_q[31:0];
+ end
+ end
+ dm::SBData1: begin
+ // access while the SBA was busy
+ if (sbbusy_i) begin
+ sbcs_d.sbbusyerror = 1'b1;
+ end else begin
+ resp_queue_data = sbdata_q[63:32];
+ end
+ end
+ default:;
+ endcase
+ end
+ // write
+ if (dmi_req_ready_o && dmi_req_valid_i && dtm_op == dm::DTM_WRITE) begin
+ unique case (dm::dm_csr_e'({1'b0, dmi_req_i.addr})) inside
+ [(dm::Data0):DataEnd]: begin
+ // attempts to write them while busy is set does not change their value
+ if (!cmdbusy_i && dm::DataCount > 0) begin
+ data_d[dmi_req_i.addr[$clog2(dm::DataCount)-1:0]] =;
+ // check whether we need to re-execute the command (just give a cmd_valid)
+ if (autoexecdata_idx < $bits(abstractauto_q.autoexecdata)) begin
+ cmd_valid_d = abstractauto_q.autoexecdata[autoexecdata_idx];
+ end
+ end
+ end
+ dm::DMControl: begin
+ dmcontrol = dm::dmcontrol_t'(;
+ // clear the havreset of the selected hart
+ if (dmcontrol.ackhavereset) begin
+ havereset_d_aligned[selected_hart] = 1'b0;
+ end
+ dmcontrol_d =;
+ end
+ dm::DMStatus:; // write are ignored to R/O register
+ dm::Hartinfo:; // hartinfo is R/O
+ // only command error is write-able
+ dm::AbstractCS: begin // W1C
+ // Gets set if an abstract command fails. The bits in this
+ // field remain set until they are cleared by writing 1 to
+ // them. No abstract command is started until the value is
+ // reset to 0.
+ a_abstractcs = dm::abstractcs_t'(;
+ // reads during abstract command execution are not allowed
+ if (!cmdbusy_i) begin
+ cmderr_d = dm::cmderr_e'(~a_abstractcs.cmderr & cmderr_q);
+ end else if (cmderr_q == dm::CmdErrNone) begin
+ cmderr_d = dm::CmdErrBusy;
+ end
+ end
+ dm::Command: begin
+ // writes are ignored if a command is already busy
+ if (!cmdbusy_i) begin
+ cmd_valid_d = 1'b1;
+ command_d = dm::command_t'(;
+ // if there was an attempted to write during a busy execution
+ // and the cmderror field is zero set the busy error
+ end else if (cmderr_q == dm::CmdErrNone) begin
+ cmderr_d = dm::CmdErrBusy;
+ end
+ end
+ dm::AbstractAuto: begin
+ // this field can only be written legally when there is no command executing
+ if (!cmdbusy_i) begin
+ abstractauto_d = 32'b0;
+ abstractauto_d.autoexecdata = 12'([dm::DataCount-1:0]);
+ abstractauto_d.autoexecprogbuf = 16'([dm::ProgBufSize-1+16:16]);
+ end else if (cmderr_q == dm::CmdErrNone) begin
+ cmderr_d = dm::CmdErrBusy;
+ end
+ end
+ [(dm::ProgBuf0):ProgBufEnd]: begin
+ // attempts to write them while busy is set does not change their value
+ if (!cmdbusy_i) begin
+ progbuf_d[dmi_req_i.addr[$clog2(dm::ProgBufSize)-1:0]] =;
+ // check whether we need to re-execute the command (just give a cmd_valid)
+ // this should probably throw an error if executed during another command
+ // was busy
+ // range of autoexecprogbuf is 31:16
+ cmd_valid_d = abstractauto_q.autoexecprogbuf[{1'b1, dmi_req_i.addr[3:0]}];
+ end
+ end
+ dm::SBCS: begin
+ // access while the SBA was busy
+ if (sbbusy_i) begin
+ sbcs_d.sbbusyerror = 1'b1;
+ end else begin
+ sbcs = dm::sbcs_t'(;
+ sbcs_d = sbcs;
+ // R/W1C
+ sbcs_d.sbbusyerror = sbcs_q.sbbusyerror & (~sbcs.sbbusyerror);
+ sbcs_d.sberror = sbcs_q.sberror & (~sbcs.sberror);
+ end
+ end
+ dm::SBAddress0: begin
+ // access while the SBA was busy
+ if (sbbusy_i) begin
+ sbcs_d.sbbusyerror = 1'b1;
+ end else begin
+ sbaddr_d[31:0] =;
+ sbaddress_write_valid_o = (sbcs_q.sberror == '0);
+ end
+ end
+ dm::SBAddress1: begin
+ // access while the SBA was busy
+ if (sbbusy_i) begin
+ sbcs_d.sbbusyerror = 1'b1;
+ end else begin
+ sbaddr_d[63:32] =;
+ end
+ end
+ dm::SBData0: begin
+ // access while the SBA was busy
+ if (sbbusy_i) begin
+ sbcs_d.sbbusyerror = 1'b1;
+ end else begin
+ sbdata_d[31:0] =;
+ sbdata_write_valid_o = (sbcs_q.sberror == '0);
+ end
+ end
+ dm::SBData1: begin
+ // access while the SBA was busy
+ if (sbbusy_i) begin
+ sbcs_d.sbbusyerror = 1'b1;
+ end else begin
+ sbdata_d[63:32] =;
+ end
+ end
+ default:;
+ endcase
+ end
+ // hart threw a command error and has precedence over bus writes
+ if (cmderror_valid_i) begin
+ cmderr_d = cmderror_i;
+ end
+ // update data registers
+ if (data_valid_i) begin
+ data_d = data_i;
+ end
+ // set the havereset flag when we did a ndmreset
+ if (ndmreset_o) begin
+ havereset_d_aligned[NrHarts-1:0] = '1;
+ end
+ // -------------
+ // System Bus
+ // -------------
+ // set bus error
+ if (sberror_valid_i) begin
+ sbcs_d.sberror = sberror_i;
+ end
+ // update read data
+ if (sbdata_valid_i) begin
+ sbdata_d = 64'(sbdata_i);
+ end
+ // dmcontrol
+ // TODO(zarubaf) we currently do not implement the hartarry mask
+ dmcontrol_d.hasel = 1'b0;
+ // we do not support resetting an individual hart
+ dmcontrol_d.hartreset = 1'b0;
+ dmcontrol_d.setresethaltreq = 1'b0;
+ dmcontrol_d.clrresethaltreq = 1'b0;
+ dmcontrol_d.zero1 = '0;
+ dmcontrol_d.zero0 = '0;
+ // Non-writeable, clear only
+ dmcontrol_d.ackhavereset = 1'b0;
+ if (!dmcontrol_q.resumereq && dmcontrol_d.resumereq) begin
+ clear_resumeack_o = 1'b1;
+ end
+ if (dmcontrol_q.resumereq && resumeack_i) begin
+ dmcontrol_d.resumereq = 1'b0;
+ end
+ // static values for dcsr
+ sbcs_d.sbversion = 3'd1;
+ sbcs_d.sbbusy = sbbusy_i;
+ sbcs_d.sbasize = $bits(sbcs_d.sbasize)'(BusWidth);
+ sbcs_d.sbaccess128 = 1'b0;
+ sbcs_d.sbaccess64 = logic'(BusWidth == 32'd64);
+ sbcs_d.sbaccess32 = logic'(BusWidth == 32'd32);
+ sbcs_d.sbaccess16 = 1'b0;
+ sbcs_d.sbaccess8 = 1'b0;
+ sbcs_d.sbaccess = (BusWidth == 32'd64) ? 3'd3 : 3'd2;
+ end
+ // output multiplexer
+ always_comb begin : p_outmux
+ selected_hart = hartsel_o[HartSelLen-1:0];
+ // default assignment
+ haltreq_o = '0;
+ resumereq_o = '0;
+ if (selected_hart < (HartSelLen+1)'(NrHarts)) begin
+ haltreq_o[selected_hart] = dmcontrol_q.haltreq;
+ resumereq_o[selected_hart] = dmcontrol_q.resumereq;
+ end
+ end
+ assign dmactive_o = dmcontrol_q.dmactive;
+ assign cmd_o = command_q;
+ assign cmd_valid_o = cmd_valid_q;
+ assign progbuf_o = progbuf_q;
+ assign data_o = data_q;
+ assign ndmreset_o = dmcontrol_q.ndmreset;
+ logic unused_testmode;
+ assign unused_testmode = testmode_i;
+ // response FIFO
+ fifo_sync #(
+ .Width (32),
+ .Pass (1'b0),
+ .Depth (2)
+ ) i_fifo (
+ .clk_i ( clk_i ),
+ .rst_ni ( dmi_rst_ni ), // reset only when system is re-set
+ .clr_i ( 1'b0 ),
+ .wdata_i ( resp_queue_data ),
+ .wvalid_i( dmi_req_valid_i ),
+ .wready_o( dmi_req_ready_o ),
+ .rdata_o ( ),
+ .rvalid_o( dmi_resp_valid_o ),
+ .rready_i( dmi_resp_ready_i ),
+ .depth_o ( ) // Doesn't use
+ );
+ always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+ // PoR
+ if (!rst_ni) begin
+ dmcontrol_q <= '0;
+ // this is the only write-able bit during reset
+ cmderr_q <= dm::CmdErrNone;
+ command_q <= '0;
+ cmd_valid_q <= '0;
+ abstractauto_q <= '0;
+ progbuf_q <= '0;
+ data_q <= '0;
+ sbcs_q <= '0;
+ sbaddr_q <= '0;
+ sbdata_q <= '0;
+ havereset_q <= '1;
+ end else begin
+ havereset_q <= SelectableHarts & havereset_d;
+ // synchronous re-set of debug module, active-low, except for dmactive
+ if (!dmcontrol_q.dmactive) begin
+ dmcontrol_q.haltreq <= '0;
+ dmcontrol_q.resumereq <= '0;
+ dmcontrol_q.hartreset <= '0;
+ dmcontrol_q.ackhavereset <= '0;
+ dmcontrol_q.zero1 <= '0;
+ dmcontrol_q.hasel <= '0;
+ dmcontrol_q.hartsello <= '0;
+ dmcontrol_q.hartselhi <= '0;
+ dmcontrol_q.zero0 <= '0;
+ dmcontrol_q.setresethaltreq <= '0;
+ dmcontrol_q.clrresethaltreq <= '0;
+ dmcontrol_q.ndmreset <= '0;
+ // this is the only write-able bit during reset
+ dmcontrol_q.dmactive <= dmcontrol_d.dmactive;
+ cmderr_q <= dm::CmdErrNone;
+ command_q <= '0;
+ cmd_valid_q <= '0;
+ abstractauto_q <= '0;
+ progbuf_q <= '0;
+ data_q <= '0;
+ sbcs_q <= '0;
+ sbaddr_q <= '0;
+ sbdata_q <= '0;
+ end else begin
+ dmcontrol_q <= dmcontrol_d;
+ cmderr_q <= cmderr_d;
+ command_q <= command_d;
+ cmd_valid_q <= cmd_valid_d;
+ abstractauto_q <= abstractauto_d;
+ progbuf_q <= progbuf_d;
+ data_q <= data_d;
+ sbcs_q <= sbcs_d;
+ sbaddr_q <= sbaddr_d;
+ sbdata_q <= sbdata_d;
+ end
+ end
+ end
+ //pragma translate_on
+endmodule : dm_csrs
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..4ef7a26
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,523 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+* Copyright and related rights are licensed under the Solderpad Hardware
+* License, Version 0.51 (the “License”); you may not use this file except in
+* compliance with the License. You may obtain a copy of the License at
+* Unless required by applicable law
+* or agreed to in writing, software, hardware and materials distributed under
+* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+* CONDITIONS OF ANY KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations under the License.
+* File:
+* Author: Florian Zaruba <>
+* Date: 11.7.2018
+* Description: Memory module for execution-based debug clients
+module dm_mem #(
+ parameter int unsigned NrHarts = 1,
+ parameter int unsigned BusWidth = 32,
+ parameter logic [NrHarts-1:0] SelectableHarts = {NrHarts{1'b1}},
+ parameter int unsigned DmBaseAddress = '0
+) (
+ input logic clk_i, // Clock
+ input logic rst_ni, // debug module reset
+ output logic [NrHarts-1:0] debug_req_o,
+ input logic [19:0] hartsel_i,
+ // from Ctrl and Status register
+ input logic [NrHarts-1:0] haltreq_i,
+ input logic [NrHarts-1:0] resumereq_i,
+ input logic clear_resumeack_i,
+ // state bits
+ output logic [NrHarts-1:0] halted_o, // hart acknowledge halt
+ output logic [NrHarts-1:0] resuming_o, // hart is resuming
+ input logic [dm::ProgBufSize-1:0][31:0] progbuf_i, // program buffer to expose
+ input logic [dm::DataCount-1:0][31:0] data_i, // data in
+ output logic [dm::DataCount-1:0][31:0] data_o, // data out
+ output logic data_valid_o, // data out is valid
+ // abstract command interface
+ input logic cmd_valid_i,
+ input dm::command_t cmd_i,
+ output logic cmderror_valid_o,
+ output dm::cmderr_e cmderror_o,
+ output logic cmdbusy_o,
+ // data interface
+ // SRAM interface
+ input logic req_i,
+ input logic we_i,
+ input logic [BusWidth-1:0] addr_i,
+ input logic [BusWidth-1:0] wdata_i,
+ input logic [BusWidth/8-1:0] be_i,
+ output logic [BusWidth-1:0] rdata_o
+ localparam int unsigned DbgAddressBits = 12;
+ localparam int unsigned HartSelLen = (NrHarts == 1) ? 1 : $clog2(NrHarts);
+ localparam int unsigned NrHartsAligned = 2**HartSelLen;
+ localparam int unsigned MaxAar = (BusWidth == 64) ? 4 : 3;
+ localparam bit HasSndScratch = (DmBaseAddress != 0);
+ // Depending on whether we are at the zero page or not we either use `x0` or `x10/a0`
+ localparam logic [4:0] LoadBaseAddr = (DmBaseAddress == 0) ? 5'd0 : 5'd10;
+ localparam logic [DbgAddressBits-1:0] DataBaseAddr = (dm::DataAddr);
+ localparam logic [DbgAddressBits-1:0] DataEndAddr = (dm::DataAddr + 4*dm::DataCount - 1);
+ localparam logic [DbgAddressBits-1:0] ProgBufBaseAddr = (dm::DataAddr - 4*dm::ProgBufSize);
+ localparam logic [DbgAddressBits-1:0] ProgBufEndAddr = (dm::DataAddr - 1);
+ localparam logic [DbgAddressBits-1:0] AbstractCmdBaseAddr = (ProgBufBaseAddr - 4*10);
+ localparam logic [DbgAddressBits-1:0] AbstractCmdEndAddr = (ProgBufBaseAddr - 1);
+ localparam logic [DbgAddressBits-1:0] WhereToAddr = 'h300;
+ localparam logic [DbgAddressBits-1:0] FlagsBaseAddr = 'h400;
+ localparam logic [DbgAddressBits-1:0] FlagsEndAddr = 'h7FF;
+ localparam logic [DbgAddressBits-1:0] HaltedAddr = 'h100;
+ localparam logic [DbgAddressBits-1:0] GoingAddr = 'h104;
+ localparam logic [DbgAddressBits-1:0] ResumingAddr = 'h108;
+ localparam logic [DbgAddressBits-1:0] ExceptionAddr = 'h10C;
+ logic [dm::ProgBufSize/2-1:0][63:0] progbuf;
+ logic [7:0][63:0] abstract_cmd;
+ logic [NrHarts-1:0] halted_d, halted_q;
+ logic [NrHarts-1:0] resuming_d, resuming_q;
+ logic resume, go, going;
+ logic exception;
+ logic unsupported_command;
+ logic [63:0] rom_rdata;
+ logic [63:0] rdata_d, rdata_q;
+ logic word_enable32_q;
+ // this is needed to avoid lint warnings related to array indexing
+ // resize hartsel to valid range
+ logic [HartSelLen-1:0] hartsel, wdata_hartsel;
+ assign hartsel = hartsel_i[HartSelLen-1:0];
+ assign wdata_hartsel = wdata_i[HartSelLen-1:0];
+ logic [NrHartsAligned-1:0] resumereq_aligned, haltreq_aligned,
+ halted_d_aligned, halted_q_aligned,
+ halted_aligned, resumereq_wdata_aligned,
+ resuming_d_aligned, resuming_q_aligned;
+ assign resumereq_aligned = NrHartsAligned'(resumereq_i);
+ assign haltreq_aligned = NrHartsAligned'(haltreq_i);
+ assign resumereq_wdata_aligned = NrHartsAligned'(resumereq_i);
+ assign halted_q_aligned = NrHartsAligned'(halted_q);
+ assign halted_d = NrHarts'(halted_d_aligned);
+ assign resuming_q_aligned = NrHartsAligned'(resuming_q);
+ assign resuming_d = NrHarts'(resuming_d_aligned);
+ // distinguish whether we need to forward data from the ROM or the FSM
+ // latch the address for this
+ logic fwd_rom_d, fwd_rom_q;
+ dm::ac_ar_cmd_t ac_ar;
+ // Abstract Command Access Register
+ assign ac_ar = dm::ac_ar_cmd_t'(cmd_i.control);
+ assign debug_req_o = haltreq_i;
+ assign halted_o = halted_q;
+ assign resuming_o = resuming_q;
+ // reshape progbuf
+ assign progbuf = progbuf_i;
+ typedef enum logic [1:0] { Idle, Go, Resume, CmdExecuting } state_e;
+ state_e state_d, state_q;
+ // hart ctrl queue
+ always_comb begin : p_hart_ctrl_queue
+ cmderror_valid_o = 1'b0;
+ cmderror_o = dm::CmdErrNone;
+ state_d = state_q;
+ go = 1'b0;
+ resume = 1'b0;
+ cmdbusy_o = 1'b1;
+ unique case (state_q)
+ Idle: begin
+ cmdbusy_o = 1'b0;
+ if (cmd_valid_i && halted_q_aligned[hartsel] && !unsupported_command) begin
+ // give the go signal
+ state_d = Go;
+ end else if (cmd_valid_i) begin
+ // hart must be halted for all requests
+ cmderror_valid_o = 1'b1;
+ cmderror_o = dm::CmdErrorHaltResume;
+ end
+ // CSRs want to resume, the request is ignored when the hart is
+ // requested to halt or it didn't clear the resuming_q bit before
+ if (resumereq_aligned[hartsel] && !resuming_q_aligned[hartsel] &&
+ !haltreq_aligned[hartsel] && halted_q_aligned[hartsel]) begin
+ state_d = Resume;
+ end
+ end
+ Go: begin
+ // we are already busy here since we scheduled the execution of a program
+ cmdbusy_o = 1'b1;
+ go = 1'b1;
+ // the thread is now executing the command, track its state
+ if (going) begin
+ state_d = CmdExecuting;
+ end
+ end
+ Resume: begin
+ cmdbusy_o = 1'b1;
+ resume = 1'b1;
+ if (resuming_q_aligned[hartsel]) begin
+ state_d = Idle;
+ end
+ end
+ CmdExecuting: begin
+ cmdbusy_o = 1'b1;
+ go = 1'b0;
+ // wait until the hart has halted again
+ if (halted_aligned[hartsel]) begin
+ state_d = Idle;
+ end
+ end
+ //default: ;
+ endcase
+ // only signal once that cmd is unsupported so that we can clear cmderr
+ // in subsequent writes to abstractcs
+ if (unsupported_command && cmd_valid_i) begin
+ cmderror_valid_o = 1'b1;
+ cmderror_o = dm::CmdErrNotSupported;
+ end
+ if (exception) begin
+ cmderror_valid_o = 1'b1;
+ cmderror_o = dm::CmdErrorException;
+ end
+ end
+ // word mux for 32bit and 64bit buses
+ logic [63:0] word_mux;
+ assign word_mux = (fwd_rom_q) ? rom_rdata : rdata_q;
+ if (BusWidth == 64) begin : gen_word_mux64
+ assign rdata_o = word_mux;
+ end else begin : gen_word_mux32
+ assign rdata_o = (word_enable32_q) ? word_mux[32 +: 32] : word_mux[0 +: 32];
+ end
+ // read/write logic
+ logic [63:0] data_bits;
+ logic [7:0][7:0] rdata;
+ always_comb begin : p_rw_logic
+ halted_d_aligned = NrHartsAligned'(halted_q);
+ resuming_d_aligned = NrHartsAligned'(resuming_q);
+ rdata_d = rdata_q;
+ // convert the data in bits representation
+ data_bits = data_i;
+ rdata = '0;
+ // write data in csr register
+ data_valid_o = 1'b0;
+ exception = 1'b0;
+ halted_aligned = '0;
+ going = 1'b0;
+ // The resume ack signal is lowered when the resume request is deasserted
+ if (clear_resumeack_i) begin
+ resuming_d_aligned[hartsel] = 1'b0;
+ end
+ // we've got a new request
+ if (req_i) begin
+ // this is a write
+ if (we_i) begin
+ unique case (addr_i[DbgAddressBits-1:0]) inside
+ HaltedAddr: begin
+ halted_aligned[wdata_hartsel] = 1'b1;
+ halted_d_aligned[wdata_hartsel] = 1'b1;
+ end
+ GoingAddr: begin
+ going = 1'b1;
+ end
+ ResumingAddr: begin
+ // clear the halted flag as the hart resumed execution
+ halted_d_aligned[wdata_hartsel] = 1'b0;
+ // set the resuming flag which needs to be cleared by the debugger
+ resuming_d_aligned[wdata_hartsel] = 1'b1;
+ end
+ // an exception occurred during execution
+ ExceptionAddr: exception = 1'b1;
+ // core can write data registers
+ [DataBaseAddr:DataEndAddr]: begin
+ data_valid_o = 1'b1;
+ for (int i = 0; i < $bits(be_i); i++) begin
+ if (be_i[i]) begin
+ data_bits[i*8+:8] = wdata_i[i*8+:8];
+ end
+ end
+ end
+ default ;
+ endcase
+ // this is a read
+ end else begin
+ unique case (addr_i[DbgAddressBits-1:0]) inside
+ // variable ROM content
+ WhereToAddr: begin
+ // variable jump to abstract cmd, program_buffer or resume
+ if (resumereq_wdata_aligned[wdata_hartsel]) begin
+ rdata_d = {32'b0, dm::jal('0, 21'(dm::ResumeAddress[11:0])-21'(WhereToAddr))};
+ end
+ // there is a command active so jump there
+ if (cmdbusy_o) begin
+ // transfer not set is shortcut to the program buffer if postexec is set
+ // keep this statement narrow to not catch invalid commands
+ if (cmd_i.cmdtype == dm::AccessRegister &&
+ !ac_ar.transfer && ac_ar.postexec) begin
+ rdata_d = {32'b0, dm::jal('0, 21'(ProgBufBaseAddr)-21'(WhereToAddr))};
+ // this is a legit abstract cmd -> execute it
+ end else begin
+ rdata_d = {32'b0, dm::jal('0, 21'(AbstractCmdBaseAddr)-21'(WhereToAddr))};
+ end
+ end
+ end
+ [DataBaseAddr:DataEndAddr]: begin
+ rdata_d = {
+ data_i[$clog2(dm::ProgBufSize)'(addr_i[DbgAddressBits-1:3] -
+ DataBaseAddr[DbgAddressBits-1:3] + 1'b1)],
+ data_i[$clog2(dm::ProgBufSize)'(addr_i[DbgAddressBits-1:3] -
+ DataBaseAddr[DbgAddressBits-1:3])]
+ };
+ end
+ [ProgBufBaseAddr:ProgBufEndAddr]: begin
+ rdata_d = progbuf[$clog2(dm::ProgBufSize)'(addr_i[DbgAddressBits-1:3] -
+ ProgBufBaseAddr[DbgAddressBits-1:3])];
+ end
+ // two slots for abstract command
+ [AbstractCmdBaseAddr:AbstractCmdEndAddr]: begin
+ // return the correct address index
+ rdata_d = abstract_cmd[3'(addr_i[DbgAddressBits-1:3] -
+ AbstractCmdBaseAddr[DbgAddressBits-1:3])];
+ end
+ // harts are polling for flags here
+ [FlagsBaseAddr:FlagsEndAddr]: begin
+ // release the corresponding hart
+ if (({addr_i[DbgAddressBits-1:3], 3'b0} - FlagsBaseAddr[DbgAddressBits-1:0]) ==
+ (DbgAddressBits'(hartsel) & {{(DbgAddressBits-3){1'b1}}, 3'b0})) begin
+ rdata[DbgAddressBits'(hartsel) & DbgAddressBits'(3'b111)] = {6'b0, resume, go};
+ end
+ rdata_d = rdata;
+ end
+ default: ;
+ endcase
+ end
+ end
+ data_o = data_bits;
+ end
+ always_comb begin : p_abstract_cmd_rom
+ // this abstract command is currently unsupported
+ unsupported_command = 1'b0;
+ // default memory
+ // if ac_ar.transfer is not set then we can take a shortcut to the program buffer
+ abstract_cmd[0][31:0] = dm::illegal();
+ // load debug module base address into a0, this is shared among all commands
+ abstract_cmd[0][63:32] = HasSndScratch ? dm::auipc(5'd10, '0) : dm::nop();
+ // clr lowest 12b -> DM base offset
+ abstract_cmd[1][31:0] = HasSndScratch ? dm::srli(5'd10, 5'd10, 6'd12) : dm::nop();
+ abstract_cmd[1][63:32] = HasSndScratch ? dm::slli(5'd10, 5'd10, 6'd12) : dm::nop();
+ abstract_cmd[2][31:0] = dm::nop();
+ abstract_cmd[2][63:32] = dm::nop();
+ abstract_cmd[3][31:0] = dm::nop();
+ abstract_cmd[3][63:32] = dm::nop();
+ abstract_cmd[4][31:0] = HasSndScratch ? dm::csrr(dm::CSR_DSCRATCH1, 5'd10) : dm::nop();
+ abstract_cmd[4][63:32] = dm::ebreak();
+ abstract_cmd[7:5] = '0;
+ // this depends on the command being executed
+ unique case (cmd_i.cmdtype)
+ // --------------------
+ // Access Register
+ // --------------------
+ dm::AccessRegister: begin
+ if (32'(ac_ar.aarsize) < MaxAar && ac_ar.transfer && ac_ar.write) begin
+ // store a0 in dscratch1
+ abstract_cmd[0][31:0] = HasSndScratch ? dm::csrr(dm::CSR_DSCRATCH1, 5'd10) : dm::nop();
+ // this range is reserved
+ if (ac_ar.regno[15:14] != '0) begin
+ abstract_cmd[0][31:0] = dm::ebreak(); // we leave asap
+ unsupported_command = 1'b1;
+ // A0 access needs to be handled separately, as we use A0 to load
+ // the DM address offset need to access DSCRATCH1 in this case
+ end else if (HasSndScratch && ac_ar.regno[12] && (!ac_ar.regno[5]) &&
+ (ac_ar.regno[4:0] == 5'd10)) begin
+ // store s0 in dscratch
+ abstract_cmd[2][31:0] = dm::csrw(dm::CSR_DSCRATCH0, 5'd8);
+ // load from data register
+ abstract_cmd[2][63:32] = dm::load(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr);
+ // and store it in the corresponding CSR
+ abstract_cmd[3][31:0] = dm::csrw(dm::CSR_DSCRATCH1, 5'd8);
+ // restore s0 again from dscratch
+ abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8);
+ // GPR/FPR access
+ end else if (ac_ar.regno[12]) begin
+ // determine whether we want to access the floating point register or not
+ if (ac_ar.regno[5]) begin
+ abstract_cmd[2][31:0] =
+ dm::float_load(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr);
+ end else begin
+ abstract_cmd[2][31:0] =
+ dm::load(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr);
+ end
+ // CSR access
+ end else begin
+ // data register to CSR
+ // store s0 in dscratch
+ abstract_cmd[2][31:0] = dm::csrw(dm::CSR_DSCRATCH0, 5'd8);
+ // load from data register
+ abstract_cmd[2][63:32] = dm::load(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr);
+ // and store it in the corresponding CSR
+ abstract_cmd[3][31:0] = dm::csrw(dm::csr_reg_t'(ac_ar.regno[11:0]), 5'd8);
+ // restore s0 again from dscratch
+ abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8);
+ end
+ end else if (32'(ac_ar.aarsize) < MaxAar && ac_ar.transfer && !ac_ar.write) begin
+ // store a0 in dscratch1
+ abstract_cmd[0][31:0] = HasSndScratch ?
+ dm::csrr(dm::CSR_DSCRATCH1, LoadBaseAddr) :
+ dm::nop();
+ // this range is reserved
+ if (ac_ar.regno[15:14] != '0) begin
+ abstract_cmd[0][31:0] = dm::ebreak(); // we leave asap
+ unsupported_command = 1'b1;
+ // A0 access needs to be handled separately, as we use A0 to load
+ // the DM address offset need to access DSCRATCH1 in this case
+ end else if (HasSndScratch && ac_ar.regno[12] && (!ac_ar.regno[5]) &&
+ (ac_ar.regno[4:0] == 5'd10)) begin
+ // store s0 in dscratch
+ abstract_cmd[2][31:0] = dm::csrw(dm::CSR_DSCRATCH0, 5'd8);
+ // read value from CSR into s0
+ abstract_cmd[2][63:32] = dm::csrr(dm::CSR_DSCRATCH1, 5'd8);
+ // and store s0 into data section
+ abstract_cmd[3][31:0] = dm::store(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr);
+ // restore s0 again from dscratch
+ abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8);
+ // GPR/FPR access
+ end else if (ac_ar.regno[12]) begin
+ // determine whether we want to access the floating point register or not
+ if (ac_ar.regno[5]) begin
+ abstract_cmd[2][31:0] =
+ dm::float_store(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr);
+ end else begin
+ abstract_cmd[2][31:0] =
+ dm::store(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr);
+ end
+ // CSR access
+ end else begin
+ // CSR register to data
+ // store s0 in dscratch
+ abstract_cmd[2][31:0] = dm::csrw(dm::CSR_DSCRATCH0, 5'd8);
+ // read value from CSR into s0
+ abstract_cmd[2][63:32] = dm::csrr(dm::csr_reg_t'(ac_ar.regno[11:0]), 5'd8);
+ // and store s0 into data section
+ abstract_cmd[3][31:0] = dm::store(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr);
+ // restore s0 again from dscratch
+ abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8);
+ end
+ end else if (32'(ac_ar.aarsize) >= MaxAar || ac_ar.aarpostincrement == 1'b1) begin
+ // this should happend when e.g. ac_ar.aarsize >= MaxAar
+ // Openocd will try to do an access with aarsize=64 bits
+ // first before falling back to 32 bits.
+ abstract_cmd[0][31:0] = dm::ebreak(); // we leave asap
+ unsupported_command = 1'b1;
+ end
+ // Check whether we need to execute the program buffer. When we
+ // get an unsupported command we really should abort instead of
+ // still trying to execute the program buffer, makes it easier
+ // for the debugger to recover
+ if (ac_ar.postexec && !unsupported_command) begin
+ // issue a nop, we will automatically run into the program buffer
+ abstract_cmd[4][63:32] = dm::nop();
+ end
+ end
+ // not supported at the moment
+ // dm::QuickAccess:;
+ // dm::AccessMemory:;
+ default: begin
+ abstract_cmd[0][31:0] = dm::ebreak();
+ unsupported_command = 1'b1;
+ end
+ endcase
+ end
+ logic [63:0] rom_addr;
+ assign rom_addr = 64'(addr_i);
+ // Depending on whether the debug module is located
+ // at the zero page we can instantiate a simplified version
+ // which only requires one scratch register per hart.
+ // For all other cases we need to set aside
+ // two registers per hart, hence we also need
+ // two scratch registers.
+ if (HasSndScratch) begin : gen_rom_snd_scratch
+ debug_rom i_debug_rom (
+ .clk_i,
+ .req_i,
+ .addr_i ( rom_addr ),
+ .rdata_o ( rom_rdata )
+ );
+ end else begin : gen_rom_one_scratch
+ // It uses the zero register (`x0`) as the base
+ // for its loads. The zero register does not need to
+ // be saved.
+ debug_rom_one_scratch i_debug_rom (
+ .clk_i,
+ .req_i,
+ .addr_i ( rom_addr ),
+ .rdata_o ( rom_rdata )
+ );
+ end
+ // ROM starts at the HaltAddress of the core e.g.: it immediately jumps to
+ // the ROM base address
+ assign fwd_rom_d = logic'(addr_i[DbgAddressBits-1:0] >= dm::HaltAddress[DbgAddressBits-1:0]);
+ always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+ if (!rst_ni) begin
+ fwd_rom_q <= 1'b0;
+ rdata_q <= '0;
+ state_q <= Idle;
+ word_enable32_q <= 1'b0;
+ end else begin
+ fwd_rom_q <= fwd_rom_d;
+ rdata_q <= rdata_d;
+ state_q <= state_d;
+ word_enable32_q <= addr_i[2];
+ end
+ end
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ halted_q <= 1'b0;
+ resuming_q <= 1'b0;
+ end else begin
+ halted_q <= SelectableHarts & halted_d;
+ resuming_q <= SelectableHarts & resuming_d;
+ end
+ end
+endmodule : dm_mem
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..1b7d0f5
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,414 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the “License”); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File:
+ * Author: Florian Zaruba <>
+ * Date: 30.6.2018
+ *
+ * Description: Debug-module package, contains common system definitions.
+ *
+ */
+package dm;
+ localparam logic [3:0] DbgVersion013 = 4'h2;
+ // size of program buffer in junks of 32-bit words
+ localparam logic [4:0] ProgBufSize = 5'h8;
+ // amount of data count registers implemented
+ localparam logic [3:0] DataCount = 4'h2;
+ // address to which a hart should jump when it was requested to halt
+ localparam logic [63:0] HaltAddress = 64'h800;
+ localparam logic [63:0] ResumeAddress = HaltAddress + 4;
+ localparam logic [63:0] ExceptionAddress = HaltAddress + 8;
+ // address where data0-15 is shadowed or if shadowed in a CSR
+ // address of the first CSR used for shadowing the data
+ localparam logic [11:0] DataAddr = 12'h380; // we are aligned with Rocket here
+ // debug registers
+ typedef enum logic [7:0] {
+ Data0 = 8'h04,
+ Data1 = 8'h05,
+ Data2 = 8'h06,
+ Data3 = 8'h07,
+ Data4 = 8'h08,
+ Data5 = 8'h09,
+ Data6 = 8'h0A,
+ Data7 = 8'h0B,
+ Data8 = 8'h0C,
+ Data9 = 8'h0D,
+ Data10 = 8'h0E,
+ Data11 = 8'h0F,
+ DMControl = 8'h10,
+ DMStatus = 8'h11, // r/o
+ Hartinfo = 8'h12,
+ HaltSum1 = 8'h13,
+ HAWindowSel = 8'h14,
+ HAWindow = 8'h15,
+ AbstractCS = 8'h16,
+ Command = 8'h17,
+ AbstractAuto = 8'h18,
+ DevTreeAddr0 = 8'h19,
+ DevTreeAddr1 = 8'h1A,
+ DevTreeAddr2 = 8'h1B,
+ DevTreeAddr3 = 8'h1C,
+ NextDM = 8'h1D,
+ ProgBuf0 = 8'h20,
+ ProgBuf15 = 8'h2F,
+ AuthData = 8'h30,
+ HaltSum2 = 8'h34,
+ HaltSum3 = 8'h35,
+ SBAddress3 = 8'h37,
+ SBCS = 8'h38,
+ SBAddress0 = 8'h39,
+ SBAddress1 = 8'h3A,
+ SBAddress2 = 8'h3B,
+ SBData0 = 8'h3C,
+ SBData1 = 8'h3D,
+ SBData2 = 8'h3E,
+ SBData3 = 8'h3F,
+ HaltSum0 = 8'h40
+ } dm_csr_e;
+ // debug causes
+ localparam logic [2:0] CauseBreakpoint = 3'h1;
+ localparam logic [2:0] CauseTrigger = 3'h2;
+ localparam logic [2:0] CauseRequest = 3'h3;
+ localparam logic [2:0] CauseSingleStep = 3'h4;
+ typedef struct packed {
+ logic [31:23] zero1;
+ logic impebreak;
+ logic [21:20] zero0;
+ logic allhavereset;
+ logic anyhavereset;
+ logic allresumeack;
+ logic anyresumeack;
+ logic allnonexistent;
+ logic anynonexistent;
+ logic allunavail;
+ logic anyunavail;
+ logic allrunning;
+ logic anyrunning;
+ logic allhalted;
+ logic anyhalted;
+ logic authenticated;
+ logic authbusy;
+ logic hasresethaltreq;
+ logic devtreevalid;
+ logic [3:0] version;
+ } dmstatus_t;
+ typedef struct packed {
+ logic haltreq;
+ logic resumereq;
+ logic hartreset;
+ logic ackhavereset;
+ logic zero1;
+ logic hasel;
+ logic [25:16] hartsello;
+ logic [15:6] hartselhi;
+ logic [5:4] zero0;
+ logic setresethaltreq;
+ logic clrresethaltreq;
+ logic ndmreset;
+ logic dmactive;
+ } dmcontrol_t;
+ typedef struct packed {
+ logic [31:24] zero1;
+ logic [23:20] nscratch;
+ logic [19:17] zero0;
+ logic dataaccess;
+ logic [15:12] datasize;
+ logic [11:0] dataaddr;
+ } hartinfo_t;
+ typedef enum logic [2:0] {
+ CmdErrNone, CmdErrBusy, CmdErrNotSupported,
+ CmdErrorException, CmdErrorHaltResume,
+ CmdErrorBus, CmdErrorOther = 7
+ } cmderr_e;
+ typedef struct packed {
+ logic [31:29] zero3;
+ logic [28:24] progbufsize;
+ logic [23:13] zero2;
+ logic busy;
+ logic zero1;
+ cmderr_e cmderr;
+ logic [7:4] zero0;
+ logic [3:0] datacount;
+ } abstractcs_t;
+ typedef enum logic [7:0] {
+ AccessRegister = 8'h0,
+ QuickAccess = 8'h1,
+ AccessMemory = 8'h2
+ } cmd_e;
+ typedef struct packed {
+ cmd_e cmdtype;
+ logic [23:0] control;
+ } command_t;
+ typedef struct packed {
+ logic [31:16] autoexecprogbuf;
+ logic [15:12] zero0;
+ logic [11:0] autoexecdata;
+ } abstractauto_t;
+ typedef struct packed {
+ logic zero1;
+ logic [22:20] aarsize;
+ logic aarpostincrement;
+ logic postexec;
+ logic transfer;
+ logic write;
+ logic [15:0] regno;
+ } ac_ar_cmd_t;
+ // DTM
+ typedef enum logic [1:0] {
+ DTM_NOP = 2'h0,
+ DTM_READ = 2'h1,
+ DTM_WRITE = 2'h2
+ } dtm_op_e;
+ typedef struct packed {
+ logic [31:29] sbversion;
+ logic [28:23] zero0;
+ logic sbbusyerror;
+ logic sbbusy;
+ logic sbreadonaddr;
+ logic [19:17] sbaccess;
+ logic sbautoincrement;
+ logic sbreadondata;
+ logic [14:12] sberror;
+ logic [11:5] sbasize;
+ logic sbaccess128;
+ logic sbaccess64;
+ logic sbaccess32;
+ logic sbaccess16;
+ logic sbaccess8;
+ } sbcs_t;
+ localparam logic [1:0] DTM_SUCCESS = 2'h0;
+ typedef struct packed {
+ logic [6:0] addr;
+ dtm_op_e op;
+ logic [31:0] data;
+ } dmi_req_t;
+ typedef struct packed {
+ logic [31:0] data;
+ logic [1:0] resp;
+ } dmi_resp_t;
+ // privilege levels
+ typedef enum logic[1:0] {
+ PRIV_LVL_M = 2'b11,
+ PRIV_LVL_S = 2'b01,
+ PRIV_LVL_U = 2'b00
+ } priv_lvl_t;
+ // debugregs in core
+ typedef struct packed {
+ logic [31:28] xdebugver;
+ logic [27:16] zero2;
+ logic ebreakm;
+ logic zero1;
+ logic ebreaks;
+ logic ebreaku;
+ logic stepie;
+ logic stopcount;
+ logic stoptime;
+ logic [8:6] cause;
+ logic zero0;
+ logic mprven;
+ logic nmip;
+ logic step;
+ priv_lvl_t prv;
+ } dcsr_t;
+ // CSRs
+ typedef enum logic [11:0] {
+ // Floating-Point CSRs
+ CSR_FFLAGS = 12'h001,
+ CSR_FRM = 12'h002,
+ CSR_FCSR = 12'h003,
+ CSR_FTRAN = 12'h800,
+ // Supervisor Mode CSRs
+ CSR_SSTATUS = 12'h100,
+ CSR_SIE = 12'h104,
+ CSR_STVEC = 12'h105,
+ CSR_SCOUNTEREN = 12'h106,
+ CSR_SSCRATCH = 12'h140,
+ CSR_SEPC = 12'h141,
+ CSR_SCAUSE = 12'h142,
+ CSR_STVAL = 12'h143,
+ CSR_SIP = 12'h144,
+ CSR_SATP = 12'h180,
+ // Machine Mode CSRs
+ CSR_MSTATUS = 12'h300,
+ CSR_MISA = 12'h301,
+ CSR_MEDELEG = 12'h302,
+ CSR_MIDELEG = 12'h303,
+ CSR_MIE = 12'h304,
+ CSR_MTVEC = 12'h305,
+ CSR_MCOUNTEREN = 12'h306,
+ CSR_MSCRATCH = 12'h340,
+ CSR_MEPC = 12'h341,
+ CSR_MCAUSE = 12'h342,
+ CSR_MTVAL = 12'h343,
+ CSR_MIP = 12'h344,
+ CSR_PMPCFG0 = 12'h3A0,
+ CSR_PMPADDR0 = 12'h3B0,
+ CSR_MVENDORID = 12'hF11,
+ CSR_MARCHID = 12'hF12,
+ CSR_MIMPID = 12'hF13,
+ CSR_MHARTID = 12'hF14,
+ CSR_MCYCLE = 12'hB00,
+ CSR_MINSTRET = 12'hB02,
+ CSR_DCACHE = 12'h701,
+ CSR_ICACHE = 12'h700,
+ CSR_TSELECT = 12'h7A0,
+ CSR_TDATA1 = 12'h7A1,
+ CSR_TDATA2 = 12'h7A2,
+ CSR_TDATA3 = 12'h7A3,
+ CSR_TINFO = 12'h7A4,
+ // Debug CSR
+ CSR_DCSR = 12'h7b0,
+ CSR_DPC = 12'h7b1,
+ CSR_DSCRATCH0 = 12'h7b2, // optional
+ CSR_DSCRATCH1 = 12'h7b3, // optional
+ // Counters and Timers
+ CSR_CYCLE = 12'hC00,
+ CSR_TIME = 12'hC01,
+ CSR_INSTRET = 12'hC02
+ } csr_reg_t;
+ // Instruction Generation Helpers
+ function automatic logic [31:0] jal (logic [4:0] rd,
+ logic [20:0] imm);
+ // OpCode Jal
+ return {imm[20], imm[10:1], imm[11], imm[19:12], rd, 7'h6f};
+ endfunction
+ function automatic logic [31:0] jalr (logic [4:0] rd,
+ logic [4:0] rs1,
+ logic [11:0] offset);
+ // OpCode Jal
+ return {offset[11:0], rs1, 3'b0, rd, 7'h67};
+ endfunction
+ function automatic logic [31:0] andi (logic [4:0] rd,
+ logic [4:0] rs1,
+ logic [11:0] imm);
+ // OpCode andi
+ return {imm[11:0], rs1, 3'h7, rd, 7'h13};
+ endfunction
+ function automatic logic [31:0] slli (logic [4:0] rd,
+ logic [4:0] rs1,
+ logic [5:0] shamt);
+ // OpCode slli
+ return {6'b0, shamt[5:0], rs1, 3'h1, rd, 7'h13};
+ endfunction
+ function automatic logic [31:0] srli (logic [4:0] rd,
+ logic [4:0] rs1,
+ logic [5:0] shamt);
+ // OpCode srli
+ return {6'b0, shamt[5:0], rs1, 3'h5, rd, 7'h13};
+ endfunction
+ function automatic logic [31:0] load (logic [2:0] size,
+ logic [4:0] dest,
+ logic [4:0] base,
+ logic [11:0] offset);
+ // OpCode Load
+ return {offset[11:0], base, size, dest, 7'h03};
+ endfunction
+ function automatic logic [31:0] auipc (logic [4:0] rd,
+ logic [20:0] imm);
+ // OpCode Auipc
+ return {imm[20], imm[10:1], imm[11], imm[19:12], rd, 7'h17};
+ endfunction
+ function automatic logic [31:0] store (logic [2:0] size,
+ logic [4:0] src,
+ logic [4:0] base,
+ logic [11:0] offset);
+ // OpCode Store
+ return {offset[11:5], src, base, size, offset[4:0], 7'h23};
+ endfunction
+ function automatic logic [31:0] float_load (logic [2:0] size,
+ logic [4:0] dest,
+ logic [4:0] base,
+ logic [11:0] offset);
+ // OpCode Load
+ return {offset[11:0], base, size, dest, 7'b00_001_11};
+ endfunction
+ function automatic logic [31:0] float_store (logic [2:0] size,
+ logic [4:0] src,
+ logic [4:0] base,
+ logic [11:0] offset);
+ // OpCode Store
+ return {offset[11:5], src, base, size, offset[4:0], 7'b01_001_11};
+ endfunction
+ function automatic logic [31:0] csrw (csr_reg_t csr,
+ logic [4:0] rs1);
+ // CSRRW, rd, OpCode System
+ return {csr, rs1, 3'h1, 5'h0, 7'h73};
+ endfunction
+ function automatic logic [31:0] csrr (csr_reg_t csr,
+ logic [4:0] dest);
+ // rs1, CSRRS, rd, OpCode System
+ return {csr, 5'h0, 3'h2, dest, 7'h73};
+ endfunction
+ function automatic logic [31:0] branch(logic [4:0] src2,
+ logic [4:0] src1,
+ logic [2:0] funct3,
+ logic [11:0] offset);
+ // OpCode Branch
+ return {offset[11], offset[9:4], src2, src1, funct3,
+ offset[3:0], offset[10], 7'b11_000_11};
+ endfunction
+ function automatic logic [31:0] ebreak ();
+ return 32'h00100073;
+ endfunction
+ function automatic logic [31:0] wfi ();
+ return 32'h10500073;
+ endfunction
+ function automatic logic [31:0] nop ();
+ return 32'h00000013;
+ endfunction
+ function automatic logic [31:0] illegal ();
+ return 32'h00000000;
+ endfunction
+endpackage : dm
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..c97f956
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,172 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+* Copyright and related rights are licensed under the Solderpad Hardware
+* License, Version 0.51 (the “License”); you may not use this file except in
+* compliance with the License. You may obtain a copy of the License at
+* Unless required by applicable law
+* or agreed to in writing, software, hardware and materials distributed under
+* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+* CONDITIONS OF ANY KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations under the License.
+* File:
+* Author: Florian Zaruba <>
+* Date: 1.8.2018
+* Description: System Bus Access Module
+module dm_sba #(
+ parameter int unsigned BusWidth = 32
+) (
+ input logic clk_i, // Clock
+ input logic rst_ni,
+ input logic dmactive_i, // synchronous reset active low
+ output logic master_req_o,
+ output logic [BusWidth-1:0] master_add_o,
+ output logic master_we_o,
+ output logic [BusWidth-1:0] master_wdata_o,
+ output logic [BusWidth/8-1:0] master_be_o,
+ input logic master_gnt_i,
+ input logic master_r_valid_i,
+ input logic [BusWidth-1:0] master_r_rdata_i,
+ input logic [BusWidth-1:0] sbaddress_i,
+ input logic sbaddress_write_valid_i,
+ // control signals in
+ input logic sbreadonaddr_i,
+ output logic [BusWidth-1:0] sbaddress_o,
+ input logic sbautoincrement_i,
+ input logic [2:0] sbaccess_i,
+ // data in
+ input logic sbreadondata_i,
+ input logic [BusWidth-1:0] sbdata_i,
+ input logic sbdata_read_valid_i,
+ input logic sbdata_write_valid_i,
+ // read data out
+ output logic [BusWidth-1:0] sbdata_o,
+ output logic sbdata_valid_o,
+ // control signals
+ output logic sbbusy_o,
+ output logic sberror_valid_o, // bus error occurred
+ output logic [2:0] sberror_o // bus error occurred
+ typedef enum logic [2:0] { Idle, Read, Write, WaitRead, WaitWrite } state_e;
+ state_e state_d, state_q;
+ logic [BusWidth-1:0] address;
+ logic req;
+ logic gnt;
+ logic we;
+ logic [BusWidth/8-1:0] be;
+ logic [$clog2(BusWidth/8)-1:0] be_idx;
+ assign sbbusy_o = logic'(state_q != Idle);
+ always_comb begin : p_fsm
+ req = 1'b0;
+ address = sbaddress_i;
+ we = 1'b0;
+ be = '0;
+ be_idx = sbaddress_i[$clog2(BusWidth/8)-1:0];
+ sberror_o = '0;
+ sberror_valid_o = 1'b0;
+ sbaddress_o = sbaddress_i;
+ state_d = state_q;
+ unique case (state_q)
+ Idle: begin
+ // debugger requested a read
+ if (sbaddress_write_valid_i && sbreadonaddr_i) state_d = Read;
+ // debugger requested a write
+ if (sbdata_write_valid_i) state_d = Write;
+ // perform another read
+ if (sbdata_read_valid_i && sbreadondata_i) state_d = Read;
+ end
+ Read: begin
+ req = 1'b1;
+ if (gnt) state_d = WaitRead;
+ end
+ Write: begin
+ req = 1'b1;
+ we = 1'b1;
+ // generate byte enable mask
+ unique case (sbaccess_i)
+ 3'b000: begin
+ be[be_idx] = '1;
+ end
+ 3'b001: begin
+ be[int'({be_idx[$high(be_idx):1], 1'b0}) +: 2] = '1;
+ end
+ 3'b010: begin
+ if (BusWidth == 32'd64) be[int'({be_idx[$high(be_idx)], 2'h0}) +: 4] = '1;
+ else be = '1;
+ end
+ 3'b011: be = '1;
+ default: ;
+ endcase
+ if (gnt) state_d = WaitWrite;
+ end
+ WaitRead: begin
+ if (sbdata_valid_o) begin
+ state_d = Idle;
+ // auto-increment address
+ if (sbautoincrement_i) sbaddress_o = sbaddress_i + (32'h1 << sbaccess_i);
+ end
+ end
+ WaitWrite: begin
+ if (sbdata_valid_o) begin
+ state_d = Idle;
+ // auto-increment address
+ if (sbautoincrement_i) sbaddress_o = sbaddress_i + (32'h1 << sbaccess_i);
+ end
+ end
+ default: state_d = Idle; // catch parasitic state
+ endcase
+ // handle error case
+ if (sbaccess_i > 3 && state_q != Idle) begin
+ req = 1'b0;
+ state_d = Idle;
+ sberror_valid_o = 1'b1;
+ sberror_o = 3'd3;
+ end
+ // further error handling should go here ...
+ end
+ always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+ if (!rst_ni) begin
+ state_q <= Idle;
+ end else begin
+ state_q <= state_d;
+ end
+ end
+ assign master_req_o = req;
+ assign master_add_o = address[BusWidth-1:0];
+ assign master_we_o = we;
+ assign master_wdata_o = sbdata_i[BusWidth-1:0];
+ assign master_be_o = be[BusWidth/8-1:0];
+ assign gnt = master_gnt_i;
+ assign sbdata_valid_o = master_r_valid_i;
+ assign sbdata_o = master_r_rdata_i[BusWidth-1:0];
+ //pragma translate_off
+ `ifndef VERILATOR
+ // maybe bump severity to $error if not handled at runtime
+ dm_sba_access_size: assert property(@(posedge clk_i) disable iff (dmactive_i !== 1'b0)
+ (state_d != Idle) |-> (sbaccess_i < 4))
+ else $warning ("accesses > 8 byte not supported at the moment");
+ `endif
+ //pragma translate_on
+endmodule : dm_sba
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..c1c1de2
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,85 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+* Copyright and related rights are licensed under the Solderpad Hardware
+* License, Version 0.51 (the “License”); you may not use this file except in
+* compliance with the License. You may obtain a copy of the License at
+* Unless required by applicable law
+* or agreed to in writing, software, hardware and materials distributed under
+* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+* CONDITIONS OF ANY KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations under the License.
+* File:
+* Author: Andreas Traber <>
+* Author: Florian Zaruba <>
+* Description: Clock domain crossings for JTAG to DMI very heavily based
+* on previous work by Andreas Traber for the PULP project.
+* This is mainly a wrapper around the existing CDCs.
+module dmi_cdc (
+ // JTAG side (master side)
+ input logic tck_i,
+ input logic trst_ni,
+ input dm::dmi_req_t jtag_dmi_req_i,
+ output logic jtag_dmi_ready_o,
+ input logic jtag_dmi_valid_i,
+ output dm::dmi_resp_t jtag_dmi_resp_o,
+ output logic jtag_dmi_valid_o,
+ input logic jtag_dmi_ready_i,
+ // core side (slave side)
+ input logic clk_i,
+ input logic rst_ni,
+ output dm::dmi_req_t core_dmi_req_o,
+ output logic core_dmi_valid_o,
+ input logic core_dmi_ready_i,
+ input dm::dmi_resp_t core_dmi_resp_i,
+ output logic core_dmi_ready_o,
+ input logic core_dmi_valid_i
+ // TODO: Make it clean for synthesis.
+ fifo_async #(
+ .Width ( $bits(dm::dmi_req_t) ),
+ .Depth ( 4 )
+ ) i_cdc_req (
+ .clk_wr_i ( tck_i ),
+ .rst_wr_ni ( trst_ni ),
+ .wvalid_i ( jtag_dmi_valid_i ),
+ .wready_o ( jtag_dmi_ready_o ), // wrclk
+ .wdata_i ( jtag_dmi_req_i ),
+ .wdepth_o ( ),
+ .clk_rd_i ( clk_i ),
+ .rst_rd_ni ( rst_ni ),
+ .rvalid_o ( core_dmi_valid_o ),
+ .rready_i ( core_dmi_ready_i ),
+ .rdata_o ( core_dmi_req_o ),
+ .rdepth_o ( )
+ );
+ fifo_async #(
+ .Width ( $bits(dm::dmi_resp_t) ),
+ .Depth ( 4 )
+ ) i_cdc_resp (
+ .clk_wr_i ( clk_i ),
+ .rst_wr_ni ( rst_ni ),
+ .wvalid_i ( core_dmi_valid_i ),
+ .wready_o ( core_dmi_ready_o ), // wrclk
+ .wdata_i ( core_dmi_resp_i ),
+ .wdepth_o ( ),
+ .clk_rd_i ( tck_i ),
+ .rst_rd_ni ( trst_ni ),
+ .rvalid_o ( jtag_dmi_valid_o ),
+ .rready_i ( jtag_dmi_ready_i ),
+ .rdata_o ( jtag_dmi_resp_o ),
+ .rdepth_o ( )
+ );
+endmodule : dmi_cdc
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..917cdc6
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,264 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+* Copyright and related rights are licensed under the Solderpad Hardware
+* License, Version 0.51 (the “License”); you may not use this file except in
+* compliance with the License. You may obtain a copy of the License at
+* Unless required by applicable law
+* or agreed to in writing, software, hardware and materials distributed under
+* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+* CONDITIONS OF ANY KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations under the License.
+* File:
+* Author: Florian Zaruba <>
+* Date: 19.7.2018
+* Description: JTAG DMI (debug module interface)
+module dmi_jtag #(
+ parameter logic [31:0] IdcodeValue = 32'h00000001
+) (
+ input logic clk_i, // DMI Clock
+ input logic rst_ni, // Asynchronous reset active low
+ input logic testmode_i,
+ output logic dmi_rst_no, // hard reset
+ output dm::dmi_req_t dmi_req_o,
+ output logic dmi_req_valid_o,
+ input logic dmi_req_ready_i,
+ input dm::dmi_resp_t dmi_resp_i,
+ output logic dmi_resp_ready_o,
+ input logic dmi_resp_valid_i,
+ input logic tck_i, // JTAG test clock pad
+ input logic tms_i, // JTAG test mode select pad
+ input logic trst_ni, // JTAG test reset pad
+ input logic td_i, // JTAG test data input pad
+ output logic td_o, // JTAG test data output pad
+ output logic tdo_oe_o // Data out output enable
+ assign dmi_rst_no = rst_ni;
+ logic test_logic_reset;
+ logic shift_dr;
+ logic update_dr;
+ logic capture_dr;
+ logic dmi_access;
+ logic dtmcs_select;
+ logic dmi_reset;
+ logic dmi_tdi;
+ logic dmi_tdo;
+ dm::dmi_req_t dmi_req;
+ logic dmi_req_ready;
+ logic dmi_req_valid;
+ dm::dmi_resp_t dmi_resp;
+ logic dmi_resp_valid;
+ logic dmi_resp_ready;
+ typedef struct packed {
+ logic [6:0] address;
+ logic [31:0] data;
+ logic [1:0] op;
+ } dmi_t;
+ typedef enum logic [1:0] {
+ DMINoError = 2'h0, DMIReservedError = 2'h1,
+ DMIOPFailed = 2'h2, DMIBusy = 2'h3
+ } dmi_error_e;
+ typedef enum logic [2:0] { Idle, Read, WaitReadValid, Write, WaitWriteValid } state_e;
+ state_e state_d, state_q;
+ logic [$bits(dmi_t)-1:0] dr_d, dr_q;
+ logic [6:0] address_d, address_q;
+ logic [31:0] data_d, data_q;
+ dmi_t dmi;
+ assign dmi = dmi_t'(dr_q);
+ assign dmi_req.addr = address_q;
+ assign = data_q;
+ assign dmi_req.op = (state_q == Write) ? dm::DTM_WRITE : dm::DTM_READ;
+ // we'will always be ready to accept the data we requested
+ assign dmi_resp_ready = 1'b1;
+ logic error_dmi_busy;
+ dmi_error_e error_d, error_q;
+ always_comb begin : p_fsm
+ error_dmi_busy = 1'b0;
+ // default assignments
+ state_d = state_q;
+ address_d = address_q;
+ data_d = data_q;
+ error_d = error_q;
+ dmi_req_valid = 1'b0;
+ unique case (state_q)
+ Idle: begin
+ // make sure that no error is sticky
+ if (dmi_access && update_dr && (error_q == DMINoError)) begin
+ // save address and value
+ address_d = dmi.address;
+ data_d =;
+ if (dm::dtm_op_e'(dmi.op) == dm::DTM_READ) begin
+ state_d = Read;
+ end else if (dm::dtm_op_e'(dmi.op) == dm::DTM_WRITE) begin
+ state_d = Write;
+ end
+ // else this is a nop and we can stay here
+ end
+ end
+ Read: begin
+ dmi_req_valid = 1'b1;
+ if (dmi_req_ready) begin
+ state_d = WaitReadValid;
+ end
+ end
+ WaitReadValid: begin
+ // load data into register and shift out
+ if (dmi_resp_valid) begin
+ data_d =;
+ state_d = Idle;
+ end
+ end
+ Write: begin
+ dmi_req_valid = 1'b1;
+ // got a valid answer go back to idle
+ if (dmi_req_ready) begin
+ state_d = Idle;
+ end
+ end
+ default: begin
+ // just wait for idle here
+ if (dmi_resp_valid) begin
+ state_d = Idle;
+ end
+ end
+ endcase
+ // update_dr means we got another request but we didn't finish
+ // the one in progress, this state is sticky
+ if (update_dr && state_q != Idle) begin
+ error_dmi_busy = 1'b1;
+ end
+ // if capture_dr goes high while we are in the read state
+ // or in the corresponding wait state we are not giving back a valid word
+ // -> throw an error
+ if (capture_dr && state_q inside {Read, WaitReadValid}) begin
+ error_dmi_busy = 1'b1;
+ end
+ if (error_dmi_busy) begin
+ error_d = DMIBusy;
+ end
+ // clear sticky error flag
+ if (dmi_reset && dtmcs_select) begin
+ error_d = DMINoError;
+ end
+ end
+ // shift register
+ assign dmi_tdo = dr_q[0];
+ always_comb begin : p_shift
+ dr_d = dr_q;
+ if (capture_dr) begin
+ if (dmi_access) begin
+ if (error_q == DMINoError && !error_dmi_busy) begin
+ dr_d = {address_q, data_q, DMINoError};
+ // DMI was busy, report an error
+ end else if (error_q == DMIBusy || error_dmi_busy) begin
+ dr_d = {address_q, data_q, DMIBusy};
+ end
+ end
+ end
+ if (shift_dr) begin
+ if (dmi_access) begin
+ dr_d = {dmi_tdi, dr_q[$bits(dr_q)-1:1]};
+ end
+ end
+ if (test_logic_reset) begin
+ dr_d = '0;
+ end
+ end
+ always_ff @(posedge tck_i or negedge trst_ni) begin : p_regs
+ if (!trst_ni) begin
+ dr_q <= '0;
+ state_q <= Idle;
+ address_q <= '0;
+ data_q <= '0;
+ error_q <= DMINoError;
+ end else begin
+ dr_q <= dr_d;
+ state_q <= state_d;
+ address_q <= address_d;
+ data_q <= data_d;
+ error_q <= error_d;
+ end
+ end
+ // ---------
+ // TAP
+ // ---------
+ dmi_jtag_tap #(
+ .IrLength (5),
+ .IdcodeValue(IdcodeValue)
+ ) i_dmi_jtag_tap (
+ .tck_i (tck_i) ,
+ .tms_i (tms_i),
+ .trst_ni (trst_ni),
+ .td_i (td_i),
+ .td_o (td_o),
+ .tdo_oe_o(tdo_oe_o),
+ .testmode_i (testmode_i),
+ .test_logic_reset_o ( test_logic_reset ),
+ .shift_dr_o ( shift_dr ),
+ .update_dr_o ( update_dr ),
+ .capture_dr_o ( capture_dr ),
+ .dmi_access_o ( dmi_access ),
+ .dtmcs_select_o ( dtmcs_select ),
+ .dmi_reset_o ( dmi_reset ),
+ .dmi_error_i ( error_q ),
+ .dmi_tdi_o ( dmi_tdi ),
+ .dmi_tdo_i ( dmi_tdo )
+ );
+ // ---------
+ // CDC
+ // ---------
+ dmi_cdc i_dmi_cdc (
+ // JTAG side (master side)
+ .tck_i (tck_i),
+ .trst_ni (trst_ni),
+ .jtag_dmi_req_i ( dmi_req ),
+ .jtag_dmi_ready_o ( dmi_req_ready ),
+ .jtag_dmi_valid_i ( dmi_req_valid ),
+ .jtag_dmi_resp_o ( dmi_resp ),
+ .jtag_dmi_valid_o ( dmi_resp_valid ),
+ .jtag_dmi_ready_i ( dmi_resp_ready ),
+ // core side
+ .clk_i,
+ .rst_ni,
+ .core_dmi_req_o ( dmi_req_o ),
+ .core_dmi_valid_o ( dmi_req_valid_o ),
+ .core_dmi_ready_i ( dmi_req_ready_i ),
+ .core_dmi_resp_i ( dmi_resp_i ),
+ .core_dmi_ready_o ( dmi_resp_ready_o ),
+ .core_dmi_valid_i ( dmi_resp_valid_i )
+ );
+endmodule : dmi_jtag
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..e90f914
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,345 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the “License”); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File:
+ * Author: Florian Zaruba <>
+ * Date: 19.7.2018
+ *
+ * Description: JTAG TAP for DMI (according to debug spec 0.13)
+ *
+ */
+module dmi_jtag_tap #(
+ parameter int unsigned IrLength = 5,
+ // JTAG IDCODE Value
+ parameter logic [31:0] IdcodeValue = 32'h00000001
+ // xxxx version
+ // xxxxxxxxxxxxxxxx part number
+ // xxxxxxxxxxx manufacturer id
+ // 1 required by standard
+) (
+ input logic tck_i, // JTAG test clock pad
+ input logic tms_i, // JTAG test mode select pad
+ input logic trst_ni, // JTAG test reset pad
+ input logic td_i, // JTAG test data input pad
+ output logic td_o, // JTAG test data output pad
+ output logic tdo_oe_o, // Data out output enable
+ input logic testmode_i,
+ output logic test_logic_reset_o,
+ output logic shift_dr_o,
+ output logic update_dr_o,
+ output logic capture_dr_o,
+ // we want to access DMI register
+ output logic dmi_access_o,
+ // JTAG is interested in writing the DTM CSR register
+ output logic dtmcs_select_o,
+ // clear error state
+ output logic dmi_reset_o,
+ input logic [1:0] dmi_error_i,
+ // test data to submodule
+ output logic dmi_tdi_o,
+ // test data in from submodule
+ input logic dmi_tdo_i
+ // to submodule
+ assign dmi_tdi_o = td_i;
+ typedef enum logic [3:0] {
+ TestLogicReset, RunTestIdle, SelectDrScan,
+ CaptureDr, ShiftDr, Exit1Dr, PauseDr, Exit2Dr,
+ UpdateDr, SelectIrScan, CaptureIr, ShiftIr,
+ Exit1Ir, PauseIr, Exit2Ir, UpdateIr
+ } tap_state_e;
+ tap_state_e tap_state_q, tap_state_d;
+ typedef enum logic [IrLength-1:0] {
+ BYPASS0 = 'h0,
+ IDCODE = 'h1,
+ DTMCSR = 'h10,
+ DMIACCESS = 'h11,
+ BYPASS1 = 'h1f
+ } ir_reg_e;
+ typedef struct packed {
+ logic [31:18] zero1;
+ logic dmihardreset;
+ logic dmireset;
+ logic zero0;
+ logic [14:12] idle;
+ logic [11:10] dmistat;
+ logic [9:4] abits;
+ logic [3:0] version;
+ } dtmcs_t;
+ // ----------------
+ // IR logic
+ // ----------------
+ // shift register
+ logic [IrLength-1:0] jtag_ir_shift_d, jtag_ir_shift_q;
+ // IR register -> this gets captured from shift register upon update_ir
+ ir_reg_e jtag_ir_d, jtag_ir_q;
+ logic capture_ir, shift_ir, update_ir; // pause_ir
+ always_comb begin : p_jtag
+ jtag_ir_shift_d = jtag_ir_shift_q;
+ jtag_ir_d = jtag_ir_q;
+ // IR shift register
+ if (shift_ir) begin
+ jtag_ir_shift_d = {td_i, jtag_ir_shift_q[IrLength-1:1]};
+ end
+ // capture IR register
+ if (capture_ir) begin
+ jtag_ir_shift_d = IrLength'(4'b0101);
+ end
+ // update IR register
+ if (update_ir) begin
+ jtag_ir_d = ir_reg_e'(jtag_ir_shift_q);
+ end
+ // synchronous test-logic reset
+ if (test_logic_reset_o) begin
+ jtag_ir_shift_d = '0;
+ jtag_ir_d = IDCODE;
+ end
+ end
+ always_ff @(posedge tck_i, negedge trst_ni) begin : p_jtag_ir_reg
+ if (!trst_ni) begin
+ jtag_ir_shift_q <= '0;
+ jtag_ir_q <= IDCODE;
+ end else begin
+ jtag_ir_shift_q <= jtag_ir_shift_d;
+ jtag_ir_q <= jtag_ir_d;
+ end
+ end
+ // ----------------
+ // TAP DR Regs
+ // ----------------
+ // - Bypass
+ // - IDCODE
+ // - DTM CS
+ logic [31:0] idcode_d, idcode_q;
+ logic idcode_select;
+ logic bypass_select;
+ dtmcs_t dtmcs_d, dtmcs_q;
+ logic bypass_d, bypass_q; // this is a 1-bit register
+ assign dmi_reset_o = dtmcs_q.dmireset;
+ always_comb begin
+ idcode_d = idcode_q;
+ bypass_d = bypass_q;
+ dtmcs_d = dtmcs_q;
+ if (capture_dr_o) begin
+ if (idcode_select) idcode_d = IdcodeValue;
+ if (bypass_select) bypass_d = 1'b0;
+ if (dtmcs_select_o) begin
+ dtmcs_d = '{
+ zero1 : '0,
+ dmihardreset : 1'b0,
+ dmireset : 1'b0,
+ zero0 : '0,
+ idle : 3'd1, // 1: Enter Run-Test/Idle and leave it immediately
+ dmistat : dmi_error_i, // 0: No error, 1: Op failed, 2: too fast
+ abits : 6'd7, // The size of address in dmi
+ version : 4'd1 // Version described in spec version 0.13 (and later?)
+ };
+ end
+ end
+ if (shift_dr_o) begin
+ if (idcode_select) idcode_d = {td_i, 31'(idcode_q >> 1)};
+ if (bypass_select) bypass_d = td_i;
+ if (dtmcs_select_o) dtmcs_d = {td_i, 31'(dtmcs_q >> 1)};
+ end
+ if (test_logic_reset_o) begin
+ idcode_d = IdcodeValue;
+ bypass_d = 1'b0;
+ end
+ end
+ // ----------------
+ // Data reg select
+ // ----------------
+ always_comb begin : p_data_reg_sel
+ dmi_access_o = 1'b0;
+ dtmcs_select_o = 1'b0;
+ idcode_select = 1'b0;
+ bypass_select = 1'b0;
+ unique case (jtag_ir_q)
+ BYPASS0: bypass_select = 1'b1;
+ IDCODE: idcode_select = 1'b1;
+ DTMCSR: dtmcs_select_o = 1'b1;
+ DMIACCESS: dmi_access_o = 1'b1;
+ BYPASS1: bypass_select = 1'b1;
+ default: bypass_select = 1'b1;
+ endcase
+ end
+ // ----------------
+ // Output select
+ // ----------------
+ logic tdo_mux;
+ always_comb begin : p_out_sel
+ // we are shifting out the IR register
+ if (shift_ir) begin
+ tdo_mux = jtag_ir_shift_q[0];
+ // here we are shifting the DR register
+ end else begin
+ unique case (jtag_ir_q)
+ IDCODE: tdo_mux = idcode_q[0]; // Reading ID code
+ DTMCSR: tdo_mux = dtmcs_q.version[0];
+ DMIACCESS: tdo_mux = dmi_tdo_i; // Read from DMI TDO
+ default: tdo_mux = bypass_q; // BYPASS instruction
+ endcase
+ end
+ end
+ // ----------------
+ // DFT
+ // ----------------
+ logic tck_n;
+ prim_generic_clock_inv #(
+ .HasScanMode(1'b1)
+ ) i_tck_inv (
+ .clk_i ( tck_i ),
+ .clk_no ( tck_n ),
+ .scanmode_i ( testmode_i )
+ );
+ // TDO changes state at negative edge of TCK
+ always_ff @(posedge tck_n, negedge trst_ni) begin : p_tdo_regs
+ if (!trst_ni) begin
+ td_o <= 1'b0;
+ tdo_oe_o <= 1'b0;
+ end else begin
+ td_o <= tdo_mux;
+ tdo_oe_o <= (shift_ir | shift_dr_o);
+ end
+ end
+ // ----------------
+ // TAP FSM
+ // ----------------
+ // Determination of next state; purely combinatorial
+ always_comb begin : p_tap_fsm
+ test_logic_reset_o = 1'b0;
+ capture_dr_o = 1'b0;
+ shift_dr_o = 1'b0;
+ update_dr_o = 1'b0;
+ capture_ir = 1'b0;
+ shift_ir = 1'b0;
+ // pause_ir = 1'b0; unused
+ update_ir = 1'b0;
+ unique case (tap_state_q)
+ TestLogicReset: begin
+ tap_state_d = (tms_i) ? TestLogicReset : RunTestIdle;
+ test_logic_reset_o = 1'b1;
+ end
+ RunTestIdle: begin
+ tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle;
+ end
+ // DR Path
+ SelectDrScan: begin
+ tap_state_d = (tms_i) ? SelectIrScan : CaptureDr;
+ end
+ CaptureDr: begin
+ capture_dr_o = 1'b1;
+ tap_state_d = (tms_i) ? Exit1Dr : ShiftDr;
+ end
+ ShiftDr: begin
+ shift_dr_o = 1'b1;
+ tap_state_d = (tms_i) ? Exit1Dr : ShiftDr;
+ end
+ Exit1Dr: begin
+ tap_state_d = (tms_i) ? UpdateDr : PauseDr;
+ end
+ PauseDr: begin
+ tap_state_d = (tms_i) ? Exit2Dr : PauseDr;
+ end
+ Exit2Dr: begin
+ tap_state_d = (tms_i) ? UpdateDr : ShiftDr;
+ end
+ UpdateDr: begin
+ update_dr_o = 1'b1;
+ tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle;
+ end
+ // IR Path
+ SelectIrScan: begin
+ tap_state_d = (tms_i) ? TestLogicReset : CaptureIr;
+ end
+ // In this controller state, the shift register bank in the
+ // Instruction Register parallel loads a pattern of fixed values on
+ // the rising edge of TCK. The last two significant bits must always
+ // be "01".
+ CaptureIr: begin
+ capture_ir = 1'b1;
+ tap_state_d = (tms_i) ? Exit1Ir : ShiftIr;
+ end
+ // In this controller state, the instruction register gets connected
+ // between TDI and TDO, and the captured pattern gets shifted on
+ // each rising edge of TCK. The instruction available on the TDI
+ // pin is also shifted in to the instruction register.
+ ShiftIr: begin
+ shift_ir = 1'b1;
+ tap_state_d = (tms_i) ? Exit1Ir : ShiftIr;
+ end
+ Exit1Ir: begin
+ tap_state_d = (tms_i) ? UpdateIr : PauseIr;
+ end
+ PauseIr: begin
+ // pause_ir = 1'b1; // unused
+ tap_state_d = (tms_i) ? Exit2Ir : PauseIr;
+ end
+ Exit2Ir: begin
+ tap_state_d = (tms_i) ? UpdateIr : ShiftIr;
+ end
+ // In this controller state, the instruction in the instruction
+ // shift register is latched to the latch bank of the Instruction
+ // Register on every falling edge of TCK. This instruction becomes
+ // the current instruction once it is latched.
+ UpdateIr: begin
+ update_ir = 1'b1;
+ tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle;
+ end
+ //default: ; // can't actually happen since case is full
+ endcase
+ end
+ always_ff @(posedge tck_i or negedge trst_ni) begin : p_regs
+ if (!trst_ni) begin
+ tap_state_q <= RunTestIdle;
+ idcode_q <= IdcodeValue;
+ bypass_q <= 1'b0;
+ dtmcs_q <= '0;
+ end else begin
+ tap_state_q <= tap_state_d;
+ idcode_q <= idcode_d;
+ bypass_q <= bypass_d;
+ dtmcs_q <= dtmcs_d;
+ end
+ end
+endmodule : dmi_jtag_tap
diff --git a/verilog/rtl/down_clocking_even.v b/verilog/rtl/down_clocking_even.v
new file mode 100644
index 0000000..49a06ca
--- /dev/null
+++ b/verilog/rtl/down_clocking_even.v
@@ -0,0 +1,37 @@
+/*Down clocking module
+Output clock frequency is the original frequency divided by an even number
+module down_clocking_even(
+input clk_i,
+input rst_ni,
+input [15:0]i_divisor,
+output o_clk
+wire [15:0]divisor;
+wire borrow;
+minus_one minus_one_0(
+wire go;
+assign go=((i_divisor!=0)&&rst_ni);
+reg [15:0]ct;
+reg clk;
+always@(posedge clk_i )
+ if(!rst_ni)begin
+ ct<=0;
+ clk<=0;
+ end
+ else if(go)begin
+ if(ct>=divisor)begin
+ ct<=0;
+ clk<=~clk;
+ end
+ else ct<=ct+1;
+ end
+assign o_clk=go?clk:clk_i;
\ No newline at end of file
diff --git a/verilog/rtl/down_clocking_odd.v b/verilog/rtl/down_clocking_odd.v
new file mode 100644
index 0000000..3395908
--- /dev/null
+++ b/verilog/rtl/down_clocking_odd.v
@@ -0,0 +1,69 @@
+/*Author: Zhuxu
+Down clocking module
+Output clock frequency is the original frequency divided by an odd number
+module down_clocking_odd(
+input clk_i,
+input rst_ni,
+input [15:0]i_divisor,
+output o_clk
+reg a,b;
+wire c;
+assign c=(~a)&(~b);
+wire [15:0]divisor;
+wire borrow;
+minus_one minus_one_0(
+wire go;
+assign go=((i_divisor!=0)&&rst_ni);
+reg [15:0]ct_0;
+always@(posedge clk_i )
+ if(!rst_ni)begin
+ a<=0;
+ ct_0<=0;
+ end
+ else if(go)begin
+ if(a)begin
+ if(ct_0>=divisor)begin
+ ct_0<=0;
+ a<=0;
+ end
+ else ct_0<=ct_0+1;
+ end
+ else if(c)a<=c;
+ end
+reg [15:0]ct_1;
+always@(negedge clk_i )
+ if(!rst_ni)begin
+ b<=0;
+ ct_1<=0;
+ end
+ else if(go)begin
+ if(b)begin
+ if(ct_1>=divisor)begin
+ ct_1<=0;
+ b<=0;
+ end
+ else ct_1<=ct_1+1;
+ end
+ else if(c)b<=c;
+ end
+reg clk;
+always@(posedge c)
+ if(!rst_ni)clk<=0;
+ else clk<=~clk;
+assign o_clk=go?clk:clk_i;
\ No newline at end of file
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..0add43c
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,203 @@
+// Generic asynchronous fifo for use in a variety of devices.
+module fifo_async #(
+ parameter int unsigned Width = 16,
+ parameter int unsigned Depth = 3,
+ localparam int unsigned DepthW = $clog2(Depth+1) // derived parameter representing [0..Depth]
+) (
+ // write port
+ input clk_wr_i,
+ input rst_wr_ni,
+ input wvalid_i,
+ output wready_o,
+ input [Width-1:0] wdata_i,
+ output [DepthW-1:0] wdepth_o,
+ // read port
+ input clk_rd_i,
+ input rst_rd_ni,
+ output rvalid_o,
+ input rready_i,
+ output [Width-1:0] rdata_o,
+ output [DepthW-1:0] rdepth_o
+ localparam int unsigned PTRV_W = $clog2(Depth);
+ localparam logic [PTRV_W-1:0] DepthMinus1 = PTRV_W'(Depth - 1);
+ localparam int unsigned PTR_WIDTH = PTRV_W+1;
+ logic [PTR_WIDTH-1:0] fifo_wptr, fifo_rptr;
+ logic [PTR_WIDTH-1:0] fifo_wptr_sync_combi, fifo_rptr_sync;
+ logic [PTR_WIDTH-1:0] fifo_wptr_gray_sync, fifo_rptr_gray_sync;
+ logic [PTR_WIDTH-1:0] fifo_wptr_gray, fifo_rptr_gray;
+ logic fifo_incr_wptr, fifo_incr_rptr, empty;
+ logic full_wclk, full_rclk;
+ assign wready_o = !full_wclk;
+ assign rvalid_o = !empty;
+ // create the write and read pointers
+ assign fifo_incr_wptr = wvalid_i & wready_o;
+ assign fifo_incr_rptr = rvalid_o & rready_i;
+ ///////////////////
+ // write pointer //
+ ///////////////////
+ always_ff @(posedge clk_wr_i or negedge rst_wr_ni)
+ if (!rst_wr_ni) begin
+ fifo_wptr <= {(PTR_WIDTH){1'b0}};
+ end else if (fifo_incr_wptr) begin
+ if (fifo_wptr[PTR_WIDTH-2:0] == DepthMinus1) begin
+ fifo_wptr <= {~fifo_wptr[PTR_WIDTH-1],{(PTR_WIDTH-1){1'b0}}};
+ end else begin
+ fifo_wptr <= fifo_wptr + {{(PTR_WIDTH-1){1'b0}},1'b1};
+ end
+ end
+ // gray-coded version
+ always_ff @(posedge clk_wr_i or negedge rst_wr_ni)
+ if (!rst_wr_ni) begin
+ fifo_wptr_gray <= {(PTR_WIDTH){1'b0}};
+ end else if (fifo_incr_wptr) begin
+ if (fifo_wptr[PTR_WIDTH-2:0] == DepthMinus1) begin
+ fifo_wptr_gray <= dec2gray({~fifo_wptr[PTR_WIDTH-1],{(PTR_WIDTH-1){1'b0}}});
+ end else begin
+ fifo_wptr_gray <= dec2gray(fifo_wptr + {{(PTR_WIDTH-1){1'b0}},1'b1});
+ end
+ end
+ prim_generic_flop_2sync #(.Width(PTR_WIDTH)) sync_wptr (
+ .clk_i (clk_rd_i),
+ .rst_ni (rst_rd_ni),
+ .d_i (fifo_wptr_gray),
+ .q_o (fifo_wptr_gray_sync));
+ assign fifo_wptr_sync_combi = gray2dec(fifo_wptr_gray_sync);
+ //////////////////
+ // read pointer //
+ //////////////////
+ always_ff @(posedge clk_rd_i or negedge rst_rd_ni)
+ if (!rst_rd_ni) begin
+ fifo_rptr <= {(PTR_WIDTH){1'b0}};
+ end else if (fifo_incr_rptr) begin
+ if (fifo_rptr[PTR_WIDTH-2:0] == DepthMinus1) begin
+ fifo_rptr <= {~fifo_rptr[PTR_WIDTH-1],{(PTR_WIDTH-1){1'b0}}};
+ end else begin
+ fifo_rptr <= fifo_rptr + {{(PTR_WIDTH-1){1'b0}},1'b1};
+ end
+ end
+ // gray-coded version
+ always_ff @(posedge clk_rd_i or negedge rst_rd_ni)
+ if (!rst_rd_ni) begin
+ fifo_rptr_gray <= {(PTR_WIDTH){1'b0}};
+ end else if (fifo_incr_rptr) begin
+ if (fifo_rptr[PTR_WIDTH-2:0] == DepthMinus1) begin
+ fifo_rptr_gray <= dec2gray({~fifo_rptr[PTR_WIDTH-1],{(PTR_WIDTH-1){1'b0}}});
+ end else begin
+ fifo_rptr_gray <= dec2gray(fifo_rptr + {{(PTR_WIDTH-1){1'b0}},1'b1});
+ end
+ end
+ prim_generic_flop_2sync #(.Width(PTR_WIDTH)) sync_rptr (
+ .clk_i (clk_wr_i),
+ .rst_ni (rst_wr_ni),
+ .d_i (fifo_rptr_gray),
+ .q_o (fifo_rptr_gray_sync));
+ always_ff @(posedge clk_wr_i or negedge rst_wr_ni)
+ if (!rst_wr_ni) begin
+ fifo_rptr_sync <= {PTR_WIDTH{1'b0}};
+ end else begin
+ fifo_rptr_sync <= gray2dec(fifo_rptr_gray_sync);
+ end
+ //////////////////
+ // empty / full //
+ //////////////////
+ assign full_wclk = (fifo_wptr == (fifo_rptr_sync ^ {1'b1,{(PTR_WIDTH-1){1'b0}}}));
+ assign full_rclk = (fifo_wptr_sync_combi == (fifo_rptr ^ {1'b1,{(PTR_WIDTH-1){1'b0}}}));
+ // Current depth in the write clock side
+ logic wptr_msb;
+ logic rptr_sync_msb;
+ logic [PTRV_W-1:0] wptr_value;
+ logic [PTRV_W-1:0] rptr_sync_value;
+ assign wptr_msb = fifo_wptr[PTR_WIDTH-1];
+ assign rptr_sync_msb = fifo_rptr_sync[PTR_WIDTH-1];
+ assign wptr_value = fifo_wptr[0+:PTRV_W];
+ assign rptr_sync_value = fifo_rptr_sync[0+:PTRV_W];
+ assign wdepth_o = (full_wclk) ? DepthW'(Depth) :
+ (wptr_msb == rptr_sync_msb) ? DepthW'(wptr_value) - DepthW'(rptr_sync_value) :
+ (DepthW'(Depth) - DepthW'(rptr_sync_value) + DepthW'(wptr_value)) ;
+ // Same again in the read clock side
+ assign empty = (fifo_wptr_sync_combi == fifo_rptr);
+ logic rptr_msb;
+ logic wptr_sync_msb;
+ logic [PTRV_W-1:0] rptr_value;
+ logic [PTRV_W-1:0] wptr_sync_value;
+ assign wptr_sync_msb = fifo_wptr_sync_combi[PTR_WIDTH-1];
+ assign rptr_msb = fifo_rptr[PTR_WIDTH-1];
+ assign wptr_sync_value = fifo_wptr_sync_combi[0+:PTRV_W];
+ assign rptr_value = fifo_rptr[0+:PTRV_W];
+ assign rdepth_o = (full_rclk) ? DepthW'(Depth) :
+ (wptr_sync_msb == rptr_msb) ? DepthW'(wptr_sync_value) - DepthW'(rptr_value) :
+ (DepthW'(Depth) - DepthW'(rptr_value) + DepthW'(wptr_sync_value)) ;
+ /////////////
+ // storage //
+ /////////////
+ logic [Width-1:0] storage [Depth];
+ always_ff @(posedge clk_wr_i)
+ if (fifo_incr_wptr) begin
+ storage[fifo_wptr[PTR_WIDTH-2:0]] <= wdata_i;
+ end
+ assign rdata_o = storage[fifo_rptr[PTR_WIDTH-2:0]];
+ // gray code conversion functions. algorithm walks up from 0..N-1
+ // then flips the upper bit and walks down from N-1 to 0.
+ function automatic [PTR_WIDTH-1:0] dec2gray(input logic [PTR_WIDTH-1:0] decval);
+ logic [PTR_WIDTH-1:0] decval_sub;
+ logic [PTR_WIDTH-2:0] decval_in;
+ logic unused_decval_msb;
+ decval_sub = (PTR_WIDTH)'(Depth) - {1'b0, decval[PTR_WIDTH-2:0]} - 1'b1;
+ {unused_decval_msb, decval_in} = decval[PTR_WIDTH-1] ? decval_sub : decval;
+ // Was done in two assigns for low bits and top bit
+ // but that generates a (bogus) verilator warning, so do in one assign
+ dec2gray = {decval[PTR_WIDTH-1],
+ {1'b0,decval_in[PTR_WIDTH-2:1]} ^ decval_in[PTR_WIDTH-2:0]};
+ endfunction
+ function automatic [PTR_WIDTH-1:0] gray2dec(input logic [PTR_WIDTH-1:0] grayval);
+ logic [PTR_WIDTH-2:0] dec_tmp, dec_tmp_sub;
+ logic unused_decsub_msb;
+ dec_tmp[PTR_WIDTH-2] = grayval[PTR_WIDTH-2];
+ for (int i = PTR_WIDTH-3; i >= 0; i--)
+ dec_tmp[i] = dec_tmp[i+1]^grayval[i];
+ {unused_decsub_msb, dec_tmp_sub} = (PTR_WIDTH-1)'(Depth) - {1'b0, dec_tmp} - 1'b1;
+ if (grayval[PTR_WIDTH-1])
+ gray2dec = {1'b1,dec_tmp_sub};
+ else
+ gray2dec = {1'b0,dec_tmp};
+ endfunction
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..b0ab38a
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,147 @@
+// Generic synchronous fifo for use in a variety of devices.
+module fifo_sync #(
+ parameter int unsigned Width = 16,
+ parameter bit Pass = 1'b1, // if == 1 allow requests to pass through empty FIFO
+ parameter int unsigned Depth = 4,
+ parameter bit OutputZeroIfEmpty = 1'b1, // if == 1 always output 0 when FIFO is empty
+ // derived parameter
+ localparam int DepthW = tlul_pkg::vbits(Depth+1)
+) (
+ input clk_i,
+ input rst_ni,
+ // synchronous clear / flush port
+ input clr_i,
+ // write port
+ input wvalid_i,
+ output wready_o,
+ input [Width-1:0] wdata_i,
+ // read port
+ output rvalid_o,
+ input rready_i,
+ output [Width-1:0] rdata_o,
+ // occupancy
+ output [DepthW-1:0] depth_o
+ // FIFO is in complete passthrough mode
+ if (Depth == 0) begin : gen_passthru_fifo
+ assign depth_o = 1'b0; //output is meaningless
+ // devie facing
+ assign rvalid_o = wvalid_i;
+ assign rdata_o = wdata_i;
+ // host facing
+ assign wready_o = rready_i;
+ // this avoids lint warnings
+ logic unused_clr;
+ assign unused_clr = clr_i;
+ // Normal FIFO construction
+ end else begin : gen_normal_fifo
+ localparam int unsigned PTRV_W = tlul_pkg::vbits(Depth);
+ localparam int unsigned PTR_WIDTH = PTRV_W+1;
+ logic [PTR_WIDTH-1:0] fifo_wptr, fifo_rptr;
+ logic fifo_incr_wptr, fifo_incr_rptr, fifo_empty;
+ // create the write and read pointers
+ logic full, empty;
+ logic wptr_msb;
+ logic rptr_msb;
+ logic [PTRV_W-1:0] wptr_value;
+ logic [PTRV_W-1:0] rptr_value;
+ assign wptr_msb = fifo_wptr[PTR_WIDTH-1];
+ assign rptr_msb = fifo_rptr[PTR_WIDTH-1];
+ assign wptr_value = fifo_wptr[0+:PTRV_W];
+ assign rptr_value = fifo_rptr[0+:PTRV_W];
+ assign depth_o = (full) ? DepthW'(Depth) :
+ (wptr_msb == rptr_msb) ? DepthW'(wptr_value) - DepthW'(rptr_value) :
+ (DepthW'(Depth) - DepthW'(rptr_value) + DepthW'(wptr_value)) ;
+ assign fifo_incr_wptr = wvalid_i & wready_o;
+ assign fifo_incr_rptr = rvalid_o & rready_i;
+ assign wready_o = ~full;
+ assign rvalid_o = ~empty;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ fifo_wptr <= {(PTR_WIDTH){1'b0}};
+ end else if (clr_i) begin
+ fifo_wptr <= {(PTR_WIDTH){1'b0}};
+ end else if (fifo_incr_wptr) begin
+ if (fifo_wptr[PTR_WIDTH-2:0] == (PTR_WIDTH-1)'(Depth-1)) begin
+ fifo_wptr <= {~fifo_wptr[PTR_WIDTH-1],{(PTR_WIDTH-1){1'b0}}};
+ end else begin
+ fifo_wptr <= fifo_wptr + {{(PTR_WIDTH-1){1'b0}},1'b1};
+ end
+ end
+ end
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ fifo_rptr <= {(PTR_WIDTH){1'b0}};
+ end else if (clr_i) begin
+ fifo_rptr <= {(PTR_WIDTH){1'b0}};
+ end else if (fifo_incr_rptr) begin
+ if (fifo_rptr[PTR_WIDTH-2:0] == (PTR_WIDTH-1)'(Depth-1)) begin
+ fifo_rptr <= {~fifo_rptr[PTR_WIDTH-1],{(PTR_WIDTH-1){1'b0}}};
+ end else begin
+ fifo_rptr <= fifo_rptr + {{(PTR_WIDTH-1){1'b0}},1'b1};
+ end
+ end
+ end
+ assign full = (fifo_wptr == (fifo_rptr ^ {1'b1,{(PTR_WIDTH-1){1'b0}}}));
+ assign fifo_empty = (fifo_wptr == fifo_rptr);
+ // the generate blocks below are needed to avoid lint errors due to array indexing
+ // in the where the fifo only has one storage element
+ logic [Depth-1:0][Width-1:0] storage;
+ logic [Width-1:0] storage_rdata;
+ if (Depth == 1) begin : gen_depth_eq1
+ assign storage_rdata = storage[0];
+ always_ff @(posedge clk_i)
+ if (fifo_incr_wptr) begin
+ storage[0] <= wdata_i;
+ end
+ // fifo with more than one storage element
+ end else begin : gen_depth_gt1
+ assign storage_rdata = storage[fifo_rptr[PTR_WIDTH-2:0]];
+ always_ff @(posedge clk_i)
+ if (fifo_incr_wptr) begin
+ storage[fifo_wptr[PTR_WIDTH-2:0]] <= wdata_i;
+ end
+ end
+ logic [Width-1:0] rdata_int;
+ if (Pass == 1'b1) begin : gen_pass
+ assign rdata_int = (fifo_empty && wvalid_i) ? wdata_i : storage_rdata;
+ assign empty = fifo_empty & ~wvalid_i;
+ end else begin : gen_nopass
+ assign rdata_int = storage_rdata;
+ assign empty = fifo_empty;
+ end
+ if (OutputZeroIfEmpty == 1'b1) begin : gen_output_zero
+ assign rdata_o = empty ? 'b0 : rdata_int;
+ end else begin : gen_no_output_zero
+ assign rdata_o = rdata_int;
+ end
+ end // block: gen_normal_fifo
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..98ed07b
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,758 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Author: Stefan Mach <>
+module fpnew_cast_multi #(
+ parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1,
+ parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1,
+ // FPU configuration
+ parameter int unsigned NumPipeRegs = 0,
+ parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
+ parameter type TagType = logic,
+ parameter type AuxType = logic,
+ // Do not change
+ localparam int unsigned WIDTH = fpnew_pkg::maximum(fpnew_pkg::max_fp_width(FpFmtConfig),
+ fpnew_pkg::max_int_width(IntFmtConfig)),
+ localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ // Input signals
+ input logic [WIDTH-1:0] operands_i, // 1 operand
+ input logic [NUM_FORMATS-1:0] is_boxed_i, // 1 operand
+ input fpnew_pkg::roundmode_e rnd_mode_i,
+ input fpnew_pkg::operation_e op_i,
+ input logic op_mod_i,
+ input fpnew_pkg::fp_format_e src_fmt_i,
+ input fpnew_pkg::fp_format_e dst_fmt_i,
+ input fpnew_pkg::int_format_e int_fmt_i,
+ input TagType tag_i,
+ input AuxType aux_i,
+ // Input Handshake
+ input logic in_valid_i,
+ output logic in_ready_o,
+ input logic flush_i,
+ // Output signals
+ output logic [WIDTH-1:0] result_o,
+ output fpnew_pkg::status_t status_o,
+ output logic extension_bit_o,
+ output TagType tag_o,
+ output AuxType aux_o,
+ // Output handshake
+ output logic out_valid_o,
+ input logic out_ready_i,
+ // Indication of valid data in flight
+ output logic busy_o
+ // ----------
+ // Constants
+ // ----------
+ localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS;
+ localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig);
+ localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig);
+ localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits;
+ localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits;
+ localparam int unsigned SUPER_BIAS = 2**(SUPER_EXP_BITS - 1) - 1;
+ // The internal mantissa includes normal bit or an entire integer
+ localparam int unsigned INT_MAN_WIDTH = fpnew_pkg::maximum(SUPER_MAN_BITS + 1, MAX_INT_WIDTH);
+ // If needed, there will be a LZC for renormalization
+ localparam int unsigned LZC_RESULT_WIDTH = $clog2(INT_MAN_WIDTH);
+ // The internal exponent must be able to represent the smallest denormal input value as signed
+ // or the number of bits in an integer
+ localparam int unsigned INT_EXP_WIDTH = fpnew_pkg::maximum($clog2(MAX_INT_WIDTH),
+ fpnew_pkg::maximum(SUPER_EXP_BITS, $clog2(SUPER_BIAS + SUPER_MAN_BITS))) + 1;
+ // Pipelines
+ localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
+ ? NumPipeRegs
+ : (PipeConfig == fpnew_pkg::DISTRIBUTED
+ ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
+ : 0); // no regs here otherwise
+ localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
+ ? NumPipeRegs
+ : (PipeConfig == fpnew_pkg::DISTRIBUTED
+ ? ((NumPipeRegs + 2) / 3) // First to get distributed regs
+ : 0); // no regs here otherwise
+ localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
+ ? NumPipeRegs
+ : (PipeConfig == fpnew_pkg::DISTRIBUTED
+ ? (NumPipeRegs / 3) // Last to get distributed regs
+ : 0); // no regs here otherwise
+ // ---------------
+ // Input pipeline
+ // ---------------
+ // Selected pipeline output signals as non-arrays
+ logic [WIDTH-1:0] operands_q;
+ logic [NUM_FORMATS-1:0] is_boxed_q;
+ logic op_mod_q;
+ fpnew_pkg::fp_format_e src_fmt_q;
+ fpnew_pkg::fp_format_e dst_fmt_q;
+ fpnew_pkg::int_format_e int_fmt_q;
+ // Input pipeline signals, index i holds signal after i register stages
+ logic [0:NUM_INP_REGS][WIDTH-1:0] inp_pipe_operands_q;
+ logic [0:NUM_INP_REGS][NUM_FORMATS-1:0] inp_pipe_is_boxed_q;
+ fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q;
+ fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q;
+ logic [0:NUM_INP_REGS] inp_pipe_op_mod_q;
+ fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q;
+ fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q;
+ fpnew_pkg::int_format_e [0:NUM_INP_REGS] inp_pipe_int_fmt_q;
+ TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
+ AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
+ logic [0:NUM_INP_REGS] inp_pipe_valid_q;
+ // Ready signal is combinatorial for all stages
+ logic [0:NUM_INP_REGS] inp_pipe_ready;
+ // Input stage: First element of pipeline is taken from inputs
+ assign inp_pipe_operands_q[0] = operands_i;
+ assign inp_pipe_is_boxed_q[0] = is_boxed_i;
+ assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+ assign inp_pipe_op_q[0] = op_i;
+ assign inp_pipe_op_mod_q[0] = op_mod_i;
+ assign inp_pipe_src_fmt_q[0] = src_fmt_i;
+ assign inp_pipe_dst_fmt_q[0] = dst_fmt_i;
+ assign inp_pipe_int_fmt_q[0] = int_fmt_i;
+ assign inp_pipe_tag_q[0] = tag_i;
+ assign inp_pipe_aux_q[0] = aux_i;
+ assign inp_pipe_valid_q[0] = in_valid_i;
+ // Input stage: Propagate pipeline ready signal to updtream circuitry
+ assign in_ready_o = inp_pipe_ready[0];
+ // Generate the register stages
+ for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+ // Internal register enable for this stage
+ logic reg_ena;
+ // Determine the ready signal of the current stage - advance the pipeline:
+ // 1. if the next stage is ready for our data
+ // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+ assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+ // Valid: enabled by ready signal, synchronous clear with the flush signal
+ `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+ // Enable register if pipleine ready and a valid data item is present
+ assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+ // Generate the pipeline registers within the stages, use enable-registers
+ `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+ `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
+ `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+ `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD)
+ `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0)
+ `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
+ `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
+ `FFL(inp_pipe_int_fmt_q[i+1], inp_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0))
+ `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
+ `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
+ end
+ // Output stage: assign selected pipe outputs to signals for later use
+ assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
+ assign is_boxed_q = inp_pipe_is_boxed_q[NUM_INP_REGS];
+ assign op_mod_q = inp_pipe_op_mod_q[NUM_INP_REGS];
+ assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS];
+ assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS];
+ assign int_fmt_q = inp_pipe_int_fmt_q[NUM_INP_REGS];
+ // -----------------
+ // Input processing
+ // -----------------
+ logic src_is_int, dst_is_int; // if 0, it's a float
+ assign src_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::I2F);
+ assign dst_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::F2I);
+ logic [INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
+ logic [NUM_FORMATS-1:0] fmt_sign;
+ logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
+ logic [NUM_FORMATS-1:0][INT_MAN_WIDTH-1:0] fmt_mantissa;
+ logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_shift_compensation; // for LZC
+ fpnew_pkg::fp_info_t [NUM_FORMATS-1:0] info;
+ logic [NUM_INT_FORMATS-1:0][INT_MAN_WIDTH-1:0] ifmt_input_val;
+ logic int_sign;
+ logic [INT_MAN_WIDTH-1:0] int_value, int_mantissa;
+ // FP Input initialization
+ for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs
+ // Set up some constants
+ localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+ localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+ localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+ if (FpFmtConfig[fmt]) begin : active_format
+ // Classify input
+ fpnew_classifier #(
+ .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ),
+ .NumOperands ( 1 )
+ ) i_fpnew_classifier (
+ .operands_i ( operands_q[FP_WIDTH-1:0] ),
+ .is_boxed_i ( is_boxed_q[fmt] ),
+ .info_o ( info[fmt] )
+ );
+ assign fmt_sign[fmt] = operands_q[FP_WIDTH-1];
+ assign fmt_exponent[fmt] = signed'({1'b0, operands_q[MAN_BITS+:EXP_BITS]});
+ assign fmt_mantissa[fmt] = {info[fmt].is_normal, operands_q[MAN_BITS-1:0]}; // zero pad
+ // Compensation for the difference in mantissa widths used for leading-zero count
+ assign fmt_shift_compensation[fmt] = signed'(INT_MAN_WIDTH - 1 - MAN_BITS);
+ end else begin : inactive_format
+ assign info[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+ assign fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled
+ assign fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+ assign fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+ assign fmt_shift_compensation[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+ end
+ end
+ // Sign-extend INT input
+ for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_sign_extend_int
+ // Set up some constants
+ localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+ if (IntFmtConfig[ifmt]) begin : active_format // only active formats
+ always_comb begin : sign_ext_input
+ // sign-extend value only if it's signed
+ ifmt_input_val[ifmt] = '{default: operands_q[INT_WIDTH-1] & ~op_mod_q};
+ ifmt_input_val[ifmt][INT_WIDTH-1:0] = operands_q[INT_WIDTH-1:0];
+ end
+ end else begin : inactive_format
+ assign ifmt_input_val[ifmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+ end
+ end
+ // Construct input mantissa from integer
+ assign int_value = ifmt_input_val[int_fmt_q];
+ assign int_sign = int_value[INT_MAN_WIDTH-1] & ~op_mod_q; // only signed ints are negative
+ assign int_mantissa = int_sign ? unsigned'(-int_value) : int_value; // get magnitude of negative
+ // select mantissa with source format
+ assign encoded_mant = src_is_int ? int_mantissa : fmt_mantissa[src_fmt_q];
+ // --------------
+ // Normalization
+ // --------------
+ logic signed [INT_EXP_WIDTH-1:0] src_bias; // src format bias
+ logic signed [INT_EXP_WIDTH-1:0] src_exp; // src format exponent (biased)
+ logic signed [INT_EXP_WIDTH-1:0] src_subnormal; // src is subnormal
+ logic signed [INT_EXP_WIDTH-1:0] src_offset; // src offset within mantissa
+ assign src_bias = signed'(fpnew_pkg::bias(src_fmt_q));
+ assign src_exp = fmt_exponent[src_fmt_q];
+ assign src_subnormal = signed'({1'b0, info[src_fmt_q].is_subnormal});
+ assign src_offset = fmt_shift_compensation[src_fmt_q];
+ logic input_sign; // input sign
+ logic signed [INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
+ logic [INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
+ logic mant_is_zero; // for integer zeroes
+ logic signed [INT_EXP_WIDTH-1:0] fp_input_exp;
+ logic signed [INT_EXP_WIDTH-1:0] int_input_exp;
+ // Input mantissa needs to be normalized
+ logic [LZC_RESULT_WIDTH-1:0] renorm_shamt; // renormalization shift amount
+ logic [LZC_RESULT_WIDTH:0] renorm_shamt_sgn; // signed form for calculations
+ // Leading-zero counter is needed for renormalization
+ lzc #(
+ .MODE ( 1 ) // MODE = 1 counts leading zeroes
+ ) i_lzc (
+ .in_i ( encoded_mant ),
+ .cnt_o ( renorm_shamt ),
+ .empty_o ( mant_is_zero )
+ );
+ assign renorm_shamt_sgn = signed'({1'b0, renorm_shamt});
+ // Get the sign from the proper source
+ assign input_sign = src_is_int ? int_sign : fmt_sign[src_fmt_q];
+ // Realign input mantissa, append zeroes if destination is wider
+ assign input_mant = encoded_mant << renorm_shamt;
+ // Unbias exponent and compensate for shift
+ assign fp_input_exp = signed'(src_exp + src_subnormal - src_bias -
+ renorm_shamt_sgn + src_offset); // compensate for shift
+ assign int_input_exp = signed'(INT_MAN_WIDTH - 1 - renorm_shamt_sgn);
+ assign input_exp = src_is_int ? int_input_exp : fp_input_exp;
+ logic signed [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
+ // Rebias the exponent
+ assign destination_exp = input_exp + signed'(fpnew_pkg::bias(dst_fmt_q));
+ // ---------------
+ // Internal pipeline
+ // ---------------
+ // Pipeline output signals as non-arrays
+ logic input_sign_q;
+ logic signed [INT_EXP_WIDTH-1:0] input_exp_q;
+ logic [INT_MAN_WIDTH-1:0] input_mant_q;
+ logic signed [INT_EXP_WIDTH-1:0] destination_exp_q;
+ logic src_is_int_q;
+ logic dst_is_int_q;
+ fpnew_pkg::fp_info_t info_q;
+ logic mant_is_zero_q;
+ logic op_mod_q2;
+ fpnew_pkg::roundmode_e rnd_mode_q;
+ fpnew_pkg::fp_format_e src_fmt_q2;
+ fpnew_pkg::fp_format_e dst_fmt_q2;
+ fpnew_pkg::int_format_e int_fmt_q2;
+ // Internal pipeline signals, index i holds signal after i register stages
+ logic [0:NUM_MID_REGS] mid_pipe_input_sign_q;
+ logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_input_exp_q;
+ logic [0:NUM_MID_REGS][INT_MAN_WIDTH-1:0] mid_pipe_input_mant_q;
+ logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_dest_exp_q;
+ logic [0:NUM_MID_REGS] mid_pipe_src_is_int_q;
+ logic [0:NUM_MID_REGS] mid_pipe_dst_is_int_q;
+ fpnew_pkg::fp_info_t [0:NUM_MID_REGS] mid_pipe_info_q;
+ logic [0:NUM_MID_REGS] mid_pipe_mant_zero_q;
+ logic [0:NUM_MID_REGS] mid_pipe_op_mod_q;
+ fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q;
+ fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_src_fmt_q;
+ fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q;
+ fpnew_pkg::int_format_e [0:NUM_MID_REGS] mid_pipe_int_fmt_q;
+ TagType [0:NUM_MID_REGS] mid_pipe_tag_q;
+ AuxType [0:NUM_MID_REGS] mid_pipe_aux_q;
+ logic [0:NUM_MID_REGS] mid_pipe_valid_q;
+ // Ready signal is combinatorial for all stages
+ logic [0:NUM_MID_REGS] mid_pipe_ready;
+ // Input stage: First element of pipeline is taken from upstream logic
+ assign mid_pipe_input_sign_q[0] = input_sign;
+ assign mid_pipe_input_exp_q[0] = input_exp;
+ assign mid_pipe_input_mant_q[0] = input_mant;
+ assign mid_pipe_dest_exp_q[0] = destination_exp;
+ assign mid_pipe_src_is_int_q[0] = src_is_int;
+ assign mid_pipe_dst_is_int_q[0] = dst_is_int;
+ assign mid_pipe_info_q[0] = info[src_fmt_q];
+ assign mid_pipe_mant_zero_q[0] = mant_is_zero;
+ assign mid_pipe_op_mod_q[0] = op_mod_q;
+ assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS];
+ assign mid_pipe_src_fmt_q[0] = src_fmt_q;
+ assign mid_pipe_dst_fmt_q[0] = dst_fmt_q;
+ assign mid_pipe_int_fmt_q[0] = int_fmt_q;
+ assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS];
+ assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS];
+ assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS];
+ // Input stage: Propagate pipeline ready signal to input pipe
+ assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
+ // Generate the register stages
+ for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
+ // Internal register enable for this stage
+ logic reg_ena;
+ // Determine the ready signal of the current stage - advance the pipeline:
+ // 1. if the next stage is ready for our data
+ // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+ assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
+ // Valid: enabled by ready signal, synchronous clear with the flush signal
+ `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+ // Enable register if pipleine ready and a valid data item is present
+ assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
+ // Generate the pipeline registers within the stages, use enable-registers
+ `FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0)
+ `FFL(mid_pipe_input_exp_q[i+1], mid_pipe_input_exp_q[i], reg_ena, '0)
+ `FFL(mid_pipe_input_mant_q[i+1], mid_pipe_input_mant_q[i], reg_ena, '0)
+ `FFL(mid_pipe_dest_exp_q[i+1], mid_pipe_dest_exp_q[i], reg_ena, '0)
+ `FFL(mid_pipe_src_is_int_q[i+1], mid_pipe_src_is_int_q[i], reg_ena, '0)
+ `FFL(mid_pipe_dst_is_int_q[i+1], mid_pipe_dst_is_int_q[i], reg_ena, '0)
+ `FFL(mid_pipe_info_q[i+1], mid_pipe_info_q[i], reg_ena, '0)
+ `FFL(mid_pipe_mant_zero_q[i+1], mid_pipe_mant_zero_q[i], reg_ena, '0)
+ `FFL(mid_pipe_op_mod_q[i+1], mid_pipe_op_mod_q[i], reg_ena, '0)
+ `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+ `FFL(mid_pipe_src_fmt_q[i+1], mid_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
+ `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
+ `FFL(mid_pipe_int_fmt_q[i+1], mid_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0))
+ `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0))
+ `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0))
+ end
+ // Output stage: assign selected pipe outputs to signals for later use
+ assign input_sign_q = mid_pipe_input_sign_q[NUM_MID_REGS];
+ assign input_exp_q = mid_pipe_input_exp_q[NUM_MID_REGS];
+ assign input_mant_q = mid_pipe_input_mant_q[NUM_MID_REGS];
+ assign destination_exp_q = mid_pipe_dest_exp_q[NUM_MID_REGS];
+ assign src_is_int_q = mid_pipe_src_is_int_q[NUM_MID_REGS];
+ assign dst_is_int_q = mid_pipe_dst_is_int_q[NUM_MID_REGS];
+ assign info_q = mid_pipe_info_q[NUM_MID_REGS];
+ assign mant_is_zero_q = mid_pipe_mant_zero_q[NUM_MID_REGS];
+ assign op_mod_q2 = mid_pipe_op_mod_q[NUM_MID_REGS];
+ assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS];
+ assign src_fmt_q2 = mid_pipe_src_fmt_q[NUM_MID_REGS];
+ assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS];
+ assign int_fmt_q2 = mid_pipe_int_fmt_q[NUM_MID_REGS];
+ // --------
+ // Casting
+ // --------
+ logic [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
+ logic [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
+ logic [2*INT_MAN_WIDTH:0] destination_mant; // mantissa from shifter, with rnd bit
+ logic [SUPER_MAN_BITS-1:0] final_mant; // mantissa after adjustments
+ logic [MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
+ logic [$clog2(INT_MAN_WIDTH+1)-1:0] denorm_shamt; // shift amount for denormalization
+ logic [1:0] fp_round_sticky_bits, int_round_sticky_bits, round_sticky_bits;
+ logic of_before_round, uf_before_round;
+ // Perform adjustments to mantissa and exponent
+ always_comb begin : cast_value
+ // Default assignment
+ final_exp = unsigned'(destination_exp_q); // take exponent as is, only look at lower bits
+ preshift_mant = '0; // initialize mantissa container with zeroes
+ denorm_shamt = SUPER_MAN_BITS - fpnew_pkg::man_bits(dst_fmt_q2); // right of mantissa
+ of_before_round = 1'b0;
+ uf_before_round = 1'b0;
+ // Place mantissa to the left of the shifter
+ preshift_mant = input_mant_q << (INT_MAN_WIDTH + 1);
+ // Handle INT casts
+ if (dst_is_int_q) begin
+ // By default right shift mantissa to be an integer
+ denorm_shamt = unsigned'(MAX_INT_WIDTH - 1 - input_exp_q);
+ // overflow: when converting to unsigned the range is larger by one
+ if (input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) begin
+ denorm_shamt = '0; // prevent shifting
+ of_before_round = 1'b1;
+ // underflow
+ end else if (input_exp_q < -1) begin
+ denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
+ uf_before_round = 1'b1;
+ end
+ // Handle FP over-/underflows
+ end else begin
+ // Overflow or infinities (for proper rounding)
+ if ((destination_exp_q >= signed'(2**fpnew_pkg::exp_bits(dst_fmt_q2))-1) ||
+ (~src_is_int_q && info_q.is_inf)) begin
+ final_exp = unsigned'(2**fpnew_pkg::exp_bits(dst_fmt_q2)-2); // largest normal value
+ preshift_mant = '1; // largest normal value and RS bits set
+ of_before_round = 1'b1;
+ // Denormalize underflowing values
+ end else if (destination_exp_q < 1 &&
+ destination_exp_q >= -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin
+ final_exp = '0; // denormal result
+ denorm_shamt = unsigned'(denorm_shamt + 1 - destination_exp_q); // adjust right shifting
+ uf_before_round = 1'b1;
+ // Limit the shift to retain sticky bits
+ end else if (destination_exp_q < -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin
+ final_exp = '0; // denormal result
+ denorm_shamt = unsigned'(denorm_shamt + 2 + fpnew_pkg::man_bits(dst_fmt_q2)); // to sticky
+ uf_before_round = 1'b1;
+ end
+ end
+ end
+ localparam NUM_FP_STICKY = 2 * INT_MAN_WIDTH - SUPER_MAN_BITS - 1; // removed mantissa, 1. and R
+ localparam NUM_INT_STICKY = 2 * INT_MAN_WIDTH - MAX_INT_WIDTH; // removed int and R
+ // Mantissa adjustment shift
+ assign destination_mant = preshift_mant >> denorm_shamt;
+ // Extract final mantissa and round bit, discard the normal bit (for FP)
+ assign {final_mant, fp_round_sticky_bits[1]} =
+ destination_mant[2*INT_MAN_WIDTH-1-:SUPER_MAN_BITS+1];
+ assign {final_int, int_round_sticky_bits[1]} = destination_mant[2*INT_MAN_WIDTH-:MAX_INT_WIDTH+1];
+ // Collapse sticky bits
+ assign fp_round_sticky_bits[0] = (| {destination_mant[NUM_FP_STICKY-1:0]});
+ assign int_round_sticky_bits[0] = (| {destination_mant[NUM_INT_STICKY-1:0]});
+ // select RS bits for destination operation
+ assign round_sticky_bits = dst_is_int_q ? int_round_sticky_bits : fp_round_sticky_bits;
+ // ----------------------------
+ // Rounding and classification
+ // ----------------------------
+ logic [WIDTH-1:0] pre_round_abs; // absolute value of result before rnd
+ logic of_after_round; // overflow
+ logic uf_after_round; // underflow
+ logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_pre_round_abs; // per format
+ logic [NUM_FORMATS-1:0] fmt_of_after_round;
+ logic [NUM_FORMATS-1:0] fmt_uf_after_round;
+ logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_pre_round_abs; // per format
+ logic rounded_sign;
+ logic [WIDTH-1:0] rounded_abs; // absolute value of result after rounding
+ logic result_true_zero;
+ logic [WIDTH-1:0] rounded_int_res; // after possible inversion
+ logic rounded_int_res_zero; // after rounding
+ // Pack exponent and mantissa into proper rounding form
+ for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble
+ // Set up some constants
+ localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+ localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+ if (FpFmtConfig[fmt]) begin : active_format
+ always_comb begin : assemble_result
+ fmt_pre_round_abs[fmt] = {final_exp[EXP_BITS-1:0], final_mant[MAN_BITS-1:0]}; // 0-extend
+ end
+ end else begin : inactive_format
+ assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE};
+ end
+ end
+ // Sign-extend integer result
+ for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_res_sign_ext
+ // Set up some constants
+ localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+ if (IntFmtConfig[ifmt]) begin : active_format
+ always_comb begin : assemble_result
+ // sign-extend reusult
+ ifmt_pre_round_abs[ifmt] = '{default: final_int[INT_WIDTH-1]};
+ ifmt_pre_round_abs[ifmt][INT_WIDTH-1:0] = final_int[INT_WIDTH-1:0];
+ end
+ end else begin : inactive_format
+ assign ifmt_pre_round_abs[ifmt] = '{default: fpnew_pkg::DONT_CARE};
+ end
+ end
+ // Select output with destination format and operation
+ assign pre_round_abs = dst_is_int_q ? ifmt_pre_round_abs[int_fmt_q2] : fmt_pre_round_abs[dst_fmt_q2];
+ fpnew_rounding #(
+ .AbsWidth ( WIDTH )
+ ) i_fpnew_rounding (
+ .abs_value_i ( pre_round_abs ),
+ .sign_i ( input_sign_q ), // source format
+ .round_sticky_bits_i ( round_sticky_bits ),
+ .rnd_mode_i ( rnd_mode_q ),
+ .effective_subtraction_i ( 1'b0 ), // no operation happened
+ .abs_rounded_o ( rounded_abs ),
+ .sign_o ( rounded_sign ),
+ .exact_zero_o ( result_true_zero )
+ );
+ logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result;
+ // Detect overflows and inject sign
+ for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject
+ // Set up some constants
+ localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+ localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+ localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+ if (FpFmtConfig[fmt]) begin : active_format
+ always_comb begin : post_process
+ // detect of / uf
+ fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal
+ fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp.
+ // Assemble regular result, nan box short ones. Int zeroes need to be detected`
+ fmt_result[fmt] = '1;
+ fmt_result[fmt][FP_WIDTH-1:0] = src_is_int_q & mant_is_zero_q
+ ? '0
+ : {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]};
+ end
+ end else begin : inactive_format
+ assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE;
+ assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE;
+ assign fmt_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
+ end
+ end
+ // Classification after rounding select by destination format
+ assign uf_after_round = fmt_uf_after_round[dst_fmt_q2];
+ assign of_after_round = fmt_of_after_round[dst_fmt_q2];
+ // Negative integer result needs to be brought into two's complement
+ assign rounded_int_res = rounded_sign ? unsigned'(-rounded_abs) : rounded_abs;
+ assign rounded_int_res_zero = (rounded_int_res == '0);
+ // -------------------------
+ // FP Special case handling
+ // -------------------------
+ logic [WIDTH-1:0] fp_special_result;
+ fpnew_pkg::status_t fp_special_status;
+ logic fp_result_is_special;
+ logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_special_result;
+ // Special result construction
+ for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results
+ // Set up some constants
+ localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+ localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+ localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+ localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1;
+ localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1);
+ if (FpFmtConfig[fmt]) begin : active_format
+ always_comb begin : special_results
+ logic [FP_WIDTH-1:0] special_res;
+ special_res = info_q.is_zero
+ ? input_sign_q << FP_WIDTH-1 // signed zero
+ // Initialize special result with ones (NaN-box)
+ fmt_special_result[fmt] = '1;
+ fmt_special_result[fmt][FP_WIDTH-1:0] = special_res;
+ end
+ end else begin : inactive_format
+ assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
+ end
+ end
+ // Detect special case from source format, I2F casts don't produce a special result
+ assign fp_result_is_special = ~src_is_int_q & (info_q.is_zero |
+ info_q.is_nan |
+ ~info_q.is_boxed);
+ // Signalling input NaNs raise invalid flag, otherwise no flags set
+ assign fp_special_status = '{NV: info_q.is_signalling, default: 1'b0};
+ // Assemble result according to destination format
+ assign fp_special_result = fmt_special_result[dst_fmt_q2]; // destination format
+ // --------------------------
+ // INT Special case handling
+ // --------------------------
+ logic [WIDTH-1:0] int_special_result;
+ fpnew_pkg::status_t int_special_status;
+ logic int_result_is_special;
+ logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_special_result;
+ // Special result construction
+ for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_special_results_int
+ // Set up some constants
+ localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+ if (IntFmtConfig[ifmt]) begin : active_format
+ always_comb begin : special_results
+ automatic logic [INT_WIDTH-1:0] special_res;
+ // Default is overflow to positive max, which is 2**INT_WIDTH-1 or 2**(INT_WIDTH-1)-1
+ special_res[INT_WIDTH-2:0] = '1; // alone yields 2**(INT_WIDTH-1)-1
+ special_res[INT_WIDTH-1] = op_mod_q2; // for unsigned casts yields 2**INT_WIDTH-1
+ // Negative special case (except for nans) tie to -max or 0
+ if (input_sign_q && !info_q.is_nan)
+ special_res = ~special_res;
+ // Initialize special result with sign-extension
+ ifmt_special_result[ifmt] = '{default: special_res[INT_WIDTH-1]};
+ ifmt_special_result[ifmt][INT_WIDTH-1:0] = special_res;
+ end
+ end else begin : inactive_format
+ assign ifmt_special_result[ifmt] = '{default: fpnew_pkg::DONT_CARE};
+ end
+ end
+ // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
+ assign int_result_is_special = info_q.is_nan | info_q.is_inf |
+ of_before_round | ~info_q.is_boxed |
+ (input_sign_q & op_mod_q2 & ~rounded_int_res_zero);
+ // All integer special cases are invalid
+ assign int_special_status = '{NV: 1'b1, default: 1'b0};
+ // Assemble result according to destination format
+ assign int_special_result = ifmt_special_result[int_fmt_q2]; // destination format
+ // -----------------
+ // Result selection
+ // -----------------
+ fpnew_pkg::status_t int_regular_status, fp_regular_status;
+ logic [WIDTH-1:0] fp_result, int_result;
+ fpnew_pkg::status_t fp_status, int_status;
+ assign fp_regular_status.NV = src_is_int_q & (of_before_round | of_after_round); // overflow is invalid for I2F casts
+ assign fp_regular_status.DZ = 1'b0; // no divisions
+ assign fp_regular_status.OF = ~src_is_int_q & (~info_q.is_inf & (of_before_round | of_after_round)); // inf casts no OF
+ assign fp_regular_status.UF = uf_after_round & fp_regular_status.NX;
+ assign fp_regular_status.NX = src_is_int_q ? (| fp_round_sticky_bits) // overflow is invalid in i2f
+ : (| fp_round_sticky_bits) | (~info_q.is_inf & (of_before_round | of_after_round));
+ assign int_regular_status = '{NX: (| int_round_sticky_bits), default: 1'b0};
+ assign fp_result = fp_result_is_special ? fp_special_result : fmt_result[dst_fmt_q2];
+ assign fp_status = fp_result_is_special ? fp_special_status : fp_regular_status;
+ assign int_result = int_result_is_special ? int_special_result : rounded_int_res;
+ assign int_status = int_result_is_special ? int_special_status : int_regular_status;
+ // Final results for output pipeline
+ logic [WIDTH-1:0] result_d;
+ fpnew_pkg::status_t status_d;
+ logic extension_bit;
+ // Select output depending on special case detection
+ assign result_d = dst_is_int_q ? int_result : fp_result;
+ assign status_d = dst_is_int_q ? int_status : fp_status;
+ // MSB of int result decides extension, otherwise NaN box
+ assign extension_bit = dst_is_int_q ? int_result[WIDTH-1] : 1'b1;
+ // ----------------
+ // Output Pipeline
+ // ----------------
+ // Output pipeline signals, index i holds signal after i register stages
+ logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
+ fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
+ logic [0:NUM_OUT_REGS] out_pipe_ext_bit_q;
+ TagType [0:NUM_OUT_REGS] out_pipe_tag_q;
+ AuxType [0:NUM_OUT_REGS] out_pipe_aux_q;
+ logic [0:NUM_OUT_REGS] out_pipe_valid_q;
+ // Ready signal is combinatorial for all stages
+ logic [0:NUM_OUT_REGS] out_pipe_ready;
+ // Input stage: First element of pipeline is taken from inputs
+ assign out_pipe_result_q[0] = result_d;
+ assign out_pipe_status_q[0] = status_d;
+ assign out_pipe_ext_bit_q[0] = extension_bit;
+ assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS];
+ assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS];
+ assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS];
+ // Input stage: Propagate pipeline ready signal to inside pipe
+ assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
+ // Generate the register stages
+ for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+ // Internal register enable for this stage
+ logic reg_ena;
+ // Determine the ready signal of the current stage - advance the pipeline:
+ // 1. if the next stage is ready for our data
+ // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+ assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+ // Valid: enabled by ready signal, synchronous clear with the flush signal
+ `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+ // Enable register if pipleine ready and a valid data item is present
+ assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+ // Generate the pipeline registers within the stages, use enable-registers
+ `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
+ `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
+ `FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0)
+ `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0))
+ `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0))
+ end
+ // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+ assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+ // Output stage: assign module outputs
+ assign result_o = out_pipe_result_q[NUM_OUT_REGS];
+ assign status_o = out_pipe_status_q[NUM_OUT_REGS];
+ assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS];
+ assign tag_o = out_pipe_tag_q[NUM_OUT_REGS];
+ assign aux_o = out_pipe_aux_q[NUM_OUT_REGS];
+ assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS];
+ assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..5e4fab9
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,72 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Author: Stefan Mach <>
+module fpnew_classifier #(
+ parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0),
+ parameter int unsigned NumOperands = 1,
+ // Do not change
+ localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat)
+) (
+ input logic [NumOperands-1:0][WIDTH-1:0] operands_i,
+ input logic [NumOperands-1:0] is_boxed_i,
+ output fpnew_pkg::fp_info_t [NumOperands-1:0] info_o
+ localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
+ localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
+ // Type definition
+ typedef struct packed {
+ logic sign;
+ logic [EXP_BITS-1:0] exponent;
+ logic [MAN_BITS-1:0] mantissa;
+ } fp_t;
+ // Iterate through all operands
+ for (genvar op = 0; op < int'(NumOperands); op++) begin : gen_num_values
+ fp_t value;
+ logic is_boxed;
+ logic is_normal;
+ logic is_inf;
+ logic is_nan;
+ logic is_signalling;
+ logic is_quiet;
+ logic is_zero;
+ logic is_subnormal;
+ // ---------------
+ // Classify Input
+ // ---------------
+ always_comb begin : classify_input
+ value = operands_i[op];
+ is_boxed = is_boxed_i[op];
+ is_normal = is_boxed && (value.exponent != '0) && (value.exponent != '1);
+ is_zero = is_boxed && (value.exponent == '0) && (value.mantissa == '0);
+ is_subnormal = is_boxed && (value.exponent == '0) && !is_zero;
+ is_inf = is_boxed && ((value.exponent == '1) && (value.mantissa == '0));
+ is_nan = !is_boxed || ((value.exponent == '1) && (value.mantissa != '0));
+ is_signalling = is_boxed && is_nan && (value.mantissa[MAN_BITS-1] == 1'b0);
+ is_quiet = is_nan && !is_signalling;
+ // Assign output for current input
+ info_o[op].is_normal = is_normal;
+ info_o[op].is_subnormal = is_subnormal;
+ info_o[op].is_zero = is_zero;
+ info_o[op].is_inf = is_inf;
+ info_o[op].is_nan = is_nan;
+ info_o[op].is_signalling = is_signalling;
+ info_o[op].is_quiet = is_quiet;
+ info_o[op].is_boxed = is_boxed;
+ end
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..937d345
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,339 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Author: Stefan Mach <>
+module fpnew_divsqrt_multi #(
+ parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1,
+ // FPU configuration
+ parameter int unsigned NumPipeRegs = 0,
+ parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::AFTER,
+ parameter type TagType = logic,
+ parameter type AuxType = logic,
+ // Do not change
+ localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig),
+ localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ // Input signals
+ input logic [1:0][WIDTH-1:0] operands_i, // 2 operands
+ input logic [NUM_FORMATS-1:0][1:0] is_boxed_i, // 2 operands
+ input fpnew_pkg::roundmode_e rnd_mode_i,
+ input fpnew_pkg::operation_e op_i,
+ input fpnew_pkg::fp_format_e dst_fmt_i,
+ input TagType tag_i,
+ input AuxType aux_i,
+ // Input Handshake
+ input logic in_valid_i,
+ output logic in_ready_o,
+ input logic flush_i,
+ // Output signals
+ output logic [WIDTH-1:0] result_o,
+ output fpnew_pkg::status_t status_o,
+ output logic extension_bit_o,
+ output TagType tag_o,
+ output AuxType aux_o,
+ // Output handshake
+ output logic out_valid_o,
+ input logic out_ready_i,
+ // Indication of valid data in flight
+ output logic busy_o
+ // ----------
+ // Constants
+ // ----------
+ // Pipelines
+ localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE)
+ ? NumPipeRegs
+ : (PipeConfig == fpnew_pkg::DISTRIBUTED
+ ? (NumPipeRegs / 2) // Last to get distributed regs
+ : 0); // no regs here otherwise
+ localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE)
+ ? NumPipeRegs
+ : (PipeConfig == fpnew_pkg::DISTRIBUTED
+ ? ((NumPipeRegs + 1) / 2) // First to get distributed regs
+ : 0); // no regs here otherwise
+ // ---------------
+ // Input pipeline
+ // ---------------
+ // Selected pipeline output signals as non-arrays
+ logic [1:0][WIDTH-1:0] operands_q;
+ fpnew_pkg::roundmode_e rnd_mode_q;
+ fpnew_pkg::operation_e op_q;
+ fpnew_pkg::fp_format_e dst_fmt_q;
+ logic in_valid_q;
+ // Input pipeline signals, index i holds signal after i register stages
+ logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q;
+ fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q;
+ fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q;
+ fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q;
+ TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
+ AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
+ logic [0:NUM_INP_REGS] inp_pipe_valid_q;
+ // Ready signal is combinatorial for all stages
+ logic [0:NUM_INP_REGS] inp_pipe_ready;
+ // Input stage: First element of pipeline is taken from inputs
+ assign inp_pipe_operands_q[0] = operands_i;
+ assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+ assign inp_pipe_op_q[0] = op_i;
+ assign inp_pipe_dst_fmt_q[0] = dst_fmt_i;
+ assign inp_pipe_tag_q[0] = tag_i;
+ assign inp_pipe_aux_q[0] = aux_i;
+ assign inp_pipe_valid_q[0] = in_valid_i;
+ // Input stage: Propagate pipeline ready signal to updtream circuitry
+ assign in_ready_o = inp_pipe_ready[0];
+ // Generate the register stages
+ for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+ // Internal register enable for this stage
+ logic reg_ena;
+ // Determine the ready signal of the current stage - advance the pipeline:
+ // 1. if the next stage is ready for our data
+ // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+ assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+ // Valid: enabled by ready signal, synchronous clear with the flush signal
+ `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+ // Enable register if pipleine ready and a valid data item is present
+ assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+ // Generate the pipeline registers within the stages, use enable-registers
+ `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+ `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+ `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD)
+ `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
+ `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
+ `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
+ end
+ // Output stage: assign selected pipe outputs to signals for later use
+ assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
+ assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS];
+ assign op_q = inp_pipe_op_q[NUM_INP_REGS];
+ assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS];
+ assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS];
+ // -----------------
+ // Input processing
+ // -----------------
+ logic [1:0] divsqrt_fmt;
+ logic [1:0][63:0] divsqrt_operands; // those are fixed to 64bit
+ logic input_is_fp8;
+ // Translate fpnew formats into divsqrt formats
+ always_comb begin : translate_fmt
+ unique case (dst_fmt_q)
+ fpnew_pkg::FP32: divsqrt_fmt = 2'b00;
+ fpnew_pkg::FP64: divsqrt_fmt = 2'b01;
+ fpnew_pkg::FP16: divsqrt_fmt = 2'b10;
+ fpnew_pkg::FP16ALT: divsqrt_fmt = 2'b11;
+ default: divsqrt_fmt = 2'b10; // maps also FP8 to FP16
+ endcase
+ // Only if FP8 is enabled
+ input_is_fp8 = FpFmtConfig[fpnew_pkg::FP8] & (dst_fmt_q == fpnew_pkg::FP8);
+ // If FP8 is supported, map it to an FP16 value
+ divsqrt_operands[0] = input_is_fp8 ? operands_q[0] << 8 : operands_q[0];
+ divsqrt_operands[1] = input_is_fp8 ? operands_q[1] << 8 : operands_q[1];
+ end
+ // ------------
+ // Control FSM
+ // ------------
+ logic in_ready; // input handshake with upstream
+ logic div_valid, sqrt_valid; // input signalling with unit
+ logic unit_ready, unit_done; // status signals from unit instance
+ logic op_starting; // high in the cycle a new operation starts
+ logic out_valid, out_ready; // output handshake with downstream
+ logic hold_result; // whether to put result into hold register
+ logic data_is_held; // data in hold register is valid
+ logic unit_busy; // valid data in flight
+ // FSM states
+ typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e;
+ fsm_state_e state_q, state_d;
+ // Upstream ready comes from sanitization FSM
+ assign inp_pipe_ready[NUM_INP_REGS] = in_ready;
+ // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr.
+ assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i;
+ assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i;
+ assign op_starting = div_valid | sqrt_valid;
+ // FSM to safely apply and receive data from DIVSQRT unit
+ always_comb begin : flag_fsm
+ // Default assignments
+ in_ready = 1'b0;
+ out_valid = 1'b0;
+ hold_result = 1'b0;
+ data_is_held = 1'b0;
+ unit_busy = 1'b0;
+ state_d = state_q;
+ unique case (state_q)
+ // Waiting for work
+ IDLE: begin
+ in_ready = 1'b1; // we're ready
+ if (in_valid_q && unit_ready) begin // New work arrives
+ state_d = BUSY; // go into processing state
+ end
+ end
+ // Operation in progress
+ BUSY: begin
+ unit_busy = 1'b1; // data in flight
+ // If the unit is done with processing
+ if (unit_done) begin
+ out_valid = 1'b1; // try to commit result downstream
+ // If downstream accepts our result
+ if (out_ready) begin
+ state_d = IDLE; // we anticipate going back to idling..
+ if (in_valid_q && unit_ready) begin // ..unless new work comes in
+ in_ready = 1'b1; // we acknowledge the instruction
+ state_d = BUSY; // and stay busy with it
+ end
+ // Otherwise if downstream is not ready for the result
+ end else begin
+ hold_result = 1'b1; // activate the hold register
+ state_d = HOLD; // wait for the pipeline to take the data
+ end
+ end
+ end
+ // Waiting with valid result for downstream
+ HOLD: begin
+ unit_busy = 1'b1; // data in flight
+ data_is_held = 1'b1; // data in hold register is valid
+ out_valid = 1'b1; // try to commit result downstream
+ // If the result is accepted by downstream
+ if (out_ready) begin
+ state_d = IDLE; // go back to idle..
+ if (in_valid_q && unit_ready) begin // ..unless new work comes in
+ in_ready = 1'b1; // acknowledge the new transaction
+ state_d = BUSY; // will be busy with the next instruction
+ end
+ end
+ end
+ // fall into idle state otherwise
+ default: state_d = IDLE;
+ endcase
+ // Flushing overrides the other actions
+ if (flush_i) begin
+ unit_busy = 1'b0; // data is invalidated
+ out_valid = 1'b0; // cancel any valid data
+ state_d = IDLE; // go to default state
+ end
+ end
+ // FSM status register (asynch active low rst_ni)
+ `FF(state_q, state_d, IDLE)
+ // Hold additional information while the operation is in progress
+ logic result_is_fp8_q;
+ TagType result_tag_q;
+ AuxType result_aux_q;
+ // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst)
+ `FFL(result_is_fp8_q, input_is_fp8, op_starting, '0)
+ `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0)
+ `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0)
+ // -----------------
+ // DIVSQRT instance
+ // -----------------
+ logic [63:0] unit_result;
+ logic [WIDTH-1:0] adjusted_result, held_result_q;
+ fpnew_pkg::status_t unit_status, held_status_q;
+ div_sqrt_top_mvp i_divsqrt_lei (
+ .Clk_CI ( clk_i ),
+ .Rst_RBI ( rst_ni ),
+ .Div_start_SI ( div_valid ),
+ .Sqrt_start_SI ( sqrt_valid ),
+ .Operand_a_DI ( divsqrt_operands[0] ),
+ .Operand_b_DI ( divsqrt_operands[1] ),
+ .RM_SI ( rnd_mode_q ),
+ .Precision_ctl_SI ( '0 ),
+ .Format_sel_SI ( divsqrt_fmt ),
+ .Kill_SI ( flush_i ),
+ .Result_DO ( unit_result ),
+ .Fflags_SO ( unit_status ),
+ .Ready_SO ( unit_ready ),
+ .Done_SO ( unit_done )
+ );
+ // Adjust result width and fix FP8
+ assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result;
+ // The Hold register (load, no rst_ni)
+ `FFLNR(held_result_q, adjusted_result, hold_result, clk_i)
+ `FFLNR(held_status_q, unit_status, hold_result, clk_i)
+ // --------------
+ // Output Select
+ // --------------
+ logic [WIDTH-1:0] result_d;
+ fpnew_pkg::status_t status_d;
+ // Prioritize hold register data
+ assign result_d = data_is_held ? held_result_q : adjusted_result;
+ assign status_d = data_is_held ? held_status_q : unit_status;
+ // ----------------
+ // Output Pipeline
+ // ----------------
+ // Output pipeline signals, index i holds signal after i register stages
+ logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
+ fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
+ TagType [0:NUM_OUT_REGS] out_pipe_tag_q;
+ AuxType [0:NUM_OUT_REGS] out_pipe_aux_q;
+ logic [0:NUM_OUT_REGS] out_pipe_valid_q;
+ // Ready signal is combinatorial for all stages
+ logic [0:NUM_OUT_REGS] out_pipe_ready;
+ // Input stage: First element of pipeline is taken from inputs
+ assign out_pipe_result_q[0] = result_d;
+ assign out_pipe_status_q[0] = status_d;
+ assign out_pipe_tag_q[0] = result_tag_q;
+ assign out_pipe_aux_q[0] = result_aux_q;
+ assign out_pipe_valid_q[0] = out_valid;
+ // Input stage: Propagate pipeline ready signal to inside pipe
+ assign out_ready = out_pipe_ready[0];
+ // Generate the register stages
+ for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+ // Internal register enable for this stage
+ logic reg_ena;
+ // Determine the ready signal of the current stage - advance the pipeline:
+ // 1. if the next stage is ready for our data
+ // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+ assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+ // Valid: enabled by ready signal, synchronous clear with the flush signal
+ `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+ // Enable register if pipleine ready and a valid data item is present
+ assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+ // Generate the pipeline registers within the stages, use enable-registers
+ `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
+ `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
+ `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0))
+ `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0))
+ end
+ // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+ assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+ // Output stage: assign module outputs
+ assign result_o = out_pipe_result_q[NUM_OUT_REGS];
+ assign status_o = out_pipe_status_q[NUM_OUT_REGS];
+ assign extension_bit_o = 1'b1; // always NaN-Box result
+ assign tag_o = out_pipe_tag_q[NUM_OUT_REGS];
+ assign aux_o = out_pipe_aux_q[NUM_OUT_REGS];
+ assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS];
+ assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q});
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..420e793
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,671 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Author: Stefan Mach <>
+module fpnew_fma #(
+ parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0),
+ parameter int unsigned NumPipeRegs = 0,
+ parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
+ parameter type TagType = logic,
+ parameter type AuxType = logic,
+ localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ // Input signals
+ input logic [2:0][WIDTH-1:0] operands_i, // 3 operands
+ input logic [2:0] is_boxed_i, // 3 operands
+ input fpnew_pkg::roundmode_e rnd_mode_i,
+ input fpnew_pkg::operation_e op_i,
+ input logic op_mod_i,
+ input TagType tag_i,
+ input AuxType aux_i,
+ // Input Handshake
+ input logic in_valid_i,
+ output logic in_ready_o,
+ input logic flush_i,
+ // Output signals
+ output logic [WIDTH-1:0] result_o,
+ output fpnew_pkg::status_t status_o,
+ output logic extension_bit_o,
+ output TagType tag_o,
+ output AuxType aux_o,
+ // Output handshake
+ output logic out_valid_o,
+ input logic out_ready_i,
+ // Indication of valid data in flight
+ output logic busy_o
+ // ----------
+ // Constants
+ // ----------
+ localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
+ localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
+ localparam int unsigned BIAS = fpnew_pkg::bias(FpFormat);
+ // Precision bits 'p' include the implicit bit
+ localparam int unsigned PRECISION_BITS = MAN_BITS + 1;
+ // The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection
+ localparam int unsigned LOWER_SUM_WIDTH = 2 * PRECISION_BITS + 3;
+ localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH);
+ // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid
+ // datapath leakage. This is either given by the exponent bits or the width of the LZC result.
+ // In most reasonable FP formats the internal exponent will be wider than the LZC result.
+ localparam int unsigned EXP_WIDTH = unsigned'(fpnew_pkg::maximum(EXP_BITS + 2, LZC_RESULT_WIDTH));
+ // Shift amount width: maximum internal mantissa size is 3p+3 bits
+ localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3);
+ // Pipelines
+ localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
+ ? NumPipeRegs
+ : (PipeConfig == fpnew_pkg::DISTRIBUTED
+ ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
+ : 0); // no regs here otherwise
+ localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
+ ? NumPipeRegs
+ : (PipeConfig == fpnew_pkg::DISTRIBUTED
+ ? ((NumPipeRegs + 2) / 3) // First to get distributed regs
+ : 0); // no regs here otherwise
+ localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
+ ? NumPipeRegs
+ : (PipeConfig == fpnew_pkg::DISTRIBUTED
+ ? (NumPipeRegs / 3) // Last to get distributed regs
+ : 0); // no regs here otherwise
+ // ----------------
+ // Type definition
+ // ----------------
+ typedef struct packed {
+ logic sign;
+ logic [EXP_BITS-1:0] exponent;
+ logic [MAN_BITS-1:0] mantissa;
+ } fp_t;
+ // ---------------
+ // Input pipeline
+ // ---------------
+ // Input pipeline signals, index i holds signal after i register stages
+ logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q;
+ logic [0:NUM_INP_REGS][2:0] inp_pipe_is_boxed_q;
+ fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q;
+ fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q;
+ logic [0:NUM_INP_REGS] inp_pipe_op_mod_q;
+ TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
+ AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
+ logic [0:NUM_INP_REGS] inp_pipe_valid_q;
+ // Ready signal is combinatorial for all stages
+ logic [0:NUM_INP_REGS] inp_pipe_ready;
+ // Input stage: First element of pipeline is taken from inputs
+ assign inp_pipe_operands_q[0] = operands_i;
+ assign inp_pipe_is_boxed_q[0] = is_boxed_i;
+ assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+ assign inp_pipe_op_q[0] = op_i;
+ assign inp_pipe_op_mod_q[0] = op_mod_i;
+ assign inp_pipe_tag_q[0] = tag_i;
+ assign inp_pipe_aux_q[0] = aux_i;
+ assign inp_pipe_valid_q[0] = in_valid_i;
+ // Input stage: Propagate pipeline ready signal to updtream circuitry
+ assign in_ready_o = inp_pipe_ready[0];
+ // Generate the register stages
+ for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+ // Internal register enable for this stage
+ logic reg_ena;
+ // Determine the ready signal of the current stage - advance the pipeline:
+ // 1. if the next stage is ready for our data
+ // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+ assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+ // Valid: enabled by ready signal, synchronous clear with the flush signal
+ `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+ // Enable register if pipleine ready and a valid data item is present
+ assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+ // Generate the pipeline registers within the stages, use enable-registers
+ `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+ `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
+ `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+ `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD)
+ `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0)
+ `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
+ `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
+ end
+ // -----------------
+ // Input processing
+ // -----------------
+ fpnew_pkg::fp_info_t [2:0] info_q;
+ // Classify input
+ fpnew_classifier #(
+ .FpFormat ( FpFormat ),
+ .NumOperands ( 3 )
+ ) i_class_inputs (
+ .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ),
+ .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ),
+ .info_o ( info_q )
+ );
+ fp_t operand_a, operand_b, operand_c;
+ fpnew_pkg::fp_info_t info_a, info_b, info_c;
+ // Operation selection and operand adjustment
+ // | \c op_q | \c op_mod_q | Operation Adjustment
+ // |:--------:|:-----------:|---------------------
+ // | FMADD | \c 0 | FMADD: none
+ // | FMADD | \c 1 | FMSUB: Invert sign of operand C
+ // | FNMSUB | \c 0 | FNMSUB: Invert sign of operand A
+ // | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C
+ // | ADD | \c 0 | ADD: Set operand A to +1.0
+ // | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C
+ // | MUL | \c 0 | MUL: Set operand C to +0.0
+ // | *others* | \c - | *invalid*
+ // \note \c op_mod_q always inverts the sign of the addend.
+ always_comb begin : op_select
+ // Default assignments - packing-order-agnostic
+ operand_a = inp_pipe_operands_q[NUM_INP_REGS][0];
+ operand_b = inp_pipe_operands_q[NUM_INP_REGS][1];
+ operand_c = inp_pipe_operands_q[NUM_INP_REGS][2];
+ info_a = info_q[0];
+ info_b = info_q[1];
+ info_c = info_q[2];
+ // op_mod_q inverts sign of operand C
+ operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+ unique case (inp_pipe_op_q[NUM_INP_REGS])
+ fpnew_pkg::FMADD: ; // do nothing
+ fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product
+ fpnew_pkg::ADD: begin // Set multiplicand to +1
+ operand_a = '{sign: 1'b0, exponent: BIAS, mantissa: '0};
+ info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value.
+ end
+ fpnew_pkg::MUL: begin // Set addend to -0 (for proper rounding with RDN)
+ operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0};
+ info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value.
+ end
+ default: begin // propagate don't cares
+ operand_a = '{default: fpnew_pkg::DONT_CARE};
+ operand_b = '{default: fpnew_pkg::DONT_CARE};
+ operand_c = '{default: fpnew_pkg::DONT_CARE};
+ info_a = '{default: fpnew_pkg::DONT_CARE};
+ info_b = '{default: fpnew_pkg::DONT_CARE};
+ info_c = '{default: fpnew_pkg::DONT_CARE};
+ end
+ endcase
+ end
+ // ---------------------
+ // Input classification
+ // ---------------------
+ logic any_operand_inf;
+ logic any_operand_nan;
+ logic signalling_nan;
+ logic effective_subtraction;
+ logic tentative_sign;
+ // Reduction for special case handling
+ assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf, info_c.is_inf});
+ assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan, info_c.is_nan});
+ assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling});
+ // Effective subtraction in FMA occurs when product and addend signs differ
+ assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign;
+ // The tentative sign of the FMA shall be the sign of the product
+ assign tentative_sign = operand_a.sign ^ operand_b.sign;
+ // ----------------------
+ // Special case handling
+ // ----------------------
+ fp_t special_result;
+ fpnew_pkg::status_t special_status;
+ logic result_is_special;
+ always_comb begin : special_cases
+ // Default assignments
+ special_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN
+ special_status = '0;
+ result_is_special = 1'b0;
+ // Handle potentially mixed nan & infinity input => important for the case where infinity and
+ // zero are multiplied and added to a qnan.
+ // RISC-V mandates raising the NV exception in these cases:
+ // (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs)
+ if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin
+ result_is_special = 1'b1; // bypass FMA, output is the canonical qNaN
+ special_status.NV = 1'b1; // invalid operation
+ // NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP
+ end else if (any_operand_nan) begin
+ result_is_special = 1'b1; // bypass FMA, output is the canonical qNaN
+ special_status.NV = signalling_nan; // raise the invalid operation flag if signalling
+ // Special cases involving infinity
+ end else if (any_operand_inf) begin
+ result_is_special = 1'b1; // bypass FMA
+ // Effective addition of opposite infinities (±inf - ±inf) is invalid!
+ if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction)
+ special_status.NV = 1'b1; // invalid operation
+ // Handle cases where output will be inf because of inf product input
+ else if (info_a.is_inf || info_b.is_inf) begin
+ // Result is infinity with the sign of the product
+ special_result = '{sign: operand_a.sign ^ operand_b.sign, exponent: '1, mantissa: '0};
+ // Handle cases where the addend is inf
+ end else if (info_c.is_inf) begin
+ // Result is inifinity with sign of the addend (= operand_c)
+ special_result = '{sign: operand_c.sign, exponent: '1, mantissa: '0};
+ end
+ end
+ end
+ // ---------------------------
+ // Initial exponent data path
+ // ---------------------------
+ logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c;
+ logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference;
+ logic signed [EXP_WIDTH-1:0] tentative_exponent;
+ // Zero-extend exponents into signed container - implicit width extension
+ assign exponent_a = signed'({1'b0, operand_a.exponent});
+ assign exponent_b = signed'({1'b0, operand_b.exponent});
+ assign exponent_c = signed'({1'b0, operand_c.exponent});
+ // Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx)
+ // with Ex the encoded exponent and nx the implicit bit. Internal exponents stay biased.
+ assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm
+ // Biased product exponent is the sum of encoded exponents minus the bias.
+ assign exponent_product = (info_a.is_zero || info_b.is_zero)
+ ? 2 - signed'(BIAS) // in case the product is zero, set minimum exp.
+ : signed'(exponent_a + info_a.is_subnormal
+ + exponent_b + info_b.is_subnormal
+ - signed'(BIAS));
+ // Exponent difference is the addend exponent minus the product exponent
+ assign exponent_difference = exponent_addend - exponent_product;
+ // The tentative exponent will be the larger of the product or addend exponent
+ assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product;
+ // Shift amount for addend based on exponents (unsigned as only right shifts)
+ logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt;
+ always_comb begin : addend_shift_amount
+ // Product-anchored case, saturated shift (addend is only in the sticky bit)
+ if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1))
+ addend_shamt = 3 * PRECISION_BITS + 4;
+ // Addend and product will have mutual bits to add
+ else if (exponent_difference <= signed'(PRECISION_BITS + 2))
+ addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference);
+ // Addend-anchored case, saturated shift (product is only in the sticky bit)
+ else
+ addend_shamt = 0;
+ end
+ // ------------------
+ // Product data path
+ // ------------------
+ logic [PRECISION_BITS-1:0] mantissa_a, mantissa_b, mantissa_c;
+ logic [2*PRECISION_BITS-1:0] product; // the p*p product is 2p bits wide
+ logic [3*PRECISION_BITS+3:0] product_shifted; // addends are 3p+4 bit wide (including G/R)
+ // Add implicit bits to mantissae
+ assign mantissa_a = {info_a.is_normal, operand_a.mantissa};
+ assign mantissa_b = {info_b.is_normal, operand_b.mantissa};
+ assign mantissa_c = {info_c.is_normal, operand_c.mantissa};
+ // Mantissa multiplier (a*b)
+ assign product = mantissa_a * mantissa_b;
+ // Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky:
+ // | 000...000 | product | RS |
+ // <- p+2 -> <- 2p -> < 2>
+ assign product_shifted = product << 2; // constant shift
+ // -----------------
+ // Addend data path
+ // -----------------
+ logic [3*PRECISION_BITS+3:0] addend_after_shift; // upper 3p+4 bits are needed to go on
+ logic [PRECISION_BITS-1:0] addend_sticky_bits; // up to p bit of shifted addend are sticky
+ logic sticky_before_add; // they are compressed into a single sticky bit
+ logic [3*PRECISION_BITS+3:0] addend_shifted; // addends are 3p+4 bit wide (including G/R)
+ logic inject_carry_in; // inject carry for subtractions if needed
+ // In parallel, the addend is right-shifted according to the exponent difference. Up to p bits
+ // are shifted out and compressed into a sticky bit.
+ // | mantissa_c | 000..000 |
+ // <- p -> <- 3p+4 ->
+ // | 000..........000 | mantissa_c | 000...............0GR | sticky bits |
+ // <- addend_shamt -> <- p -> <- 2p+4-addend_shamt -> <- up to p ->
+ assign {addend_after_shift, addend_sticky_bits} =
+ (mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt;
+ assign sticky_before_add = (| addend_sticky_bits);
+ // assign addend_after_shift[0] = sticky_before_add;
+ // In case of a subtraction, the addend is inverted
+ assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift;
+ assign inject_carry_in = effective_subtraction & ~sticky_before_add;
+ // ------
+ // Adder
+ // ------
+ logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry
+ logic sum_carry; // observe carry bit from sum for sign fixing
+ logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow
+ logic final_sign;
+ //Mantissa adder (ab+c). In normal addition, it cannot overflow.
+ assign sum_raw = product_shifted + addend_shifted + inject_carry_in;
+ assign sum_carry = sum_raw[3*PRECISION_BITS+4];
+ // Complement negative sum (can only happen in subtraction -> overflows for positive results)
+ assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw;
+ // In case of a mispredicted subtraction result, do a sign flip
+ assign final_sign = (effective_subtraction && (sum_carry == tentative_sign))
+ ? 1'b1
+ : (effective_subtraction ? 1'b0 : tentative_sign);
+ // ---------------
+ // Internal pipeline
+ // ---------------
+ // Pipeline output signals as non-arrays
+ logic effective_subtraction_q;
+ logic signed [EXP_WIDTH-1:0] exponent_product_q;
+ logic signed [EXP_WIDTH-1:0] exponent_difference_q;
+ logic signed [EXP_WIDTH-1:0] tentative_exponent_q;
+ logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q;
+ logic sticky_before_add_q;
+ logic [3*PRECISION_BITS+3:0] sum_q;
+ logic final_sign_q;
+ fpnew_pkg::roundmode_e rnd_mode_q;
+ logic result_is_special_q;
+ fp_t special_result_q;
+ fpnew_pkg::status_t special_status_q;
+ // Internal pipeline signals, index i holds signal after i register stages
+ logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q;
+ logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q;
+ logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q;
+ logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q;
+ logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q;
+ logic [0:NUM_MID_REGS] mid_pipe_sticky_q;
+ logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q;
+ logic [0:NUM_MID_REGS] mid_pipe_final_sign_q;
+ fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q;
+ logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q;
+ fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q;
+ fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q;
+ TagType [0:NUM_MID_REGS] mid_pipe_tag_q;
+ AuxType [0:NUM_MID_REGS] mid_pipe_aux_q;
+ logic [0:NUM_MID_REGS] mid_pipe_valid_q;
+ // Ready signal is combinatorial for all stages
+ logic [0:NUM_MID_REGS] mid_pipe_ready;
+ // Input stage: First element of pipeline is taken from upstream logic
+ assign mid_pipe_eff_sub_q[0] = effective_subtraction;
+ assign mid_pipe_exp_prod_q[0] = exponent_product;
+ assign mid_pipe_exp_diff_q[0] = exponent_difference;
+ assign mid_pipe_tent_exp_q[0] = tentative_exponent;
+ assign mid_pipe_add_shamt_q[0] = addend_shamt;
+ assign mid_pipe_sticky_q[0] = sticky_before_add;
+ assign mid_pipe_sum_q[0] = sum;
+ assign mid_pipe_final_sign_q[0] = final_sign;
+ assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS];
+ assign mid_pipe_res_is_spec_q[0] = result_is_special;
+ assign mid_pipe_spec_res_q[0] = special_result;
+ assign mid_pipe_spec_stat_q[0] = special_status;
+ assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS];
+ assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS];
+ assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS];
+ // Input stage: Propagate pipeline ready signal to input pipe
+ assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
+ // Generate the register stages
+ for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
+ // Internal register enable for this stage
+ logic reg_ena;
+ // Determine the ready signal of the current stage - advance the pipeline:
+ // 1. if the next stage is ready for our data
+ // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+ assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
+ // Valid: enabled by ready signal, synchronous clear with the flush signal
+ `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+ // Enable register if pipleine ready and a valid data item is present
+ assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
+ // Generate the pipeline registers within the stages, use enable-registers
+ `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0)
+ `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0)
+ `FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0)
+ `FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0)
+ `FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0)
+ `FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0)
+ `FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0)
+ `FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0)
+ `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+ `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0)
+ `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0)
+ `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0)
+ `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0))
+ `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0))
+ end
+ // Output stage: assign selected pipe outputs to signals for later use
+ assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS];
+ assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS];
+ assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS];
+ assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS];
+ assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS];
+ assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS];
+ assign sum_q = mid_pipe_sum_q[NUM_MID_REGS];
+ assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS];
+ assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS];
+ assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS];
+ assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS];
+ assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS];
+ // --------------
+ // Normalization
+ // --------------
+ logic [LOWER_SUM_WIDTH-1:0] sum_lower; // lower 2p+3 bits of sum are searched
+ logic [LZC_RESULT_WIDTH-1:0] leading_zero_count; // the number of leading zeroes
+ logic signed [LZC_RESULT_WIDTH:0] leading_zero_count_sgn; // signed leading-zero count
+ logic lzc_zeroes; // in case only zeroes found
+ logic [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount
+ logic signed [EXP_WIDTH-1:0] normalized_exponent;
+ logic [3*PRECISION_BITS+4:0] sum_shifted; // result after first normalization shift
+ logic [PRECISION_BITS:0] final_mantissa; // final mantissa before rounding with round bit
+ logic [2*PRECISION_BITS+2:0] sum_sticky_bits; // remaining 2p+3 sticky bits after normalization
+ logic sticky_after_norm; // sticky bit after normalization
+ logic signed [EXP_WIDTH-1:0] final_exponent;
+ assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0];
+ // Leading zero counter for cancellations
+ lzc #(
+ .MODE ( 1 ) // MODE = 1 counts leading zeroes
+ ) i_lzc (
+ .in_i ( sum_lower ),
+ .cnt_o ( leading_zero_count ),
+ .empty_o ( lzc_zeroes )
+ );
+ assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count});
+ // Normalization shift amount based on exponents and LZC (unsigned as only left shifts)
+ always_comb begin : norm_shift_amount
+ // Product-anchored case or cancellations require LZC
+ if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin
+ // Normal result (biased exponent > 0 and not a zero)
+ if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin
+ // Undo initial product shift, remove the counted zeroes
+ norm_shamt = PRECISION_BITS + 2 + leading_zero_count;
+ normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift
+ // Subnormal result
+ end else begin
+ // Cap the shift distance to align mantissa with minimum exponent
+ norm_shamt = unsigned'(signed'(PRECISION_BITS) + 2 + exponent_product_q);
+ normalized_exponent = 0; // subnormals encoded as 0
+ end
+ // Addend-anchored case
+ end else begin
+ norm_shamt = addend_shamt_q; // Undo the initial shift
+ normalized_exponent = tentative_exponent_q;
+ end
+ end
+ // Do the large normalization shift
+ assign sum_shifted = sum_q << norm_shamt;
+ // The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left
+ // or right of the (non-carry) MSB of the sum.
+ always_comb begin : small_norm
+ // Default assignment, discarding carry bit
+ {final_mantissa, sum_sticky_bits} = sum_shifted;
+ final_exponent = normalized_exponent;
+ // The normalized sum has overflown, align right and fix exponent
+ if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit
+ {final_mantissa, sum_sticky_bits} = sum_shifted >> 1;
+ final_exponent = normalized_exponent + 1;
+ // The normalized sum is normal, nothing to do
+ end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB
+ // do nothing
+ // The normalized sum is still denormal, align left - unless the result is not already subnormal
+ end else if (normalized_exponent > 1) begin
+ {final_mantissa, sum_sticky_bits} = sum_shifted << 1;
+ final_exponent = normalized_exponent - 1;
+ // Otherwise we're denormal
+ end else begin
+ final_exponent = '0;
+ end
+ end
+ // Update the sticky bit with the shifted-out bits
+ assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q;
+ // ----------------------------
+ // Rounding and classification
+ // ----------------------------
+ logic pre_round_sign;
+ logic [EXP_BITS-1:0] pre_round_exponent;
+ logic [MAN_BITS-1:0] pre_round_mantissa;
+ logic [EXP_BITS+MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding
+ logic [1:0] round_sticky_bits;
+ logic of_before_round, of_after_round; // overflow
+ logic uf_before_round, uf_after_round; // underflow
+ logic result_zero;
+ logic rounded_sign;
+ logic [EXP_BITS+MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding
+ // Classification before round. RISC-V mandates checking underflow AFTER rounding!
+ assign of_before_round = final_exponent >= 2**(EXP_BITS)-1; // infinity exponent is all ones
+ assign uf_before_round = final_exponent == 0; // exponent for subnormals capped to 0
+ // Assemble result before rounding. In case of overflow, the largest normal value is set.
+ assign pre_round_sign = final_sign_q;
+ assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : unsigned'(final_exponent[EXP_BITS-1:0]);
+ assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[MAN_BITS:1]; // bit 0 is R bit
+ assign pre_round_abs = {pre_round_exponent, pre_round_mantissa};
+ // In case of overflow, the round and sticky bits are set for proper rounding
+ assign round_sticky_bits = (of_before_round) ? 2'b11 : {final_mantissa[0], sticky_after_norm};
+ // Perform the rounding
+ fpnew_rounding #(
+ .AbsWidth ( EXP_BITS + MAN_BITS )
+ ) i_fpnew_rounding (
+ .abs_value_i ( pre_round_abs ),
+ .sign_i ( pre_round_sign ),
+ .round_sticky_bits_i ( round_sticky_bits ),
+ .rnd_mode_i ( rnd_mode_q ),
+ .effective_subtraction_i ( effective_subtraction_q ),
+ .abs_rounded_o ( rounded_abs ),
+ .sign_o ( rounded_sign ),
+ .exact_zero_o ( result_zero )
+ );
+ // Classification after rounding
+ assign uf_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // exponent = 0
+ assign of_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // exponent all ones
+ // -----------------
+ // Result selection
+ // -----------------
+ logic [WIDTH-1:0] regular_result;
+ fpnew_pkg::status_t regular_status;
+ // Assemble regular result
+ assign regular_result = {rounded_sign, rounded_abs};
+ assign regular_status.NV = 1'b0; // only valid cases are handled in regular path
+ assign regular_status.DZ = 1'b0; // no divisions
+ assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow
+ assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF
+ assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round;
+ // Final results for output pipeline
+ fp_t result_d;
+ fpnew_pkg::status_t status_d;
+ // Select output depending on special case detection
+ assign result_d = result_is_special_q ? special_result_q : regular_result;
+ assign status_d = result_is_special_q ? special_status_q : regular_status;
+ // ----------------
+ // Output Pipeline
+ // ----------------
+ // Output pipeline signals, index i holds signal after i register stages
+ fp_t [0:NUM_OUT_REGS] out_pipe_result_q;
+ fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
+ TagType [0:NUM_OUT_REGS] out_pipe_tag_q;
+ AuxType [0:NUM_OUT_REGS] out_pipe_aux_q;
+ logic [0:NUM_OUT_REGS] out_pipe_valid_q;
+ // Ready signal is combinatorial for all stages
+ logic [0:NUM_OUT_REGS] out_pipe_ready;
+ // Input stage: First element of pipeline is taken from inputs
+ assign out_pipe_result_q[0] = result_d;
+ assign out_pipe_status_q[0] = status_d;
+ assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS];
+ assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS];
+ assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS];
+ // Input stage: Propagate pipeline ready signal to inside pipe
+ assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
+ // Generate the register stages
+ for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+ // Internal register enable for this stage
+ logic reg_ena;
+ // Determine the ready signal of the current stage - advance the pipeline:
+ // 1. if the next stage is ready for our data
+ // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+ assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+ // Valid: enabled by ready signal, synchronous clear with the flush signal
+ `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+ // Enable register if pipleine ready and a valid data item is present
+ assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+ // Generate the pipeline registers within the stages, use enable-registers
+ `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
+ `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
+ `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0))
+ `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0))
+ end
+ // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+ assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+ // Output stage: assign module outputs
+ assign result_o = out_pipe_result_q[NUM_OUT_REGS];
+ assign status_o = out_pipe_status_q[NUM_OUT_REGS];
+ assign extension_bit_o = 1'b1; // always NaN-Box result
+ assign tag_o = out_pipe_tag_q[NUM_OUT_REGS];
+ assign aux_o = out_pipe_aux_q[NUM_OUT_REGS];
+ assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS];
+ assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..840b889
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,820 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Author: Stefan Mach <>
+module fpnew_fma_multi #(
+ parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1,
+ parameter int unsigned NumPipeRegs = 0,
+ parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
+ parameter type TagType = logic,
+ parameter type AuxType = logic,
+ // Do not change
+ localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig),
+ localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ // Input signals
+ input logic [2:0][WIDTH-1:0] operands_i, // 3 operands
+ input logic [NUM_FORMATS-1:0][2:0] is_boxed_i, // 3 operands
+ input fpnew_pkg::roundmode_e rnd_mode_i,
+ input fpnew_pkg::operation_e op_i,
+ input logic op_mod_i,
+ input fpnew_pkg::fp_format_e src_fmt_i, // format of the multiplicands
+ input fpnew_pkg::fp_format_e dst_fmt_i, // format of the addend and result
+ input TagType tag_i,
+ input AuxType aux_i,
+ // Input Handshake
+ input logic in_valid_i,
+ output logic in_ready_o,
+ input logic flush_i,
+ // Output signals
+ output logic [WIDTH-1:0] result_o,
+ output fpnew_pkg::status_t status_o,
+ output logic extension_bit_o,
+ output TagType tag_o,
+ output AuxType aux_o,
+ // Output handshake
+ output logic out_valid_o,
+ input logic out_ready_i,
+ // Indication of valid data in flight
+ output logic busy_o
+ // ----------
+ // Constants
+ // ----------
+ // The super-format that can hold all formats
+ localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig);
+ localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits;
+ localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits;
+ // Precision bits 'p' include the implicit bit
+ localparam int unsigned PRECISION_BITS = SUPER_MAN_BITS + 1;
+ // The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection
+ localparam int unsigned LOWER_SUM_WIDTH = 2 * PRECISION_BITS + 3;
+ localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH);
+ // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid
+ // datapath leakage. This is either given by the exponent bits or the width of the LZC result.
+ // In most reasonable FP formats the internal exponent will be wider than the LZC result.
+ localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(SUPER_EXP_BITS + 2, LZC_RESULT_WIDTH);
+ // Shift amount width: maximum internal mantissa size is 3p+3 bits
+ localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3);
+ // Pipelines
+ localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
+ ? NumPipeRegs
+ : (PipeConfig == fpnew_pkg::DISTRIBUTED
+ ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
+ : 0); // no regs here otherwise
+ localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
+ ? NumPipeRegs
+ : (PipeConfig == fpnew_pkg::DISTRIBUTED
+ ? ((NumPipeRegs + 2) / 3) // First to get distributed regs
+ : 0); // no regs here otherwise
+ localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
+ ? NumPipeRegs
+ : (PipeConfig == fpnew_pkg::DISTRIBUTED
+ ? (NumPipeRegs / 3) // Last to get distributed regs
+ : 0); // no regs here otherwise
+ // ----------------
+ // Type definition
+ // ----------------
+ typedef struct packed {
+ logic sign;
+ logic [SUPER_EXP_BITS-1:0] exponent;
+ logic [SUPER_MAN_BITS-1:0] mantissa;
+ } fp_t;
+ // ---------------
+ // Input pipeline
+ // ---------------
+ // Selected pipeline output signals as non-arrays
+ logic [2:0][WIDTH-1:0] operands_q;
+ fpnew_pkg::fp_format_e src_fmt_q;
+ fpnew_pkg::fp_format_e dst_fmt_q;
+ // Input pipeline signals, index i holds signal after i register stages
+ logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q;
+ logic [0:NUM_INP_REGS][NUM_FORMATS-1:0][2:0] inp_pipe_is_boxed_q;
+ fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q;
+ fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q;
+ logic [0:NUM_INP_REGS] inp_pipe_op_mod_q;
+ fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q;
+ fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q;
+ TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
+ AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
+ logic [0:NUM_INP_REGS] inp_pipe_valid_q;
+ // Ready signal is combinatorial for all stages
+ logic [0:NUM_INP_REGS] inp_pipe_ready;
+ // Input stage: First element of pipeline is taken from inputs
+ assign inp_pipe_operands_q[0] = operands_i;
+ assign inp_pipe_is_boxed_q[0] = is_boxed_i;
+ assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+ assign inp_pipe_op_q[0] = op_i;
+ assign inp_pipe_op_mod_q[0] = op_mod_i;
+ assign inp_pipe_src_fmt_q[0] = src_fmt_i;
+ assign inp_pipe_dst_fmt_q[0] = dst_fmt_i;
+ assign inp_pipe_tag_q[0] = tag_i;
+ assign inp_pipe_aux_q[0] = aux_i;
+ assign inp_pipe_valid_q[0] = in_valid_i;
+ // Input stage: Propagate pipeline ready signal to updtream circuitry
+ assign in_ready_o = inp_pipe_ready[0];
+ // Generate the register stages
+ for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+ // Internal register enable for this stage
+ logic reg_ena;
+ // Determine the ready signal of the current stage - advance the pipeline:
+ // 1. if the next stage is ready for our data
+ // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+ assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+ // Valid: enabled by ready signal, synchronous clear with the flush signal
+ `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+ // Enable register if pipleine ready and a valid data item is present
+ assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+ // Generate the pipeline registers within the stages, use enable-registers
+ `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+ `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
+ `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+ `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD)
+ `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0)
+ `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
+ `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
+ `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
+ `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
+ end
+ // Output stage: assign selected pipe outputs to signals for later use
+ assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
+ assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS];
+ assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS];
+ // -----------------
+ // Input processing
+ // -----------------
+ logic [NUM_FORMATS-1:0][2:0] fmt_sign;
+ logic signed [NUM_FORMATS-1:0][2:0][SUPER_EXP_BITS-1:0] fmt_exponent;
+ logic [NUM_FORMATS-1:0][2:0][SUPER_MAN_BITS-1:0] fmt_mantissa;
+ fpnew_pkg::fp_info_t [NUM_FORMATS-1:0][2:0] info_q;
+ // FP Input initialization
+ for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs
+ // Set up some constants
+ localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+ localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+ localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+ if (FpFmtConfig[fmt]) begin : active_format
+ logic [2:0][FP_WIDTH-1:0] trimmed_ops;
+ // Classify input
+ fpnew_classifier #(
+ .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ),
+ .NumOperands ( 3 )
+ ) i_fpnew_classifier (
+ .operands_i ( trimmed_ops ),
+ .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS][fmt] ),
+ .info_o ( info_q[fmt] )
+ );
+ for (genvar op = 0; op < 3; op++) begin : gen_operands
+ assign trimmed_ops[op] = operands_q[op][FP_WIDTH-1:0];
+ assign fmt_sign[fmt][op] = operands_q[op][FP_WIDTH-1];
+ assign fmt_exponent[fmt][op] = signed'({1'b0, operands_q[op][MAN_BITS+:EXP_BITS]});
+ assign fmt_mantissa[fmt][op] = {info_q[fmt][op].is_normal, operands_q[op][MAN_BITS-1:0]} <<
+ (SUPER_MAN_BITS - MAN_BITS); // move to left of mantissa
+ end
+ end else begin : inactive_format
+ assign info_q[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+ assign fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled
+ assign fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+ assign fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+ end
+ end
+ fp_t operand_a, operand_b, operand_c;
+ fpnew_pkg::fp_info_t info_a, info_b, info_c;
+ // Operation selection and operand adjustment
+ // | \c op_q | \c op_mod_q | Operation Adjustment
+ // |:--------:|:-----------:|---------------------
+ // | FMADD | \c 0 | FMADD: none
+ // | FMADD | \c 1 | FMSUB: Invert sign of operand C
+ // | FNMSUB | \c 0 | FNMSUB: Invert sign of operand A
+ // | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C
+ // | ADD | \c 0 | ADD: Set operand A to +1.0
+ // | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C
+ // | MUL | \c 0 | MUL: Set operand C to +0.0
+ // | *others* | \c - | *invalid*
+ // \note \c op_mod_q always inverts the sign of the addend.
+ always_comb begin : op_select
+ // Default assignments - packing-order-agnostic
+ operand_a = {fmt_sign[src_fmt_q][0], fmt_exponent[src_fmt_q][0], fmt_mantissa[src_fmt_q][0]};
+ operand_b = {fmt_sign[src_fmt_q][1], fmt_exponent[src_fmt_q][1], fmt_mantissa[src_fmt_q][1]};
+ operand_c = {fmt_sign[dst_fmt_q][2], fmt_exponent[dst_fmt_q][2], fmt_mantissa[dst_fmt_q][2]};
+ info_a = info_q[src_fmt_q][0];
+ info_b = info_q[src_fmt_q][1];
+ info_c = info_q[dst_fmt_q][2];
+ // op_mod_q inverts sign of operand C
+ operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+ unique case (inp_pipe_op_q[NUM_INP_REGS])
+ fpnew_pkg::FMADD: ; // do nothing
+ fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product
+ fpnew_pkg::ADD: begin // Set multiplicand to +1
+ operand_a = '{sign: 1'b0, exponent: fpnew_pkg::bias(src_fmt_q), mantissa: '0};
+ info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value.
+ end
+ fpnew_pkg::MUL: begin // Set addend to -0 (for proper rounding with RDN)
+ operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0};
+ info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value.
+ end
+ default: begin // propagate don't cares
+ operand_a = '{default: fpnew_pkg::DONT_CARE};
+ operand_b = '{default: fpnew_pkg::DONT_CARE};
+ operand_c = '{default: fpnew_pkg::DONT_CARE};
+ info_a = '{default: fpnew_pkg::DONT_CARE};
+ info_b = '{default: fpnew_pkg::DONT_CARE};
+ info_c = '{default: fpnew_pkg::DONT_CARE};
+ end
+ endcase
+ end
+ // ---------------------
+ // Input classification
+ // ---------------------
+ logic any_operand_inf;
+ logic any_operand_nan;
+ logic signalling_nan;
+ logic effective_subtraction;
+ logic tentative_sign;
+ // Reduction for special case handling
+ assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf, info_c.is_inf});
+ assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan, info_c.is_nan});
+ assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling});
+ // Effective subtraction in FMA occurs when product and addend signs differ
+ assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign;
+ // The tentative sign of the FMA shall be the sign of the product
+ assign tentative_sign = operand_a.sign ^ operand_b.sign;
+ // ----------------------
+ // Special case handling
+ // ----------------------
+ logic [WIDTH-1:0] special_result;
+ fpnew_pkg::status_t special_status;
+ logic result_is_special;
+ logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_special_result;
+ fpnew_pkg::status_t [NUM_FORMATS-1:0] fmt_special_status;
+ logic [NUM_FORMATS-1:0] fmt_result_is_special;
+ for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results
+ // Set up some constants
+ localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+ localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+ localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+ localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1;
+ localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1);
+ localparam logic [MAN_BITS-1:0] ZERO_MANTISSA = '0;
+ if (FpFmtConfig[fmt]) begin : active_format
+ always_comb begin : special_results
+ logic [FP_WIDTH-1:0] special_res;
+ // Default assignment
+ special_res = {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
+ fmt_special_status[fmt] = '0;
+ fmt_result_is_special[fmt] = 1'b0;
+ // Handle potentially mixed nan & infinity input => important for the case where infinity and
+ // zero are multiplied and added to a qnan.
+ // RISC-V mandates raising the NV exception in these cases:
+ // (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs)
+ if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin
+ fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN
+ fmt_special_status[fmt].NV = 1'b1; // invalid operation
+ // NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP
+ end else if (any_operand_nan) begin
+ fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN
+ fmt_special_status[fmt].NV = signalling_nan; // raise the invalid operation flag if signalling
+ // Special cases involving infinity
+ end else if (any_operand_inf) begin
+ fmt_result_is_special[fmt] = 1'b1; // bypass FMA
+ // Effective addition of opposite infinities (±inf - ±inf) is invalid!
+ if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction)
+ fmt_special_status[fmt].NV = 1'b1; // invalid operation
+ // Handle cases where output will be inf because of inf product input
+ else if (info_a.is_inf || info_b.is_inf) begin
+ // Result is infinity with the sign of the product
+ special_res = {operand_a.sign ^ operand_b.sign, QNAN_EXPONENT, ZERO_MANTISSA};
+ // Handle cases where the addend is inf
+ end else if (info_c.is_inf) begin
+ // Result is inifinity with sign of the addend (= operand_c)
+ special_res = {operand_c.sign, QNAN_EXPONENT, ZERO_MANTISSA};
+ end
+ end
+ // Initialize special result with ones (NaN-box)
+ fmt_special_result[fmt] = '1;
+ fmt_special_result[fmt][FP_WIDTH-1:0] = special_res;
+ end
+ end else begin : inactive_format
+ assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
+ assign fmt_special_status[fmt] = '0;
+ assign fmt_result_is_special[fmt] = 1'b0;
+ end
+ end
+ // Detect special case from source format, I2F casts don't produce a special result
+ assign result_is_special = fmt_result_is_special[dst_fmt_q]; // they're all the same
+ // Signalling input NaNs raise invalid flag, otherwise no flags set
+ assign special_status = fmt_special_status[dst_fmt_q];
+ // Assemble result according to destination format
+ assign special_result = fmt_special_result[dst_fmt_q]; // destination format
+ // ---------------------------
+ // Initial exponent data path
+ // ---------------------------
+ logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c;
+ logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference;
+ logic signed [EXP_WIDTH-1:0] tentative_exponent;
+ // Zero-extend exponents into signed container - implicit width extension
+ assign exponent_a = signed'({1'b0, operand_a.exponent});
+ assign exponent_b = signed'({1'b0, operand_b.exponent});
+ assign exponent_c = signed'({1'b0, operand_c.exponent});
+ // Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx)
+ // with Ex the encoded exponent and nx the implicit bit. Internal exponents are biased to dst fmt.
+ assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm
+ // Biased product exponent is the sum of encoded exponents minus the bias.
+ assign exponent_product = (info_a.is_zero || info_b.is_zero) // in case the product is zero, set minimum exp.
+ ? 2 - signed'(fpnew_pkg::bias(dst_fmt_q))
+ : signed'(exponent_a + info_a.is_subnormal
+ + exponent_b + info_b.is_subnormal
+ - 2*signed'(fpnew_pkg::bias(src_fmt_q))
+ + signed'(fpnew_pkg::bias(dst_fmt_q))); // rebias for dst fmt
+ // Exponent difference is the addend exponent minus the product exponent
+ assign exponent_difference = exponent_addend - exponent_product;
+ // The tentative exponent will be the larger of the product or addend exponent
+ assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product;
+ // Shift amount for addend based on exponents (unsigned as only right shifts)
+ logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt;
+ always_comb begin : addend_shift_amount
+ // Product-anchored case, saturated shift (addend is only in the sticky bit)
+ if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1))
+ addend_shamt = 3 * PRECISION_BITS + 4;
+ // Addend and product will have mutual bits to add
+ else if (exponent_difference <= signed'(PRECISION_BITS + 2))
+ addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference);
+ // Addend-anchored case, saturated shift (product is only in the sticky bit)
+ else
+ addend_shamt = 0;
+ end
+ // ------------------
+ // Product data path
+ // ------------------
+ logic [PRECISION_BITS-1:0] mantissa_a, mantissa_b, mantissa_c;
+ logic [2*PRECISION_BITS-1:0] product; // the p*p product is 2p bits wide
+ logic [3*PRECISION_BITS+3:0] product_shifted; // addends are 3p+4 bit wide (including G/R)
+ // Add implicit bits to mantissae
+ assign mantissa_a = {info_a.is_normal, operand_a.mantissa};
+ assign mantissa_b = {info_b.is_normal, operand_b.mantissa};
+ assign mantissa_c = {info_c.is_normal, operand_c.mantissa};
+ // Mantissa multiplier (a*b)
+ assign product = mantissa_a * mantissa_b;
+ // Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky:
+ // | 000...000 | product | RS |
+ // <- p+2 -> <- 2p -> < 2>
+ assign product_shifted = product << 2; // constant shift
+ // -----------------
+ // Addend data path
+ // -----------------
+ logic [3*PRECISION_BITS+3:0] addend_after_shift; // upper 3p+4 bits are needed to go on
+ logic [PRECISION_BITS-1:0] addend_sticky_bits; // up to p bit of shifted addend are sticky
+ logic sticky_before_add; // they are compressed into a single sticky bit
+ logic [3*PRECISION_BITS+3:0] addend_shifted; // addends are 3p+4 bit wide (including G/R)
+ logic inject_carry_in; // inject carry for subtractions if needed
+ // In parallel, the addend is right-shifted according to the exponent difference. Up to p bits are
+ // shifted out and compressed into a sticky bit.
+ // | mantissa_c | 000..000 |
+ // <- p -> <- 3p+4 ->
+ // | 000..........000 | mantissa_c | 000...............0GR | sticky bits |
+ // <- addend_shamt -> <- p -> <- 2p+4-addend_shamt -> <- up to p ->
+ assign {addend_after_shift, addend_sticky_bits} =
+ (mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt;
+ assign sticky_before_add = (| addend_sticky_bits);
+ // In case of a subtraction, the addend is inverted
+ assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift;
+ assign inject_carry_in = effective_subtraction & ~sticky_before_add;
+ // ------
+ // Adder
+ // ------
+ logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry
+ logic sum_carry; // observe carry bit from sum for sign fixing
+ logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow
+ logic final_sign;
+ //Mantissa adder (ab+c). In normal addition, it cannot overflow.
+ assign sum_raw = product_shifted + addend_shifted + inject_carry_in;
+ assign sum_carry = sum_raw[3*PRECISION_BITS+4];
+ // Complement negative sum (can only happen in subtraction -> overflows for positive results)
+ assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw;
+ // In case of a mispredicted subtraction result, do a sign flip
+ assign final_sign = (effective_subtraction && (sum_carry == tentative_sign))
+ ? 1'b1
+ : (effective_subtraction ? 1'b0 : tentative_sign);
+ // ---------------
+ // Internal pipeline
+ // ---------------
+ // Pipeline output signals as non-arrays
+ logic effective_subtraction_q;
+ logic signed [EXP_WIDTH-1:0] exponent_product_q;
+ logic signed [EXP_WIDTH-1:0] exponent_difference_q;
+ logic signed [EXP_WIDTH-1:0] tentative_exponent_q;
+ logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q;
+ logic sticky_before_add_q;
+ logic [3*PRECISION_BITS+3:0] sum_q;
+ logic final_sign_q;
+ fpnew_pkg::fp_format_e dst_fmt_q2;
+ fpnew_pkg::roundmode_e rnd_mode_q;
+ logic result_is_special_q;
+ fp_t special_result_q;
+ fpnew_pkg::status_t special_status_q;
+ // Internal pipeline signals, index i holds signal after i register stages
+ logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q;
+ logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q;
+ logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q;
+ logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q;
+ logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q;
+ logic [0:NUM_MID_REGS] mid_pipe_sticky_q;
+ logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q;
+ logic [0:NUM_MID_REGS] mid_pipe_final_sign_q;
+ fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q;
+ fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q;
+ logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q;
+ fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q;
+ fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q;
+ TagType [0:NUM_MID_REGS] mid_pipe_tag_q;
+ AuxType [0:NUM_MID_REGS] mid_pipe_aux_q;
+ logic [0:NUM_MID_REGS] mid_pipe_valid_q;
+ // Ready signal is combinatorial for all stages
+ logic [0:NUM_MID_REGS] mid_pipe_ready;
+ // Input stage: First element of pipeline is taken from upstream logic
+ assign mid_pipe_eff_sub_q[0] = effective_subtraction;
+ assign mid_pipe_exp_prod_q[0] = exponent_product;
+ assign mid_pipe_exp_diff_q[0] = exponent_difference;
+ assign mid_pipe_tent_exp_q[0] = tentative_exponent;
+ assign mid_pipe_add_shamt_q[0] = addend_shamt;
+ assign mid_pipe_sticky_q[0] = sticky_before_add;
+ assign mid_pipe_sum_q[0] = sum;
+ assign mid_pipe_final_sign_q[0] = final_sign;
+ assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS];
+ assign mid_pipe_dst_fmt_q[0] = dst_fmt_q;
+ assign mid_pipe_res_is_spec_q[0] = result_is_special;
+ assign mid_pipe_spec_res_q[0] = special_result;
+ assign mid_pipe_spec_stat_q[0] = special_status;
+ assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS];
+ assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS];
+ assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS];
+ // Input stage: Propagate pipeline ready signal to input pipe
+ assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
+ // Generate the register stages
+ for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
+ // Internal register enable for this stage
+ logic reg_ena;
+ // Determine the ready signal of the current stage - advance the pipeline:
+ // 1. if the next stage is ready for our data
+ // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+ assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
+ // Valid: enabled by ready signal, synchronous clear with the flush signal
+ `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+ // Enable register if pipleine ready and a valid data item is present
+ assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
+ // Generate the pipeline registers within the stages, use enable-registers
+ `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0)
+ `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0)
+ `FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0)
+ `FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0)
+ `FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0)
+ `FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0)
+ `FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0)
+ `FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0)
+ `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+ `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
+ `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0)
+ `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0)
+ `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0)
+ `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0))
+ `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0))
+ end
+ // Output stage: assign selected pipe outputs to signals for later use
+ assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS];
+ assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS];
+ assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS];
+ assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS];
+ assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS];
+ assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS];
+ assign sum_q = mid_pipe_sum_q[NUM_MID_REGS];
+ assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS];
+ assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS];
+ assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS];
+ assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS];
+ assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS];
+ assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS];
+ // --------------
+ // Normalization
+ // --------------
+ logic [LOWER_SUM_WIDTH-1:0] sum_lower; // lower 2p+3 bits of sum are searched
+ logic [LZC_RESULT_WIDTH-1:0] leading_zero_count; // the number of leading zeroes
+ logic signed [LZC_RESULT_WIDTH:0] leading_zero_count_sgn; // signed leading-zero count
+ logic lzc_zeroes; // in case only zeroes found
+ logic [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount
+ logic signed [EXP_WIDTH-1:0] normalized_exponent;
+ logic [3*PRECISION_BITS+4:0] sum_shifted; // result after first normalization shift
+ logic [PRECISION_BITS:0] final_mantissa; // final mantissa before rounding with round bit
+ logic [2*PRECISION_BITS+2:0] sum_sticky_bits; // remaining 2p+3 sticky bits after normalization
+ logic sticky_after_norm; // sticky bit after normalization
+ logic signed [EXP_WIDTH-1:0] final_exponent;
+ assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0];
+ // Leading zero counter for cancellations
+ lzc #(
+ .MODE ( 1 ) // MODE = 1 counts leading zeroes
+ ) i_lzc (
+ .in_i ( sum_lower ),
+ .cnt_o ( leading_zero_count ),
+ .empty_o ( lzc_zeroes )
+ );
+ assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count});
+ // Normalization shift amount based on exponents and LZC (unsigned as only left shifts)
+ always_comb begin : norm_shift_amount
+ // Product-anchored case or cancellations require LZC
+ if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin
+ // Normal result (biased exponent > 0 and not a zero)
+ if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin
+ // Undo initial product shift, remove the counted zeroes
+ norm_shamt = PRECISION_BITS + 2 + leading_zero_count;
+ normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift
+ // Subnormal result
+ end else begin
+ // Cap the shift distance to align mantissa with minimum exponent
+ norm_shamt = unsigned'(signed'(PRECISION_BITS + 2 + exponent_product_q));
+ normalized_exponent = 0; // subnormals encoded as 0
+ end
+ // Addend-anchored case
+ end else begin
+ norm_shamt = addend_shamt_q; // Undo the initial shift
+ normalized_exponent = tentative_exponent_q;
+ end
+ end
+ // Do the large normalization shift
+ assign sum_shifted = sum_q << norm_shamt;
+ // The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left
+ // or right of the (non-carry) MSB of the sum.
+ always_comb begin : small_norm
+ // Default assignment, discarding carry bit
+ {final_mantissa, sum_sticky_bits} = sum_shifted;
+ final_exponent = normalized_exponent;
+ // The normalized sum has overflown, align right and fix exponent
+ if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit
+ {final_mantissa, sum_sticky_bits} = sum_shifted >> 1;
+ final_exponent = normalized_exponent + 1;
+ // The normalized sum is normal, nothing to do
+ end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB
+ // do nothing
+ // The normalized sum is still denormal, align left - unless the result is not already subnormal
+ end else if (normalized_exponent > 1) begin
+ {final_mantissa, sum_sticky_bits} = sum_shifted << 1;
+ final_exponent = normalized_exponent - 1;
+ // Otherwise we're denormal
+ end else begin
+ final_exponent = '0;
+ end
+ end
+ // Update the sticky bit with the shifted-out bits
+ assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q;
+ // ----------------------------
+ // Rounding and classification
+ // ----------------------------
+ logic pre_round_sign;
+ logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding
+ logic [1:0] round_sticky_bits;
+ logic of_before_round, of_after_round; // overflow
+ logic uf_before_round, uf_after_round; // underflow
+ logic [NUM_FORMATS-1:0][SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] fmt_pre_round_abs; // per format
+ logic [NUM_FORMATS-1:0][1:0] fmt_round_sticky_bits;
+ logic [NUM_FORMATS-1:0] fmt_of_after_round;
+ logic [NUM_FORMATS-1:0] fmt_uf_after_round;
+ logic rounded_sign;
+ logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding
+ logic result_zero;
+ // Classification before round. RISC-V mandates checking underflow AFTER rounding!
+ assign of_before_round = final_exponent >= 2**(fpnew_pkg::exp_bits(dst_fmt_q2))-1; // infinity exponent is all ones
+ assign uf_before_round = final_exponent == 0; // exponent for subnormals capped to 0
+ // Pack exponent and mantissa into proper rounding form
+ for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble
+ // Set up some constants
+ localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+ localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+ logic [EXP_BITS-1:0] pre_round_exponent;
+ logic [MAN_BITS-1:0] pre_round_mantissa;
+ if (FpFmtConfig[fmt]) begin : active_format
+ assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : final_exponent[EXP_BITS-1:0];
+ assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[SUPER_MAN_BITS-:MAN_BITS];
+ // Assemble result before rounding. In case of overflow, the largest normal value is set.
+ assign fmt_pre_round_abs[fmt] = {pre_round_exponent, pre_round_mantissa}; // 0-extend
+ // Round bit is after mantissa (1 in case of overflow for rounding)
+ assign fmt_round_sticky_bits[fmt][1] = final_mantissa[SUPER_MAN_BITS-MAN_BITS] |
+ of_before_round;
+ // remaining bits in mantissa to sticky (1 in case of overflow for rounding)
+ if (MAN_BITS < SUPER_MAN_BITS) begin : narrow_sticky
+ assign fmt_round_sticky_bits[fmt][0] = (| final_mantissa[SUPER_MAN_BITS-MAN_BITS-1:0]) |
+ sticky_after_norm | of_before_round;
+ end else begin : normal_sticky
+ assign fmt_round_sticky_bits[fmt][0] = sticky_after_norm | of_before_round;
+ end
+ end else begin : inactive_format
+ assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE};
+ assign fmt_round_sticky_bits[fmt] = '{default: fpnew_pkg::DONT_CARE};
+ end
+ end
+ // Assemble result before rounding. In case of overflow, the largest normal value is set.
+ assign pre_round_sign = final_sign_q;
+ assign pre_round_abs = fmt_pre_round_abs[dst_fmt_q2];
+ // In case of overflow, the round and sticky bits are set for proper rounding
+ assign round_sticky_bits = fmt_round_sticky_bits[dst_fmt_q2];
+ // Perform the rounding
+ fpnew_rounding #(
+ ) i_fpnew_rounding (
+ .abs_value_i ( pre_round_abs ),
+ .sign_i ( pre_round_sign ),
+ .round_sticky_bits_i ( round_sticky_bits ),
+ .rnd_mode_i ( rnd_mode_q ),
+ .effective_subtraction_i ( effective_subtraction_q ),
+ .abs_rounded_o ( rounded_abs ),
+ .sign_o ( rounded_sign ),
+ .exact_zero_o ( result_zero )
+ );
+ logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result;
+ for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject
+ // Set up some constants
+ localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+ localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+ localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+ if (FpFmtConfig[fmt]) begin : active_format
+ always_comb begin : post_process
+ // detect of / uf
+ fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal
+ fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp.
+ // Assemble regular result, nan box short ones.
+ fmt_result[fmt] = '1;
+ fmt_result[fmt][FP_WIDTH-1:0] = {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]};
+ end
+ end else begin : inactive_format
+ assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE;
+ assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE;
+ assign fmt_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
+ end
+ end
+ // Classification after rounding select by destination format
+ assign uf_after_round = fmt_uf_after_round[dst_fmt_q2];
+ assign of_after_round = fmt_of_after_round[dst_fmt_q2];
+ // -----------------
+ // Result selection
+ // -----------------
+ logic [WIDTH-1:0] regular_result;
+ fpnew_pkg::status_t regular_status;
+ // Assemble regular result
+ assign regular_result = fmt_result[dst_fmt_q2];
+ assign regular_status.NV = 1'b0; // only valid cases are handled in regular path
+ assign regular_status.DZ = 1'b0; // no divisions
+ assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow
+ assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF
+ assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round;
+ // Final results for output pipeline
+ logic [WIDTH-1:0] result_d;
+ fpnew_pkg::status_t status_d;
+ // Select output depending on special case detection
+ assign result_d = result_is_special_q ? special_result_q : regular_result;
+ assign status_d = result_is_special_q ? special_status_q : regular_status;
+ // ----------------
+ // Output Pipeline
+ // ----------------
+ // Output pipeline signals, index i holds signal after i register stages
+ logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
+ fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
+ TagType [0:NUM_OUT_REGS] out_pipe_tag_q;
+ AuxType [0:NUM_OUT_REGS] out_pipe_aux_q;
+ logic [0:NUM_OUT_REGS] out_pipe_valid_q;
+ // Ready signal is combinatorial for all stages
+ logic [0:NUM_OUT_REGS] out_pipe_ready;
+ // Input stage: First element of pipeline is taken from inputs
+ assign out_pipe_result_q[0] = result_d;
+ assign out_pipe_status_q[0] = status_d;
+ assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS];
+ assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS];
+ assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS];
+ // Input stage: Propagate pipeline ready signal to inside pipe
+ assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
+ // Generate the register stages
+ for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+ // Internal register enable for this stage
+ logic reg_ena;
+ // Determine the ready signal of the current stage - advance the pipeline:
+ // 1. if the next stage is ready for our data
+ // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+ assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+ // Valid: enabled by ready signal, synchronous clear with the flush signal
+ `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+ // Enable register if pipleine ready and a valid data item is present
+ assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+ // Generate the pipeline registers within the stages, use enable-registers
+ `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
+ `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
+ `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0))
+ `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0))
+ end
+ // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+ assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+ // Output stage: assign module outputs
+ assign result_o = out_pipe_result_q[NUM_OUT_REGS];
+ assign status_o = out_pipe_status_q[NUM_OUT_REGS];
+ assign extension_bit_o = 1'b1; // always NaN-Box result
+ assign tag_o = out_pipe_tag_q[NUM_OUT_REGS];
+ assign aux_o = out_pipe_aux_q[NUM_OUT_REGS];
+ assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS];
+ assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..acddd48
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,403 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Author: Stefan Mach <>
+module fpnew_noncomp #(
+ parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0),
+ parameter int unsigned NumPipeRegs = 0,
+ parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
+ parameter type TagType = logic,
+ parameter type AuxType = logic,
+ localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ // Input signals
+ input logic [1:0][WIDTH-1:0] operands_i, // 2 operands
+ input logic [1:0] is_boxed_i, // 2 operands
+ input fpnew_pkg::roundmode_e rnd_mode_i,
+ input fpnew_pkg::operation_e op_i,
+ input logic op_mod_i,
+ input TagType tag_i,
+ input AuxType aux_i,
+ // Input Handshake
+ input logic in_valid_i,
+ output logic in_ready_o,
+ input logic flush_i,
+ // Output signals
+ output logic [WIDTH-1:0] result_o,
+ output fpnew_pkg::status_t status_o,
+ output logic extension_bit_o,
+ output fpnew_pkg::classmask_e class_mask_o,
+ output logic is_class_o,
+ output TagType tag_o,
+ output AuxType aux_o,
+ // Output handshake
+ output logic out_valid_o,
+ input logic out_ready_i,
+ // Indication of valid data in flight
+ output logic busy_o
+ // ----------
+ // Constants
+ // ----------
+ localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
+ localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
+ // Pipelines
+ localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE || PipeConfig == fpnew_pkg::INSIDE)
+ ? NumPipeRegs
+ : (PipeConfig == fpnew_pkg::DISTRIBUTED
+ ? ((NumPipeRegs + 1) / 2) // First to get distributed regs
+ : 0); // no regs here otherwise
+ localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
+ ? NumPipeRegs
+ : (PipeConfig == fpnew_pkg::DISTRIBUTED
+ ? (NumPipeRegs / 2) // Last to get distributed regs
+ : 0); // no regs here otherwise
+ // ----------------
+ // Type definition
+ // ----------------
+ typedef struct packed {
+ logic sign;
+ logic [EXP_BITS-1:0] exponent;
+ logic [MAN_BITS-1:0] mantissa;
+ } fp_t;
+ // ---------------
+ // Input pipeline
+ // ---------------
+ // Input pipeline signals, index i holds signal after i register stages
+ logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q;
+ logic [0:NUM_INP_REGS][1:0] inp_pipe_is_boxed_q;
+ fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q;
+ fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q;
+ logic [0:NUM_INP_REGS] inp_pipe_op_mod_q;
+ TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
+ AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
+ logic [0:NUM_INP_REGS] inp_pipe_valid_q;
+ // Ready signal is combinatorial for all stages
+ logic [0:NUM_INP_REGS] inp_pipe_ready;
+ // Input stage: First element of pipeline is taken from inputs
+ assign inp_pipe_operands_q[0] = operands_i;
+ assign inp_pipe_is_boxed_q[0] = is_boxed_i;
+ assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+ assign inp_pipe_op_q[0] = op_i;
+ assign inp_pipe_op_mod_q[0] = op_mod_i;
+ assign inp_pipe_tag_q[0] = tag_i;
+ assign inp_pipe_aux_q[0] = aux_i;
+ assign inp_pipe_valid_q[0] = in_valid_i;
+ // Input stage: Propagate pipeline ready signal to updtream circuitry
+ assign in_ready_o = inp_pipe_ready[0];
+ // Generate the register stages
+ for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+ // Internal register enable for this stage
+ logic reg_ena;
+ // Determine the ready signal of the current stage - advance the pipeline:
+ // 1. if the next stage is ready for our data
+ // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+ assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+ // Valid: enabled by ready signal, synchronous clear with the flush signal
+ `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+ // Enable register if pipleine ready and a valid data item is present
+ assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+ // Generate the pipeline registers within the stages, use enable-registers
+ `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+ `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
+ `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+ `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD)
+ `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0)
+ `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
+ `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
+ end
+ // ---------------------
+ // Input classification
+ // ---------------------
+ fpnew_pkg::fp_info_t [1:0] info_q;
+ // Classify input
+ fpnew_classifier #(
+ .FpFormat ( FpFormat ),
+ .NumOperands ( 2 )
+ ) i_class_a (
+ .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ),
+ .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ),
+ .info_o ( info_q )
+ );
+ fp_t operand_a, operand_b;
+ fpnew_pkg::fp_info_t info_a, info_b;
+ // Packing-order-agnostic assignments
+ assign operand_a = inp_pipe_operands_q[NUM_INP_REGS][0];
+ assign operand_b = inp_pipe_operands_q[NUM_INP_REGS][1];
+ assign info_a = info_q[0];
+ assign info_b = info_q[1];
+ logic any_operand_inf;
+ logic any_operand_nan;
+ logic signalling_nan;
+ // Reduction for special case handling
+ assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf});
+ assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan});
+ assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling});
+ logic operands_equal, operand_a_smaller;
+ // Equality checks for zeroes too
+ assign operands_equal = (operand_a == operand_b) || (info_a.is_zero && info_b.is_zero);
+ // Invert result if non-zero signs involved (unsigned comparison)
+ assign operand_a_smaller = (operand_a < operand_b) ^ (operand_a.sign || operand_b.sign);
+ // ---------------
+ // Sign Injection
+ // ---------------
+ fp_t sgnj_result;
+ fpnew_pkg::status_t sgnj_status;
+ logic sgnj_extension_bit;
+ // Sign Injection - operation is encoded in rnd_mode_q:
+ // RNE = SGNJ, RTZ = SGNJN, RDN = SGNJX, RUP = Passthrough (no NaN-box check)
+ always_comb begin : sign_injections
+ logic sign_a, sign_b; // internal signs
+ // Default assignment
+ sgnj_result = operand_a; // result based on operand a
+ // NaN-boxing check will treat invalid inputs as canonical NaNs
+ if (!info_a.is_boxed) sgnj_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)};
+ // Internal signs are treated as positive in case of non-NaN-boxed values
+ sign_a = operand_a.sign & info_a.is_boxed;
+ sign_b = operand_b.sign & info_b.is_boxed;
+ // Do the sign injection based on rm field
+ unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
+ fpnew_pkg::RNE: sgnj_result.sign = sign_b; // SGNJ
+ fpnew_pkg::RTZ: sgnj_result.sign = ~sign_b; // SGNJN
+ fpnew_pkg::RDN: sgnj_result.sign = sign_a ^ sign_b; // SGNJX
+ fpnew_pkg::RUP: sgnj_result = operand_a; // passthrough
+ default: sgnj_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
+ endcase
+ end
+ assign sgnj_status = '0; // sign injections never raise exceptions
+ // op_mod_q enables integer sign-extension of result (for storing to integer regfile)
+ assign sgnj_extension_bit = inp_pipe_op_mod_q[NUM_INP_REGS] ? sgnj_result.sign : 1'b1;
+ // ------------------
+ // Minimum / Maximum
+ // ------------------
+ fp_t minmax_result;
+ fpnew_pkg::status_t minmax_status;
+ logic minmax_extension_bit;
+ // Minimum/Maximum - operation is encoded in rnd_mode_q:
+ // RNE = MIN, RTZ = MAX
+ always_comb begin : min_max
+ // Default assignment
+ minmax_status = '0;
+ // Min/Max use quiet comparisons - only sNaN are invalid
+ minmax_status.NV = signalling_nan;
+ // Both NaN inputs cause a NaN output
+ if (info_a.is_nan && info_b.is_nan)
+ minmax_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN
+ // If one operand is NaN, the non-NaN operand is returned
+ else if (info_a.is_nan) minmax_result = operand_b;
+ else if (info_b.is_nan) minmax_result = operand_a;
+ // Otherwise decide according to the operation
+ else begin
+ unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
+ fpnew_pkg::RNE: minmax_result = operand_a_smaller ? operand_a : operand_b; // MIN
+ fpnew_pkg::RTZ: minmax_result = operand_a_smaller ? operand_b : operand_a; // MAX
+ default: minmax_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
+ endcase
+ end
+ end
+ assign minmax_extension_bit = 1'b1; // NaN-box as result is always a float value
+ // ------------
+ // Comparisons
+ // ------------
+ fp_t cmp_result;
+ fpnew_pkg::status_t cmp_status;
+ logic cmp_extension_bit;
+ // Comparisons - operation is encoded in rnd_mode_q:
+ // RNE = LE, RTZ = LT, RDN = EQ
+ // op_mod_q inverts boolean outputs
+ always_comb begin : comparisons
+ // Default assignment
+ cmp_result = '0; // false
+ cmp_status = '0; // no flags
+ // Signalling NaNs always compare as false and are illegal
+ if (signalling_nan) cmp_status.NV = 1'b1; // invalid operation
+ // Otherwise do comparisons
+ else begin
+ unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
+ fpnew_pkg::RNE: begin // Less than or equal
+ if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid
+ else cmp_result = (operand_a_smaller | operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+ end
+ fpnew_pkg::RTZ: begin // Less than
+ if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid
+ else cmp_result = (operand_a_smaller & ~operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+ end
+ fpnew_pkg::RDN: begin // Equal
+ if (any_operand_nan) cmp_result = inp_pipe_op_mod_q[NUM_INP_REGS]; // NaN always not equal
+ else cmp_result = operands_equal ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+ end
+ default: cmp_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
+ endcase
+ end
+ end
+ assign cmp_extension_bit = 1'b0; // Comparisons always produce booleans in integer registers
+ // ---------------
+ // Classification
+ // ---------------
+ fpnew_pkg::status_t class_status;
+ logic class_extension_bit;
+ fpnew_pkg::classmask_e class_mask_d; // the result is actually here
+ // Classification - always return the classification mask on the dedicated port
+ always_comb begin : classify
+ if (info_a.is_normal) begin
+ class_mask_d = operand_a.sign ? fpnew_pkg::NEGNORM : fpnew_pkg::POSNORM;
+ end else if (info_a.is_subnormal) begin
+ class_mask_d = operand_a.sign ? fpnew_pkg::NEGSUBNORM : fpnew_pkg::POSSUBNORM;
+ end else if (info_a.is_zero) begin
+ class_mask_d = operand_a.sign ? fpnew_pkg::NEGZERO : fpnew_pkg::POSZERO;
+ end else if (info_a.is_inf) begin
+ class_mask_d = operand_a.sign ? fpnew_pkg::NEGINF : fpnew_pkg::POSINF;
+ end else if (info_a.is_nan) begin
+ class_mask_d = info_a.is_signalling ? fpnew_pkg::SNAN : fpnew_pkg::QNAN;
+ end else begin
+ class_mask_d = fpnew_pkg::QNAN; // default value
+ end
+ end
+ assign class_status = '0; // classification does not set flags
+ assign class_extension_bit = 1'b0; // classification always produces results in integer registers
+ // -----------------
+ // Result selection
+ // -----------------
+ fp_t result_d;
+ fpnew_pkg::status_t status_d;
+ logic extension_bit_d;
+ logic is_class_d;
+ // Select result
+ always_comb begin : select_result
+ unique case (inp_pipe_op_q[NUM_INP_REGS])
+ fpnew_pkg::SGNJ: begin
+ result_d = sgnj_result;
+ status_d = sgnj_status;
+ extension_bit_d = sgnj_extension_bit;
+ end
+ fpnew_pkg::MINMAX: begin
+ result_d = minmax_result;
+ status_d = minmax_status;
+ extension_bit_d = minmax_extension_bit;
+ end
+ fpnew_pkg::CMP: begin
+ result_d = cmp_result;
+ status_d = cmp_status;
+ extension_bit_d = cmp_extension_bit;
+ end
+ fpnew_pkg::CLASSIFY: begin
+ result_d = '{default: fpnew_pkg::DONT_CARE}; // unused
+ status_d = class_status;
+ extension_bit_d = class_extension_bit;
+ end
+ default: begin
+ result_d = '{default: fpnew_pkg::DONT_CARE}; // dont care
+ status_d = '{default: fpnew_pkg::DONT_CARE}; // dont care
+ extension_bit_d = fpnew_pkg::DONT_CARE; // dont care
+ end
+ endcase
+ end
+ assign is_class_d = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::CLASSIFY);
+ // ----------------
+ // Output Pipeline
+ // ----------------
+ // Output pipeline signals, index i holds signal after i register stages
+ fp_t [0:NUM_OUT_REGS] out_pipe_result_q;
+ fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
+ logic [0:NUM_OUT_REGS] out_pipe_extension_bit_q;
+ fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q;
+ logic [0:NUM_OUT_REGS] out_pipe_is_class_q;
+ TagType [0:NUM_OUT_REGS] out_pipe_tag_q;
+ AuxType [0:NUM_OUT_REGS] out_pipe_aux_q;
+ logic [0:NUM_OUT_REGS] out_pipe_valid_q;
+ // Ready signal is combinatorial for all stages
+ logic [0:NUM_OUT_REGS] out_pipe_ready;
+ // Input stage: First element of pipeline is taken from inputs
+ assign out_pipe_result_q[0] = result_d;
+ assign out_pipe_status_q[0] = status_d;
+ assign out_pipe_extension_bit_q[0] = extension_bit_d;
+ assign out_pipe_class_mask_q[0] = class_mask_d;
+ assign out_pipe_is_class_q[0] = is_class_d;
+ assign out_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS];
+ assign out_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS];
+ assign out_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS];
+ // Input stage: Propagate pipeline ready signal to inside pipe
+ assign inp_pipe_ready[NUM_INP_REGS] = out_pipe_ready[0];
+ // Generate the register stages
+ for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+ // Internal register enable for this stage
+ logic reg_ena;
+ // Determine the ready signal of the current stage - advance the pipeline:
+ // 1. if the next stage is ready for our data
+ // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+ assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+ // Valid: enabled by ready signal, synchronous clear with the flush signal
+ `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+ // Enable register if pipleine ready and a valid data item is present
+ assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+ // Generate the pipeline registers within the stages, use enable-registers
+ `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
+ `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
+ `FFL(out_pipe_extension_bit_q[i+1], out_pipe_extension_bit_q[i], reg_ena, '0)
+ `FFL(out_pipe_class_mask_q[i+1], out_pipe_class_mask_q[i], reg_ena, fpnew_pkg::QNAN)
+ `FFL(out_pipe_is_class_q[i+1], out_pipe_is_class_q[i], reg_ena, '0)
+ `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0))
+ `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0))
+ end
+ // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+ assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+ // Output stage: assign module outputs
+ assign result_o = out_pipe_result_q[NUM_OUT_REGS];
+ assign status_o = out_pipe_status_q[NUM_OUT_REGS];
+ assign extension_bit_o = out_pipe_extension_bit_q[NUM_OUT_REGS];
+ assign class_mask_o = out_pipe_class_mask_q[NUM_OUT_REGS];
+ assign is_class_o = out_pipe_is_class_q[NUM_OUT_REGS];
+ assign tag_o = out_pipe_tag_q[NUM_OUT_REGS];
+ assign aux_o = out_pipe_aux_q[NUM_OUT_REGS];
+ assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS];
+ assign busy_o = (| {inp_pipe_valid_q, out_pipe_valid_q});
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..e3be31d
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,230 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Author: Stefan Mach <>
+module fpnew_opgroup_block #(
+ parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::ADDMUL,
+ // FPU configuration
+ parameter int unsigned Width = 32,
+ parameter logic EnableVectors = 1'b1,
+ parameter fpnew_pkg::fmt_logic_t FpFmtMask = '1,
+ parameter fpnew_pkg::ifmt_logic_t IntFmtMask = '1,
+ parameter fpnew_pkg::fmt_unsigned_t FmtPipeRegs = '{default: 0},
+ parameter fpnew_pkg::fmt_unit_types_t FmtUnitTypes = '{default: fpnew_pkg::PARALLEL},
+ parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
+ parameter type TagType = logic,
+ // Do not change
+ localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS,
+ localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup)
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ // Input signals
+ input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i,
+ input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i,
+ input fpnew_pkg::roundmode_e rnd_mode_i,
+ input fpnew_pkg::operation_e op_i,
+ input logic op_mod_i,
+ input fpnew_pkg::fp_format_e src_fmt_i,
+ input fpnew_pkg::fp_format_e dst_fmt_i,
+ input fpnew_pkg::int_format_e int_fmt_i,
+ input logic vectorial_op_i,
+ input TagType tag_i,
+ // Input Handshake
+ input logic in_valid_i,
+ output logic in_ready_o,
+ input logic flush_i,
+ // Output signals
+ output logic [Width-1:0] result_o,
+ output fpnew_pkg::status_t status_o,
+ output logic extension_bit_o,
+ output TagType tag_o,
+ // Output handshake
+ output logic out_valid_o,
+ input logic out_ready_i,
+ // Indication of valid data in flight
+ output logic busy_o
+ // ----------------
+ // Type Definition
+ // ----------------
+ typedef struct packed {
+ logic [Width-1:0] result;
+ fpnew_pkg::status_t status;
+ logic ext_bit;
+ TagType tag;
+ } output_t;
+ // Handshake signals for the slices
+ logic [NUM_FORMATS-1:0] fmt_in_ready, fmt_out_valid, fmt_out_ready, fmt_busy;
+ output_t [NUM_FORMATS-1:0] fmt_outputs;
+ // -----------
+ // Input Side
+ // -----------
+ assign in_ready_o = in_valid_i & fmt_in_ready[dst_fmt_i]; // Ready is given by selected format
+ // -------------------------
+ // Generate Parallel Slices
+ // -------------------------
+ for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_parallel_slices
+ // Some constants for this format
+ localparam logic ANY_MERGED = fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask);
+ localparam logic IS_FIRST_MERGED =
+ fpnew_pkg::is_first_enabled_multi(fpnew_pkg::fp_format_e'(fmt), FmtUnitTypes, FpFmtMask);
+ // Generate slice only if format enabled
+ if (FpFmtMask[fmt] && (FmtUnitTypes[fmt] == fpnew_pkg::PARALLEL)) begin : active_format
+ logic in_valid;
+ assign in_valid = in_valid_i & (dst_fmt_i == fmt); // enable selected format
+ fpnew_opgroup_fmt_slice #(
+ .OpGroup ( OpGroup ),
+ .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ),
+ .Width ( Width ),
+ .EnableVectors ( EnableVectors ),
+ .NumPipeRegs ( FmtPipeRegs[fmt] ),
+ .PipeConfig ( PipeConfig ),
+ .TagType ( TagType )
+ ) i_fmt_slice (
+ .clk_i,
+ .rst_ni,
+ .operands_i ( operands_i ),
+ .is_boxed_i ( is_boxed_i[fmt] ),
+ .rnd_mode_i,
+ .op_i,
+ .op_mod_i,
+ .vectorial_op_i,
+ .tag_i,
+ .in_valid_i ( in_valid ),
+ .in_ready_o ( fmt_in_ready[fmt] ),
+ .flush_i,
+ .result_o ( fmt_outputs[fmt].result ),
+ .status_o ( fmt_outputs[fmt].status ),
+ .extension_bit_o( fmt_outputs[fmt].ext_bit ),
+ .tag_o ( fmt_outputs[fmt].tag ),
+ .out_valid_o ( fmt_out_valid[fmt] ),
+ .out_ready_i ( fmt_out_ready[fmt] ),
+ .busy_o ( fmt_busy[fmt] )
+ );
+ // If the format wants to use merged ops, tie off the dangling ones not used here
+ end else if (FpFmtMask[fmt] && ANY_MERGED && !IS_FIRST_MERGED) begin : merged_unused
+ localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask);
+ // Ready is split up into formats
+ assign fmt_in_ready[fmt] = fmt_in_ready[int'(FMT)];
+ assign fmt_out_valid[fmt] = 1'b0; // don't emit values
+ assign fmt_busy[fmt] = 1'b0; // never busy
+ // Outputs are don't care
+ assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE};
+ assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE};
+ assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE;
+ assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE);
+ // Tie off disabled formats
+ end else if (!FpFmtMask[fmt] || (FmtUnitTypes[fmt] == fpnew_pkg::DISABLED)) begin : disable_fmt
+ assign fmt_in_ready[fmt] = 1'b0; // don't accept operations
+ assign fmt_out_valid[fmt] = 1'b0; // don't emit values
+ assign fmt_busy[fmt] = 1'b0; // never busy
+ // Outputs are don't care
+ assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE};
+ assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE};
+ assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE;
+ assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE);
+ end
+ end
+ // ----------------------
+ // Generate Merged Slice
+ // ----------------------
+ if (fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask)) begin : gen_merged_slice
+ localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask);
+ localparam REG = fpnew_pkg::get_num_regs_multi(FmtPipeRegs, FmtUnitTypes, FpFmtMask);
+ logic in_valid;
+ assign in_valid = in_valid_i & (FmtUnitTypes[dst_fmt_i] == fpnew_pkg::MERGED);
+ fpnew_opgroup_multifmt_slice #(
+ .OpGroup ( OpGroup ),
+ .Width ( Width ),
+ .FpFmtConfig ( FpFmtMask ),
+ .IntFmtConfig ( IntFmtMask ),
+ .EnableVectors ( EnableVectors ),
+ .NumPipeRegs ( REG ),
+ .PipeConfig ( PipeConfig ),
+ .TagType ( TagType )
+ ) i_multifmt_slice (
+ .clk_i,
+ .rst_ni,
+ .operands_i,
+ .is_boxed_i,
+ .rnd_mode_i,
+ .op_i,
+ .op_mod_i,
+ .src_fmt_i,
+ .dst_fmt_i,
+ .int_fmt_i,
+ .vectorial_op_i,
+ .tag_i,
+ .in_valid_i ( in_valid ),
+ .in_ready_o ( fmt_in_ready[FMT] ),
+ .flush_i,
+ .result_o ( fmt_outputs[FMT].result ),
+ .status_o ( fmt_outputs[FMT].status ),
+ .extension_bit_o ( fmt_outputs[FMT].ext_bit ),
+ .tag_o ( fmt_outputs[FMT].tag ),
+ .out_valid_o ( fmt_out_valid[FMT] ),
+ .out_ready_i ( fmt_out_ready[FMT] ),
+ .busy_o ( fmt_busy[FMT] )
+ );
+ end
+ // ------------------
+ // Arbitrate Outputs
+ // ------------------
+ output_t arbiter_output;
+ // Round-Robin arbiter to decide which result to use
+ rr_arb_tree #(
+ .NumIn ( NUM_FORMATS ),
+ .DataType ( output_t ),
+ .AxiVldRdy ( 1'b1 )
+ ) i_arbiter (
+ .clk_i,
+ .rst_ni,
+ .flush_i,
+ .rr_i ( '0 ),
+ .req_i ( fmt_out_valid ),
+ .gnt_o ( fmt_out_ready ),
+ .data_i ( fmt_outputs ),
+ .gnt_i ( out_ready_i ),
+ .req_o ( out_valid_o ),
+ .data_o ( arbiter_output ),
+ .idx_o ( /* unused */ )
+ );
+ // Unpack output
+ assign result_o = arbiter_output.result;
+ assign status_o = arbiter_output.status;
+ assign extension_bit_o = arbiter_output.ext_bit;
+ assign tag_o = arbiter_output.tag;
+ assign busy_o = (| fmt_busy);
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..fda2a57
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,276 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Author: Stefan Mach <>
+module fpnew_opgroup_fmt_slice #(
+ parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::ADDMUL,
+ parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0),
+ // FPU configuration
+ parameter int unsigned Width = 32,
+ parameter logic EnableVectors = 1'b1,
+ parameter int unsigned NumPipeRegs = 0,
+ parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
+ parameter type TagType = logic,
+ // Do not change
+ localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup)
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ // Input signals
+ input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i,
+ input logic [NUM_OPERANDS-1:0] is_boxed_i,
+ input fpnew_pkg::roundmode_e rnd_mode_i,
+ input fpnew_pkg::operation_e op_i,
+ input logic op_mod_i,
+ input logic vectorial_op_i,
+ input TagType tag_i,
+ // Input Handshake
+ input logic in_valid_i,
+ output logic in_ready_o,
+ input logic flush_i,
+ // Output signals
+ output logic [Width-1:0] result_o,
+ output fpnew_pkg::status_t status_o,
+ output logic extension_bit_o,
+ output TagType tag_o,
+ // Output handshake
+ output logic out_valid_o,
+ input logic out_ready_i,
+ // Indication of valid data in flight
+ output logic busy_o
+ localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(FpFormat);
+ localparam int unsigned NUM_LANES = fpnew_pkg::num_lanes(Width, FpFormat, EnableVectors);
+ logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes
+ logic vectorial_op;
+ logic [NUM_LANES*FP_WIDTH-1:0] slice_result;
+ logic [Width-1:0] slice_regular_result, slice_class_result, slice_vec_class_result;
+ fpnew_pkg::status_t [NUM_LANES-1:0] lane_status;
+ logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used
+ fpnew_pkg::classmask_e [NUM_LANES-1:0] lane_class_mask;
+ TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used
+ logic [NUM_LANES-1:0] lane_vectorial, lane_busy, lane_is_class; // dito
+ logic result_is_vector, result_is_class;
+ // -----------
+ // Input Side
+ // -----------
+ assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane
+ assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled
+ // ---------------
+ // Generate Lanes
+ // ---------------
+ for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes
+ logic [FP_WIDTH-1:0] local_result; // lane-local results
+ logic local_sign;
+ // Generate instances only if needed, lane 0 always generated
+ if ((lane == 0) || EnableVectors) begin : active_lane
+ logic in_valid, out_valid, out_ready; // lane-local handshake
+ logic [NUM_OPERANDS-1:0][FP_WIDTH-1:0] local_operands; // lane-local operands
+ logic [FP_WIDTH-1:0] op_result; // lane-local results
+ fpnew_pkg::status_t op_status;
+ assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors
+ // Slice out the operands for this lane
+ always_comb begin : prepare_input
+ for (int i = 0; i < int'(NUM_OPERANDS); i++) begin
+ local_operands[i] = operands_i[i][(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH];
+ end
+ end
+ // Instantiate the operation from the selected opgroup
+ if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance
+ fpnew_fma #(
+ .FpFormat ( FpFormat ),
+ .NumPipeRegs ( NumPipeRegs ),
+ .PipeConfig ( PipeConfig ),
+ .TagType ( TagType ),
+ .AuxType ( logic )
+ ) i_fma (
+ .clk_i,
+ .rst_ni,
+ .operands_i ( local_operands ),
+ .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ),
+ .rnd_mode_i,
+ .op_i,
+ .op_mod_i,
+ .tag_i,
+ .aux_i ( vectorial_op ), // Remember whether operation was vectorial
+ .in_valid_i ( in_valid ),
+ .in_ready_o ( lane_in_ready[lane] ),
+ .flush_i,
+ .result_o ( op_result ),
+ .status_o ( op_status ),
+ .extension_bit_o ( lane_ext_bit[lane] ),
+ .tag_o ( lane_tags[lane] ),
+ .aux_o ( lane_vectorial[lane] ),
+ .out_valid_o ( out_valid ),
+ .out_ready_i ( out_ready ),
+ .busy_o ( lane_busy[lane] )
+ );
+ assign lane_is_class[lane] = 1'b0;
+ assign lane_class_mask[lane] = fpnew_pkg::NEGINF;
+ end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance
+ // fpnew_divsqrt #(
+ // .FpFormat (FpFormat),
+ // .NumPipeRegs(NumPipeRegs),
+ // .PipeConfig (PipeConfig),
+ // .TagType (TagType),
+ // .AuxType (logic)
+ // ) i_divsqrt (
+ // .clk_i,
+ // .rst_ni,
+ // .operands_i ( local_operands ),
+ // .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ),
+ // .rnd_mode_i,
+ // .op_i,
+ // .op_mod_i,
+ // .tag_i,
+ // .aux_i ( vectorial_op ), // Remember whether operation was vectorial
+ // .in_valid_i ( in_valid ),
+ // .in_ready_o ( lane_in_ready[lane] ),
+ // .flush_i,
+ // .result_o ( op_result ),
+ // .status_o ( op_status ),
+ // .extension_bit_o ( lane_ext_bit[lane] ),
+ // .tag_o ( lane_tags[lane] ),
+ // .aux_o ( lane_vectorial[lane] ),
+ // .out_valid_o ( out_valid ),
+ // .out_ready_i ( out_ready ),
+ // .busy_o ( lane_busy[lane] )
+ // );
+ // assign lane_is_class[lane] = 1'b0;
+ end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance
+ fpnew_noncomp #(
+ .FpFormat (FpFormat),
+ .NumPipeRegs(NumPipeRegs),
+ .PipeConfig (PipeConfig),
+ .TagType (TagType),
+ .AuxType (logic)
+ ) i_noncomp (
+ .clk_i,
+ .rst_ni,
+ .operands_i ( local_operands ),
+ .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ),
+ .rnd_mode_i,
+ .op_i,
+ .op_mod_i,
+ .tag_i,
+ .aux_i ( vectorial_op ), // Remember whether operation was vectorial
+ .in_valid_i ( in_valid ),
+ .in_ready_o ( lane_in_ready[lane] ),
+ .flush_i,
+ .result_o ( op_result ),
+ .status_o ( op_status ),
+ .extension_bit_o ( lane_ext_bit[lane] ),
+ .class_mask_o ( lane_class_mask[lane] ),
+ .is_class_o ( lane_is_class[lane] ),
+ .tag_o ( lane_tags[lane] ),
+ .aux_o ( lane_vectorial[lane] ),
+ .out_valid_o ( out_valid ),
+ .out_ready_i ( out_ready ),
+ .busy_o ( lane_busy[lane] )
+ );
+ // Handshakes are only done if the lane is actually used
+ assign out_ready = out_ready_i & ((lane == 0) | result_is_vector);
+ assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector);
+ // Properly NaN-box or sign-extend the slice result if not in use
+ assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]};
+ assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0;
+ // Otherwise generate constant sign-extension
+ end else begin
+ assign lane_out_valid[lane] = 1'b0; // unused lane
+ assign lane_in_ready[lane] = 1'b0; // unused lane
+ assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box
+ assign lane_status[lane] = '0;
+ assign lane_busy[lane] = 1'b0;
+ assign lane_is_class[lane] = 1'b0;
+ end
+ // Insert lane result into slice result
+ assign slice_result[(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH] = local_result;
+ // Create Classification results
+ if ((lane+1)*8 <= Width) begin : vectorial_class // vectorial class blocks are 8bits in size
+ assign local_sign = (lane_class_mask[lane] == fpnew_pkg::NEGINF ||
+ lane_class_mask[lane] == fpnew_pkg::NEGNORM ||
+ lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM ||
+ lane_class_mask[lane] == fpnew_pkg::NEGZERO);
+ // Write the current block segment
+ assign slice_vec_class_result[(lane+1)*8-1:lane*8] = {
+ local_sign, // BIT 7
+ ~local_sign, // BIT 6
+ lane_class_mask[lane] == fpnew_pkg::QNAN, // BIT 5
+ lane_class_mask[lane] == fpnew_pkg::SNAN, // BIT 4
+ lane_class_mask[lane] == fpnew_pkg::POSZERO
+ || lane_class_mask[lane] == fpnew_pkg::NEGZERO, // BIT 3
+ lane_class_mask[lane] == fpnew_pkg::POSSUBNORM
+ || lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM, // BIT 2
+ lane_class_mask[lane] == fpnew_pkg::POSNORM
+ || lane_class_mask[lane] == fpnew_pkg::NEGNORM, // BIT 1
+ lane_class_mask[lane] == fpnew_pkg::POSINF
+ || lane_class_mask[lane] == fpnew_pkg::NEGINF // BIT 0
+ };
+ end
+ end
+ // ------------
+ // Output Side
+ // ------------
+ assign result_is_vector = lane_vectorial[0];
+ assign result_is_class = lane_is_class[0];
+ assign slice_regular_result = $signed({extension_bit_o, slice_result});
+ localparam int unsigned CLASS_VEC_BITS = (NUM_LANES*8 > Width) ? 8 * (Width / 8) : NUM_LANES*8;
+ // Pad out unused vec_class bits
+ if (CLASS_VEC_BITS < Width) begin : pad_vectorial_class
+ assign slice_vec_class_result[Width-1:CLASS_VEC_BITS] = '0;
+ end
+ // localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1;
+ assign slice_class_result = result_is_vector ? slice_vec_class_result : lane_class_mask[0];
+ // Select the proper result
+ assign result_o = result_is_class ? slice_class_result : slice_regular_result;
+ assign extension_bit_o = lane_ext_bit[0]; // upper lanes unused
+ assign tag_o = lane_tags[0]; // upper lanes unused
+ assign busy_o = (| lane_busy);
+ assign out_valid_o = lane_out_valid[0]; // upper lanes unused
+ // Collapse the lane status
+ always_comb begin : output_processing
+ // Collapse the status
+ automatic fpnew_pkg::status_t temp_status;
+ temp_status = '0;
+ for (int i = 0; i < int'(NUM_LANES); i++)
+ temp_status |= lane_status[i];
+ status_o = temp_status;
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..4f139e9
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,424 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Author: Stefan Mach <>
+module fpnew_opgroup_multifmt_slice #(
+ parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::CONV,
+ parameter int unsigned Width = 64,
+ // FPU configuration
+ parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1,
+ parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1,
+ parameter logic EnableVectors = 1'b1,
+ parameter int unsigned NumPipeRegs = 0,
+ parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
+ parameter type TagType = logic,
+ // Do not change
+ localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup),
+ localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ // Input signals
+ input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i,
+ input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i,
+ input fpnew_pkg::roundmode_e rnd_mode_i,
+ input fpnew_pkg::operation_e op_i,
+ input logic op_mod_i,
+ input fpnew_pkg::fp_format_e src_fmt_i,
+ input fpnew_pkg::fp_format_e dst_fmt_i,
+ input fpnew_pkg::int_format_e int_fmt_i,
+ input logic vectorial_op_i,
+ input TagType tag_i,
+ // Input Handshake
+ input logic in_valid_i,
+ output logic in_ready_o,
+ input logic flush_i,
+ // Output signals
+ output logic [Width-1:0] result_o,
+ output fpnew_pkg::status_t status_o,
+ output logic extension_bit_o,
+ output TagType tag_o,
+ // Output handshake
+ output logic out_valid_o,
+ input logic out_ready_i,
+ // Indication of valid data in flight
+ output logic busy_o
+ localparam int unsigned MAX_FP_WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig);
+ localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig);
+ localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, 1'b1);
+ localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS;
+ // We will send the format information along with the data
+ localparam int unsigned FMT_BITS =
+ fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS));
+ localparam int unsigned AUX_BITS = FMT_BITS + 2; // also add vectorial and integer flags
+ logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes
+ logic vectorial_op;
+ logic [FMT_BITS-1:0] dst_fmt; // destination format to pass along with operation
+ logic [AUX_BITS-1:0] aux_data;
+ // additional flags for CONV
+ logic dst_fmt_is_int, dst_is_cpk;
+ logic [1:0] dst_vec_op; // info for vectorial results (for packing)
+ logic [2:0] target_aux_d, target_aux_q;
+ logic is_up_cast, is_down_cast;
+ logic [NUM_FORMATS-1:0][Width-1:0] fmt_slice_result;
+ logic [NUM_INT_FORMATS-1:0][Width-1:0] ifmt_slice_result;
+ logic [Width-1:0] conv_slice_result;
+ logic [Width-1:0] conv_target_d, conv_target_q; // vectorial conversions update a register
+ fpnew_pkg::status_t [NUM_LANES-1:0] lane_status;
+ logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used
+ TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used
+ logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used
+ logic [NUM_LANES-1:0] lane_busy; // dito
+ logic result_is_vector;
+ logic [FMT_BITS-1:0] result_fmt;
+ logic result_fmt_is_int, result_is_cpk;
+ logic [1:0] result_vec_op; // info for vectorial results (for packing)
+ // -----------
+ // Input Side
+ // -----------
+ assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane
+ assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled
+ // Cast-and-Pack ops are encoded in operation and modifier
+ assign dst_fmt_is_int = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::F2I);
+ assign dst_is_cpk = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::CPKAB ||
+ op_i == fpnew_pkg::CPKCD);
+ assign dst_vec_op = (OpGroup == fpnew_pkg::CONV) & {(op_i == fpnew_pkg::CPKCD), op_mod_i};
+ assign is_up_cast = (fpnew_pkg::fp_width(dst_fmt_i) > fpnew_pkg::fp_width(src_fmt_i));
+ assign is_down_cast = (fpnew_pkg::fp_width(dst_fmt_i) < fpnew_pkg::fp_width(src_fmt_i));
+ // The destination format is the int format for F2I casts
+ assign dst_fmt = dst_fmt_is_int ? int_fmt_i : dst_fmt_i;
+ // The data sent along consists of the vectorial flag and format bits
+ assign aux_data = {dst_fmt_is_int, vectorial_op, dst_fmt};
+ assign target_aux_d = {dst_vec_op, dst_is_cpk};
+ // CONV passes one operand for assembly after the unit: opC for cpk, opB for others
+ if (OpGroup == fpnew_pkg::CONV) begin : conv_target
+ assign conv_target_d = dst_is_cpk ? operands_i[2] : operands_i[1];
+ end
+ // For 2-operand units, prepare boxing info
+ logic [NUM_FORMATS-1:0] is_boxed_1op;
+ logic [NUM_FORMATS-1:0][1:0] is_boxed_2op;
+ always_comb begin : boxed_2op
+ for (int fmt = 0; fmt < NUM_FORMATS; fmt++) begin
+ is_boxed_1op[fmt] = is_boxed_i[fmt][0];
+ is_boxed_2op[fmt] = is_boxed_i[fmt][1:0];
+ end
+ end
+ // ---------------
+ // Generate Lanes
+ // ---------------
+ for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes
+ localparam int unsigned LANE = unsigned'(lane); // unsigned to please the linter
+ // Get a mask of active formats for this lane
+ localparam fpnew_pkg::fmt_logic_t ACTIVE_FORMATS =
+ fpnew_pkg::get_lane_formats(Width, FpFmtConfig, LANE);
+ localparam fpnew_pkg::ifmt_logic_t ACTIVE_INT_FORMATS =
+ fpnew_pkg::get_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE);
+ localparam int unsigned MAX_WIDTH = fpnew_pkg::max_fp_width(ACTIVE_FORMATS);
+ // Cast-specific parameters
+ localparam fpnew_pkg::fmt_logic_t CONV_FORMATS =
+ fpnew_pkg::get_conv_lane_formats(Width, FpFmtConfig, LANE);
+ localparam fpnew_pkg::ifmt_logic_t CONV_INT_FORMATS =
+ fpnew_pkg::get_conv_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE);
+ localparam int unsigned CONV_WIDTH = fpnew_pkg::max_fp_width(CONV_FORMATS);
+ // Lane parameters from Opgroup
+ localparam fpnew_pkg::fmt_logic_t LANE_FORMATS = (OpGroup == fpnew_pkg::CONV)
+ localparam int unsigned LANE_WIDTH = (OpGroup == fpnew_pkg::CONV) ? CONV_WIDTH : MAX_WIDTH;
+ logic [LANE_WIDTH-1:0] local_result; // lane-local results
+ // Generate instances only if needed, lane 0 always generated
+ if ((lane == 0) || EnableVectors) begin : active_lane
+ logic in_valid, out_valid, out_ready; // lane-local handshake
+ logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands; // lane-local oprands
+ logic [LANE_WIDTH-1:0] op_result; // lane-local results
+ fpnew_pkg::status_t op_status;
+ assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors
+ // Slice out the operands for this lane, upper bits are ignored in the unit
+ always_comb begin : prepare_input
+ for (int unsigned i = 0; i < NUM_OPERANDS; i++) begin
+ local_operands[i] = operands_i[i] >> LANE*fpnew_pkg::fp_width(src_fmt_i);
+ end
+ // override operand 0 for some conversions
+ if (OpGroup == fpnew_pkg::CONV) begin
+ // Source is an integer
+ if (op_i == fpnew_pkg::I2F) begin
+ local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::int_width(int_fmt_i);
+ // vectorial F2F up casts
+ end else if (op_i == fpnew_pkg::F2F) begin
+ if (vectorial_op && op_mod_i && is_up_cast) begin // up cast with upper half
+ local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::fp_width(src_fmt_i) +
+ end
+ // CPK
+ end else if (dst_is_cpk) begin
+ if (lane == 1) begin
+ local_operands[0] = operands_i[1][LANE_WIDTH-1:0]; // using opB as second argument
+ end
+ end
+ end
+ end
+ // Instantiate the operation from the selected opgroup
+ if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance
+ fpnew_fma_multi #(
+ .FpFmtConfig ( LANE_FORMATS ),
+ .NumPipeRegs ( NumPipeRegs ),
+ .PipeConfig ( PipeConfig ),
+ .TagType ( TagType ),
+ .AuxType ( logic [AUX_BITS-1:0] )
+ ) i_fpnew_fma_multi (
+ .clk_i,
+ .rst_ni,
+ .operands_i ( local_operands ),
+ .is_boxed_i,
+ .rnd_mode_i,
+ .op_i,
+ .op_mod_i,
+ .src_fmt_i,
+ .dst_fmt_i,
+ .tag_i,
+ .aux_i ( aux_data ),
+ .in_valid_i ( in_valid ),
+ .in_ready_o ( lane_in_ready[lane] ),
+ .flush_i,
+ .result_o ( op_result ),
+ .status_o ( op_status ),
+ .extension_bit_o ( lane_ext_bit[lane] ),
+ .tag_o ( lane_tags[lane] ),
+ .aux_o ( lane_aux[lane] ),
+ .out_valid_o ( out_valid ),
+ .out_ready_i ( out_ready ),
+ .busy_o ( lane_busy[lane] )
+ );
+ end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance
+ fpnew_divsqrt_multi #(
+ .FpFmtConfig ( LANE_FORMATS ),
+ .NumPipeRegs ( NumPipeRegs ),
+ .PipeConfig ( PipeConfig ),
+ .TagType ( TagType ),
+ .AuxType ( logic [AUX_BITS-1:0] )
+ ) i_fpnew_divsqrt_multi (
+ .clk_i,
+ .rst_ni,
+ .operands_i ( local_operands[1:0] ), // 2 operands
+ .is_boxed_i ( is_boxed_2op ), // 2 operands
+ .rnd_mode_i,
+ .op_i,
+ .dst_fmt_i,
+ .tag_i,
+ .aux_i ( aux_data ),
+ .in_valid_i ( in_valid ),
+ .in_ready_o ( lane_in_ready[lane] ),
+ .flush_i,
+ .result_o ( op_result ),
+ .status_o ( op_status ),
+ .extension_bit_o ( lane_ext_bit[lane] ),
+ .tag_o ( lane_tags[lane] ),
+ .aux_o ( lane_aux[lane] ),
+ .out_valid_o ( out_valid ),
+ .out_ready_i ( out_ready ),
+ .busy_o ( lane_busy[lane] )
+ );
+ end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance
+ end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance
+ fpnew_cast_multi #(
+ .FpFmtConfig ( LANE_FORMATS ),
+ .IntFmtConfig ( CONV_INT_FORMATS ),
+ .NumPipeRegs ( NumPipeRegs ),
+ .PipeConfig ( PipeConfig ),
+ .TagType ( TagType ),
+ .AuxType ( logic [AUX_BITS-1:0] )
+ ) i_fpnew_cast_multi (
+ .clk_i,
+ .rst_ni,
+ .operands_i ( local_operands[0] ),
+ .is_boxed_i ( is_boxed_1op ),
+ .rnd_mode_i,
+ .op_i,
+ .op_mod_i,
+ .src_fmt_i,
+ .dst_fmt_i,
+ .int_fmt_i,
+ .tag_i,
+ .aux_i ( aux_data ),
+ .in_valid_i ( in_valid ),
+ .in_ready_o ( lane_in_ready[lane] ),
+ .flush_i,
+ .result_o ( op_result ),
+ .status_o ( op_status ),
+ .extension_bit_o ( lane_ext_bit[lane] ),
+ .tag_o ( lane_tags[lane] ),
+ .aux_o ( lane_aux[lane] ),
+ .out_valid_o ( out_valid ),
+ .out_ready_i ( out_ready ),
+ .busy_o ( lane_busy[lane] )
+ );
+ // Handshakes are only done if the lane is actually used
+ assign out_ready = out_ready_i & ((lane == 0) | result_is_vector);
+ assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector);
+ // Properly NaN-box or sign-extend the slice result if not in use
+ assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]};
+ assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0;
+ // Otherwise generate constant sign-extension
+ end else begin : inactive_lane
+ assign lane_out_valid[lane] = 1'b0; // unused lane
+ assign lane_in_ready[lane] = 1'b0; // unused lane
+ assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box
+ assign lane_status[lane] = '0;
+ assign lane_busy[lane] = 1'b0;
+ end
+ // Generate result packing depending on float format
+ for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : pack_fp_result
+ // Set up some constants
+ localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+ // only for active formats within the lane
+ if (ACTIVE_FORMATS[fmt]) begin
+ assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] =
+ local_result[FP_WIDTH-1:0];
+ end else if ((LANE+1)*FP_WIDTH <= Width) begin
+ assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] =
+ '{default: lane_ext_bit[LANE]};
+ end else if (LANE*FP_WIDTH < Width) begin
+ assign fmt_slice_result[fmt][Width-1:LANE*FP_WIDTH] =
+ '{default: lane_ext_bit[LANE]};
+ end
+ end
+ // Generate result packing depending on integer format
+ if (OpGroup == fpnew_pkg::CONV) begin : int_results_enabled
+ for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : pack_int_result
+ // Set up some constants
+ localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+ if (ACTIVE_INT_FORMATS[ifmt]) begin
+ assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] =
+ local_result[INT_WIDTH-1:0];
+ end else if ((LANE+1)*INT_WIDTH <= Width) begin
+ assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] = '0;
+ end else if (LANE*INT_WIDTH < Width) begin
+ assign ifmt_slice_result[ifmt][Width-1:LANE*INT_WIDTH] = '0;
+ end
+ end
+ end
+ end
+ // Extend slice result if needed
+ for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : extend_fp_result
+ // Set up some constants
+ localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+ if (NUM_LANES*FP_WIDTH < Width)
+ assign fmt_slice_result[fmt][Width-1:NUM_LANES*FP_WIDTH] = '{default: lane_ext_bit[0]};
+ end
+ // Mute int results if unused
+ for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : int_results_disabled
+ if (OpGroup != fpnew_pkg::CONV) begin : mute_int_result
+ assign ifmt_slice_result[ifmt] = '0;
+ end
+ end
+ // Bypass lanes with target operand for vectorial casts
+ if (OpGroup == fpnew_pkg::CONV) begin : target_regs
+ // Bypass pipeline signals, index i holds signal after i register stages
+ logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q;
+ logic [0:NumPipeRegs][2:0] byp_pipe_aux_q;
+ logic [0:NumPipeRegs] byp_pipe_valid_q;
+ // Ready signal is combinatorial for all stages
+ logic [0:NumPipeRegs] byp_pipe_ready;
+ // Input stage: First element of pipeline is taken from inputs
+ assign byp_pipe_target_q[0] = conv_target_d;
+ assign byp_pipe_aux_q[0] = target_aux_d;
+ assign byp_pipe_valid_q[0] = in_valid_i & vectorial_op;
+ // Generate the register stages
+ for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline
+ // Internal register enable for this stage
+ logic reg_ena;
+ // Determine the ready signal of the current stage - advance the pipeline:
+ // 1. if the next stage is ready for our data
+ // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+ assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1];
+ // Valid: enabled by ready signal, synchronous clear with the flush signal
+ `FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+ // Enable register if pipleine ready and a valid data item is present
+ assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i];
+ // Generate the pipeline registers within the stages, use enable-registers
+ `FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0)
+ `FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0)
+ end
+ // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+ assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector;
+ // Output stage: assign module outputs
+ assign conv_target_q = byp_pipe_target_q[NumPipeRegs];
+ // decode the aux data
+ assign {result_vec_op, result_is_cpk} = byp_pipe_aux_q[NumPipeRegs];
+ end else begin : no_conv
+ assign {result_vec_op, result_is_cpk} = '0;
+ end
+ // ------------
+ // Output Side
+ // ------------
+ assign {result_fmt_is_int, result_is_vector, result_fmt} = lane_aux[0];
+ assign result_o = result_fmt_is_int
+ ? ifmt_slice_result[result_fmt]
+ : fmt_slice_result[result_fmt];
+ assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones
+ assign tag_o = lane_tags[0]; // don't care about upper ones
+ assign busy_o = (| lane_busy);
+ assign out_valid_o = lane_out_valid[0]; // don't care about upper ones
+ // Collapse the status
+ always_comb begin : output_processing
+ // Collapse the status
+ automatic fpnew_pkg::status_t temp_status;
+ temp_status = '0;
+ for (int i = 0; i < int'(NUM_LANES); i++)
+ temp_status |= lane_status[i];
+ status_o = temp_status;
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..2d258cf
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,491 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Author: Stefan Mach <>
+package fpnew_pkg;
+ // ---------
+ // ---------
+ // | Enumerator | Format | Width | EXP_BITS | MAN_BITS
+ // |:----------:|------------------|-------:|:--------:|:--------:
+ // | FP32 | IEEE binary32 | 32 bit | 8 | 23
+ // | FP64 | IEEE binary64 | 64 bit | 11 | 52
+ // | FP16 | IEEE binary16 | 16 bit | 5 | 10
+ // | FP8 | binary8 | 8 bit | 5 | 2
+ // | FP16ALT | binary16alt | 16 bit | 8 | 7
+ // *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty!
+ // Encoding for a format
+ typedef struct packed {
+ int unsigned exp_bits;
+ int unsigned man_bits;
+ } fp_encoding_t;
+ localparam int unsigned NUM_FP_FORMATS = 5; // change me to add formats
+ localparam int unsigned FP_FORMAT_BITS = $clog2(NUM_FP_FORMATS);
+ // FP formats
+ typedef enum logic [FP_FORMAT_BITS-1:0] {
+ FP32 = 'd0,
+ FP64 = 'd1,
+ FP16 = 'd2,
+ FP8 = 'd3,
+ FP16ALT = 'd4
+ // add new formats here
+ } fp_format_e;
+ // Encodings for supported FP formats
+ localparam fp_encoding_t [0:NUM_FP_FORMATS-1] FP_ENCODINGS = '{
+ '{8, 23}, // IEEE binary32 (single)
+ '{11, 52}, // IEEE binary64 (double)
+ '{5, 10}, // IEEE binary16 (half)
+ '{5, 2}, // custom binary8
+ '{8, 7} // custom binary16alt
+ // add new formats here
+ };
+ typedef logic [0:NUM_FP_FORMATS-1] fmt_logic_t; // Logic indexed by FP format (for masks)
+ typedef logic [0:NUM_FP_FORMATS-1][31:0] fmt_unsigned_t; // Unsigned indexed by FP format
+ localparam fmt_logic_t CPK_FORMATS = 5'b11000; // FP32 and FP64 can provide CPK only
+ // ---------
+ // ---------
+ // | Enumerator | Width |
+ // |:----------:|-------:|
+ // | INT8 | 8 bit |
+ // | INT16 | 16 bit |
+ // | INT32 | 32 bit |
+ // | INT64 | 64 bit |
+ // *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty!
+ localparam int unsigned NUM_INT_FORMATS = 4; // change me to add formats
+ localparam int unsigned INT_FORMAT_BITS = $clog2(NUM_INT_FORMATS);
+ // Int formats
+ typedef enum logic [INT_FORMAT_BITS-1:0] {
+ INT8,
+ INT16,
+ INT32,
+ INT64
+ // add new formats here
+ } int_format_e;
+ // Returns the width of an INT format by index
+ function automatic int unsigned int_width(int_format_e ifmt);
+ unique case (ifmt)
+ INT8: return 8;
+ INT16: return 16;
+ INT32: return 32;
+ INT64: return 64;
+ // default: begin
+ // pragma translate_off
+ // $fatal(1, "Invalid INT format supplied");
+ // pragma translate_on
+ // just return any integer to avoid any latches
+ // hopefully this error is caught by simulation
+ //return INT8;
+ //end
+ endcase
+ endfunction
+ typedef logic [0:NUM_INT_FORMATS-1] ifmt_logic_t; // Logic indexed by INT format (for masks)
+ // --------------
+ // --------------
+ localparam int unsigned NUM_OPGROUPS = 4;
+ // Each FP operation belongs to an operation group
+ typedef enum logic [1:0] {
+ } opgroup_e;
+ localparam int unsigned OP_BITS = 4;
+ typedef enum logic [OP_BITS-1:0] {
+ FMADD, FNMSUB, ADD, MUL, // ADDMUL operation group
+ DIV, SQRT, // DIVSQRT operation group
+ SGNJ, MINMAX, CMP, CLASSIFY, // NONCOMP operation group
+ F2F, F2I, I2F, CPKAB, CPKCD // CONV operation group
+ } operation_e;
+ // -------------------
+ // -------------------
+ // Rounding modes
+ typedef enum logic [2:0] {
+ RNE = 3'b000,
+ RTZ = 3'b001,
+ RDN = 3'b010,
+ RUP = 3'b011,
+ RMM = 3'b100,
+ DYN = 3'b111
+ } roundmode_e;
+ // Status flags
+ typedef struct packed {
+ logic NV; // Invalid
+ logic DZ; // Divide by zero
+ logic OF; // Overflow
+ logic UF; // Underflow
+ logic NX; // Inexact
+ } status_t;
+ // Information about a floating point value
+ typedef struct packed {
+ logic is_normal; // is the value normal
+ logic is_subnormal; // is the value subnormal
+ logic is_zero; // is the value zero
+ logic is_inf; // is the value infinity
+ logic is_nan; // is the value NaN
+ logic is_signalling; // is the value a signalling NaN
+ logic is_quiet; // is the value a quiet NaN
+ logic is_boxed; // is the value properly NaN-boxed (RISC-V specific)
+ } fp_info_t;
+ // Classification mask
+ typedef enum logic [9:0] {
+ NEGINF = 10'b00_0000_0001,
+ NEGNORM = 10'b00_0000_0010,
+ NEGSUBNORM = 10'b00_0000_0100,
+ NEGZERO = 10'b00_0000_1000,
+ POSZERO = 10'b00_0001_0000,
+ POSSUBNORM = 10'b00_0010_0000,
+ POSNORM = 10'b00_0100_0000,
+ POSINF = 10'b00_1000_0000,
+ SNAN = 10'b01_0000_0000,
+ QNAN = 10'b10_0000_0000
+ } classmask_e;
+ // ------------------
+ // FPU configuration
+ // ------------------
+ // Pipelining registers can be inserted (at elaboration time) into operational units
+ typedef enum logic [1:0] {
+ BEFORE, // registers are inserted at the inputs of the unit
+ AFTER, // registers are inserted at the outputs of the unit
+ INSIDE, // registers are inserted at predetermined (suboptimal) locations in the unit
+ DISTRIBUTED // registers are evenly distributed, INSIDE >= AFTER >= BEFORE
+ } pipe_config_t;
+ // Arithmetic units can be arranged in parallel (per format), merged (multi-format) or not at all.
+ typedef enum logic [1:0] {
+ DISABLED, // arithmetic units are not generated
+ PARALLEL, // arithmetic units are generated in prallel slices, one for each format
+ MERGED // arithmetic units are contained within a merged unit holding multiple formats
+ } unit_type_t;
+ // Array of unit types indexed by format
+ typedef unit_type_t [0:NUM_FP_FORMATS-1] fmt_unit_types_t;
+ // Array of format-specific unit types by opgroup
+ typedef fmt_unit_types_t [0:NUM_OPGROUPS-1] opgrp_fmt_unit_types_t;
+ // same with unsigned
+ typedef fmt_unsigned_t [0:NUM_OPGROUPS-1] opgrp_fmt_unsigned_t;
+ // FPU configuration: features
+ typedef struct packed {
+ int unsigned Width;
+ logic EnableVectors;
+ logic EnableNanBox;
+ fmt_logic_t FpFmtMask;
+ ifmt_logic_t IntFmtMask;
+ } fpu_features_t;
+ localparam fpu_features_t RV64D = '{
+ Width: 64,
+ EnableVectors: 1'b0,
+ EnableNanBox: 1'b1,
+ FpFmtMask: 5'b11000,
+ IntFmtMask: 4'b0011
+ };
+ localparam fpu_features_t RV32D = '{
+ Width: 64,
+ EnableVectors: 1'b1,
+ EnableNanBox: 1'b1,
+ FpFmtMask: 5'b11000,
+ IntFmtMask: 4'b0010
+ };
+ localparam fpu_features_t RV32F = '{
+ Width: 32,
+ EnableVectors: 1'b0,
+ EnableNanBox: 1'b1,
+ FpFmtMask: 5'b10000,
+ IntFmtMask: 4'b0010
+ };
+ localparam fpu_features_t RV64D_Xsflt = '{
+ Width: 64,
+ EnableVectors: 1'b1,
+ EnableNanBox: 1'b1,
+ FpFmtMask: 5'b11111,
+ IntFmtMask: 4'b1111
+ };
+ localparam fpu_features_t RV32F_Xsflt = '{
+ Width: 32,
+ EnableVectors: 1'b1,
+ EnableNanBox: 1'b1,
+ FpFmtMask: 5'b10111,
+ IntFmtMask: 4'b1110
+ };
+ localparam fpu_features_t RV32F_Xf16alt_Xfvec = '{
+ Width: 32,
+ EnableVectors: 1'b1,
+ EnableNanBox: 1'b1,
+ FpFmtMask: 5'b10001,
+ IntFmtMask: 4'b0110
+ };
+ // FPU configuraion: implementation
+ typedef struct packed {
+ opgrp_fmt_unsigned_t PipeRegs;
+ opgrp_fmt_unit_types_t UnitTypes;
+ pipe_config_t PipeConfig;
+ } fpu_implementation_t;
+ localparam fpu_implementation_t DEFAULT_NOREGS = '{
+ PipeRegs: '{default: 0},
+ UnitTypes: '{'{default: PARALLEL}, // ADDMUL
+ '{default: MERGED}, // DIVSQRT
+ '{default: PARALLEL}, // NONCOMP
+ '{default: MERGED}}, // CONV
+ PipeConfig: BEFORE
+ };
+ localparam fpu_implementation_t DEFAULT_SNITCH = '{
+ PipeRegs: '{default: 1},
+ UnitTypes: '{'{default: PARALLEL}, // ADDMUL
+ '{default: DISABLED}, // DIVSQRT
+ '{default: PARALLEL}, // NONCOMP
+ '{default: MERGED}}, // CONV
+ PipeConfig: BEFORE
+ };
+ // -----------------------
+ // Synthesis optimization
+ // -----------------------
+ localparam logic DONT_CARE = 1'b1; // the value to assign as don't care
+ // -------------------------
+ // General helper functions
+ // -------------------------
+ function automatic int minimum(int a, int b);
+ return (a < b) ? a : b;
+ endfunction
+ function automatic int maximum(int a, int b);
+ return (a > b) ? a : b;
+ endfunction
+ // -------------------------------------------
+ // Helper functions for FP formats and values
+ // -------------------------------------------
+ // Returns the width of a FP format
+ function automatic int unsigned fp_width(fp_format_e fmt);
+ return FP_ENCODINGS[fmt].exp_bits + FP_ENCODINGS[fmt].man_bits + 1;
+ endfunction
+ // Returns the widest FP format present
+ function automatic int unsigned max_fp_width(fmt_logic_t cfg);
+ automatic int unsigned res = 0;
+ for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
+ if (cfg[i])
+ res = unsigned'(maximum(res, fp_width(fp_format_e'(i))));
+ return res;
+ endfunction
+ // Returns the narrowest FP format present
+ function automatic int unsigned min_fp_width(fmt_logic_t cfg);
+ automatic int unsigned res = max_fp_width(cfg);
+ for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
+ if (cfg[i])
+ res = unsigned'(minimum(res, fp_width(fp_format_e'(i))));
+ return res;
+ endfunction
+ // Returns the number of expoent bits for a format
+ function automatic int unsigned exp_bits(fp_format_e fmt);
+ return FP_ENCODINGS[fmt].exp_bits;
+ endfunction
+ // Returns the number of mantissa bits for a format
+ function automatic int unsigned man_bits(fp_format_e fmt);
+ return FP_ENCODINGS[fmt].man_bits;
+ endfunction
+ // Returns the bias value for a given format (as per IEEE 754-2008)
+ function automatic int unsigned bias(fp_format_e fmt);
+ return unsigned'(2**(FP_ENCODINGS[fmt].exp_bits-1)-1); // symmetrical bias
+ endfunction
+ function automatic fp_encoding_t super_format(fmt_logic_t cfg);
+ automatic fp_encoding_t res;
+ res = '0;
+ for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+ if (cfg[fmt]) begin // only active format
+ res.exp_bits = unsigned'(maximum(res.exp_bits, exp_bits(fp_format_e'(fmt))));
+ res.man_bits = unsigned'(maximum(res.man_bits, man_bits(fp_format_e'(fmt))));
+ end
+ return res;
+ endfunction
+ // -------------------------------------------
+ // Helper functions for INT formats and values
+ // -------------------------------------------
+ // Returns the widest INT format present
+ function automatic int unsigned max_int_width(ifmt_logic_t cfg);
+ automatic int unsigned res = 0;
+ for (int ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin
+ if (cfg[ifmt]) res = maximum(res, int_width(int_format_e'(ifmt)));
+ end
+ return res;
+ endfunction
+ // --------------------------------------------------
+ // Helper functions for operations and FPU structure
+ // --------------------------------------------------
+ // Returns the operation group of the given operation
+ function automatic opgroup_e get_opgroup(operation_e op);
+ unique case (op)
+ DIV, SQRT: return DIVSQRT;
+ F2F, F2I, I2F, CPKAB, CPKCD: return CONV;
+ default: return NONCOMP;
+ endcase
+ endfunction
+ // Returns the number of operands by operation group
+ function automatic int unsigned num_operands(opgroup_e grp);
+ unique case (grp)
+ ADDMUL: return 3;
+ DIVSQRT: return 2;
+ NONCOMP: return 2;
+ CONV: return 3; // vectorial casts use 3 operands
+ default: return 0;
+ endcase
+ endfunction
+ // Returns the number of lanes according to width, format and vectors
+ function automatic int unsigned num_lanes(int unsigned width, fp_format_e fmt, logic vec);
+ return vec ? width / fp_width(fmt) : 1; // if no vectors, only one lane
+ endfunction
+ // Returns the maximum number of lanes in the FPU according to width, format config and vectors
+ function automatic int unsigned max_num_lanes(int unsigned width, fmt_logic_t cfg, logic vec);
+ return vec ? width / min_fp_width(cfg) : 1; // if no vectors, only one lane
+ endfunction
+ // Returns a mask of active FP formats that are present in lane lane_no of a multiformat slice
+ function automatic fmt_logic_t get_lane_formats(int unsigned width,
+ fmt_logic_t cfg,
+ int unsigned lane_no);
+ automatic fmt_logic_t res;
+ for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+ // Mask active formats with the number of lanes for that format
+ res[fmt] = cfg[fmt] & (width / fp_width(fp_format_e'(fmt)) > lane_no);
+ return res;
+ endfunction
+ // Returns a mask of active INT formats that are present in lane lane_no of a multiformat slice
+ function automatic ifmt_logic_t get_lane_int_formats(int unsigned width,
+ fmt_logic_t cfg,
+ ifmt_logic_t icfg,
+ int unsigned lane_no);
+ automatic ifmt_logic_t res;
+ automatic fmt_logic_t lanefmts;
+ res = '0;
+ lanefmts = get_lane_formats(width, cfg, lane_no);
+ for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++)
+ for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+ // Mask active int formats with the width of the float formats
+ if ((fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt))))
+ res[ifmt] |= icfg[ifmt] && lanefmts[fmt];
+ return res;
+ endfunction
+ // Returns a mask of active FP formats that are present in lane lane_no of a CONV slice
+ function automatic fmt_logic_t get_conv_lane_formats(int unsigned width,
+ fmt_logic_t cfg,
+ int unsigned lane_no);
+ automatic fmt_logic_t res;
+ for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+ // Mask active formats with the number of lanes for that format, CPK at least twice
+ res[fmt] = cfg[fmt] && ((width / fp_width(fp_format_e'(fmt)) > lane_no) ||
+ (CPK_FORMATS[fmt] && (lane_no < 2)));
+ return res;
+ endfunction
+ // Returns a mask of active INT formats that are present in lane lane_no of a CONV slice
+ function automatic ifmt_logic_t get_conv_lane_int_formats(int unsigned width,
+ fmt_logic_t cfg,
+ ifmt_logic_t icfg,
+ int unsigned lane_no);
+ automatic ifmt_logic_t res;
+ automatic fmt_logic_t lanefmts;
+ res = '0;
+ lanefmts = get_conv_lane_formats(width, cfg, lane_no);
+ for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++)
+ for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+ // Mask active int formats with the width of the float formats
+ res[ifmt] |= icfg[ifmt] && lanefmts[fmt] &&
+ (fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt)));
+ return res;
+ endfunction
+ // Return whether any active format is set as MERGED
+ function automatic logic any_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg);
+ for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
+ if (cfg[i] && types[i] == MERGED)
+ return 1'b1;
+ return 1'b0;
+ endfunction
+ // Return whether the given format is the first active one set as MERGED
+ function automatic logic is_first_enabled_multi(fp_format_e fmt,
+ fmt_unit_types_t types,
+ fmt_logic_t cfg);
+ for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin
+ if (cfg[i] && types[i] == MERGED) return (fp_format_e'(i) == fmt);
+ end
+ return 1'b0;
+ endfunction
+ // Returns the first format that is active and is set as MERGED
+ function automatic fp_format_e get_first_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg);
+ for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
+ if (cfg[i] && types[i] == MERGED)
+ return fp_format_e'(i);
+ return fp_format_e'(0);
+ endfunction
+ // Returns the largest number of regs that is active and is set as MERGED
+ function automatic int unsigned get_num_regs_multi(fmt_unsigned_t regs,
+ fmt_unit_types_t types,
+ fmt_logic_t cfg);
+ automatic int unsigned res = 0;
+ for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin
+ if (cfg[i] && types[i] == MERGED) res = maximum(res, regs[i]);
+ end
+ return res;
+ endfunction
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..4e4b7c7
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,72 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Author: Stefan Mach <>
+module fpnew_rounding #(
+ parameter int unsigned AbsWidth=2 // Width of the abolute value, without sign bit
+) (
+ // Input value
+ input logic [AbsWidth-1:0] abs_value_i, // absolute value without sign
+ input logic sign_i,
+ // Rounding information
+ input logic [1:0] round_sticky_bits_i, // round and sticky bits {RS}
+ input fpnew_pkg::roundmode_e rnd_mode_i,
+ input logic effective_subtraction_i, // sign of inputs affects rounding of zeroes
+ // Output value
+ output logic [AbsWidth-1:0] abs_rounded_o, // absolute value without sign
+ output logic sign_o,
+ // Output classification
+ output logic exact_zero_o // output is an exact zero
+ logic round_up; // Rounding decision
+ // Take the rounding decision according to RISC-V spec
+ // RoundMode | Mnemonic | Meaning
+ // :--------:|:--------:|:-------
+ // 000 | RNE | Round to Nearest, ties to Even
+ // 001 | RTZ | Round towards Zero
+ // 010 | RDN | Round Down (towards -\infty)
+ // 011 | RUP | Round Up (towards \infty)
+ // 100 | RMM | Round to Nearest, ties to Max Magnitude
+ // others | | *invalid*
+ always_comb begin : rounding_decision
+ unique case (rnd_mode_i)
+ fpnew_pkg::RNE: // Decide accoring to round/sticky bits
+ unique case (round_sticky_bits_i)
+ 2'b00,
+ 2'b01: round_up = 1'b0; // < ulp/2 away, round down
+ 2'b10: round_up = abs_value_i[0]; // = ulp/2 away, round towards even result
+ 2'b11: round_up = 1'b1; // > ulp/2 away, round up
+ //default: round_up = fpnew_pkg::DONT_CARE;
+ endcase
+ fpnew_pkg::RTZ: round_up = 1'b0; // always round down
+ fpnew_pkg::RDN: round_up = (| round_sticky_bits_i) ? sign_i : 1'b0; // to 0 if +, away if -
+ fpnew_pkg::RUP: round_up = (| round_sticky_bits_i) ? ~sign_i : 1'b0; // to 0 if -, away if +
+ fpnew_pkg::RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up
+ default: round_up = fpnew_pkg::DONT_CARE; // propagate x
+ endcase
+ end
+ // Perform the rounding, exponent change and overflow to inf happens automagically
+ assign abs_rounded_o = abs_value_i + round_up;
+ // True zero result is a zero result without dirty round/sticky bits
+ assign exact_zero_o = (abs_value_i == '0) && (round_sticky_bits_i == '0);
+ // In case of effective subtraction (thus signs of addition operands must have differed) and a
+ // true zero result, the result sign is '-' in case of RDN and '+' for other modes.
+ assign sign_o = (exact_zero_o && effective_subtraction_i)
+ ? (rnd_mode_i == fpnew_pkg::RDN)
+ : sign_i;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..5b37edd
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,172 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Author: Stefan Mach <>
+module fpnew_top #(
+ // FPU configuration
+ parameter fpnew_pkg::fpu_features_t Features = fpnew_pkg::RV64D_Xsflt,
+ parameter fpnew_pkg::fpu_implementation_t Implementation = fpnew_pkg::DEFAULT_NOREGS,
+ parameter type TagType = logic,
+ // Do not change
+ localparam int unsigned WIDTH = Features.Width,
+ localparam int unsigned NUM_OPERANDS = 3
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ // Input signals
+ input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i,
+ input fpnew_pkg::roundmode_e rnd_mode_i,
+ input fpnew_pkg::operation_e op_i,
+ input logic op_mod_i,
+ input fpnew_pkg::fp_format_e src_fmt_i,
+ input fpnew_pkg::fp_format_e dst_fmt_i,
+ input fpnew_pkg::int_format_e int_fmt_i,
+ input logic vectorial_op_i,
+ input TagType tag_i,
+ // Input Handshake
+ input logic in_valid_i,
+ output logic in_ready_o,
+ input logic flush_i,
+ // Output signals
+ output logic [WIDTH-1:0] result_o,
+ output fpnew_pkg::status_t status_o,
+ output TagType tag_o,
+ // Output handshake
+ output logic out_valid_o,
+ input logic out_ready_i,
+ // Indication of valid data in flight
+ output logic busy_o
+ localparam int unsigned NUM_OPGROUPS = fpnew_pkg::NUM_OPGROUPS;
+ localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS;
+ // ----------------
+ // Type Definition
+ // ----------------
+ typedef struct packed {
+ logic [WIDTH-1:0] result;
+ fpnew_pkg::status_t status;
+ TagType tag;
+ } output_t;
+ // Handshake signals for the blocks
+ logic [NUM_OPGROUPS-1:0] opgrp_in_ready, opgrp_out_valid, opgrp_out_ready, opgrp_ext, opgrp_busy;
+ output_t [NUM_OPGROUPS-1:0] opgrp_outputs;
+ logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed;
+ // -----------
+ // Input Side
+ // -----------
+ assign in_ready_o = in_valid_i & opgrp_in_ready[fpnew_pkg::get_opgroup(op_i)];
+ // NaN-boxing check
+ for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_nanbox_check
+ localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+ // NaN boxing is only generated if it's enabled and needed
+ if (Features.EnableNanBox && (FP_WIDTH < WIDTH)) begin : check
+ for (genvar op = 0; op < int'(NUM_OPERANDS); op++) begin : operands
+ assign is_boxed[fmt][op] = (!vectorial_op_i)
+ ? operands_i[op][WIDTH-1:FP_WIDTH] == '1
+ : 1'b1;
+ end
+ end else begin : no_check
+ assign is_boxed[fmt] = '1;
+ end
+ end
+ // -------------------------
+ // Generate Operation Blocks
+ // -------------------------
+ for (genvar opgrp = 0; opgrp < int'(NUM_OPGROUPS); opgrp++) begin : gen_operation_groups
+ localparam int unsigned NUM_OPS = fpnew_pkg::num_operands(fpnew_pkg::opgroup_e'(opgrp));
+ logic in_valid;
+ logic [NUM_FORMATS-1:0][NUM_OPS-1:0] input_boxed;
+ assign in_valid = in_valid_i & (fpnew_pkg::get_opgroup(op_i) == fpnew_pkg::opgroup_e'(opgrp));
+ // slice out input boxing
+ always_comb begin : slice_inputs
+ for (int unsigned fmt = 0; fmt < NUM_FORMATS; fmt++)
+ input_boxed[fmt] = is_boxed[fmt][NUM_OPS-1:0];
+ end
+ fpnew_opgroup_block #(
+ .OpGroup ( fpnew_pkg::opgroup_e'(opgrp) ),
+ .Width ( WIDTH ),
+ .EnableVectors ( Features.EnableVectors ),
+ .FpFmtMask ( Features.FpFmtMask ),
+ .IntFmtMask ( Features.IntFmtMask ),
+ .FmtPipeRegs ( Implementation.PipeRegs[opgrp] ),
+ .FmtUnitTypes ( Implementation.UnitTypes[opgrp] ),
+ .PipeConfig ( Implementation.PipeConfig ),
+ .TagType ( TagType )
+ ) i_opgroup_block (
+ .clk_i,
+ .rst_ni,
+ .operands_i ( operands_i[NUM_OPS-1:0] ),
+ .is_boxed_i ( input_boxed ),
+ .rnd_mode_i,
+ .op_i,
+ .op_mod_i,
+ .src_fmt_i,
+ .dst_fmt_i,
+ .int_fmt_i,
+ .vectorial_op_i,
+ .tag_i,
+ .in_valid_i ( in_valid ),
+ .in_ready_o ( opgrp_in_ready[opgrp] ),
+ .flush_i,
+ .result_o ( opgrp_outputs[opgrp].result ),
+ .status_o ( opgrp_outputs[opgrp].status ),
+ .extension_bit_o ( opgrp_ext[opgrp] ),
+ .tag_o ( opgrp_outputs[opgrp].tag ),
+ .out_valid_o ( opgrp_out_valid[opgrp] ),
+ .out_ready_i ( opgrp_out_ready[opgrp] ),
+ .busy_o ( opgrp_busy[opgrp] )
+ );
+ end
+ // ------------------
+ // Arbitrate Outputs
+ // ------------------
+ output_t arbiter_output;
+ // Round-Robin arbiter to decide which result to use
+ rr_arb_tree #(
+ .NumIn ( NUM_OPGROUPS ),
+ .DataType ( output_t ),
+ .AxiVldRdy ( 1'b1 )
+ ) i_arbiter (
+ .clk_i,
+ .rst_ni,
+ .flush_i,
+ .rr_i ( '0 ),
+ .req_i ( opgrp_out_valid ),
+ .gnt_o ( opgrp_out_ready ),
+ .data_i ( opgrp_outputs ),
+ .gnt_i ( out_ready_i ),
+ .req_o ( out_valid_o ),
+ .data_o ( arbiter_output ),
+ .idx_o ( /* unused */ )
+ );
+ // Unpack output
+ assign result_o = arbiter_output.result;
+ assign status_o = arbiter_output.status;
+ assign tag_o = arbiter_output.tag;
+ assign busy_o = (| opgrp_busy);
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..762553c
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,144 @@
+// General Purpose Input/Output module
+module gpio (
+ input clk_i,
+ input rst_ni,
+ // Below Regster interface can be changed
+ input tlul_pkg::tl_h2d_t tl_i,
+ output tlul_pkg::tl_d2h_t tl_o,
+ input [31:0] cio_gpio_i,
+ output logic [31:0] cio_gpio_o,
+ output logic [31:0] cio_gpio_en_o,
+ output logic [31:0] intr_gpio_o
+ import gpio_reg_pkg::* ;
+ gpio_reg2hw_t reg2hw;
+ gpio_hw2reg_t hw2reg;
+ logic [31:0] cio_gpio_q;
+ logic [31:0] cio_gpio_en_q;
+ // possibly filter the input based upon register configuration
+ logic [31:0] data_in_d;
+ for (genvar i = 0 ; i < 32 ; i++) begin : gen_filter
+ prim_filter_ctr #(.Cycles(16)) filter (
+ .clk_i,
+ .rst_ni,
+ .enable_i(reg2hw.ctrl_en_input_filter.q[i]),
+ .filter_i(cio_gpio_i[i]),
+ .filter_o(data_in_d[i])
+ );
+ end
+ // GPIO_IN
+ assign = 1'b1;
+ assign hw2reg.data_in.d = data_in_d;
+ assign cio_gpio_o = cio_gpio_q;
+ assign cio_gpio_en_o = cio_gpio_en_q;
+ assign hw2reg.direct_out.d = cio_gpio_q;
+ assign = cio_gpio_q[31:16];
+ assign hw2reg.masked_out_upper.mask.d = 16'h 0;
+ assign = cio_gpio_q[15:0];
+ assign hw2reg.masked_out_lower.mask.d = 16'h 0;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ cio_gpio_q <= '0;
+ end else if (reg2hw.direct_out.qe) begin
+ cio_gpio_q <= reg2hw.direct_out.q;
+ end else if ( begin
+ cio_gpio_q[31:16] <=
+ ( reg2hw.masked_out_upper.mask.q & |
+ (~reg2hw.masked_out_upper.mask.q & cio_gpio_q[31:16]);
+ end else if ( begin
+ cio_gpio_q[15:0] <=
+ ( reg2hw.masked_out_lower.mask.q & |
+ (~reg2hw.masked_out_lower.mask.q & cio_gpio_q[15:0]);
+ end
+ end
+ // GPIO OE
+ assign hw2reg.direct_oe.d = cio_gpio_en_q;
+ assign = cio_gpio_en_q[31:16];
+ assign hw2reg.masked_oe_upper.mask.d = 16'h 0;
+ assign = cio_gpio_en_q[15:0];
+ assign hw2reg.masked_oe_lower.mask.d = 16'h 0;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ cio_gpio_en_q <= '0;
+ end else if (reg2hw.direct_oe.qe) begin
+ cio_gpio_en_q <= reg2hw.direct_oe.q;
+ end else if ( begin
+ cio_gpio_en_q[31:16] <=
+ ( reg2hw.masked_oe_upper.mask.q & |
+ (~reg2hw.masked_oe_upper.mask.q & cio_gpio_en_q[31:16]);
+ end else if ( begin
+ cio_gpio_en_q[15:0] <=
+ ( reg2hw.masked_oe_lower.mask.q & |
+ (~reg2hw.masked_oe_lower.mask.q & cio_gpio_en_q[15:0]);
+ end
+ end
+ logic [31:0] data_in_q;
+ always_ff @(posedge clk_i) begin
+ data_in_q <= data_in_d;
+ end
+ logic [31:0] event_intr_rise, event_intr_fall, event_intr_actlow, event_intr_acthigh;
+ logic [31:0] event_intr_combined;
+ // instantiate interrupt hardware primitive
+ prim_intr_hw #(.Width(32)) intr_hw (
+ .clk_i,
+ .rst_ni,
+ .event_intr_i (event_intr_combined),
+ .reg2hw_intr_enable_q_i (reg2hw.intr_enable.q),
+ .reg2hw_intr_test_q_i (reg2hw.intr_test.q),
+ .reg2hw_intr_test_qe_i (reg2hw.intr_test.qe),
+ .reg2hw_intr_state_q_i (reg2hw.intr_state.q),
+ .hw2reg_intr_state_de_o (,
+ .hw2reg_intr_state_d_o (hw2reg.intr_state.d),
+ .intr_o (intr_gpio_o)
+ );
+ // detect four possible individual interrupts
+ assign event_intr_rise = (~data_in_q & data_in_d) & reg2hw.intr_ctrl_en_rising.q;
+ assign event_intr_fall = ( data_in_q & ~data_in_d) & reg2hw.intr_ctrl_en_falling.q;
+ assign event_intr_acthigh = data_in_d & reg2hw.intr_ctrl_en_lvlhigh.q;
+ assign event_intr_actlow = ~data_in_d & reg2hw.intr_ctrl_en_lvllow.q;
+ assign event_intr_combined = event_intr_rise |
+ event_intr_fall |
+ event_intr_actlow |
+ event_intr_acthigh;
+ // Register module
+ gpio_reg_top u_reg (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tl_i (tl_i),
+ .tl_o (tl_o),
+ .reg2hw (reg2hw),
+ .hw2reg (hw2reg),
+ .devmode_i (1'b1)
+ );
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..b85347a
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,248 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Register Package auto-generated by `reggen` containing data structure
+package gpio_reg_pkg;
+ // Address width within the block
+ parameter int BlockAw = 6;
+ ////////////////////////////
+ // Typedefs for registers //
+ ////////////////////////////
+ typedef struct packed {
+ logic [31:0] q;
+ } gpio_reg2hw_intr_state_reg_t;
+ typedef struct packed {
+ logic [31:0] q;
+ } gpio_reg2hw_intr_enable_reg_t;
+ typedef struct packed {
+ logic [31:0] q;
+ logic qe;
+ } gpio_reg2hw_intr_test_reg_t;
+ typedef struct packed {
+ logic [31:0] q;
+ logic qe;
+ } gpio_reg2hw_direct_out_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic [15:0] q;
+ logic qe;
+ } data;
+ struct packed {
+ logic [15:0] q;
+ logic qe;
+ } mask;
+ } gpio_reg2hw_masked_out_lower_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic [15:0] q;
+ logic qe;
+ } data;
+ struct packed {
+ logic [15:0] q;
+ logic qe;
+ } mask;
+ } gpio_reg2hw_masked_out_upper_reg_t;
+ typedef struct packed {
+ logic [31:0] q;
+ logic qe;
+ } gpio_reg2hw_direct_oe_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic [15:0] q;
+ logic qe;
+ } data;
+ struct packed {
+ logic [15:0] q;
+ logic qe;
+ } mask;
+ } gpio_reg2hw_masked_oe_lower_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic [15:0] q;
+ logic qe;
+ } data;
+ struct packed {
+ logic [15:0] q;
+ logic qe;
+ } mask;
+ } gpio_reg2hw_masked_oe_upper_reg_t;
+ typedef struct packed {
+ logic [31:0] q;
+ } gpio_reg2hw_intr_ctrl_en_rising_reg_t;
+ typedef struct packed {
+ logic [31:0] q;
+ } gpio_reg2hw_intr_ctrl_en_falling_reg_t;
+ typedef struct packed {
+ logic [31:0] q;
+ } gpio_reg2hw_intr_ctrl_en_lvlhigh_reg_t;
+ typedef struct packed {
+ logic [31:0] q;
+ } gpio_reg2hw_intr_ctrl_en_lvllow_reg_t;
+ typedef struct packed {
+ logic [31:0] q;
+ } gpio_reg2hw_ctrl_en_input_filter_reg_t;
+ typedef struct packed {
+ logic [31:0] d;
+ logic de;
+ } gpio_hw2reg_intr_state_reg_t;
+ typedef struct packed {
+ logic [31:0] d;
+ logic de;
+ } gpio_hw2reg_data_in_reg_t;
+ typedef struct packed {
+ logic [31:0] d;
+ } gpio_hw2reg_direct_out_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic [15:0] d;
+ } data;
+ struct packed {
+ logic [15:0] d;
+ } mask;
+ } gpio_hw2reg_masked_out_lower_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic [15:0] d;
+ } data;
+ struct packed {
+ logic [15:0] d;
+ } mask;
+ } gpio_hw2reg_masked_out_upper_reg_t;
+ typedef struct packed {
+ logic [31:0] d;
+ } gpio_hw2reg_direct_oe_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic [15:0] d;
+ } data;
+ struct packed {
+ logic [15:0] d;
+ } mask;
+ } gpio_hw2reg_masked_oe_lower_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic [15:0] d;
+ } data;
+ struct packed {
+ logic [15:0] d;
+ } mask;
+ } gpio_hw2reg_masked_oe_upper_reg_t;
+ ///////////////////////////////////////
+ // Register to internal design logic //
+ ///////////////////////////////////////
+ typedef struct packed {
+ gpio_reg2hw_intr_state_reg_t intr_state; // [458:427]
+ gpio_reg2hw_intr_enable_reg_t intr_enable; // [426:395]
+ gpio_reg2hw_intr_test_reg_t intr_test; // [394:362]
+ gpio_reg2hw_direct_out_reg_t direct_out; // [361:329]
+ gpio_reg2hw_masked_out_lower_reg_t masked_out_lower; // [328:295]
+ gpio_reg2hw_masked_out_upper_reg_t masked_out_upper; // [294:261]
+ gpio_reg2hw_direct_oe_reg_t direct_oe; // [260:228]
+ gpio_reg2hw_masked_oe_lower_reg_t masked_oe_lower; // [227:194]
+ gpio_reg2hw_masked_oe_upper_reg_t masked_oe_upper; // [193:160]
+ gpio_reg2hw_intr_ctrl_en_rising_reg_t intr_ctrl_en_rising; // [159:128]
+ gpio_reg2hw_intr_ctrl_en_falling_reg_t intr_ctrl_en_falling; // [127:96]
+ gpio_reg2hw_intr_ctrl_en_lvlhigh_reg_t intr_ctrl_en_lvlhigh; // [95:64]
+ gpio_reg2hw_intr_ctrl_en_lvllow_reg_t intr_ctrl_en_lvllow; // [63:32]
+ gpio_reg2hw_ctrl_en_input_filter_reg_t ctrl_en_input_filter; // [31:0]
+ } gpio_reg2hw_t;
+ ///////////////////////////////////////
+ // Internal design logic to register //
+ ///////////////////////////////////////
+ typedef struct packed {
+ gpio_hw2reg_intr_state_reg_t intr_state; // [257:225]
+ gpio_hw2reg_data_in_reg_t data_in; // [224:192]
+ gpio_hw2reg_direct_out_reg_t direct_out; // [191:160]
+ gpio_hw2reg_masked_out_lower_reg_t masked_out_lower; // [159:128]
+ gpio_hw2reg_masked_out_upper_reg_t masked_out_upper; // [127:96]
+ gpio_hw2reg_direct_oe_reg_t direct_oe; // [95:64]
+ gpio_hw2reg_masked_oe_lower_reg_t masked_oe_lower; // [63:32]
+ gpio_hw2reg_masked_oe_upper_reg_t masked_oe_upper; // [31:0]
+ } gpio_hw2reg_t;
+ // Register Address
+ parameter logic [BlockAw-1:0] GPIO_INTR_STATE_OFFSET = 6'h 0;
+ parameter logic [BlockAw-1:0] GPIO_INTR_ENABLE_OFFSET = 6'h 4;
+ parameter logic [BlockAw-1:0] GPIO_INTR_TEST_OFFSET = 6'h 8;
+ parameter logic [BlockAw-1:0] GPIO_DATA_IN_OFFSET = 6'h c;
+ parameter logic [BlockAw-1:0] GPIO_DIRECT_OUT_OFFSET = 6'h 10;
+ parameter logic [BlockAw-1:0] GPIO_MASKED_OUT_LOWER_OFFSET = 6'h 14;
+ parameter logic [BlockAw-1:0] GPIO_MASKED_OUT_UPPER_OFFSET = 6'h 18;
+ parameter logic [BlockAw-1:0] GPIO_DIRECT_OE_OFFSET = 6'h 1c;
+ parameter logic [BlockAw-1:0] GPIO_MASKED_OE_LOWER_OFFSET = 6'h 20;
+ parameter logic [BlockAw-1:0] GPIO_MASKED_OE_UPPER_OFFSET = 6'h 24;
+ parameter logic [BlockAw-1:0] GPIO_INTR_CTRL_EN_RISING_OFFSET = 6'h 28;
+ parameter logic [BlockAw-1:0] GPIO_INTR_CTRL_EN_FALLING_OFFSET = 6'h 2c;
+ parameter logic [BlockAw-1:0] GPIO_INTR_CTRL_EN_LVLHIGH_OFFSET = 6'h 30;
+ parameter logic [BlockAw-1:0] GPIO_INTR_CTRL_EN_LVLLOW_OFFSET = 6'h 34;
+ parameter logic [BlockAw-1:0] GPIO_CTRL_EN_INPUT_FILTER_OFFSET = 6'h 38;
+ // Register Index
+ typedef enum int {
+ } gpio_id_e;
+ // Register width information to check illegal writes
+ parameter logic [3:0] GPIO_PERMIT [15] = '{
+ 4'b 1111, // index[ 0] GPIO_INTR_STATE
+ 4'b 1111, // index[ 1] GPIO_INTR_ENABLE
+ 4'b 1111, // index[ 2] GPIO_INTR_TEST
+ 4'b 1111, // index[ 3] GPIO_DATA_IN
+ 4'b 1111, // index[ 4] GPIO_DIRECT_OUT
+ 4'b 1111, // index[ 5] GPIO_MASKED_OUT_LOWER
+ 4'b 1111, // index[ 6] GPIO_MASKED_OUT_UPPER
+ 4'b 1111, // index[ 7] GPIO_DIRECT_OE
+ 4'b 1111, // index[ 8] GPIO_MASKED_OE_LOWER
+ 4'b 1111, // index[ 9] GPIO_MASKED_OE_UPPER
+ 4'b 1111, // index[10] GPIO_INTR_CTRL_EN_RISING
+ 4'b 1111, // index[11] GPIO_INTR_CTRL_EN_FALLING
+ 4'b 1111, // index[12] GPIO_INTR_CTRL_EN_LVLHIGH
+ 4'b 1111, // index[13] GPIO_INTR_CTRL_EN_LVLLOW
+ 4'b 1111 // index[14] GPIO_CTRL_EN_INPUT_FILTER
+ };
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..1c2ba13
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,706 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Register Top module auto-generated by `reggen`
+module gpio_reg_top (
+ input clk_i,
+ input rst_ni,
+ // Below Regster interface can be changed
+ input tlul_pkg::tl_h2d_t tl_i,
+ output tlul_pkg::tl_d2h_t tl_o,
+ // To HW
+ output gpio_reg_pkg::gpio_reg2hw_t reg2hw, // Write
+ input gpio_reg_pkg::gpio_hw2reg_t hw2reg, // Read
+ // Config
+ input devmode_i // If 1, explicit error return for unmapped register access
+ import gpio_reg_pkg::* ;
+ localparam int AW = 6;
+ localparam int DW = 32;
+ localparam int DBW = DW/8; // Byte Width
+ // register signals
+ logic reg_we;
+ logic reg_re;
+ logic [AW-1:0] reg_addr;
+ logic [DW-1:0] reg_wdata;
+ logic [DBW-1:0] reg_be;
+ logic [DW-1:0] reg_rdata;
+ logic reg_error;
+ logic addrmiss, wr_err;
+ logic [DW-1:0] reg_rdata_next;
+ tlul_pkg::tl_h2d_t tl_reg_h2d;
+ tlul_pkg::tl_d2h_t tl_reg_d2h;
+ assign tl_reg_h2d = tl_i;
+ assign tl_o = tl_reg_d2h;
+ tlul_adapter_reg #(
+ .RegAw(AW),
+ .RegDw(DW)
+ ) u_reg_if (
+ .clk_i,
+ .rst_ni,
+ .tl_i (tl_reg_h2d),
+ .tl_o (tl_reg_d2h),
+ .we_o (reg_we),
+ .re_o (reg_re),
+ .addr_o (reg_addr),
+ .wdata_o (reg_wdata),
+ .be_o (reg_be),
+ .rdata_i (reg_rdata),
+ .error_i (reg_error)
+ );
+ assign reg_rdata = reg_rdata_next ;
+ assign reg_error = (devmode_i & addrmiss) | wr_err ;
+ // Define SW related signals
+ // Format: <reg>_<field>_{wd|we|qs}
+ // or <reg>_{wd|we|qs} if field == 1 or 0
+ logic [31:0] intr_state_qs;
+ logic [31:0] intr_state_wd;
+ logic intr_state_we;
+ logic [31:0] intr_enable_qs;
+ logic [31:0] intr_enable_wd;
+ logic intr_enable_we;
+ logic [31:0] intr_test_wd;
+ logic intr_test_we;
+ logic [31:0] data_in_qs;
+ logic [31:0] direct_out_qs;
+ logic [31:0] direct_out_wd;
+ logic direct_out_we;
+ logic direct_out_re;
+ logic [15:0] masked_out_lower_data_qs;
+ logic [15:0] masked_out_lower_data_wd;
+ logic masked_out_lower_data_we;
+ logic masked_out_lower_data_re;
+ logic [15:0] masked_out_lower_mask_wd;
+ logic masked_out_lower_mask_we;
+ logic [15:0] masked_out_upper_data_qs;
+ logic [15:0] masked_out_upper_data_wd;
+ logic masked_out_upper_data_we;
+ logic masked_out_upper_data_re;
+ logic [15:0] masked_out_upper_mask_wd;
+ logic masked_out_upper_mask_we;
+ logic [31:0] direct_oe_qs;
+ logic [31:0] direct_oe_wd;
+ logic direct_oe_we;
+ logic direct_oe_re;
+ logic [15:0] masked_oe_lower_data_qs;
+ logic [15:0] masked_oe_lower_data_wd;
+ logic masked_oe_lower_data_we;
+ logic masked_oe_lower_data_re;
+ logic [15:0] masked_oe_lower_mask_qs;
+ logic [15:0] masked_oe_lower_mask_wd;
+ logic masked_oe_lower_mask_we;
+ logic masked_oe_lower_mask_re;
+ logic [15:0] masked_oe_upper_data_qs;
+ logic [15:0] masked_oe_upper_data_wd;
+ logic masked_oe_upper_data_we;
+ logic masked_oe_upper_data_re;
+ logic [15:0] masked_oe_upper_mask_qs;
+ logic [15:0] masked_oe_upper_mask_wd;
+ logic masked_oe_upper_mask_we;
+ logic masked_oe_upper_mask_re;
+ logic [31:0] intr_ctrl_en_rising_qs;
+ logic [31:0] intr_ctrl_en_rising_wd;
+ logic intr_ctrl_en_rising_we;
+ logic [31:0] intr_ctrl_en_falling_qs;
+ logic [31:0] intr_ctrl_en_falling_wd;
+ logic intr_ctrl_en_falling_we;
+ logic [31:0] intr_ctrl_en_lvlhigh_qs;
+ logic [31:0] intr_ctrl_en_lvlhigh_wd;
+ logic intr_ctrl_en_lvlhigh_we;
+ logic [31:0] intr_ctrl_en_lvllow_qs;
+ logic [31:0] intr_ctrl_en_lvllow_wd;
+ logic intr_ctrl_en_lvllow_we;
+ logic [31:0] ctrl_en_input_filter_qs;
+ logic [31:0] ctrl_en_input_filter_wd;
+ logic ctrl_en_input_filter_we;
+ // Register instances
+ // R[intr_state]: V(False)
+ prim_subreg #(
+ .DW (32),
+ .RESVAL (32'h0)
+ ) u_intr_state (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_state_we),
+ .wd (intr_state_wd),
+ // from internal hardware
+ .de (,
+ .d (hw2reg.intr_state.d ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_state.q ),
+ // to register interface (read)
+ .qs (intr_state_qs)
+ );
+ // R[intr_enable]: V(False)
+ prim_subreg #(
+ .DW (32),
+ .RESVAL (32'h0)
+ ) u_intr_enable (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_enable_we),
+ .wd (intr_enable_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_enable.q ),
+ // to register interface (read)
+ .qs (intr_enable_qs)
+ );
+ // R[intr_test]: V(True)
+ prim_subreg_ext #(
+ .DW (32)
+ ) u_intr_test (
+ .re (1'b0),
+ .we (intr_test_we),
+ .wd (intr_test_wd),
+ .d ('0),
+ .qre (),
+ .qe (reg2hw.intr_test.qe),
+ .q (reg2hw.intr_test.q ),
+ .qs ()
+ );
+ // R[data_in]: V(False)
+ prim_subreg #(
+ .DW (32),
+ .RESVAL (32'h0)
+ ) u_data_in (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (,
+ .d (hw2reg.data_in.d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (data_in_qs)
+ );
+ // R[direct_out]: V(True)
+ prim_subreg_ext #(
+ .DW (32)
+ ) u_direct_out (
+ .re (direct_out_re),
+ .we (direct_out_we),
+ .wd (direct_out_wd),
+ .d (hw2reg.direct_out.d),
+ .qre (),
+ .qe (reg2hw.direct_out.qe),
+ .q (reg2hw.direct_out.q ),
+ .qs (direct_out_qs)
+ );
+ // R[masked_out_lower]: V(True)
+ // F[data]: 15:0
+ prim_subreg_ext #(
+ .DW (16)
+ ) u_masked_out_lower_data (
+ .re (masked_out_lower_data_re),
+ .we (masked_out_lower_data_we),
+ .wd (masked_out_lower_data_wd),
+ .d (,
+ .qre (),
+ .qe (,
+ .q ( ),
+ .qs (masked_out_lower_data_qs)
+ );
+ // F[mask]: 31:16
+ prim_subreg_ext #(
+ .DW (16)
+ ) u_masked_out_lower_mask (
+ .re (1'b0),
+ .we (masked_out_lower_mask_we),
+ .wd (masked_out_lower_mask_wd),
+ .d (hw2reg.masked_out_lower.mask.d),
+ .qre (),
+ .qe (reg2hw.masked_out_lower.mask.qe),
+ .q (reg2hw.masked_out_lower.mask.q ),
+ .qs ()
+ );
+ // R[masked_out_upper]: V(True)
+ // F[data]: 15:0
+ prim_subreg_ext #(
+ .DW (16)
+ ) u_masked_out_upper_data (
+ .re (masked_out_upper_data_re),
+ .we (masked_out_upper_data_we),
+ .wd (masked_out_upper_data_wd),
+ .d (,
+ .qre (),
+ .qe (,
+ .q ( ),
+ .qs (masked_out_upper_data_qs)
+ );
+ // F[mask]: 31:16
+ prim_subreg_ext #(
+ .DW (16)
+ ) u_masked_out_upper_mask (
+ .re (1'b0),
+ .we (masked_out_upper_mask_we),
+ .wd (masked_out_upper_mask_wd),
+ .d (hw2reg.masked_out_upper.mask.d),
+ .qre (),
+ .qe (reg2hw.masked_out_upper.mask.qe),
+ .q (reg2hw.masked_out_upper.mask.q ),
+ .qs ()
+ );
+ // R[direct_oe]: V(True)
+ prim_subreg_ext #(
+ .DW (32)
+ ) u_direct_oe (
+ .re (direct_oe_re),
+ .we (direct_oe_we),
+ .wd (direct_oe_wd),
+ .d (hw2reg.direct_oe.d),
+ .qre (),
+ .qe (reg2hw.direct_oe.qe),
+ .q (reg2hw.direct_oe.q ),
+ .qs (direct_oe_qs)
+ );
+ // R[masked_oe_lower]: V(True)
+ // F[data]: 15:0
+ prim_subreg_ext #(
+ .DW (16)
+ ) u_masked_oe_lower_data (
+ .re (masked_oe_lower_data_re),
+ .we (masked_oe_lower_data_we),
+ .wd (masked_oe_lower_data_wd),
+ .d (,
+ .qre (),
+ .qe (,
+ .q ( ),
+ .qs (masked_oe_lower_data_qs)
+ );
+ // F[mask]: 31:16
+ prim_subreg_ext #(
+ .DW (16)
+ ) u_masked_oe_lower_mask (
+ .re (masked_oe_lower_mask_re),
+ .we (masked_oe_lower_mask_we),
+ .wd (masked_oe_lower_mask_wd),
+ .d (hw2reg.masked_oe_lower.mask.d),
+ .qre (),
+ .qe (reg2hw.masked_oe_lower.mask.qe),
+ .q (reg2hw.masked_oe_lower.mask.q ),
+ .qs (masked_oe_lower_mask_qs)
+ );
+ // R[masked_oe_upper]: V(True)
+ // F[data]: 15:0
+ prim_subreg_ext #(
+ .DW (16)
+ ) u_masked_oe_upper_data (
+ .re (masked_oe_upper_data_re),
+ .we (masked_oe_upper_data_we),
+ .wd (masked_oe_upper_data_wd),
+ .d (,
+ .qre (),
+ .qe (,
+ .q ( ),
+ .qs (masked_oe_upper_data_qs)
+ );
+ // F[mask]: 31:16
+ prim_subreg_ext #(
+ .DW (16)
+ ) u_masked_oe_upper_mask (
+ .re (masked_oe_upper_mask_re),
+ .we (masked_oe_upper_mask_we),
+ .wd (masked_oe_upper_mask_wd),
+ .d (hw2reg.masked_oe_upper.mask.d),
+ .qre (),
+ .qe (reg2hw.masked_oe_upper.mask.qe),
+ .q (reg2hw.masked_oe_upper.mask.q ),
+ .qs (masked_oe_upper_mask_qs)
+ );
+ // R[intr_ctrl_en_rising]: V(False)
+ prim_subreg #(
+ .DW (32),
+ .RESVAL (32'h0)
+ ) u_intr_ctrl_en_rising (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_ctrl_en_rising_we),
+ .wd (intr_ctrl_en_rising_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_ctrl_en_rising.q ),
+ // to register interface (read)
+ .qs (intr_ctrl_en_rising_qs)
+ );
+ // R[intr_ctrl_en_falling]: V(False)
+ prim_subreg #(
+ .DW (32),
+ .RESVAL (32'h0)
+ ) u_intr_ctrl_en_falling (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_ctrl_en_falling_we),
+ .wd (intr_ctrl_en_falling_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_ctrl_en_falling.q ),
+ // to register interface (read)
+ .qs (intr_ctrl_en_falling_qs)
+ );
+ // R[intr_ctrl_en_lvlhigh]: V(False)
+ prim_subreg #(
+ .DW (32),
+ .RESVAL (32'h0)
+ ) u_intr_ctrl_en_lvlhigh (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_ctrl_en_lvlhigh_we),
+ .wd (intr_ctrl_en_lvlhigh_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_ctrl_en_lvlhigh.q ),
+ // to register interface (read)
+ .qs (intr_ctrl_en_lvlhigh_qs)
+ );
+ // R[intr_ctrl_en_lvllow]: V(False)
+ prim_subreg #(
+ .DW (32),
+ .RESVAL (32'h0)
+ ) u_intr_ctrl_en_lvllow (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_ctrl_en_lvllow_we),
+ .wd (intr_ctrl_en_lvllow_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_ctrl_en_lvllow.q ),
+ // to register interface (read)
+ .qs (intr_ctrl_en_lvllow_qs)
+ );
+ // R[ctrl_en_input_filter]: V(False)
+ prim_subreg #(
+ .DW (32),
+ .RESVAL (32'h0)
+ ) u_ctrl_en_input_filter (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ctrl_en_input_filter_we),
+ .wd (ctrl_en_input_filter_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ctrl_en_input_filter.q ),
+ // to register interface (read)
+ .qs (ctrl_en_input_filter_qs)
+ );
+ logic [14:0] addr_hit;
+ always_comb begin
+ addr_hit = '0;
+ addr_hit[ 0] = (reg_addr == GPIO_INTR_STATE_OFFSET);
+ addr_hit[ 1] = (reg_addr == GPIO_INTR_ENABLE_OFFSET);
+ addr_hit[ 2] = (reg_addr == GPIO_INTR_TEST_OFFSET);
+ addr_hit[ 3] = (reg_addr == GPIO_DATA_IN_OFFSET);
+ addr_hit[ 4] = (reg_addr == GPIO_DIRECT_OUT_OFFSET);
+ addr_hit[ 5] = (reg_addr == GPIO_MASKED_OUT_LOWER_OFFSET);
+ addr_hit[ 6] = (reg_addr == GPIO_MASKED_OUT_UPPER_OFFSET);
+ addr_hit[ 7] = (reg_addr == GPIO_DIRECT_OE_OFFSET);
+ addr_hit[ 8] = (reg_addr == GPIO_MASKED_OE_LOWER_OFFSET);
+ addr_hit[ 9] = (reg_addr == GPIO_MASKED_OE_UPPER_OFFSET);
+ addr_hit[10] = (reg_addr == GPIO_INTR_CTRL_EN_RISING_OFFSET);
+ addr_hit[11] = (reg_addr == GPIO_INTR_CTRL_EN_FALLING_OFFSET);
+ addr_hit[12] = (reg_addr == GPIO_INTR_CTRL_EN_LVLHIGH_OFFSET);
+ addr_hit[13] = (reg_addr == GPIO_INTR_CTRL_EN_LVLLOW_OFFSET);
+ addr_hit[14] = (reg_addr == GPIO_CTRL_EN_INPUT_FILTER_OFFSET);
+ end
+ assign addrmiss = (reg_re || reg_we) ? ~|addr_hit : 1'b0 ;
+ // Check sub-word write is permitted
+ always_comb begin
+ wr_err = 1'b0;
+ if (addr_hit[ 0] && reg_we && (GPIO_PERMIT[ 0] != (GPIO_PERMIT[ 0] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 1] && reg_we && (GPIO_PERMIT[ 1] != (GPIO_PERMIT[ 1] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 2] && reg_we && (GPIO_PERMIT[ 2] != (GPIO_PERMIT[ 2] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 3] && reg_we && (GPIO_PERMIT[ 3] != (GPIO_PERMIT[ 3] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 4] && reg_we && (GPIO_PERMIT[ 4] != (GPIO_PERMIT[ 4] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 5] && reg_we && (GPIO_PERMIT[ 5] != (GPIO_PERMIT[ 5] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 6] && reg_we && (GPIO_PERMIT[ 6] != (GPIO_PERMIT[ 6] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 7] && reg_we && (GPIO_PERMIT[ 7] != (GPIO_PERMIT[ 7] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 8] && reg_we && (GPIO_PERMIT[ 8] != (GPIO_PERMIT[ 8] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 9] && reg_we && (GPIO_PERMIT[ 9] != (GPIO_PERMIT[ 9] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[10] && reg_we && (GPIO_PERMIT[10] != (GPIO_PERMIT[10] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[11] && reg_we && (GPIO_PERMIT[11] != (GPIO_PERMIT[11] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[12] && reg_we && (GPIO_PERMIT[12] != (GPIO_PERMIT[12] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[13] && reg_we && (GPIO_PERMIT[13] != (GPIO_PERMIT[13] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[14] && reg_we && (GPIO_PERMIT[14] != (GPIO_PERMIT[14] & reg_be))) wr_err = 1'b1 ;
+ end
+ assign intr_state_we = addr_hit[0] & reg_we & ~wr_err;
+ assign intr_state_wd = reg_wdata[31:0];
+ assign intr_enable_we = addr_hit[1] & reg_we & ~wr_err;
+ assign intr_enable_wd = reg_wdata[31:0];
+ assign intr_test_we = addr_hit[2] & reg_we & ~wr_err;
+ assign intr_test_wd = reg_wdata[31:0];
+ assign direct_out_we = addr_hit[4] & reg_we & ~wr_err;
+ assign direct_out_wd = reg_wdata[31:0];
+ assign direct_out_re = addr_hit[4] && reg_re;
+ assign masked_out_lower_data_we = addr_hit[5] & reg_we & ~wr_err;
+ assign masked_out_lower_data_wd = reg_wdata[15:0];
+ assign masked_out_lower_data_re = addr_hit[5] && reg_re;
+ assign masked_out_lower_mask_we = addr_hit[5] & reg_we & ~wr_err;
+ assign masked_out_lower_mask_wd = reg_wdata[31:16];
+ assign masked_out_upper_data_we = addr_hit[6] & reg_we & ~wr_err;
+ assign masked_out_upper_data_wd = reg_wdata[15:0];
+ assign masked_out_upper_data_re = addr_hit[6] && reg_re;
+ assign masked_out_upper_mask_we = addr_hit[6] & reg_we & ~wr_err;
+ assign masked_out_upper_mask_wd = reg_wdata[31:16];
+ assign direct_oe_we = addr_hit[7] & reg_we & ~wr_err;
+ assign direct_oe_wd = reg_wdata[31:0];
+ assign direct_oe_re = addr_hit[7] && reg_re;
+ assign masked_oe_lower_data_we = addr_hit[8] & reg_we & ~wr_err;
+ assign masked_oe_lower_data_wd = reg_wdata[15:0];
+ assign masked_oe_lower_data_re = addr_hit[8] && reg_re;
+ assign masked_oe_lower_mask_we = addr_hit[8] & reg_we & ~wr_err;
+ assign masked_oe_lower_mask_wd = reg_wdata[31:16];
+ assign masked_oe_lower_mask_re = addr_hit[8] && reg_re;
+ assign masked_oe_upper_data_we = addr_hit[9] & reg_we & ~wr_err;
+ assign masked_oe_upper_data_wd = reg_wdata[15:0];
+ assign masked_oe_upper_data_re = addr_hit[9] && reg_re;
+ assign masked_oe_upper_mask_we = addr_hit[9] & reg_we & ~wr_err;
+ assign masked_oe_upper_mask_wd = reg_wdata[31:16];
+ assign masked_oe_upper_mask_re = addr_hit[9] && reg_re;
+ assign intr_ctrl_en_rising_we = addr_hit[10] & reg_we & ~wr_err;
+ assign intr_ctrl_en_rising_wd = reg_wdata[31:0];
+ assign intr_ctrl_en_falling_we = addr_hit[11] & reg_we & ~wr_err;
+ assign intr_ctrl_en_falling_wd = reg_wdata[31:0];
+ assign intr_ctrl_en_lvlhigh_we = addr_hit[12] & reg_we & ~wr_err;
+ assign intr_ctrl_en_lvlhigh_wd = reg_wdata[31:0];
+ assign intr_ctrl_en_lvllow_we = addr_hit[13] & reg_we & ~wr_err;
+ assign intr_ctrl_en_lvllow_wd = reg_wdata[31:0];
+ assign ctrl_en_input_filter_we = addr_hit[14] & reg_we & ~wr_err;
+ assign ctrl_en_input_filter_wd = reg_wdata[31:0];
+ // Read data return
+ always_comb begin
+ reg_rdata_next = '0;
+ unique case (1'b1)
+ addr_hit[0]: begin
+ reg_rdata_next[31:0] = intr_state_qs;
+ end
+ addr_hit[1]: begin
+ reg_rdata_next[31:0] = intr_enable_qs;
+ end
+ addr_hit[2]: begin
+ reg_rdata_next[31:0] = '0;
+ end
+ addr_hit[3]: begin
+ reg_rdata_next[31:0] = data_in_qs;
+ end
+ addr_hit[4]: begin
+ reg_rdata_next[31:0] = direct_out_qs;
+ end
+ addr_hit[5]: begin
+ reg_rdata_next[15:0] = masked_out_lower_data_qs;
+ reg_rdata_next[31:16] = '0;
+ end
+ addr_hit[6]: begin
+ reg_rdata_next[15:0] = masked_out_upper_data_qs;
+ reg_rdata_next[31:16] = '0;
+ end
+ addr_hit[7]: begin
+ reg_rdata_next[31:0] = direct_oe_qs;
+ end
+ addr_hit[8]: begin
+ reg_rdata_next[15:0] = masked_oe_lower_data_qs;
+ reg_rdata_next[31:16] = masked_oe_lower_mask_qs;
+ end
+ addr_hit[9]: begin
+ reg_rdata_next[15:0] = masked_oe_upper_data_qs;
+ reg_rdata_next[31:16] = masked_oe_upper_mask_qs;
+ end
+ addr_hit[10]: begin
+ reg_rdata_next[31:0] = intr_ctrl_en_rising_qs;
+ end
+ addr_hit[11]: begin
+ reg_rdata_next[31:0] = intr_ctrl_en_falling_qs;
+ end
+ addr_hit[12]: begin
+ reg_rdata_next[31:0] = intr_ctrl_en_lvlhigh_qs;
+ end
+ addr_hit[13]: begin
+ reg_rdata_next[31:0] = intr_ctrl_en_lvllow_qs;
+ end
+ addr_hit[14]: begin
+ reg_rdata_next[31:0] = ctrl_en_input_filter_qs;
+ end
+ default: begin
+ reg_rdata_next = '1;
+ end
+ endcase
+ end
diff --git a/verilog/rtl/iccm_controller.v b/verilog/rtl/iccm_controller.v
new file mode 100644
index 0000000..99a58ef
--- /dev/null
+++ b/verilog/rtl/iccm_controller.v
@@ -0,0 +1,135 @@
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+module iccm_controller (
+ clk_i,
+ rst_ni,
+ prog_i,
+ rx_dv_i,
+ rx_byte_i,
+ we_o,
+ addr_o,
+ wdata_o,
+ reset_o
+ input wire clk_i;
+ input wire rst_ni;
+ input wire prog_i;
+ input wire rx_dv_i;
+ input wire [7:0] rx_byte_i;
+ output wire we_o;
+ output wire [11:0] addr_o;
+ output wire [31:0] wdata_o;
+ output wire reset_o;
+ reg [1:0] ctrl_fsm_cs;
+ reg [1:0] ctrl_fsm_ns;
+ wire [7:0] rx_byte_d;
+ reg [7:0] rx_byte_q0;
+ reg [7:0] rx_byte_q1;
+ reg [7:0] rx_byte_q2;
+ reg [7:0] rx_byte_q3;
+ reg we_q;
+ reg we_d;
+ reg [11:0] addr_q;
+ reg [11:0] addr_d;
+ reg reset_q;
+ reg reset_d;
+ reg [1:0] byte_count;
+ localparam [1:0] DONE = 3;
+ localparam [1:0] LOAD = 1;
+ localparam [1:0] PROG = 2;
+ localparam [1:0] RESET = 0;
+ always @(*) begin
+ we_d = we_q;
+ addr_d = addr_q;
+ reset_d = reset_q;
+ ctrl_fsm_ns = ctrl_fsm_cs;
+ case (ctrl_fsm_cs)
+ RESET: begin
+ we_d = 1'b0;
+ reset_d = 1'b0;
+ if (rx_dv_i)
+ ctrl_fsm_ns = LOAD;
+ else
+ ctrl_fsm_ns = RESET;
+ end
+ if (((byte_count == 2'b11) && (rx_byte_q2 != 8'h0f)) && (rx_byte_d != 8'hff)) begin
+ we_d = 1'b1;
+ ctrl_fsm_ns = PROG;
+ end
+ else
+ ctrl_fsm_ns = DONE;
+ PROG: begin
+ we_d = 1'b0;
+ ctrl_fsm_ns = DONE;
+ end
+ if (wdata_o == 32'h00000fff || (!rst_ni)) begin
+ ctrl_fsm_ns = DONE;
+ reset_d = 1'b1;
+ end
+ else if (rx_dv_i)
+ ctrl_fsm_ns = LOAD;
+ else
+ ctrl_fsm_ns = DONE;
+ // default: ctrl_fsm_ns = RESET;
+ endcase
+ end
+ assign rx_byte_d = rx_byte_i;
+ assign we_o = we_q;
+ assign addr_o = addr_q;
+ assign wdata_o = {rx_byte_q0, rx_byte_q1, rx_byte_q2, rx_byte_q3};
+ assign reset_o = reset_q;
+ always @(posedge clk_i or negedge rst_ni)
+ if (!rst_ni) begin
+ we_q <= 1'b0;
+ addr_q <= 12'b000000000000;
+ rx_byte_q0 <= 8'b00000000;
+ rx_byte_q1 <= 8'b00000000;
+ rx_byte_q2 <= 8'b00000000;
+ rx_byte_q3 <= 8'b00000000;
+ reset_q <= 1'b1;
+ byte_count <= 2'b00;
+ ctrl_fsm_cs <= DONE;
+ end
+ else if (prog_i) begin
+ we_q <= 1'b0;
+ addr_q <= 12'b000000000000;
+ rx_byte_q0 <= 8'b00000000;
+ rx_byte_q1 <= 8'b00000000;
+ rx_byte_q2 <= 8'b00000000;
+ rx_byte_q3 <= 8'b00000000;
+ reset_q <= 1'b0;
+ byte_count <= 2'b00;
+ ctrl_fsm_cs <= RESET;
+ end
+ else begin
+ we_q <= we_d;
+ if (ctrl_fsm_cs == LOAD) begin
+ if (byte_count == 2'b00) begin
+ rx_byte_q0 <= rx_byte_d;
+ byte_count <= 2'b01;
+ end
+ else if (byte_count == 2'b01) begin
+ rx_byte_q1 <= rx_byte_d;
+ byte_count <= 2'b10;
+ end
+ else if (byte_count == 2'b10) begin
+ rx_byte_q2 <= rx_byte_d;
+ byte_count <= 2'b11;
+ end
+ else begin
+ rx_byte_q3 <= rx_byte_d;
+ byte_count <= 2'b00;
+ end
+ addr_q <= addr_d;
+ end
+ if (ctrl_fsm_cs == PROG)
+ addr_q <= addr_d + 1'b1;
+ reset_q <= reset_d;
+ ctrl_fsm_cs <= ctrl_fsm_ns;
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..9619890
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,82 @@
+module instr_mem_top
+ input clk_i,
+ input rst_ni,
+ input tlul_pkg::tl_h2d_t tl_i,
+ output tlul_pkg::tl_d2h_t tl_o,
+// iccm controller interface
+ input [11:0] iccm_ctrl_addr,
+ input [31:0] iccm_ctrl_wdata,
+ input iccm_ctrl_we,
+ input prog_rst_ni,
+// sram interface
+ output logic csb,
+ output logic [11:0] addr_o,
+ output logic [31:0] wdata_o,
+ output logic [3:0] wmask_o,
+ output logic we_o,
+ input logic [31:0] rdata_i
+logic rvalid;
+logic tl_we;
+logic [31:0] tl_wmask;
+logic [31:0] tl_wdata;
+logic [11:0] tl_addr;
+logic tl_req;
+logic [3:0] mask_sel;
+assign mask_sel[0] = (tl_wmask[7:0] != 8'b0) ? 1'b1: 1'b0;
+assign mask_sel[1] = (tl_wmask[15:8] != 8'b0) ? 1'b1: 1'b0;
+assign mask_sel[2] = (tl_wmask[23:16] != 8'b0) ? 1'b1: 2'b0;
+assign mask_sel[3] = (tl_wmask[31:24] != 8'b0) ? 1'b1: 2'b0;
+assign csb = ~(tl_req | iccm_ctrl_we);
+assign addr_o = (prog_rst_ni) ? tl_addr : iccm_ctrl_addr;
+assign wdata_o = (prog_rst_ni) ? tl_wdata : iccm_ctrl_wdata;
+assign we_o = ~((prog_rst_ni) ? tl_we : iccm_ctrl_we);
+assign wmask_o = (prog_rst_ni) ? mask_sel : 4'b1111;
+ tlul_sram_adapter #(
+ .SramAw (12),
+ .SramDw (32),
+ .Outstanding (2),
+ .ByteAccess (1),
+ .ErrOnWrite (0), // 1: Writes not allowed, automatically error
+ .ErrOnRead (0) // 1: Reads not allowed, automatically error
+) inst_mem (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tl_i (tl_i),
+ .tl_o (tl_o),
+ .req_o (tl_req),
+ .gnt_i (1'b1),
+ .we_o (tl_we),
+ .addr_o (tl_addr),
+ .wdata_o (tl_wdata),
+ .wmask_o (tl_wmask),
+ .rdata_i ((rst_ni) ? rdata_i: '0),
+ .rvalid_i (rvalid),
+ .rerror_i (2'b0)
+ always_ff @(posedge clk_i) begin
+ if (!rst_ni) begin
+ rvalid <= 1'b0;
+ end else if (iccm_ctrl_we | tl_we) begin
+ rvalid <= 1'b0;
+ end else begin
+ rvalid <= tl_req;
+ end
+ end
\ No newline at end of file
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..0c645e6
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,61 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Company: IIS @ ETHZ - Federal Institute of Technology //
+// //
+// Engineers: Lei Li //
+// //
+// Additional contributions by: //
+// //
+// //
+// //
+// Create Date: 12/01/2017 //
+// Design Name: FPU //
+// Module Name: iteration_div_sqrt_mvp //
+// Project Name: Private FPU //
+// Language: SystemVerilog //
+// //
+// Description: iteration unit for div and sqrt //
+// //
+// //
+// Revision: 03/14/2018 //
+// For div_sqrt_mvp //
+module iteration_div_sqrt_mvp
+ parameter WIDTH=25
+ (//Input
+ input logic [WIDTH-1:0] A_DI,
+ input logic [WIDTH-1:0] B_DI,
+ input logic Div_enable_SI,
+ input logic Div_start_dly_SI,
+ input logic Sqrt_enable_SI,
+ input logic [1:0] D_DI,
+ output logic [1:0] D_DO,
+ output logic [WIDTH-1:0] Sum_DO,
+ output logic Carry_out_DO
+ );
+ logic D_carry_D;
+ logic Sqrt_cin_D;
+ logic Cin_D;
+ assign D_DO[0]=~D_DI[0];
+ assign D_DO[1]=~(D_DI[1] ^ D_DI[0]);
+ assign D_carry_D=D_DI[1] | D_DI[0];
+ assign Sqrt_cin_D=Sqrt_enable_SI&&D_carry_D;
+ assign Cin_D=Div_enable_SI?1'b0:Sqrt_cin_D;
+ assign {Carry_out_DO,Sum_DO}=A_DI+B_DI+Cin_D;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..2a67ee0
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,24 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+package jtag_pkg;
+ typedef struct packed {
+ logic tck;
+ logic tms;
+ logic trst_n;
+ logic tdi;
+ } jtag_req_t;
+ parameter jtag_req_t JTAG_REQ_DEFAULT = '0;
+ typedef struct packed {
+ logic tdo;
+ logic tdo_oe;
+ } jtag_rsp_t;
+ parameter jtag_rsp_t JTAG_RSP_DEFAULT = '0;
+endpackage : jtag_pkg
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..424eb2e
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,112 @@
+// Copyright (c) 2018 - 2019 ETH Zurich, University of Bologna
+// All rights reserved.
+// This code is under development and not yet released to the public.
+// Until it is released, the code is under the copyright of ETH Zurich and
+// the University of Bologna, and may contain confidential and/or unpublished
+// work. Any reuse/redistribution is strictly forbidden without written
+// permission from ETH Zurich.
+// Bug fixes and contributions will eventually be released under the
+// SolderPad open hardware license in the context of the PULP platform
+// (, under the copyright of ETH Zurich and the
+// University of Bologna.
+/// A trailing zero counter / leading zero counter.
+/// Set MODE to 0 for trailing zero counter => cnt_o is the number of trailing zeros (from the LSB)
+/// Set MODE to 1 for leading zero counter => cnt_o is the number of leading zeros (from the MSB)
+/// If the input does not contain a zero, `empty_o` is asserted. Additionally `cnt_o` contains
+/// the maximum number of zeros - 1. For example:
+/// in_i = 000_0000, empty_o = 1, cnt_o = 6 (mode = 0)
+/// in_i = 000_0001, empty_o = 0, cnt_o = 0 (mode = 0)
+/// in_i = 000_1000, empty_o = 0, cnt_o = 3 (mode = 0)
+/// Furthermore, this unit contains a more efficient implementation for Verilator (simulation only).
+/// This speeds up simulation significantly.
+module lzc #(
+ /// The width of the input vector.
+ parameter int unsigned WIDTH = 2,
+ /// Mode selection: 0 -> trailing zero, 1 -> leading zero
+ parameter bit MODE = 1'b0,
+ /// Dependent parameter. Do **not** change!
+ ///
+ /// Width of the output signal with the zero count.
+ parameter int unsigned CNT_WIDTH = cf_math_pkg::idx_width(WIDTH)
+) (
+ /// Input vector to be counted.
+ input logic [WIDTH-1:0] in_i,
+ /// Count of the leading / trailing zeros.
+ output logic [CNT_WIDTH-1:0] cnt_o,
+ /// Counter is empty: Asserted if all bits in in_i are zero.
+ output logic empty_o
+ if (WIDTH == 1) begin : gen_degenerate_lzc
+ assign cnt_o[0] = !in_i[0];
+ assign empty_o = !in_i[0];
+ end else begin : gen_lzc
+ localparam int unsigned NumLevels = $clog2(WIDTH);
+ // pragma translate_off
+ initial begin
+ assert(WIDTH > 0) else $fatal(1, "input must be at least one bit wide");
+ end
+ // pragma translate_on
+ logic [WIDTH-1:0][NumLevels-1:0] index_lut;
+ logic [2**NumLevels-1:0] sel_nodes;
+ logic [2**NumLevels-1:0][NumLevels-1:0] index_nodes;
+ logic [WIDTH-1:0] in_tmp;
+ // reverse vector if required
+ always_comb begin : flip_vector
+ for (int unsigned i = 0; i < WIDTH; i++) begin
+ in_tmp[i] = (MODE) ? in_i[WIDTH-1-i] : in_i[i];
+ end
+ end
+ for (genvar j = 0; unsigned'(j) < WIDTH; j++) begin : g_index_lut
+ assign index_lut[j] = (NumLevels)'(unsigned'(j));
+ end
+ for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : g_levels
+ if (unsigned'(level) == NumLevels - 1) begin : g_last_level
+ for (genvar k = 0; k < 2 ** level; k++) begin : g_level
+ // if two successive indices are still in the vector...
+ if (unsigned'(k) * 2 < WIDTH - 1) begin : g_reduce
+ assign sel_nodes[2 ** level - 1 + k] = in_tmp[k * 2] | in_tmp[k * 2 + 1];
+ assign index_nodes[2 ** level - 1 + k] = (in_tmp[k * 2] == 1'b1)
+ ? index_lut[k * 2] :
+ index_lut[k * 2 + 1];
+ end
+ // if only the first index is still in the vector...
+ if (unsigned'(k) * 2 == WIDTH - 1) begin : g_base
+ assign sel_nodes[2 ** level - 1 + k] = in_tmp[k * 2];
+ assign index_nodes[2 ** level - 1 + k] = index_lut[k * 2];
+ end
+ // if index is out of range
+ if (unsigned'(k) * 2 > WIDTH - 1) begin : g_out_of_range
+ assign sel_nodes[2 ** level - 1 + k] = 1'b0;
+ assign index_nodes[2 ** level - 1 + k] = '0;
+ end
+ end
+ end else begin : g_not_last_level
+ for (genvar l = 0; l < 2 ** level; l++) begin : g_level
+ assign sel_nodes[2 ** level - 1 + l] =
+ sel_nodes[2 ** (level + 1) - 1 + l * 2] | sel_nodes[2 ** (level + 1) - 1 + l * 2 + 1];
+ assign index_nodes[2 ** level - 1 + l] = (sel_nodes[2 ** (level + 1) - 1 + l * 2] == 1'b1)
+ ? index_nodes[2 ** (level + 1) - 1 + l * 2] :
+ index_nodes[2 ** (level + 1) - 1 + l * 2 + 1];
+ end
+ end
+ end
+ assign cnt_o = NumLevels > unsigned'(0) ? index_nodes[0] : {($clog2(WIDTH)) {1'b0}};
+ assign empty_o = NumLevels > unsigned'(0) ? ~sel_nodes[0] : ~(|in_i);
+ end : gen_lzc
+endmodule : lzc
diff --git a/verilog/rtl/minus_one.v b/verilog/rtl/minus_one.v
new file mode 100644
index 0000000..f3575c9
--- /dev/null
+++ b/verilog/rtl/minus_one.v
@@ -0,0 +1,122 @@
+/*Author: Zhuxu
+Use parallel prefix tree structure to reduce a 16-bit number by one.
+stage 0: number of genration=16; number of logic operation=16; G_0[xx]=~i_operand[xx];
+stage 1: NOG=16; NOO=8; G_1[2n-1]=G_0[2n-1]&&G_0[2n-2]; n=8:1
+stage 2: NOG=16; NOO=7; G_2[2n-1]=G_1[2n-1]&&G_1[2n-3]; n=8:2
+stage 3: NOG=16; NOO=6; G_3[2n-1]=G_2[2n-1]&&G_2[2n-5]; n=8:3
+stage 4: NOG=16; NOO=4; G_4[2n-1]=G_3[2n-1]&&G_3[2n-9]; n=8:5
+stage 5: NOG=16; NOO=7; G_5[2n]=G_4[2n]&&G_4[2n-1]; n=7:1
+module minus_one(
+input [15:0]i_operand,
+output [15:0]o_result,
+output o_borrow
+//stage 0
+wire [15:0]G_0;
+assign G_0=~i_operand;
+//stage 1
+wire [15:0]G_1;
+assign G_1[1]=G_0[1]&G_0[0];
+assign G_1[3]=G_0[3]&G_0[2];
+assign G_1[5]=G_0[5]&G_0[4];
+assign G_1[7]=G_0[7]&G_0[6];
+assign G_1[9]=G_0[9]&G_0[8];
+assign G_1[11]=G_0[11]&G_0[10];
+assign G_1[13]=G_0[13]&G_0[12];
+assign G_1[15]=G_0[15]&G_0[14];
+assign G_1[0]=G_0[0];
+assign G_1[2]=G_0[2];
+assign G_1[4]=G_0[4];
+assign G_1[6]=G_0[6];
+assign G_1[8]=G_0[8];
+assign G_1[10]=G_0[10];
+assign G_1[12]=G_0[12];
+assign G_1[14]=G_0[14];
+//stage 2
+wire [15:0]G_2;
+assign G_2[3]=G_1[3]&G_1[1];
+assign G_2[5]=G_1[5]&G_1[3];
+assign G_2[7]=G_1[7]&G_1[5];
+assign G_2[9]=G_1[9]&G_1[7];
+assign G_2[11]=G_1[11]&G_1[9];
+assign G_2[13]=G_1[13]&G_1[11];
+assign G_2[15]=G_1[15]&G_1[13];
+assign G_2[0]=G_1[0];
+assign G_2[2]=G_1[2];
+assign G_2[1]=G_1[1];
+assign G_2[4]=G_1[4];
+assign G_2[6]=G_1[6];
+assign G_2[8]=G_1[8];
+assign G_2[10]=G_1[10];
+assign G_2[12]=G_1[12];
+assign G_2[14]=G_1[14];
+//stage 3
+wire [15:0]G_3;
+assign G_3[5]=G_2[5]&G_2[1];
+assign G_3[7]=G_2[7]&G_2[3];
+assign G_3[9]=G_2[9]&G_2[5];
+assign G_3[11]=G_2[11]&G_2[7];
+assign G_3[13]=G_2[13]&G_2[9];
+assign G_3[15]=G_2[15]&G_2[11];
+assign G_3[0]=G_2[0];
+assign G_3[2]=G_2[2];
+assign G_3[1]=G_2[1];
+assign G_3[4]=G_2[4];
+assign G_3[3]=G_2[3];
+assign G_3[6]=G_2[6];
+assign G_3[8]=G_2[8];
+assign G_3[10]=G_2[10];
+assign G_3[12]=G_2[12];
+assign G_3[14]=G_2[14];
+//stage 4
+wire [15:0]G_4;
+assign G_4[9]=G_3[9]&G_3[1];
+assign G_4[11]=G_3[11]&G_3[3];
+assign G_4[13]=G_3[13]&G_3[5];
+assign G_4[15]=G_3[15]&G_3[7];
+assign G_4[0]=G_3[0];
+assign G_4[2]=G_3[2];
+assign G_4[1]=G_3[1];
+assign G_4[4]=G_3[4];
+assign G_4[3]=G_3[3];
+assign G_4[6]=G_3[6];
+assign G_4[5]=G_3[5];
+assign G_4[8]=G_3[8];
+assign G_4[7]=G_3[7];
+assign G_4[10]=G_3[10];
+assign G_4[12]=G_3[12];
+assign G_4[14]=G_3[14];
+//stage 5
+wire [15:0]G_5;
+assign G_5[2]=G_4[2]&G_4[1];
+assign G_5[4]=G_4[4]&G_4[3];
+assign G_5[6]=G_4[6]&G_4[5];
+assign G_5[8]=G_4[8]&G_4[7];
+assign G_5[10]=G_4[10]&G_4[9];
+assign G_5[12]=G_4[12]&G_4[11];
+assign G_5[14]=G_4[14]&G_4[13];
+assign G_5[1]=G_4[1];
+assign G_5[3]=G_4[3];
+assign G_5[5]=G_4[5];
+assign G_5[7]=G_4[7];
+assign G_5[9]=G_4[9];
+assign G_5[11]=G_4[11];
+assign G_5[13]=G_4[13];
+assign G_5[15]=G_4[15];
+assign G_5[0]=G_4[0];
+//stage 6
+assign o_result[0]=~i_operand[0];
+assign o_result[15:1]=(G_5[14:0]&(~i_operand[15:1]))|((~G_5[14:0])&i_operand[15:1]);
+assign o_borrow=G_5[15];
\ No newline at end of file
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..590abe9
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,470 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Company: IIS @ ETHZ - Federal Institute of Technology //
+// //
+// Engineers: Lei Li //
+// //
+// Additional contributions by: //
+// //
+// //
+// //
+// Create Date: 09/03/2018 //
+// Design Name: FPU //
+// Module Name: //
+// Project Name: //
+// Language: SystemVerilog //
+// //
+// Description: Floating point Normalizer/Rounding unit //
+// Since this module is design as a combinatinal logic, it can//
+// be added arbinary register stages for different frequency //
+// in the wrapper module. //
+// //
+// //
+// //
+// Revision Date: 12/04/2018 //
+// Lei Li //
+// To address some requirements by Stefan //
+// //
+// //
+// //
+// //
+// //
+// //
+import defs_div_sqrt_mvp::*;
+module norm_div_sqrt_mvp
+ (//Inputs
+ input logic [C_MANT_FP64+4:0] Mant_in_DI, // Include the needed 4-bit for rounding and hidden bit
+ input logic signed [C_EXP_FP64+1:0] Exp_in_DI,
+ input logic Sign_in_DI,
+ input logic Div_enable_SI,
+ input logic Sqrt_enable_SI,
+ input logic Inf_a_SI,
+ input logic Inf_b_SI,
+ input logic Zero_a_SI,
+ input logic Zero_b_SI,
+ input logic NaN_a_SI,
+ input logic NaN_b_SI,
+ input logic SNaN_SI,
+ input logic [C_RM-1:0] RM_SI,
+ input logic Full_precision_SI,
+ input logic FP32_SI,
+ input logic FP64_SI,
+ input logic FP16_SI,
+ input logic FP16ALT_SI,
+ //Outputs
+ output logic [C_EXP_FP64+C_MANT_FP64:0] Result_DO,
+ output logic [4:0] Fflags_SO //{NV,DZ,OF,UF,NX}
+ );
+ logic Sign_res_D;
+ logic NV_OP_S;
+ logic Exp_OF_S;
+ logic Exp_UF_S;
+ logic Div_Zero_S;
+ logic In_Exact_S;
+ /////////////////////////////////////////////////////////////////////////////
+ // Normalization //
+ /////////////////////////////////////////////////////////////////////////////
+ logic [C_MANT_FP64:0] Mant_res_norm_D;
+ logic [C_EXP_FP64-1:0] Exp_res_norm_D;
+ /////////////////////////////////////////////////////////////////////////////
+ // Right shift operations for negtive exponents //
+ /////////////////////////////////////////////////////////////////////////////
+ logic [C_EXP_FP64+1:0] Exp_Max_RS_FP64_D;
+ logic [C_EXP_FP32+1:0] Exp_Max_RS_FP32_D;
+ logic [C_EXP_FP16+1:0] Exp_Max_RS_FP16_D;
+ logic [C_EXP_FP16ALT+1:0] Exp_Max_RS_FP16ALT_D;
+ //
+ assign Exp_Max_RS_FP64_D=Exp_in_DI[C_EXP_FP64:0]+C_MANT_FP64+1; // to check exponent after (C_MANT_FP64+1)-bit >> when Exp_in_DI is negative
+ assign Exp_Max_RS_FP32_D=Exp_in_DI[C_EXP_FP32:0]+C_MANT_FP32+1; // to check exponent after (C_MANT_FP32+1)-bit >> when Exp_in_DI is negative
+ assign Exp_Max_RS_FP16_D=Exp_in_DI[C_EXP_FP16:0]+C_MANT_FP16+1; // to check exponent after (C_MANT_FP16+1)-bit >> when Exp_in_DI is negative
+ assign Exp_Max_RS_FP16ALT_D=Exp_in_DI[C_EXP_FP16ALT:0]+C_MANT_FP16ALT+1; // to check exponent after (C_MANT_FP16ALT+1)-bit >> when Exp_in_DI is negative
+ logic [C_EXP_FP64+1:0] Num_RS_D;
+ assign Num_RS_D=~Exp_in_DI+1+1; // How many right shifts(RS) are needed to generate a denormal number? >> is need only when Exp_in_DI is negative
+ logic [C_MANT_FP64:0] Mant_RS_D;
+ logic [C_MANT_FP64+4:0] Mant_forsticky_D;
+ assign {Mant_RS_D,Mant_forsticky_D} ={Mant_in_DI,{(C_MANT_FP64+1){1'b0}} } >>(Num_RS_D); //
+ logic [C_EXP_FP64+1:0] Exp_subOne_D;
+ assign Exp_subOne_D = Exp_in_DI -1;
+ //normalization
+ logic [1:0] Mant_lower_D;
+ logic Mant_sticky_bit_D;
+ logic [C_MANT_FP64+4:0] Mant_forround_D;
+ always_comb
+ begin
+ if(NaN_a_SI) // if a is NaN, return NaN
+ begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b0;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+ Exp_res_norm_D='1;
+ Mant_forround_D='0;
+ Sign_res_D=1'b0;
+ NV_OP_S = SNaN_SI;
+ end
+ else if(NaN_b_SI) //if b is NaN, return NaN
+ begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b0;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+ Exp_res_norm_D='1;
+ Mant_forround_D='0;
+ Sign_res_D=1'b0;
+ NV_OP_S = SNaN_SI;
+ end
+ else if(Inf_a_SI)
+ begin
+ if(Div_enable_SI&&Inf_b_SI) //Inf/Inf, retrurn NaN
+ begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b0;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+ Exp_res_norm_D='1;
+ Mant_forround_D='0;
+ Sign_res_D=1'b0;
+ NV_OP_S = 1'b1;
+ end
+ else if (Sqrt_enable_SI && Sign_in_DI) begin // catch sqrt(-inf)
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b0;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+ Exp_res_norm_D='1;
+ Mant_forround_D='0;
+ Sign_res_D=1'b0;
+ NV_OP_S = 1'b1;
+ end else begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b1;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D= '0;
+ Exp_res_norm_D='1;
+ Mant_forround_D='0;
+ Sign_res_D=Sign_in_DI;
+ NV_OP_S = 1'b0;
+ end
+ end
+ else if(Div_enable_SI&&Inf_b_SI)
+ begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b1;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D= '0;
+ Exp_res_norm_D='0;
+ Mant_forround_D='0;
+ Sign_res_D=Sign_in_DI;
+ NV_OP_S = 1'b0;
+ end
+ else if(Zero_a_SI)
+ begin
+ if(Div_enable_SI&&Zero_b_SI)
+ begin
+ Div_Zero_S=1'b1;
+ Exp_OF_S=1'b0;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+ Exp_res_norm_D='1;
+ Mant_forround_D='0;
+ Sign_res_D=1'b0;
+ NV_OP_S = 1'b1;
+ end
+ else
+ begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b0;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D='0;
+ Exp_res_norm_D='0;
+ Mant_forround_D='0;
+ Sign_res_D=Sign_in_DI;
+ NV_OP_S = 1'b0;
+ end
+ end
+ else if(Div_enable_SI&&(Zero_b_SI)) //div Zero
+ begin
+ Div_Zero_S=1'b1;
+ Exp_OF_S=1'b0;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D='0;
+ Exp_res_norm_D='1;
+ Mant_forround_D='0;
+ Sign_res_D=Sign_in_DI;
+ NV_OP_S = 1'b0;
+ end
+ else if(Sign_in_DI&&Sqrt_enable_SI) //sqrt(-a)
+ begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b0;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+ Exp_res_norm_D='1;
+ Mant_forround_D='0;
+ Sign_res_D=1'b0;
+ NV_OP_S = 1'b1;
+ end
+ else if((Exp_in_DI[C_EXP_FP64:0]=='0))
+ begin
+ if(Mant_in_DI!='0) //Exp=0, Mant!=0, it is denormal
+ begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b0;
+ Exp_UF_S=1'b1;
+ Mant_res_norm_D={1'b0,Mant_in_DI[C_MANT_FP64+4:5]};
+ Exp_res_norm_D='0;
+ Mant_forround_D={Mant_in_DI[4:0],{(C_MANT_FP64){1'b0}} };
+ Sign_res_D=Sign_in_DI;
+ NV_OP_S = 1'b0;
+ end
+ else // Zero
+ begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b0;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D='0;
+ Exp_res_norm_D='0;
+ Mant_forround_D='0;
+ Sign_res_D=Sign_in_DI;
+ NV_OP_S = 1'b0;
+ end
+ end
+ else if((Exp_in_DI[C_EXP_FP64:0]==C_EXP_ONE_FP64)&&(~Mant_in_DI[C_MANT_FP64+4])) //denormal
+ begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b0;
+ Exp_UF_S=1'b1;
+ Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+4:4];
+ Exp_res_norm_D='0;
+ Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}};
+ Sign_res_D=Sign_in_DI;
+ NV_OP_S = 1'b0;
+ end
+ else if(Exp_in_DI[C_EXP_FP64+1]) //minus //consider format
+ begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b0;
+ Exp_UF_S=1'b1;
+ Mant_res_norm_D={Mant_RS_D[C_MANT_FP64:0]};
+ Exp_res_norm_D='0;
+ Mant_forround_D={Mant_forsticky_D[C_MANT_FP64+4:0]}; //??
+ Sign_res_D=Sign_in_DI;
+ NV_OP_S = 1'b0;
+ end
+ else if( (Exp_in_DI[C_EXP_FP32]&&FP32_SI) | (Exp_in_DI[C_EXP_FP64]&&FP64_SI) | (Exp_in_DI[C_EXP_FP16]&&FP16_SI) | (Exp_in_DI[C_EXP_FP16ALT]&&FP16ALT_SI) ) //OF
+ begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b1;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D='0;
+ Exp_res_norm_D='1;
+ Mant_forround_D='0;
+ Sign_res_D=Sign_in_DI;
+ NV_OP_S = 1'b0;
+ end
+ else if( ((Exp_in_DI[C_EXP_FP32-1:0]=='1)&&FP32_SI) | ((Exp_in_DI[C_EXP_FP64-1:0]=='1)&&FP64_SI) | ((Exp_in_DI[C_EXP_FP16-1:0]=='1)&&FP16_SI) | ((Exp_in_DI[C_EXP_FP16ALT-1:0]=='1)&&FP16ALT_SI) )//255
+ begin
+ if(~Mant_in_DI[C_MANT_FP64+4]) // MSB=0
+ begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b0;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3];
+ Exp_res_norm_D=Exp_subOne_D;
+ Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}};
+ Sign_res_D=Sign_in_DI;
+ NV_OP_S = 1'b0;
+ end
+ else if(Mant_in_DI!='0) //NaN
+ begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b1;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D= '0;
+ Exp_res_norm_D='1;
+ Mant_forround_D='0;
+ Sign_res_D=Sign_in_DI;
+ NV_OP_S = 1'b0;
+ end
+ else //infinity
+ begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b1;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D= '0;
+ Exp_res_norm_D='1;
+ Mant_forround_D='0;
+ Sign_res_D=Sign_in_DI;
+ NV_OP_S = 1'b0;
+ end
+ end
+ else if(Mant_in_DI[C_MANT_FP64+4]) //normal numbers with 1.XXX
+ begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b0;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D= Mant_in_DI[C_MANT_FP64+4:4];
+ Exp_res_norm_D=Exp_in_DI[C_EXP_FP64-1:0];
+ Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}};
+ Sign_res_D=Sign_in_DI;
+ NV_OP_S = 1'b0;
+ end
+ else //normal numbers with 0.1XX
+ begin
+ Div_Zero_S=1'b0;
+ Exp_OF_S=1'b0;
+ Exp_UF_S=1'b0;
+ Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3];
+ Exp_res_norm_D=Exp_subOne_D;
+ Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}};
+ Sign_res_D=Sign_in_DI;
+ NV_OP_S = 1'b0;
+ end
+ end
+ /////////////////////////////////////////////////////////////////////////////
+ // Rounding enable only for full precision (Full_precision_SI==1'b1) //
+ /////////////////////////////////////////////////////////////////////////////
+ logic [C_MANT_FP64:0] Mant_upper_D;
+ logic [C_MANT_FP64+1:0] Mant_upperRounded_D;
+ logic Mant_roundUp_S;
+ logic Mant_rounded_S;
+ always_comb //determine which bits for Mant_lower_D and Mant_sticky_bit_D
+ begin
+ if(FP32_SI)
+ begin
+ Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP32], {(C_MANT_FP64-C_MANT_FP32){1'b0}} };
+ Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-1:C_MANT_FP64-C_MANT_FP32-2];
+ Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-3:0];
+ end
+ else if(FP64_SI)
+ begin
+ Mant_upper_D = Mant_res_norm_D[C_MANT_FP64:0];
+ Mant_lower_D = Mant_forround_D[C_MANT_FP64+4:C_MANT_FP64+3];
+ Mant_sticky_bit_D = | Mant_forround_D[C_MANT_FP64+3:0];
+ end
+ else if(FP16_SI)
+ begin
+ Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16], {(C_MANT_FP64-C_MANT_FP16){1'b0}} };
+ Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-1:C_MANT_FP64-C_MANT_FP16-2];
+ Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-3:30];
+ end
+ else //FP16ALT
+ begin
+ Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16ALT], {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} };
+ Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-1:C_MANT_FP64-C_MANT_FP16ALT-2];
+ Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-3:30];
+ end
+ end
+ assign Mant_rounded_S = (|(Mant_lower_D))| Mant_sticky_bit_D;
+ always_comb //determine whether to round up or not
+ begin
+ Mant_roundUp_S = 1'b0;
+ case (RM_SI)
+ Mant_roundUp_S = Mant_lower_D[1] && ((Mant_lower_D[0] | Mant_sticky_bit_D )| ( (FP32_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP32]) | (FP64_SI&&Mant_upper_D[0]) | (FP16_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16]) | (FP16ALT_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16ALT]) ) );
+ Mant_roundUp_S = 0;
+ Mant_roundUp_S = Mant_rounded_S & ~Sign_in_DI;
+ Mant_roundUp_S = Mant_rounded_S & Sign_in_DI;
+ default :
+ Mant_roundUp_S = 0;
+ endcase // case (RM_DI)
+ end // always_comb begin
+ logic Mant_renorm_S;
+ logic [C_MANT_FP64:0] Mant_roundUp_Vector_S; // for all the formats
+ assign Mant_roundUp_Vector_S={7'h0,(FP16ALT_SI&&Mant_roundUp_S),2'h0,(FP16_SI&&Mant_roundUp_S),12'h0,(FP32_SI&&Mant_roundUp_S),28'h0,(FP64_SI&&Mant_roundUp_S)};
+ assign Mant_upperRounded_D = Mant_upper_D + Mant_roundUp_Vector_S;
+ assign Mant_renorm_S = Mant_upperRounded_D[C_MANT_FP64+1];
+ /////////////////////////////////////////////////////////////////////////////
+ // Renormalization for Rounding //
+ /////////////////////////////////////////////////////////////////////////////
+ logic [C_MANT_FP64-1:0] Mant_res_round_D;
+ logic [C_EXP_FP64-1:0] Exp_res_round_D;
+ assign Mant_res_round_D = (Mant_renorm_S)?Mant_upperRounded_D[C_MANT_FP64:1]:Mant_upperRounded_D[C_MANT_FP64-1:0]; // including the process of the hidden bit
+ assign Exp_res_round_D = Exp_res_norm_D+Mant_renorm_S;
+ /////////////////////////////////////////////////////////////////////////////
+ // Output Assignments //
+ /////////////////////////////////////////////////////////////////////////////
+ logic [C_MANT_FP64-1:0] Mant_before_format_ctl_D;
+ logic [C_EXP_FP64-1:0] Exp_before_format_ctl_D;
+ assign Mant_before_format_ctl_D = Full_precision_SI ? Mant_res_round_D : Mant_res_norm_D;
+ assign Exp_before_format_ctl_D = Full_precision_SI ? Exp_res_round_D : Exp_res_norm_D;
+ always_comb //NaN Boxing
+ begin //
+ if(FP32_SI)
+ begin
+ Result_DO ={32'hffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP32-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP32]};
+ end
+ else if(FP64_SI)
+ begin
+ Result_DO ={Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP64-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:0]};
+ end
+ else if(FP16_SI)
+ begin
+ Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16]};
+ end
+ else
+ begin
+ Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16ALT-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16ALT]};
+ end
+ end
+assign In_Exact_S = (~Full_precision_SI) | Mant_rounded_S;
+assign Fflags_SO = {NV_OP_S,Div_Zero_S,Exp_OF_S,Exp_UF_S,In_Exact_S}; //{NV,DZ,OF,UF,NX}
+endmodule // norm_div_sqrt_mvp
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..62bd147
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,104 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Company: IIS @ ETHZ - Federal Institute of Technology //
+// //
+// Engineers: Lei Li //
+// //
+// Additional contributions by: //
+// //
+// //
+// //
+// Create Date: 10/04/2018 //
+// Design Name: FPU //
+// Module Name: //
+// Project Name: Private FPU //
+// Language: SystemVerilog //
+// //
+// Description: non restroring binary divisior/ square root //
+// //
+// Revision Date: 12/04/2018 //
+// Lei Li //
+// To address some requirements by Stefan and add low power //
+// control for special cases //
+// //
+import defs_div_sqrt_mvp::*;
+module nrbd_nrsc_mvp
+ (//Input
+ input logic Clk_CI,
+ input logic Rst_RBI,
+ input logic Div_start_SI,
+ input logic Sqrt_start_SI,
+ input logic Start_SI,
+ input logic Kill_SI,
+ input logic Special_case_SBI,
+ input logic Special_case_dly_SBI,
+ input logic [C_PC-1:0] Precision_ctl_SI,
+ input logic [1:0] Format_sel_SI,
+ input logic [C_MANT_FP64:0] Mant_a_DI,
+ input logic [C_MANT_FP64:0] Mant_b_DI,
+ input logic [C_EXP_FP64:0] Exp_a_DI,
+ input logic [C_EXP_FP64:0] Exp_b_DI,
+ //output
+ output logic Div_enable_SO,
+ output logic Sqrt_enable_SO,
+ output logic Full_precision_SO,
+ output logic FP32_SO,
+ output logic FP64_SO,
+ output logic FP16_SO,
+ output logic FP16ALT_SO,
+ output logic Ready_SO,
+ output logic Done_SO,
+ output logic [C_MANT_FP64+4:0] Mant_z_DO,
+ output logic [C_EXP_FP64+1:0] Exp_z_DO
+ );
+ logic Div_start_dly_S,Sqrt_start_dly_S;
+control_mvp control_U0
+( .Clk_CI (Clk_CI ),
+ .Rst_RBI (Rst_RBI ),
+ .Div_start_SI (Div_start_SI ),
+ .Sqrt_start_SI (Sqrt_start_SI ),
+ .Start_SI (Start_SI ),
+ .Kill_SI (Kill_SI ),
+ .Special_case_SBI (Special_case_SBI ),
+ .Special_case_dly_SBI (Special_case_dly_SBI ),
+ .Precision_ctl_SI (Precision_ctl_SI ),
+ .Format_sel_SI (Format_sel_SI ),
+ .Numerator_DI (Mant_a_DI ),
+ .Exp_num_DI (Exp_a_DI ),
+ .Denominator_DI (Mant_b_DI ),
+ .Exp_den_DI (Exp_b_DI ),
+ .Div_start_dly_SO (Div_start_dly_S ),
+ .Sqrt_start_dly_SO (Sqrt_start_dly_S ),
+ .Div_enable_SO (Div_enable_SO ),
+ .Sqrt_enable_SO (Sqrt_enable_SO ),
+ .Full_precision_SO (Full_precision_SO ),
+ .FP32_SO (FP32_SO ),
+ .FP64_SO (FP64_SO ),
+ .FP16_SO (FP16_SO ),
+ .FP16ALT_SO (FP16ALT_SO ),
+ .Ready_SO (Ready_SO ),
+ .Done_SO (Done_SO ),
+ .Mant_result_prenorm_DO (Mant_z_DO ),
+ .Exp_result_prenorm_DO (Exp_z_DO )
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..9e0d25f
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,425 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Company: IIS @ ETHZ - Federal Institute of Technology //
+// //
+// Engineers: Lei Li // //
+// //
+// Additional contributions by: //
+// //
+// //
+// //
+// Create Date: 01/03/2018 //
+// Design Name: FPU //
+// Module Name: //
+// Project Name: Private FPU //
+// Language: SystemVerilog //
+// //
+// Description: decode and data preparation //
+// //
+// Revision Date: 12/04/2018 //
+// Lei Li //
+// To address some requirements by Stefan and add low power //
+// control for special cases //
+// //
+// //
+import defs_div_sqrt_mvp::*;
+module preprocess_mvp
+ (
+ input logic Clk_CI,
+ input logic Rst_RBI,
+ input logic Div_start_SI,
+ input logic Sqrt_start_SI,
+ input logic Ready_SI,
+ //Input Operands
+ input logic [C_OP_FP64-1:0] Operand_a_DI,
+ input logic [C_OP_FP64-1:0] Operand_b_DI,
+ input logic [C_RM-1:0] RM_SI, //Rounding Mode
+ input logic [C_FS-1:0] Format_sel_SI, // Format Selection
+ // to control
+ output logic Start_SO,
+ output logic [C_EXP_FP64:0] Exp_a_DO_norm,
+ output logic [C_EXP_FP64:0] Exp_b_DO_norm,
+ output logic [C_MANT_FP64:0] Mant_a_DO_norm,
+ output logic [C_MANT_FP64:0] Mant_b_DO_norm,
+ output logic [C_RM-1:0] RM_dly_SO,
+ output logic Sign_z_DO,
+ output logic Inf_a_SO,
+ output logic Inf_b_SO,
+ output logic Zero_a_SO,
+ output logic Zero_b_SO,
+ output logic NaN_a_SO,
+ output logic NaN_b_SO,
+ output logic SNaN_SO,
+ output logic Special_case_SBO,
+ output logic Special_case_dly_SBO
+ );
+ //Hidden Bits
+ logic Hb_a_D;
+ logic Hb_b_D;
+ logic [C_EXP_FP64-1:0] Exp_a_D;
+ logic [C_EXP_FP64-1:0] Exp_b_D;
+ logic [C_MANT_FP64-1:0] Mant_a_NonH_D;
+ logic [C_MANT_FP64-1:0] Mant_b_NonH_D;
+ logic [C_MANT_FP64:0] Mant_a_D;
+ logic [C_MANT_FP64:0] Mant_b_D;
+ /////////////////////////////////////////////////////////////////////////////
+ // Disassemble operands
+ /////////////////////////////////////////////////////////////////////////////
+ logic Sign_a_D,Sign_b_D;
+ logic Start_S;
+ always_comb
+ begin
+ case(Format_sel_SI)
+ 2'b00:
+ begin
+ Sign_a_D = Operand_a_DI[C_OP_FP32-1];
+ Sign_b_D = Operand_b_DI[C_OP_FP32-1];
+ Exp_a_D = {3'h0, Operand_a_DI[C_OP_FP32-2:C_MANT_FP32]};
+ Exp_b_D = {3'h0, Operand_b_DI[C_OP_FP32-2:C_MANT_FP32]};
+ Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP32-1:0],29'h0};
+ Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP32-1:0],29'h0};
+ end
+ 2'b01:
+ begin
+ Sign_a_D = Operand_a_DI[C_OP_FP64-1];
+ Sign_b_D = Operand_b_DI[C_OP_FP64-1];
+ Exp_a_D = Operand_a_DI[C_OP_FP64-2:C_MANT_FP64];
+ Exp_b_D = Operand_b_DI[C_OP_FP64-2:C_MANT_FP64];
+ Mant_a_NonH_D = Operand_a_DI[C_MANT_FP64-1:0];
+ Mant_b_NonH_D = Operand_b_DI[C_MANT_FP64-1:0];
+ end
+ 2'b10:
+ begin
+ Sign_a_D = Operand_a_DI[C_OP_FP16-1];
+ Sign_b_D = Operand_b_DI[C_OP_FP16-1];
+ Exp_a_D = {6'h00, Operand_a_DI[C_OP_FP16-2:C_MANT_FP16]};
+ Exp_b_D = {6'h00, Operand_b_DI[C_OP_FP16-2:C_MANT_FP16]};
+ Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16-1:0],42'h0};
+ Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16-1:0],42'h0};
+ end
+ 2'b11:
+ begin
+ Sign_a_D = Operand_a_DI[C_OP_FP16ALT-1];
+ Sign_b_D = Operand_b_DI[C_OP_FP16ALT-1];
+ Exp_a_D = {3'h0, Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]};
+ Exp_b_D = {3'h0, Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]};
+ Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16ALT-1:0],45'h0};
+ Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16ALT-1:0],45'h0};
+ end
+ endcase
+ end
+ assign Mant_a_D = {Hb_a_D,Mant_a_NonH_D};
+ assign Mant_b_D = {Hb_b_D,Mant_b_NonH_D};
+ assign Hb_a_D = | Exp_a_D; // hidden bit
+ assign Hb_b_D = | Exp_b_D; // hidden bit
+ assign Start_S= Div_start_SI | Sqrt_start_SI;
+ /////////////////////////////////////////////////////////////////////////////
+ // preliminary checks for infinite/zero/NaN operands //
+ /////////////////////////////////////////////////////////////////////////////
+ logic Mant_a_prenorm_zero_S;
+ logic Mant_b_prenorm_zero_S;
+ logic Exp_a_prenorm_zero_S;
+ logic Exp_b_prenorm_zero_S;
+ assign Exp_a_prenorm_zero_S = ~Hb_a_D;
+ assign Exp_b_prenorm_zero_S = ~Hb_b_D;
+ logic Exp_a_prenorm_Inf_NaN_S;
+ logic Exp_b_prenorm_Inf_NaN_S;
+ logic Mant_a_prenorm_QNaN_S;
+ logic Mant_a_prenorm_SNaN_S;
+ logic Mant_b_prenorm_QNaN_S;
+ logic Mant_b_prenorm_SNaN_S;
+ assign Mant_a_prenorm_QNaN_S=Mant_a_NonH_D[C_MANT_FP64-1]&&(~(|Mant_a_NonH_D[C_MANT_FP64-2:0]));
+ assign Mant_a_prenorm_SNaN_S=(~Mant_a_NonH_D[C_MANT_FP64-1])&&((|Mant_a_NonH_D[C_MANT_FP64-2:0]));
+ assign Mant_b_prenorm_QNaN_S=Mant_b_NonH_D[C_MANT_FP64-1]&&(~(|Mant_b_NonH_D[C_MANT_FP64-2:0]));
+ assign Mant_b_prenorm_SNaN_S=(~Mant_b_NonH_D[C_MANT_FP64-1])&&((|Mant_b_NonH_D[C_MANT_FP64-2:0]));
+ always_comb
+ begin
+ case(Format_sel_SI)
+ 2'b00:
+ begin
+ Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32);
+ Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32);
+ Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32);
+ Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32);
+ end
+ 2'b01:
+ begin
+ Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64);
+ Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64);
+ Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64);
+ Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64);
+ end
+ 2'b10:
+ begin
+ Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16);
+ Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16);
+ Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16);
+ Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16);
+ end
+ 2'b11:
+ begin
+ Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT);
+ Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT);
+ Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT);
+ Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT);
+ end
+ endcase
+ end
+ logic Zero_a_SN,Zero_a_SP;
+ logic Zero_b_SN,Zero_b_SP;
+ logic Inf_a_SN,Inf_a_SP;
+ logic Inf_b_SN,Inf_b_SP;
+ logic NaN_a_SN,NaN_a_SP;
+ logic NaN_b_SN,NaN_b_SP;
+ logic SNaN_SN,SNaN_SP;
+ assign Zero_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_zero_S&&Mant_a_prenorm_zero_S):Zero_a_SP;
+ assign Zero_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_zero_S&&Mant_b_prenorm_zero_S):Zero_b_SP;
+ assign Inf_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&Mant_a_prenorm_zero_S):Inf_a_SP;
+ assign Inf_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&Mant_b_prenorm_zero_S):Inf_b_SP;
+ assign NaN_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&(~Mant_a_prenorm_zero_S)):NaN_a_SP;
+ assign NaN_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&(~Mant_b_prenorm_zero_S)):NaN_b_SP;
+ assign SNaN_SN = (Start_S&&Ready_SI) ? ((Mant_a_prenorm_SNaN_S&&NaN_a_SN) | (Mant_b_prenorm_SNaN_S&&NaN_b_SN)) : SNaN_SP;
+ always_ff @(posedge Clk_CI, negedge Rst_RBI)
+ begin
+ if(~Rst_RBI)
+ begin
+ Zero_a_SP <='0;
+ Zero_b_SP <='0;
+ Inf_a_SP <='0;
+ Inf_b_SP <='0;
+ NaN_a_SP <='0;
+ NaN_b_SP <='0;
+ SNaN_SP <= '0;
+ end
+ else
+ begin
+ Inf_a_SP <=Inf_a_SN;
+ Inf_b_SP <=Inf_b_SN;
+ Zero_a_SP <=Zero_a_SN;
+ Zero_b_SP <=Zero_b_SN;
+ NaN_a_SP <=NaN_a_SN;
+ NaN_b_SP <=NaN_b_SN;
+ SNaN_SP <= SNaN_SN;
+ end
+ end
+ /////////////////////////////////////////////////////////////////////////////
+ // Low power control
+ /////////////////////////////////////////////////////////////////////////////
+ assign Special_case_SBO=(~{(Div_start_SI)?(Zero_a_SN | Zero_b_SN | Inf_a_SN | Inf_b_SN | NaN_a_SN | NaN_b_SN): (Zero_a_SN | Inf_a_SN | NaN_a_SN | Sign_a_D) })&&(Start_S&&Ready_SI);
+ always_ff @(posedge Clk_CI, negedge Rst_RBI)
+ begin
+ if(~Rst_RBI)
+ begin
+ Special_case_dly_SBO <= '0;
+ end
+ else if((Start_S&&Ready_SI))
+ begin
+ Special_case_dly_SBO <= Special_case_SBO;
+ end
+ else if(Special_case_dly_SBO)
+ begin
+ Special_case_dly_SBO <= 1'b1;
+ end
+ else
+ begin
+ Special_case_dly_SBO <= '0;
+ end
+ end
+ /////////////////////////////////////////////////////////////////////////////
+ // Delay sign for normalization and round //
+ /////////////////////////////////////////////////////////////////////////////
+ logic Sign_z_DN;
+ logic Sign_z_DP;
+ always_comb
+ begin
+ if(Div_start_SI&&Ready_SI)
+ Sign_z_DN = Sign_a_D ^ Sign_b_D;
+ else if(Sqrt_start_SI&&Ready_SI)
+ Sign_z_DN = Sign_a_D;
+ else
+ Sign_z_DN = Sign_z_DP;
+ end
+ always_ff @(posedge Clk_CI, negedge Rst_RBI)
+ begin
+ if(~Rst_RBI)
+ begin
+ Sign_z_DP <= '0;
+ end
+ else
+ begin
+ Sign_z_DP <= Sign_z_DN;
+ end
+ end
+ logic [C_RM-1:0] RM_DN;
+ logic [C_RM-1:0] RM_DP;
+ always_comb
+ begin
+ if(Start_S&&Ready_SI)
+ RM_DN = RM_SI;
+ else
+ RM_DN = RM_DP;
+ end
+ always_ff @(posedge Clk_CI, negedge Rst_RBI)
+ begin
+ if(~Rst_RBI)
+ begin
+ RM_DP <= '0;
+ end
+ else
+ begin
+ RM_DP <= RM_DN;
+ end
+ end
+ assign RM_dly_SO = RM_DP;
+ logic [5:0] Mant_leadingOne_a, Mant_leadingOne_b;
+ logic Mant_zero_S_a,Mant_zero_S_b;
+ lzc #(
+ .WIDTH ( C_MANT_FP64+1 ),
+ .MODE ( 1 )
+ ) LOD_Ua (
+ .in_i ( Mant_a_D ),
+ .cnt_o ( Mant_leadingOne_a ),
+ .empty_o ( Mant_zero_S_a )
+ );
+ logic [C_MANT_FP64:0] Mant_a_norm_DN,Mant_a_norm_DP;
+ assign Mant_a_norm_DN = ((Start_S&&Ready_SI))?(Mant_a_D<<(Mant_leadingOne_a)):Mant_a_norm_DP;
+ always_ff @(posedge Clk_CI, negedge Rst_RBI)
+ begin
+ if(~Rst_RBI)
+ begin
+ Mant_a_norm_DP <= '0;
+ end
+ else
+ begin
+ Mant_a_norm_DP<=Mant_a_norm_DN;
+ end
+ end
+ logic [C_EXP_FP64:0] Exp_a_norm_DN,Exp_a_norm_DP;
+ assign Exp_a_norm_DN = ((Start_S&&Ready_SI))?(Exp_a_D-Mant_leadingOne_a+(|Mant_leadingOne_a)):Exp_a_norm_DP; //Covering the process of denormal numbers
+ always_ff @(posedge Clk_CI, negedge Rst_RBI)
+ begin
+ if(~Rst_RBI)
+ begin
+ Exp_a_norm_DP <= '0;
+ end
+ else
+ begin
+ Exp_a_norm_DP<=Exp_a_norm_DN;
+ end
+ end
+ lzc #(
+ .WIDTH ( C_MANT_FP64+1 ),
+ .MODE ( 1 )
+ ) LOD_Ub (
+ .in_i ( Mant_b_D ),
+ .cnt_o ( Mant_leadingOne_b ),
+ .empty_o ( Mant_zero_S_b )
+ );
+ logic [C_MANT_FP64:0] Mant_b_norm_DN,Mant_b_norm_DP;
+ assign Mant_b_norm_DN = ((Start_S&&Ready_SI))?(Mant_b_D<<(Mant_leadingOne_b)):Mant_b_norm_DP;
+ always_ff @(posedge Clk_CI, negedge Rst_RBI)
+ begin
+ if(~Rst_RBI)
+ begin
+ Mant_b_norm_DP <= '0;
+ end
+ else
+ begin
+ Mant_b_norm_DP<=Mant_b_norm_DN;
+ end
+ end
+ logic [C_EXP_FP64:0] Exp_b_norm_DN,Exp_b_norm_DP;
+ assign Exp_b_norm_DN = ((Start_S&&Ready_SI))?(Exp_b_D-Mant_leadingOne_b+(|Mant_leadingOne_b)):Exp_b_norm_DP; //Covering the process of denormal numbers
+ always_ff @(posedge Clk_CI, negedge Rst_RBI)
+ begin
+ if(~Rst_RBI)
+ begin
+ Exp_b_norm_DP <= '0;
+ end
+ else
+ begin
+ Exp_b_norm_DP<=Exp_b_norm_DN;
+ end
+ end
+ /////////////////////////////////////////////////////////////////////////////
+ // Output assignments //
+ /////////////////////////////////////////////////////////////////////////////
+ assign Start_SO=Start_S;
+ assign Exp_a_DO_norm=Exp_a_norm_DP;
+ assign Exp_b_DO_norm=Exp_b_norm_DP;
+ assign Mant_a_DO_norm=Mant_a_norm_DP;
+ assign Mant_b_DO_norm=Mant_b_norm_DP;
+ assign Sign_z_DO=Sign_z_DP;
+ assign Inf_a_SO=Inf_a_SP;
+ assign Inf_b_SO=Inf_b_SP;
+ assign Zero_a_SO=Zero_a_SP;
+ assign Zero_b_SO=Zero_b_SP;
+ assign NaN_a_SO=NaN_a_SP;
+ assign NaN_b_SO=NaN_b_SP;
+ assign SNaN_SO=SNaN_SP;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..e379f8a
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,127 @@
+// N:1 arbiter module
+// Verilog parameter
+// N: Number of request ports
+// DW: Data width
+// DataPort: Set to 1 to enable the data port. Otherwise that port will be ignored.
+// EnReqStabA: Checks whether requests remain asserted until granted
+// This is the original implementation of the arbiter which relies on parallel prefix computing
+// optimization to optimize the request / arbiter tree. Not all synthesis tools may support this.
+// Note that the currently winning request is held if the data sink is not ready. This behavior is
+// required by some interconnect protocols (AXI, TL). The module contains an assertion that checks
+// this behavior.
+// Also, this module contains a request stability assertion that checks that requests stay asserted
+// until they have been served. This assertion can be optionally disabled by setting EnReqStabA to
+// zero. This is a non-functional parameter and does not affect the designs behavior.
+// See also: prim_arbiter_tree
+module prim_arbiter_ppc #(
+ parameter int unsigned N = 8,
+ parameter int unsigned DW = 32,
+ // Configurations
+ // EnDataPort: {0, 1}, if 0, input data will be ignored
+ parameter bit EnDataPort = 1,
+ // Non-functional parameter to switch on the request stability assertion
+ parameter bit EnReqStabA = 1,
+ // Derived parameters
+ localparam int IdxW = $clog2(N)
+) (
+ input clk_i,
+ input rst_ni,
+ input [ N-1:0] req_i,
+ input [DW-1:0] data_i [N],
+ output logic [ N-1:0] gnt_o,
+ output logic [IdxW-1:0] idx_o,
+ output logic valid_o,
+ output logic [DW-1:0] data_o,
+ input ready_i
+ // this case is basically just a bypass
+ if (N == 1) begin : gen_degenerate_case
+ assign valid_o = req_i[0];
+ assign data_o = data_i[0];
+ assign gnt_o[0] = valid_o & ready_i;
+ assign idx_o = '0;
+ end else begin : gen_normal_case
+ logic [N-1:0] masked_req;
+ logic [N-1:0] ppc_out;
+ logic [N-1:0] arb_req;
+ logic [N-1:0] mask, mask_next;
+ logic [N-1:0] winner;
+ assign masked_req = mask & req_i;
+ assign arb_req = (|masked_req) ? masked_req : req_i;
+ // PPC
+ // Even below code looks O(n) but DC optimizes it to O(log(N))
+ // Using Parallel Prefix Computation
+ always_comb begin
+ ppc_out[0] = arb_req[0];
+ for (int i = 1 ; i < N ; i++) begin
+ ppc_out[i] = ppc_out[i-1] | arb_req[i];
+ end
+ end
+ // Grant Generation: Leading-One detector
+ assign winner = ppc_out ^ {ppc_out[N-2:0], 1'b0};
+ assign gnt_o = (ready_i) ? winner : '0;
+ assign valid_o = |req_i;
+ // Mask Generation
+ assign mask_next = {ppc_out[N-2:0], 1'b0};
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ mask <= '0;
+ end else if (valid_o && ready_i) begin
+ // Latch only when requests accepted
+ mask <= mask_next;
+ end else if (valid_o && !ready_i) begin
+ // Downstream isn't yet ready so, keep current request alive. (First come first serve)
+ mask <= ppc_out;
+ end
+ end
+ if (EnDataPort == 1) begin: gen_datapath
+ always_comb begin
+ data_o = '0;
+ for (int i = 0 ; i < N ; i++) begin
+ if (winner[i]) begin
+ data_o = data_i[i];
+ end
+ end
+ end
+ end else begin: gen_nodatapath
+ assign data_o = '1;
+ // TODO: waive data_i from NOT_READ error
+ end
+ always_comb begin
+ idx_o = '0;
+ for (int unsigned i = 0 ; i < N ; i++) begin
+ if (winner[i]) begin
+ idx_o = i[IdxW-1:0];
+ end
+ end
+ end
+ end
+endmodule : prim_arbiter_ppc
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..177d2c0
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,12 @@
+module prim_clock_gating (
+ input clk_i,
+ input en_i,
+ input test_en_i,
+ output logic clk_o
+sky130_fd_sc_hd__dlclkp_1 CG( .CLK(clk_i), .GCLK(clk_o), .GATE(en_i | test_en_i));
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..ec635b8
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,63 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Primitive counter-based input filter, with enable.
+// Configurable number of cycles. Cheaper version of filter for
+// large values of #Cycles
+// when in reset, stored value is zero
+// when enable is false, output is input
+// when enable is true, output is stored value,
+// new input must be opposite value from stored value for
+// #Cycles before switching to new value.
+module prim_filter_ctr #(parameter int unsigned Cycles = 4) (
+ input clk_i,
+ input rst_ni,
+ input enable_i,
+ input filter_i,
+ output filter_o
+ localparam int unsigned CTR_WIDTH = $clog2(Cycles);
+ localparam logic [CTR_WIDTH-1:0] CYCLESM1 = (CTR_WIDTH)'(Cycles-1);
+ logic [CTR_WIDTH-1:0] diff_ctr_q, diff_ctr_d;
+ logic filter_q, stored_value_q, update_stored_value;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ filter_q <= 1'b0;
+ end else begin
+ filter_q <= filter_i;
+ end
+ end
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ stored_value_q <= 1'b0;
+ end else if (update_stored_value) begin
+ stored_value_q <= filter_i;
+ end
+ end
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ diff_ctr_q <= {CTR_WIDTH{1'b0}};
+ end else begin
+ diff_ctr_q <= diff_ctr_d;
+ end
+ end
+ // always look for differences, even if not filter enabled
+ assign diff_ctr_d =
+ (filter_i != filter_q) ? '0 : // restart
+ (diff_ctr_q == CYCLESM1) ? CYCLESM1 : // saturate
+ (diff_ctr_q + 1'b1); // count up
+ assign update_stored_value = (diff_ctr_d == CYCLESM1);
+ assign filter_o = enable_i ? stored_value_q : filter_i;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..ae8f6b6
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,29 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Clock inverter
+// Varies on the process
+module prim_generic_clock_inv #(
+ parameter bit HasScanMode = 1'b1
+) (
+ input clk_i,
+ input scanmode_i,
+ output logic clk_no // Inverted
+ if (HasScanMode) begin : gen_scan
+ prim_generic_clock_mux2 i_dft_tck_mux (
+ .clk0_i ( ~clk_i ),
+ .clk1_i ( clk_i ), // bypass the inverted clock for testing
+ .sel_i ( scanmode_i ),
+ .clk_o ( clk_no )
+ );
+ end else begin : gen_noscan
+ logic unused_scanmode;
+ assign unused_scanmode = scanmode_i;
+ assign clk_no = ~clk_i;
+ end
+endmodule : prim_generic_clock_inv
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..8f296e6
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,22 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+module prim_generic_clock_mux2 #(
+ parameter bit NoFpgaBufG = 1'b0 // this parameter serves no function in the generic model
+) (
+ input clk0_i,
+ input clk1_i,
+ input sel_i,
+ output logic clk_o
+ assign clk_o = (sel_i) ? clk1_i : clk0_i;
+ // make sure sel is never X (including during reset)
+ // need to use ##1 as this could break with inverted clocks that
+ // start with a rising edge at the beginning of the simulation.
+endmodule : prim_generic_clock_mux2
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..8eacf01
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,26 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// `include ""
+module prim_generic_flop # (
+ parameter int Width = 1,
+ localparam int WidthSubOne = Width-1,
+ parameter logic [WidthSubOne:0] ResetValue = 0
+) (
+ input clk_i,
+ input rst_ni,
+ input [Width-1:0] d_i,
+ output logic [Width-1:0] q_o
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ q_o <= ResetValue;
+ end else begin
+ q_o <= d_i;
+ end
+ end
+endmodule // prim_generic_flop
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..fdd1358
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,43 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Generic double-synchronizer flop
+// This may need to be moved to prim_generic if libraries have a specific cell
+// for synchronization
+module prim_generic_flop_2sync #(
+ parameter int Width = 16,
+ localparam int WidthSubOne = Width-1, // temp work around #2679
+ parameter logic [WidthSubOne:0] ResetValue = '0
+) (
+ input clk_i, // receive clock
+ input rst_ni,
+ input [Width-1:0] d_i,
+ output logic [Width-1:0] q_o
+ logic [Width-1:0] intq;
+ prim_generic_flop #(
+ .Width(Width),
+ .ResetValue(ResetValue)
+ ) u_sync_1 (
+ .clk_i,
+ .rst_ni,
+ .d_i,
+ .q_o(intq)
+ );
+ prim_generic_flop #(
+ .Width(Width),
+ .ResetValue(ResetValue)
+ ) u_sync_2 (
+ .clk_i,
+ .rst_ni,
+ .d_i(intq),
+ .q_o
+ );
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..c84e389
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,51 @@
+module prim_intr_hw # (
+ parameter int unsigned Width = 1,
+ parameter bit FlopOutput = 1
+) (
+ // event
+ input clk_i,
+ input rst_ni,
+ input [Width-1:0] event_intr_i,
+ // register interface
+ input [Width-1:0] reg2hw_intr_enable_q_i,
+ input [Width-1:0] reg2hw_intr_test_q_i,
+ input reg2hw_intr_test_qe_i,
+ input [Width-1:0] reg2hw_intr_state_q_i,
+ output hw2reg_intr_state_de_o,
+ output [Width-1:0] hw2reg_intr_state_d_o,
+ // outgoing interrupt
+ output logic [Width-1:0] intr_o
+ logic [Width-1:0] new_event;
+ assign new_event =
+ (({Width{reg2hw_intr_test_qe_i}} & reg2hw_intr_test_q_i) | event_intr_i);
+ assign hw2reg_intr_state_de_o = |new_event;
+ // for scalar interrupts, this resolves to '1' with new event
+ // for vector interrupts, new events are OR'd in to existing interrupt state
+ assign hw2reg_intr_state_d_o = new_event | reg2hw_intr_state_q_i;
+ if (FlopOutput == 1) begin : gen_flop_intr_output
+ // flop the interrupt output
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ intr_o <= 1'b0;
+ end else begin
+ intr_o <= reg2hw_intr_state_q_i & reg2hw_intr_enable_q_i;
+ end
+ end
+ end else begin : gen_intr_passthrough_output
+ logic unused_clk;
+ logic unused_rst_n;
+ assign unused_clk = clk_i;
+ assign unused_rst_n = rst_ni;
+ assign intr_o = reg2hw_intr_state_q_i & reg2hw_intr_enable_q_i;
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..ebe38d1
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,18 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Constants for use in primitives
+// This file is a stop-gap until the DV file list is generated by FuseSoC.
+// Its contents are taken from the file which would be generated by FuseSoC.
+package prim_pkg;
+ // Implementation target specialization
+ typedef enum integer {
+ ImplGeneric,
+ ImplXilinx
+ } impl_e;
+endpackage : prim_pkg
\ No newline at end of file
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..33d4290
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,60 @@
+module prim_subreg #(
+ parameter int DW = 32 ,
+ parameter SWACCESS = "RW", // {RW, RO, WO, W1C, W1S, W0C, RC}
+ parameter logic [DW-1:0] RESVAL = '0 // Reset value
+) (
+ input clk_i,
+ input rst_ni,
+ // From SW: valid for RW, WO, W1C, W1S, W0C, RC
+ // In case of RC, Top connects Read Pulse to we
+ input we,
+ input [DW-1:0] wd,
+ // From HW: valid for HRW, HWO
+ input de,
+ input [DW-1:0] d,
+ // output to HW and Reg Read
+ output logic qe,
+ output logic [DW-1:0] q,
+ output logic [DW-1:0] qs
+ logic wr_en;
+ logic [DW-1:0] wr_data;
+ prim_subreg_arb #(
+ .DW ( DW ),
+ ) wr_en_data_arb (
+ .we,
+ .wd,
+ .de,
+ .d,
+ .q,
+ .wr_en,
+ .wr_data
+ );
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ qe <= 1'b0;
+ end else begin
+ qe <= we;
+ end
+ end
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ q <= RESVAL;
+ end else if (wr_en) begin
+ q <= wr_data;
+ end
+ end
+ assign qs = q;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..adc144f
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,75 @@
+module prim_subreg_arb #(
+ parameter int DW = 32 ,
+ parameter SWACCESS = "RW" // {RW, RO, WO, W1C, W1S, W0C, RC}
+) (
+ // From SW: valid for RW, WO, W1C, W1S, W0C, RC.
+ // In case of RC, top connects read pulse to we.
+ input we,
+ input [DW-1:0] wd,
+ // From HW: valid for HRW, HWO.
+ input de,
+ input [DW-1:0] d,
+ // From register: actual reg value.
+ input [DW-1:0] q,
+ // To register: actual write enable and write data.
+ output logic wr_en,
+ output logic [DW-1:0] wr_data
+ if ((SWACCESS == "RW") || (SWACCESS == "WO")) begin : gen_w
+ assign wr_en = we | de;
+ assign wr_data = (we == 1'b1) ? wd : d; // SW higher priority
+ // Unused q - Prevent lint errors.
+ logic [DW-1:0] unused_q;
+ assign unused_q = q;
+ end else if (SWACCESS == "RO") begin : gen_ro
+ assign wr_en = de;
+ assign wr_data = d;
+ // Unused we, wd, q - Prevent lint errors.
+ logic unused_we;
+ logic [DW-1:0] unused_wd;
+ logic [DW-1:0] unused_q;
+ assign unused_we = we;
+ assign unused_wd = wd;
+ assign unused_q = q;
+ end else if (SWACCESS == "W1S") begin : gen_w1s
+ // If SWACCESS is W1S, then assume hw tries to clear.
+ // So, give a chance HW to clear when SW tries to set.
+ // If both try to set/clr at the same bit pos, SW wins.
+ assign wr_en = we | de;
+ assign wr_data = (de ? d : q) | (we ? wd : '0);
+ end else if (SWACCESS == "W1C") begin : gen_w1c
+ // If SWACCESS is W1C, then assume hw tries to set.
+ // So, give a chance HW to set when SW tries to clear.
+ // If both try to set/clr at the same bit pos, SW wins.
+ assign wr_en = we | de;
+ assign wr_data = (de ? d : q) & (we ? ~wd : '1);
+ end else if (SWACCESS == "W0C") begin : gen_w0c
+ assign wr_en = we | de;
+ assign wr_data = (de ? d : q) & (we ? wd : '1);
+ end else if (SWACCESS == "RC") begin : gen_rc
+ // This swtype is not recommended but exists for compatibility.
+ // WARN: we signal is actually read signal not write enable.
+ assign wr_en = we | de;
+ assign wr_data = (de ? d : q) & (we ? '0 : '1);
+ // Unused wd - Prevent lint errors.
+ logic [DW-1:0] unused_wd;
+ assign unused_wd = wd;
+ end else begin : gen_hw
+ assign wr_en = de;
+ assign wr_data = d;
+ // Unused we, wd, q - Prevent lint errors.
+ logic unused_we;
+ logic [DW-1:0] unused_wd;
+ logic [DW-1:0] unused_q;
+ assign unused_we = we;
+ assign unused_wd = wd;
+ assign unused_q = q;
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..00d7e9c
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,24 @@
+module prim_subreg_ext #(
+ parameter int unsigned DW = 32
+) (
+ input re,
+ input we,
+ input [DW-1:0] wd,
+ input [DW-1:0] d,
+ // output to HW and Reg Read
+ output logic qe,
+ output logic qre,
+ output logic [DW-1:0] q,
+ output logic [DW-1:0] qs
+ assign qs = d;
+ assign q = wd;
+ assign qe = we;
+ assign qre = re;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..54d8a37
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,89 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+ * Utility functions
+ */
+package prim_util_pkg;
+ /**
+ * Math function: $clog2 as specified in Verilog-2005
+ *
+ * Do not use this function if $clog2() is available.
+ *
+ * clog2 = 0 for value == 0
+ * ceil(log2(value)) for value >= 1
+ *
+ * This implementation is a synthesizable variant of the $clog2 function as
+ * specified in the Verilog-2005 standard (IEEE 1364-2005).
+ *
+ * To quote the standard:
+ * The system function $clog2 shall return the ceiling of the log
+ * base 2 of the argument (the log rounded up to an integer
+ * value). The argument can be an integer or an arbitrary sized
+ * vector value. The argument shall be treated as an unsigned
+ * value, and an argument value of 0 shall produce a result of 0.
+ */
+ function automatic integer _clog2(integer value);
+ integer result;
+ // Use an intermediate value to avoid assigning to an input port, which produces a warning in
+ // Synopsys DC.
+ integer v = value;
+ v = v - 1;
+ for (result = 0; v > 0; result++) begin
+ v = v >> 1;
+ end
+ return result;
+ endfunction
+ /**
+ * Math function: Number of bits needed to address |value| items.
+ *
+ * 0 for value == 0
+ * vbits = 1 for value == 1
+ * ceil(log2(value)) for value > 1
+ *
+ *
+ * The primary use case for this function is the definition of registers/arrays
+ * which are wide enough to contain |value| items.
+ *
+ * This function identical to $clog2() for all input values except the value 1;
+ * it could be considered an "enhanced" $clog2() function.
+ *
+ *
+ * Example 1:
+ * parameter Items = 1;
+ * localparam ItemsWidth = vbits(Items); // 1
+ * logic [ItemsWidth-1:0] item_register; // items_register is now [0:0]
+ *
+ * Example 2:
+ * parameter Items = 64;
+ * localparam ItemsWidth = vbits(Items); // 6
+ * logic [ItemsWidth-1:0] item_register; // items_register is now [5:0]
+ *
+ * Note: If you want to store the number "value" inside a register, you need
+ * a register with size vbits(value + 1), since you also need to store
+ * the number 0.
+ *
+ * Example 3:
+ * logic [vbits(64)-1:0] store_64_logic_values; // width is [5:0]
+ * logic [vbits(64 + 1)-1:0] store_number_64; // width is [6:0]
+ */
+ function automatic integer vbits(integer value);
+`ifdef XCELIUM
+ // The use of system functions was not allowed here in Verilog-2001, but is
+ // valid since (System)Verilog-2005, which is also when $clog2() first
+ // appeared.
+ // Xcelium < 19.10 does not yet support the use of $clog2() here, fall back
+ // to an implementation without a system function. Remove this workaround
+ // if we require a newer Xcelium version.
+ // See #2579 and #2597.
+ return (value == 1) ? 1 : _clog2(value);
+ return (value == 1) ? 1 : $clog2(value);
+ endfunction
\ No newline at end of file
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..b159a93
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,75 @@
+module pwm_top (
+ input clk_i,
+ input rst_ni,
+ input tlul_pkg::tl_h2d_t tl_i,
+ output tlul_pkg::tl_d2h_t tl_o,
+ output pwm_o,
+ output pwm_o_2,
+ output pwm1_oe,
+ output pwm2_oe
+localparam int AW = 8;
+localparam int DW = 32;
+localparam int DBW = DW/8;
+logic re;
+logic we;
+logic [7:0] addr;
+logic [31:0] wdata;
+logic [3:0] be;
+logic [31:0] rdata;
+logic err;
+//assign err = '0;
+PWM pwm_core(
+.clk_i (clk_i),
+.rst_ni (rst_ni),
+.re_i (re),
+.we_i (we),
+.addr_i (addr),
+.wdata_i (wdata),
+.be_i (be),
+.rdata_o (rdata),
+//.error_o (err),
+.i_extclk ('0),
+.i_DC ('0),
+.i_valid_DC ('0),
+.o_pwm (pwm_o),
+.o_pwm_2 (pwm_o_2),
+.oe_pwm1 (pwm1_oe),
+.oe_pwm2 (pwm2_oe)
+tlul_adapter_reg #(
+ .RegAw(AW),
+ .RegDw(DW)
+) u_reg_if (
+ .clk_i,
+ .rst_ni,
+ .tl_i (tl_i),
+ .tl_o (tl_o),
+ .we_o (we),
+ .re_o (re),
+ .addr_o (addr),
+ .wdata_o (wdata),
+ .be_o (be),
+ .rdata_i (rdata),
+ .error_i (1'b0)
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..9013a64
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,343 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Author: Michael Schaffner <>, ETH Zurich
+// Wolfgang Roenninger <>, ETH Zurich
+// Date: 02.04.2019
+// Description: logarithmic arbitration tree with round robin arbitration scheme.
+/// The rr_arb_tree employs non-starving round robin-arbitration - i.e., the priorities
+/// rotate each cycle.
+/// ## Fair vs. unfair Arbitration
+/// This refers to fair throughput distribution when not all inputs have active requests.
+/// This module has an internal state `rr_q` which defines the highest priority input. (When
+/// `ExtPrio` is `1'b1` this state is provided from the outside.) The arbitration tree will
+/// choose the input with the same index as currently defined by the state if it has an active
+/// request. Otherwise a *random* other active input is selected. The parameter `FairArb` is used
+/// to distinguish between two methods of calculating the next state.
+/// * `1'b0`: The next state is calculated by advancing the current state by one. This leads to the
+/// state being calculated without the context of the active request. Leading to an
+/// unfair throughput distribution if not all inputs have active requests.
+/// * `1'b1`: The next state jumps to the next unserved request with higher index.
+/// This is achieved by using two trailing-zero-counters (`lzc`). The upper has the masked
+/// `req_i` signal with all indices which will have a higher priority in the next state.
+/// The trailing zero count defines the input index with the next highest priority after
+/// the current one is served. When the upper is empty the lower `lzc` provides the
+/// wrapped index if there are outstanding requests with lower or same priority.
+/// The implication of throughput fairness on the module timing are:
+/// * The trailing zero counter (`lzc`) has a loglog relation of input to output timing. This means
+/// that in this module the input to register path scales with Log(Log(`NumIn`)).
+/// * The `rr_arb_tree` data multiplexing scales with Log(`NumIn`). This means that the input to output
+/// timing path of this module also scales scales with Log(`NumIn`).
+/// This implies that in this module the input to output path is always longer than the input to
+/// register path. As the output data usually also terminates in a register the parameter `FairArb`
+/// only has implications on the area. When it is `1'b0` a static plus one adder is instantiated.
+/// If it is `1'b1` two `lzc`, a masking logic stage and a two input multiplexer are instantiated.
+/// However these are small in respect of the data multiplexers needed, as the width of the `req_i`
+/// signal is usually less as than `DataWidth`.
+module rr_arb_tree #(
+ /// Number of inputs to be arbitrated.
+ parameter int unsigned NumIn = 64,
+ /// Data width of the payload in bits. Not needed if `DataType` is overwritten.
+ parameter int unsigned DataWidth = 32,
+ /// Data type of the payload, can be overwritten with custom type. Only use of `DataWidth`.
+ parameter type DataType = logic [DataWidth-1:0],
+ /// The `ExtPrio` option allows to override the internal round robin counter via the
+ /// `rr_i` signal. This can be useful in case multiple arbiters need to have
+ /// rotating priorities that are operating in lock-step. If static priority arbitration
+ /// is needed, just connect `rr_i` to '0.
+ ///
+ /// Set to 1'b1 to enable.
+ parameter bit ExtPrio = 1'b0,
+ /// If `AxiVldRdy` is set, the req/gnt signals are compliant with the AXI style vld/rdy
+ /// handshake. Namely, upstream vld (req) must not depend on rdy (gnt), as it can be deasserted
+ /// again even though vld is asserted. Enabling `AxiVldRdy` leads to a reduction of arbiter
+ /// delay and area.
+ ///
+ /// Set to `1'b1` to treat req/gnt as vld/rdy.
+ parameter bit AxiVldRdy = 1'b0,
+ /// The `LockIn` option prevents the arbiter from changing the arbitration
+ /// decision when the arbiter is disabled. I.e., the index of the first request
+ /// that wins the arbitration will be locked in case the destination is not
+ /// able to grant the request in the same cycle.
+ ///
+ /// Set to `1'b1` to enable.
+ parameter bit LockIn = 1'b0,
+ /// When set, ensures that throughput gets distributed evenly between all inputs.
+ ///
+ /// Set to `1'b0` to disable.
+ parameter bit FairArb = 1'b1,
+ /// Dependent parameter, do **not** overwrite.
+ /// Width of the arbitration priority signal and the arbitrated index.
+ parameter int unsigned IdxWidth = (NumIn > 32'd1) ? unsigned'($clog2(NumIn)) : 32'd1,
+ /// Dependent parameter, do **not** overwrite.
+ /// Type for defining the arbitration priority and arbitrated index signal.
+ parameter type idx_t = logic [IdxWidth-1:0]
+) (
+ /// clk_i, positive edge triggered.
+ input logic clk_i,
+ /// Asynchronous rst_ni, active low.
+ input logic rst_ni,
+ /// Clears the arbiter state. Only used if `ExtPrio` is `1'b0` or `LockIn` is `1'b1`.
+ input logic flush_i,
+ /// External round-robin priority. Only used if `ExtPrio` is `1'b1.`
+ input idx_t rr_i,
+ /// Input requests arbitration.
+ input logic [NumIn-1:0] req_i,
+ /* verilator lint_off UNOPTFLAT */
+ /// Input request is granted.
+ output logic [NumIn-1:0] gnt_o,
+ /* verilator lint_on UNOPTFLAT */
+ /// Input data for arbitration.
+ input DataType [NumIn-1:0] data_i,
+ /// Output request is valid.
+ output logic req_o,
+ /// Output request is granted.
+ input logic gnt_i,
+ /// Output data.
+ output DataType data_o,
+ /// Index from which input the data came from.
+ output idx_t idx_o
+ // pragma translate_off
+ `ifndef VERILATOR
+ // Default SVA rst_ni
+ default disable iff (!rst_ni || flush_i);
+ `endif
+ // pragma translate_on
+ // just pass through in this corner case
+ if (NumIn == unsigned'(1)) begin : gen_pass_through
+ assign req_o = req_i[0];
+ assign gnt_o[0] = gnt_i;
+ assign data_o = data_i[0];
+ assign idx_o = '0;
+ // non-degenerate cases
+ end else begin : gen_arbiter
+ localparam int unsigned NumLevels = unsigned'($clog2(NumIn));
+ /* verilator lint_off UNOPTFLAT */
+ idx_t [2**NumLevels-2:0] index_nodes; // used to propagate the indices
+ DataType [2**NumLevels-2:0] data_nodes; // used to propagate the data
+ logic [2**NumLevels-2:0] gnt_nodes; // used to propagate the grant to masters
+ logic [2**NumLevels-2:0] req_nodes; // used to propagate the requests to slave
+ /* lint_off */
+ idx_t rr_q;
+ logic [NumIn-1:0] req_d;
+ // the final arbitration decision can be taken from the root of the tree
+ assign req_o = req_nodes[0];
+ assign data_o = data_nodes[0];
+ assign idx_o = index_nodes[0];
+ if (ExtPrio) begin : gen_ext_rr
+ assign rr_q = rr_i;
+ assign req_d = req_i;
+ end else begin : gen_int_rr
+ idx_t rr_d;
+ // lock arbiter decision in case we got at least one req and no acknowledge
+ if (LockIn) begin : gen_lock
+ logic lock_d, lock_q;
+ logic [NumIn-1:0] req_q;
+ assign lock_d = req_o & ~gnt_i;
+ assign req_d = (lock_q) ? req_q : req_i;
+ always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg
+ if (!rst_ni) begin
+ lock_q <= '0;
+ end else begin
+ if (flush_i) begin
+ lock_q <= '0;
+ end else begin
+ lock_q <= lock_d;
+ end
+ end
+ end
+ // pragma translate_off
+ `ifndef VERILATOR
+ lock: assert property(
+ @(posedge clk_i) LockIn |-> req_o && !gnt_i |=> idx_o == $past(idx_o)) else
+ $fatal (1, "Lock implies same arbiter decision in next cycle if output is not \
+ ready.");
+ logic [NumIn-1:0] req_tmp;
+ assign req_tmp = req_q & req_i;
+ lock_req: assume property(
+ @(posedge clk_i) LockIn |-> lock_d |=> req_tmp == req_q) else
+ $fatal (1, "It is disallowed to deassert unserved request signals when LockIn is \
+ enabled.");
+ `endif
+ // pragma translate_on
+ always_ff @(posedge clk_i or negedge rst_ni) begin : p_req_regs
+ if (!rst_ni) begin
+ req_q <= '0;
+ end else begin
+ if (flush_i) begin
+ req_q <= '0;
+ end else begin
+ req_q <= req_d;
+ end
+ end
+ end
+ end else begin : gen_no_lock
+ assign req_d = req_i;
+ end
+ if (FairArb) begin : gen_fair_arb
+ logic [NumIn-1:0] upper_mask, lower_mask;
+ idx_t upper_idx, lower_idx, next_idx;
+ logic upper_empty, lower_empty;
+ for (genvar i = 0; i < NumIn; i++) begin : gen_mask
+ assign upper_mask[i] = (i > rr_q) ? req_d[i] : 1'b0;
+ assign lower_mask[i] = (i <= rr_q) ? req_d[i] : 1'b0;
+ end
+ lzc #(
+ .WIDTH ( NumIn ),
+ .MODE ( 1'b0 )
+ ) i_lzc_upper (
+ .in_i ( upper_mask ),
+ .cnt_o ( upper_idx ),
+ .empty_o ( upper_empty )
+ );
+ lzc #(
+ .WIDTH ( NumIn ),
+ .MODE ( 1'b0 )
+ ) i_lzc_lower (
+ .in_i ( lower_mask ),
+ .cnt_o ( lower_idx ),
+ .empty_o ( /*unused*/ )
+ );
+ assign next_idx = upper_empty ? lower_idx : upper_idx;
+ assign rr_d = (gnt_i && req_o) ? next_idx : rr_q;
+ end else begin : gen_unfair_arb
+ assign rr_d = (gnt_i && req_o) ? ((rr_q == idx_t'(NumIn-1)) ? '0 : rr_q + 1'b1) : rr_q;
+ end
+ // this holds the highest priority
+ always_ff @(posedge clk_i or negedge rst_ni) begin : p_rr_regs
+ if (!rst_ni) begin
+ rr_q <= '0;
+ end else begin
+ if (flush_i) begin
+ rr_q <= '0;
+ end else begin
+ rr_q <= rr_d;
+ end
+ end
+ end
+ end
+ assign gnt_nodes[0] = gnt_i;
+ // arbiter tree
+ for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : gen_levels
+ for (genvar l = 0; l < 2**level; l++) begin : gen_level
+ // local select signal
+ logic sel;
+ // index calcs
+ localparam int unsigned Idx0 = 2**level-1+l;// current node
+ localparam int unsigned Idx1 = 2**(level+1)-1+l*2;
+ //////////////////////////////////////////////////////////////
+ // uppermost level where data is fed in from the inputs
+ if (unsigned'(level) == NumLevels-1) begin : gen_first_level
+ // if two successive indices are still in the vector...
+ if (unsigned'(l) * 2 < NumIn-1) begin : gen_reduce
+ assign req_nodes[Idx0] = req_d[l*2] | req_d[l*2+1];
+ // arbitration: round robin
+ assign sel = ~req_d[l*2] | req_d[l*2+1] & rr_q[NumLevels-1-level];
+ assign index_nodes[Idx0] = idx_t'(sel);
+ assign data_nodes[Idx0] = (sel) ? data_i[l*2+1] : data_i[l*2];
+ assign gnt_o[l*2] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]) & ~sel;
+ assign gnt_o[l*2+1] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2+1]) & sel;
+ end
+ // if only the first index is still in the vector...
+ if (unsigned'(l) * 2 == NumIn-1) begin : gen_first
+ assign req_nodes[Idx0] = req_d[l*2];
+ assign index_nodes[Idx0] = '0;// always zero in this case
+ assign data_nodes[Idx0] = data_i[l*2];
+ assign gnt_o[l*2] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]);
+ end
+ // if index is out of range, fill up with zeros (will get pruned)
+ if (unsigned'(l) * 2 > NumIn-1) begin : gen_out_of_range
+ assign req_nodes[Idx0] = 1'b0;
+ assign index_nodes[Idx0] = idx_t'('0);
+ assign data_nodes[Idx0] = DataType'('0);
+ end
+ //////////////////////////////////////////////////////////////
+ // general case for other levels within the tree
+ end else begin : gen_other_levels
+ assign req_nodes[Idx0] = req_nodes[Idx1] | req_nodes[Idx1+1];
+ // arbitration: round robin
+ assign sel = ~req_nodes[Idx1] | req_nodes[Idx1+1] & rr_q[NumLevels-1-level];
+ assign index_nodes[Idx0] = (sel) ?
+ idx_t'({1'b1, index_nodes[Idx1+1][NumLevels-unsigned'(level)-2:0]}) :
+ idx_t'({1'b0, index_nodes[Idx1][NumLevels-unsigned'(level)-2:0]});
+ assign data_nodes[Idx0] = (sel) ? data_nodes[Idx1+1] : data_nodes[Idx1];
+ assign gnt_nodes[Idx1] = gnt_nodes[Idx0] & ~sel;
+ assign gnt_nodes[Idx1+1] = gnt_nodes[Idx0] & sel;
+ end
+ //////////////////////////////////////////////////////////////
+ end
+ end
+ // pragma translate_off
+ `ifndef VERILATOR
+ initial begin : p_assert
+ assert(NumIn)
+ else $fatal(1, "Input must be at least one element wide.");
+ assert(!(LockIn && ExtPrio))
+ else $fatal(1,"Cannot use LockIn feature together with external ExtPrio.");
+ end
+ hot_one : assert property(
+ @(posedge clk_i) $onehot0(gnt_o))
+ else $fatal (1, "Grant signal must be hot1 or zero.");
+ gnt0 : assert property(
+ @(posedge clk_i) |gnt_o |-> gnt_i)
+ else $fatal (1, "Grant out implies grant in.");
+ gnt1 : assert property(
+ @(posedge clk_i) req_o |-> gnt_i |-> |gnt_o)
+ else $fatal (1, "Req out and grant in implies grant out.");
+ gnt_idx : assert property(
+ @(posedge clk_i) req_o |-> gnt_i |-> gnt_o[idx_o])
+ else $fatal (1, "Idx_o / gnt_o do not match.");
+ req0 : assert property(
+ @(posedge clk_i) |req_i |-> req_o)
+ else $fatal (1, "Req in implies req out.");
+ req1 : assert property(
+ @(posedge clk_i) req_o |-> |req_i)
+ else $fatal (1, "Req out implies req in.");
+ `endif
+ // pragma translate_on
+ end
+endmodule : rr_arb_tree
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..fca2e70
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,42 @@
+// basic reset managemnet logic for azadi
+module rstmgr(
+ input clk_i, //system clock
+ input rst_ni, // system reset
+ input prog_rst_ni,
+ input logic ndmreset, // non-debug module reset
+ output logic sys_rst_ni // reset for system except debug module
+ logic rst_d, rst_q;
+ logic rst_fd, rst_fq; // follower flip flop
+ always_comb begin
+ if(!rst_ni) begin
+ rst_d = 1'b0;
+ end else
+ if(ndmreset) begin
+ rst_d = 1'b0;
+ end else
+ if(!prog_rst_ni)begin
+ rst_d = 1'b0;
+ end else begin
+ rst_d = 1'b1;
+ end
+ end
+ always_ff @(posedge clk_i ) begin
+ rst_q <= rst_d;
+ end
+ assign rst_fd = rst_q;
+ always_ff @(posedge clk_i ) begin
+ rst_fq <= rst_fd;
+ end
+ assign sys_rst_ni = rst_fq;
\ No newline at end of file
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..398652b
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,319 @@
+module rv_dm #(
+ parameter int NrHarts = 1,
+ parameter logic [31:0] IdcodeValue = 32'h 0000_0001,
+ parameter logic DirectDmiTap = 1'b1
+) (
+ input logic clk_i, // clock
+ input logic rst_ni, // asynchronous reset active low, connect PoR
+ // here, not the system reset
+ input logic testmode_i,
+ output logic ndmreset_o, // non-debug module reset
+ output logic dmactive_o, // debug module is active
+ output logic [NrHarts-1:0] debug_req_o, // async debug request
+ input logic [NrHarts-1:0] unavailable_i, // communicate whether the hart is unavailable
+ // (e.g.: power down)
+ // bus device with debug memory, for an execution based technique
+ input tlul_pkg::tl_h2d_t tl_d_i,
+ output tlul_pkg::tl_d2h_t tl_d_o,
+ // bus host, for system bus accesses
+ output tlul_pkg::tl_h2d_t tl_h_o,
+ input tlul_pkg::tl_d2h_t tl_h_i,
+ input jtag_pkg::jtag_req_t jtag_req_i,
+ output jtag_pkg::jtag_rsp_t jtag_rsp_o
+ // Currently only 32 bit busses are supported by our TL-UL IP
+ localparam int BusWidth = 32;
+ // all harts have contiguous IDs
+ localparam logic [NrHarts-1:0] SelectableHarts = {NrHarts{1'b1}};
+ // Debug CSRs
+ dm::hartinfo_t [NrHarts-1:0] hartinfo;
+ logic [NrHarts-1:0] halted;
+ // logic [NrHarts-1:0] running;
+ logic [NrHarts-1:0] resumeack;
+ logic [NrHarts-1:0] haltreq;
+ logic [NrHarts-1:0] resumereq;
+ logic clear_resumeack;
+ logic cmd_valid;
+ dm::command_t cmd;
+ logic cmderror_valid;
+ dm::cmderr_e cmderror;
+ logic cmdbusy;
+ logic [dm::ProgBufSize-1:0][31:0] progbuf;
+ logic [dm::DataCount-1:0][31:0] data_csrs_mem;
+ logic [dm::DataCount-1:0][31:0] data_mem_csrs;
+ logic data_valid;
+ logic [19:0] hartsel;
+ // System Bus Access Module
+ logic [BusWidth-1:0] sbaddress_csrs_sba;
+ logic [BusWidth-1:0] sbaddress_sba_csrs;
+ logic sbaddress_write_valid;
+ logic sbreadonaddr;
+ logic sbautoincrement;
+ logic [2:0] sbaccess;
+ logic sbreadondata;
+ logic [BusWidth-1:0] sbdata_write;
+ logic sbdata_read_valid;
+ logic sbdata_write_valid;
+ logic [BusWidth-1:0] sbdata_read;
+ logic sbdata_valid;
+ logic sbbusy;
+ logic sberror_valid;
+ logic [2:0] sberror;
+ dm::dmi_req_t dmi_req;
+ dm::dmi_resp_t dmi_rsp;
+ logic dmi_req_valid, dmi_req_ready;
+ logic dmi_rsp_valid, dmi_rsp_ready;
+ logic dmi_rst_n;
+ // static debug hartinfo
+ localparam dm::hartinfo_t DebugHartInfo = '{
+ zero1: '0,
+ nscratch: 2, // Debug module needs at least two scratch regs
+ zero0: 0,
+ dataaccess: 1'b1, // data registers are memory mapped in the debugger
+ datasize: dm::DataCount,
+ dataaddr: dm::DataAddr
+ };
+ for (genvar i = 0; i < NrHarts; i++) begin : gen_dm_hart_ctrl
+ assign hartinfo[i] = DebugHartInfo;
+ end
+ dm_csrs #(
+ .NrHarts(NrHarts),
+ .BusWidth(BusWidth),
+ .SelectableHarts(SelectableHarts)
+ ) i_dm_csrs (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .testmode_i ( testmode_i ),
+ .dmi_rst_ni ( dmi_rst_n ),
+ .dmi_req_valid_i ( dmi_req_valid ),
+ .dmi_req_ready_o ( dmi_req_ready ),
+ .dmi_req_i ( dmi_req ),
+ .dmi_resp_valid_o ( dmi_rsp_valid ),
+ .dmi_resp_ready_i ( dmi_rsp_ready ),
+ .dmi_resp_o ( dmi_rsp ),
+ .ndmreset_o ( ndmreset_o ),
+ .dmactive_o ( dmactive_o ),
+ .hartsel_o ( hartsel ),
+ .hartinfo_i ( hartinfo ),
+ .halted_i ( halted ),
+ .unavailable_i,
+ .resumeack_i ( resumeack ),
+ .haltreq_o ( haltreq ),
+ .resumereq_o ( resumereq ),
+ .clear_resumeack_o ( clear_resumeack ),
+ .cmd_valid_o ( cmd_valid ),
+ .cmd_o ( cmd ),
+ .cmderror_valid_i ( cmderror_valid ),
+ .cmderror_i ( cmderror ),
+ .cmdbusy_i ( cmdbusy ),
+ .progbuf_o ( progbuf ),
+ .data_i ( data_mem_csrs ),
+ .data_valid_i ( data_valid ),
+ .data_o ( data_csrs_mem ),
+ .sbaddress_o ( sbaddress_csrs_sba ),
+ .sbaddress_i ( sbaddress_sba_csrs ),
+ .sbaddress_write_valid_o ( sbaddress_write_valid ),
+ .sbreadonaddr_o ( sbreadonaddr ),
+ .sbautoincrement_o ( sbautoincrement ),
+ .sbaccess_o ( sbaccess ),
+ .sbreadondata_o ( sbreadondata ),
+ .sbdata_o ( sbdata_write ),
+ .sbdata_read_valid_o ( sbdata_read_valid ),
+ .sbdata_write_valid_o ( sbdata_write_valid ),
+ .sbdata_i ( sbdata_read ),
+ .sbdata_valid_i ( sbdata_valid ),
+ .sbbusy_i ( sbbusy ),
+ .sberror_valid_i ( sberror_valid ),
+ .sberror_i ( sberror )
+ );
+ logic host_req;
+ logic [BusWidth-1:0] host_add;
+ logic host_we;
+ logic [BusWidth-1:0] host_wdata;
+ logic [BusWidth/8-1:0] host_be;
+ logic host_gnt;
+ logic host_r_valid;
+ logic [BusWidth-1:0] host_r_rdata;
+ logic host_r_err;
+ dm_sba #(
+ .BusWidth(BusWidth)
+ ) i_dm_sba (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .master_req_o ( host_req ),
+ .master_add_o ( host_add ),
+ .master_we_o ( host_we ),
+ .master_wdata_o ( host_wdata ),
+ .master_be_o ( host_be ),
+ .master_gnt_i ( host_gnt ),
+ .master_r_valid_i ( host_r_valid ),
+ .master_r_rdata_i ( host_r_rdata ),
+ .dmactive_i ( dmactive_o ),
+ .sbaddress_i ( sbaddress_csrs_sba ),
+ .sbaddress_o ( sbaddress_sba_csrs ),
+ .sbaddress_write_valid_i ( sbaddress_write_valid ),
+ .sbreadonaddr_i ( sbreadonaddr ),
+ .sbautoincrement_i ( sbautoincrement ),
+ .sbaccess_i ( sbaccess ),
+ .sbreadondata_i ( sbreadondata ),
+ .sbdata_i ( sbdata_write ),
+ .sbdata_read_valid_i ( sbdata_read_valid ),
+ .sbdata_write_valid_i ( sbdata_write_valid ),
+ .sbdata_o ( sbdata_read ),
+ .sbdata_valid_o ( sbdata_valid ),
+ .sbbusy_o ( sbbusy ),
+ .sberror_valid_o ( sberror_valid ),
+ .sberror_o ( sberror )
+ );
+ tlul_host_adapter #(
+ .MAX_REQS(1)
+ ) tl_adapter_host_sba (
+ .clk_i(clk_i),
+ .rst_ni(rst_ni),
+ .req_i (host_req),
+ .gnt_o (host_gnt),
+ .addr_i (host_add),
+ .we_i (host_we),
+ .wdata_i (host_wdata),
+ .be_i (host_be),
+ .valid_o (host_r_valid),
+ .rdata_o (host_r_rdata),
+ .err_o (host_r_err),
+ .tl_h_c_a (tl_h_o),
+ .tl_h_c_d (tl_h_i)
+ );
+ // DBG doesn't handle error responses so raise assertion if we see one
+ localparam int unsigned AddressWidthWords = BusWidth - $clog2(BusWidth/8);
+ logic req;
+ logic we;
+ logic [BusWidth/8-1:0] be;
+ logic [BusWidth-1:0] wdata;
+ logic [BusWidth-1:0] rdata;
+ logic rvalid;
+ logic [BusWidth-1:0] addr_b;
+ logic [AddressWidthWords-1:0] addr_w;
+ // TODO: The tlul_adapter_sram give us a bitwise write mask currently,
+ // but dm_mem only supports byte write masks. Disable sub-word access in the
+ // adapter for now until we figure out a good strategy to deal with this.
+ assign be = {BusWidth/8{1'b1}};
+ assign addr_b = {addr_w, {$clog2(BusWidth/8){1'b0}}};
+ dm_mem #(
+ .NrHarts(NrHarts),
+ .BusWidth(BusWidth),
+ .SelectableHarts(SelectableHarts),
+ // The debug module provides a simplified ROM for systems that map the debug ROM to offset 0x0
+ // on the system bus. In that case, only one scratch register has to be implemented in the core.
+ // However, we require that the DM can be placed at arbitrary offsets in the system, which
+ // requires the generalized debug ROM implementation and two scratch registers. We hence set
+ // this parameter to a non-zero value (inside dm_mem, this just feeds into a comparison with 0).
+ .DmBaseAddress(1)
+ ) i_dm_mem (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .debug_req_o ( debug_req_o ),
+ .hartsel_i ( hartsel ),
+ .haltreq_i ( haltreq ),
+ .resumereq_i ( resumereq ),
+ .clear_resumeack_i ( clear_resumeack ),
+ .halted_o ( halted ),
+ .resuming_o ( resumeack ),
+ .cmd_valid_i ( cmd_valid ),
+ .cmd_i ( cmd ),
+ .cmderror_valid_o ( cmderror_valid ),
+ .cmderror_o ( cmderror ),
+ .cmdbusy_o ( cmdbusy ),
+ .progbuf_i ( progbuf ),
+ .data_i ( data_csrs_mem ),
+ .data_o ( data_mem_csrs ),
+ .data_valid_o ( data_valid ),
+ .req_i ( req ),
+ .we_i ( we ),
+ .addr_i ( addr_b ),
+ .wdata_i ( wdata ),
+ .be_i ( be ),
+ .rdata_o ( rdata )
+ );
+ // Bound-in DPI module replaces the TAP
+//if (DirectDmiTap) begin
+ dmi_jtag #(
+ .IdcodeValue (IdcodeValue)
+ ) dap (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .testmode_i (testmode_i),
+ .dmi_rst_no (dmi_rst_n),
+ .dmi_req_o (dmi_req),
+ .dmi_req_valid_o (dmi_req_valid),
+ .dmi_req_ready_i (dmi_req_ready),
+ .dmi_resp_i (dmi_rsp ),
+ .dmi_resp_ready_o (dmi_rsp_ready),
+ .dmi_resp_valid_i (dmi_rsp_valid),
+ //JTAG
+ .tck_i (jtag_req_i.tck),
+ .tms_i (jtag_req_i.tms),
+ .trst_ni (jtag_req_i.trst_n),
+ .td_i (jtag_req_i.tdi),
+ .td_o (jtag_rsp_o.tdo),
+ .tdo_oe_o (jtag_rsp_o.tdo_oe)
+ );
+ tlul_sram_adapter #(
+ .SramAw(AddressWidthWords),
+ .SramDw(BusWidth),
+ .Outstanding(1),
+ .ByteAccess(0)
+ ) tl_adapter_device_mem (
+ .clk_i(clk_i),
+ .rst_ni(rst_ni),
+ .req_o (req),
+ .gnt_i (1'b1),
+ .we_o (we),
+ .addr_o (addr_w),
+ .wdata_o (wdata),
+ .wmask_o (),
+ .rdata_i (rdata),
+ .rvalid_i (rvalid),
+ .rerror_i (2'b00),
+ .tl_o (tl_d_o),
+ .tl_i (tl_d_i)
+ );
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rvalid <= '0;
+ end else begin
+ rvalid <= req & ~we;
+ end
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..5d77a3b
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,241 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// RISC-V Platform-Level Interrupt Controller compliant INTC
+// Current version doesn't support MSI interrupt but it is easy to add
+// the feature. Create one external register and connect qe signal to the
+// gateway module (as edge-triggered)
+// Consider to set MAX_PRIO as small number as possible. It is main factor
+// of area increase if edge-triggered counter isn't implemented.
+// Verilog parameter
+// MAX_PRIO: Maximum value of interrupt priority
+module rv_plic import rv_plic_reg_pkg::*; #(
+ // derived parameter
+ localparam int SRCW = $clog2(NumSrc)
+) (
+ input clk_i,
+ input rst_ni,
+ // Bus Interface (device)
+ input tlul_pkg::tl_h2d_t tl_i,
+ output tlul_pkg::tl_d2h_t tl_o,
+ // Interrupt Sources
+ input [NumSrc-1:0] intr_src_i,
+ // Interrupt notification to targets
+ output [NumTarget-1:0] irq_o,
+ output logic [NumTarget-1:0] msip_o
+ rv_plic_reg2hw_t reg2hw;
+ rv_plic_hw2reg_t hw2reg;
+ localparam int MAX_PRIO = 3;
+ localparam int PRIOW = $clog2(MAX_PRIO+1);
+ logic [SRCW:0] irq_id_o [NumTarget];
+ logic [NumSrc-1:0] le; // 0:level 1:edge
+ logic [NumSrc-1:0] ip;
+ logic [NumSrc-1:0] ie [NumTarget];
+ logic [NumTarget-1:0] claim_re; // Target read indicator
+ logic [SRCW-1:0] claim_id [NumTarget];
+ logic [NumSrc-1:0] claim; // Converted from claim_re/claim_id
+ logic [NumTarget-1:0] complete_we; // Target write indicator
+ logic [SRCW-1:0] complete_id [NumTarget];
+ logic [NumSrc-1:0] complete; // Converted from complete_re/complete_id
+ logic [SRCW:0] cc_id [NumTarget]; // Write ID
+ logic [PRIOW-1:0] prio [NumSrc];
+ logic [PRIOW-1:0] threshold [NumTarget];
+ // Glue logic between rv_plic_reg_top and others
+ assign cc_id = irq_id_o;
+ always_comb begin
+ for (int i = 0 ; i < NumTarget ; i++) begin
+ if (claim_re[i]) begin
+ claim[claim_id[i]] = 1'b1;
+ end else begin
+ claim = '0;
+ end
+ end
+ end
+ always_comb begin
+ for (int i = 0 ; i < NumTarget ; i++) begin
+ if (complete_we[i]) begin
+ complete[complete_id[i]] = 1'b1;
+ end else begin
+ complete = '0;
+ end
+ end
+ end
+ //`ASSERT_PULSE(claimPulse, claim_re[i])
+ //`ASSERT_PULSE(completePulse, complete_we[i])
+ //////////////
+ // Priority //
+ //////////////
+ assign prio[0] = reg2hw.prio0.q;
+ assign prio[1] = reg2hw.prio1.q;
+ assign prio[2] = reg2hw.prio2.q;
+ assign prio[3] = reg2hw.prio3.q;
+ assign prio[4] = reg2hw.prio4.q;
+ assign prio[5] = reg2hw.prio5.q;
+ assign prio[6] = reg2hw.prio6.q;
+ assign prio[7] = reg2hw.prio7.q;
+ assign prio[8] = reg2hw.prio8.q;
+ assign prio[9] = reg2hw.prio9.q;
+ assign prio[10] = reg2hw.prio10.q;
+ assign prio[11] = reg2hw.prio11.q;
+ assign prio[12] = reg2hw.prio12.q;
+ assign prio[13] = reg2hw.prio13.q;
+ assign prio[14] = reg2hw.prio14.q;
+ assign prio[15] = reg2hw.prio15.q;
+ assign prio[16] = reg2hw.prio16.q;
+ assign prio[17] = reg2hw.prio17.q;
+ assign prio[18] = reg2hw.prio18.q;
+ assign prio[19] = reg2hw.prio19.q;
+ assign prio[20] = reg2hw.prio20.q;
+ assign prio[21] = reg2hw.prio21.q;
+ assign prio[22] = reg2hw.prio22.q;
+ assign prio[23] = reg2hw.prio23.q;
+ assign prio[24] = reg2hw.prio24.q;
+ assign prio[25] = reg2hw.prio25.q;
+ assign prio[26] = reg2hw.prio26.q;
+ assign prio[27] = reg2hw.prio27.q;
+ assign prio[28] = reg2hw.prio28.q;
+ assign prio[29] = reg2hw.prio29.q;
+ assign prio[30] = reg2hw.prio30.q;
+ assign prio[31] = reg2hw.prio31.q;
+ assign prio[32] = reg2hw.prio32.q;
+ assign prio[33] = reg2hw.prio33.q;
+ assign prio[34] = reg2hw.prio34.q;
+ assign prio[35] = reg2hw.prio35.q;
+ assign prio[36] = reg2hw.prio36.q;
+ assign prio[37] = reg2hw.prio37.q;
+ assign prio[38] = reg2hw.prio38.q;
+ assign prio[39] = reg2hw.prio39.q;
+ assign prio[40] = reg2hw.prio40.q;
+ assign prio[41] = reg2hw.prio41.q;
+ assign prio[42] = reg2hw.prio42.q;
+ assign prio[43] = reg2hw.prio43.q;
+ //////////////////////
+ // Interrupt Enable //
+ //////////////////////
+ for (genvar s = 0; s < 44; s++) begin : gen_ie0
+ assign ie[0][s] = reg2hw.ie0[s].q;
+ end
+ ////////////////////////
+ // THRESHOLD register //
+ ////////////////////////
+ assign threshold[0] = reg2hw.threshold0.q;
+ /////////////////
+ // CC register //
+ /////////////////
+ assign claim_re[0] =;
+ assign claim_id[0] = irq_id_o[0];
+ assign complete_we[0] = reg2hw.cc0.qe;
+ assign complete_id[0] = reg2hw.cc0.q;
+ assign hw2reg.cc0.d = cc_id[0];
+ ///////////////////
+ // MSIP register //
+ ///////////////////
+ assign msip_o[0] = reg2hw.msip0.q;
+ ////////
+ // IP //
+ ////////
+ for (genvar s = 0; s < 44; s++) begin : gen_ip
+ assign hw2reg.ip[s].de = 1'b1; // Always write
+ assign hw2reg.ip[s].d = ip[s];
+ end
+ ///////////////////////////////////
+ // Detection:: 0: Level, 1: Edge //
+ ///////////////////////////////////
+ for (genvar s = 0; s < 44; s++) begin : gen_le
+ assign le[s] = reg2hw.le[s].q;
+ end
+ //////////////
+ // Gateways //
+ //////////////
+ rv_plic_gateway #(
+ .N_SOURCE (NumSrc)
+ ) u_gateway (
+ .clk_i,
+ .rst_ni,
+ .src_i (intr_src_i),
+ .le_i (le),
+ .claim_i (claim),
+ .complete_i (complete),
+ .ip_o (ip)
+ );
+ ///////////////////////////////////
+ // Target interrupt notification //
+ ///////////////////////////////////
+ for (genvar i = 0 ; i < NumTarget ; i++) begin : gen_target
+ rv_plic_target #(
+ .N_SOURCE (NumSrc),
+ ) u_target (
+ .clk_i,
+ .rst_ni,
+ .ip_i (ip),
+ .ie_i (ie[i]),
+ .prio_i (prio),
+ .threshold_i (threshold[i]),
+ .irq_o (irq_o[i]),
+ .irq_id_o (irq_id_o[i])
+ );
+ end
+ ////////////////////////
+ // Register interface //
+ ////////////////////////
+ // Limitation of register tool prevents the module from having flexibility to parameters
+ // So, signals are manually tied at the top.
+ rv_plic_reg_top u_reg (
+ .clk_i,
+ .rst_ni,
+ .tl_i,
+ .tl_o,
+ .reg2hw,
+ .hw2reg,
+ .devmode_i (1'b1)
+ );
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..c81810b
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,62 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// RISC-V Platform-Level Interrupt Gateways module
+module rv_plic_gateway #(
+ parameter int N_SOURCE = 32
+) (
+ input clk_i,
+ input rst_ni,
+ input [N_SOURCE-1:0] src_i,
+ input [N_SOURCE-1:0] le_i, // Level0 Edge1
+ input [N_SOURCE-1:0] claim_i, // $onehot0(claim_i)
+ input [N_SOURCE-1:0] complete_i, // $onehot0(complete_i)
+ output logic [N_SOURCE-1:0] ip_o
+ logic [N_SOURCE-1:0] ia; // Interrupt Active
+ logic [N_SOURCE-1:0] set; // Set: (le_i) ? src_i & ~src_q : src_i ;
+ logic [N_SOURCE-1:0] src_q;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) src_q <= '0;
+ else src_q <= src_i;
+ end
+ always_comb begin
+ for (int i = 0 ; i < N_SOURCE; i++) begin
+ set[i] = (le_i[i]) ? src_i[i] & ~src_q[i] : src_i[i] ;
+ end
+ end
+ // Interrupt pending is set by source (depends on le_i), cleared by claim_i.
+ // Until interrupt is claimed, set doesn't affect ip_o.
+ // RISC-V PLIC spec mentioned it can have counter for edge triggered
+ // But skipped the feature as counter consumes substantial logic size.
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ ip_o <= '0;
+ end else begin
+ ip_o <= (ip_o | (set & ~ia & ~ip_o)) & (~(ip_o & claim_i));
+ end
+ end
+ // Interrupt active is to control ip_o. If ip_o is set then until completed
+ // by target, ip_o shouldn't be set by source even claim_i can clear ip_o.
+ // ia can be cleared only when ia was set. If `set` and `complete_i` happen
+ // at the same time, always `set` wins.
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ ia <= '0;
+ end else begin
+ ia <= (ia | (set & ~ia)) & (~(ia & complete_i & ~ip_o));
+ end
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..ca7182f
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,462 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Register Package auto-generated by `reggen` containing data structure
+package rv_plic_reg_pkg;
+ // Param list
+ parameter int NumSrc = 44;
+ parameter int NumTarget = 1;
+ parameter int PrioWidth = 2;
+ // Address width within the block
+ parameter int BlockAw = 10;
+ ////////////////////////////
+ // Typedefs for registers //
+ ////////////////////////////
+ typedef struct packed {
+ logic q;
+ } rv_plic_reg2hw_le_mreg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio0_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio1_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio2_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio3_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio4_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio5_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio6_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio7_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio8_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio9_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio10_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio11_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio12_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio13_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio14_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio15_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio16_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio17_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio18_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio19_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio20_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio21_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio22_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio23_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio24_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio25_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio26_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio27_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio28_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio29_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio30_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio31_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio32_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio33_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio34_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio35_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio36_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio37_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio38_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio39_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio40_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio41_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio42_reg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_prio43_reg_t;
+ typedef struct packed {
+ logic q;
+ } rv_plic_reg2hw_ie0_mreg_t;
+ typedef struct packed {
+ logic [1:0] q;
+ } rv_plic_reg2hw_threshold0_reg_t;
+ typedef struct packed {
+ logic [7:0] q;
+ logic qe;
+ logic re;
+ } rv_plic_reg2hw_cc0_reg_t;
+ typedef struct packed {
+ logic q;
+ } rv_plic_reg2hw_msip0_reg_t;
+ typedef struct packed {
+ logic d;
+ logic de;
+ } rv_plic_hw2reg_ip_mreg_t;
+ typedef struct packed {
+ logic [7:0] d;
+ } rv_plic_hw2reg_cc0_reg_t;
+ ///////////////////////////////////////
+ // Register to internal design logic //
+ ///////////////////////////////////////
+ typedef struct packed {
+ rv_plic_reg2hw_le_mreg_t [43:0] le; // [700:529]
+ rv_plic_reg2hw_prio0_reg_t prio0; // [528:527]
+ rv_plic_reg2hw_prio1_reg_t prio1; // [526:525]
+ rv_plic_reg2hw_prio2_reg_t prio2; // [524:523]
+ rv_plic_reg2hw_prio3_reg_t prio3; // [522:521]
+ rv_plic_reg2hw_prio4_reg_t prio4; // [520:519]
+ rv_plic_reg2hw_prio5_reg_t prio5; // [518:517]
+ rv_plic_reg2hw_prio6_reg_t prio6; // [516:515]
+ rv_plic_reg2hw_prio7_reg_t prio7; // [514:513]
+ rv_plic_reg2hw_prio8_reg_t prio8; // [512:511]
+ rv_plic_reg2hw_prio9_reg_t prio9; // [510:509]
+ rv_plic_reg2hw_prio10_reg_t prio10; // [508:507]
+ rv_plic_reg2hw_prio11_reg_t prio11; // [506:505]
+ rv_plic_reg2hw_prio12_reg_t prio12; // [504:503]
+ rv_plic_reg2hw_prio13_reg_t prio13; // [502:501]
+ rv_plic_reg2hw_prio14_reg_t prio14; // [500:499]
+ rv_plic_reg2hw_prio15_reg_t prio15; // [498:497]
+ rv_plic_reg2hw_prio16_reg_t prio16; // [496:495]
+ rv_plic_reg2hw_prio17_reg_t prio17; // [494:493]
+ rv_plic_reg2hw_prio18_reg_t prio18; // [492:491]
+ rv_plic_reg2hw_prio19_reg_t prio19; // [490:489]
+ rv_plic_reg2hw_prio20_reg_t prio20; // [488:487]
+ rv_plic_reg2hw_prio21_reg_t prio21; // [486:485]
+ rv_plic_reg2hw_prio22_reg_t prio22; // [484:483]
+ rv_plic_reg2hw_prio23_reg_t prio23; // [482:481]
+ rv_plic_reg2hw_prio24_reg_t prio24; // [480:479]
+ rv_plic_reg2hw_prio25_reg_t prio25; // [478:477]
+ rv_plic_reg2hw_prio26_reg_t prio26; // [476:475]
+ rv_plic_reg2hw_prio27_reg_t prio27; // [474:473]
+ rv_plic_reg2hw_prio28_reg_t prio28; // [472:471]
+ rv_plic_reg2hw_prio29_reg_t prio29; // [470:469]
+ rv_plic_reg2hw_prio30_reg_t prio30; // [468:467]
+ rv_plic_reg2hw_prio31_reg_t prio31; // [466:465]
+ rv_plic_reg2hw_prio32_reg_t prio32; // [464:463]
+ rv_plic_reg2hw_prio33_reg_t prio33; // [462:461]
+ rv_plic_reg2hw_prio34_reg_t prio34; // [460:459]
+ rv_plic_reg2hw_prio35_reg_t prio35; // [458:457]
+ rv_plic_reg2hw_prio36_reg_t prio36; // [456:455]
+ rv_plic_reg2hw_prio37_reg_t prio37; // [454:453]
+ rv_plic_reg2hw_prio38_reg_t prio38; // [452:451]
+ rv_plic_reg2hw_prio39_reg_t prio39; // [450:449]
+ rv_plic_reg2hw_prio40_reg_t prio40; // [448:447]
+ rv_plic_reg2hw_prio41_reg_t prio41; // [446:445]
+ rv_plic_reg2hw_prio42_reg_t prio42; // [444:443]
+ rv_plic_reg2hw_prio43_reg_t prio43; // [442:441]
+ rv_plic_reg2hw_ie0_mreg_t [43:0] ie0; // [184:13]
+ rv_plic_reg2hw_threshold0_reg_t threshold0; // [12:11]
+ rv_plic_reg2hw_cc0_reg_t cc0; // [10:1]
+ rv_plic_reg2hw_msip0_reg_t msip0; // [0:0]
+ } rv_plic_reg2hw_t;
+ ///////////////////////////////////////
+ // Internal design logic to register //
+ ///////////////////////////////////////
+ typedef struct packed {
+ rv_plic_hw2reg_ip_mreg_t [43:0] ip; // [351:8]
+ rv_plic_hw2reg_cc0_reg_t cc0; // [7:0]
+ } rv_plic_hw2reg_t;
+ // Register Address
+ parameter logic [BlockAw-1:0] RV_PLIC_IP_0_OFFSET = 10'h 0;
+ parameter logic [BlockAw-1:0] RV_PLIC_IP_1_OFFSET = 10'h 4;
+ parameter logic [BlockAw-1:0] RV_PLIC_LE_0_OFFSET = 10'h 8;
+ parameter logic [BlockAw-1:0] RV_PLIC_LE_1_OFFSET = 10'h c;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO0_OFFSET = 10'h 10;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO1_OFFSET = 10'h 14;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO2_OFFSET = 10'h 18;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO3_OFFSET = 10'h 1c;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO4_OFFSET = 10'h 20;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO5_OFFSET = 10'h 24;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO6_OFFSET = 10'h 28;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO7_OFFSET = 10'h 2c;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO8_OFFSET = 10'h 30;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO9_OFFSET = 10'h 34;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO10_OFFSET = 10'h 38;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO11_OFFSET = 10'h 3c;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO12_OFFSET = 10'h 40;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO13_OFFSET = 10'h 44;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO14_OFFSET = 10'h 48;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO15_OFFSET = 10'h 4c;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO16_OFFSET = 10'h 50;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO17_OFFSET = 10'h 54;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO18_OFFSET = 10'h 58;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO19_OFFSET = 10'h 5c;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO20_OFFSET = 10'h 60;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO21_OFFSET = 10'h 64;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO22_OFFSET = 10'h 68;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO23_OFFSET = 10'h 6c;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO24_OFFSET = 10'h 70;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO25_OFFSET = 10'h 74;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO26_OFFSET = 10'h 78;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO27_OFFSET = 10'h 7c;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO28_OFFSET = 10'h 80;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO29_OFFSET = 10'h 84;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO30_OFFSET = 10'h 88;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO31_OFFSET = 10'h 8c;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO32_OFFSET = 10'h 90;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO33_OFFSET = 10'h 94;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO34_OFFSET = 10'h 98;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO35_OFFSET = 10'h 9c;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO36_OFFSET = 10'h a0;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO37_OFFSET = 10'h a4;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO38_OFFSET = 10'h a8;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO39_OFFSET = 10'h ac;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO40_OFFSET = 10'h b0;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO41_OFFSET = 10'h b4;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO42_OFFSET = 10'h b8;
+ parameter logic [BlockAw-1:0] RV_PLIC_PRIO43_OFFSET = 10'h bc;
+ parameter logic [BlockAw-1:0] RV_PLIC_IE0_0_OFFSET = 10'h c0;
+ parameter logic [BlockAw-1:0] RV_PLIC_IE0_1_OFFSET = 10'h c4;
+ parameter logic [BlockAw-1:0] RV_PLIC_THRESHOLD0_OFFSET = 10'h c8;
+ parameter logic [BlockAw-1:0] RV_PLIC_CC0_OFFSET = 10'h cc;
+ parameter logic [BlockAw-1:0] RV_PLIC_MSIP0_OFFSET = 10'h d0;
+ // Register Index
+ typedef enum int {
+ RV_PLIC_IE0_0,
+ RV_PLIC_IE0_1,
+ } rv_plic_id_e;
+ // Register width information to check illegal writes
+ parameter logic [3:0] RV_PLIC_PERMIT [53] = '{
+ 4'b 1111, // index[ 0] RV_PLIC_IP_0
+ 4'b 1111, // index[ 1] RV_PLIC_IP_1
+ 4'b 1111, // index[ 6] RV_PLIC_LE_0
+ 4'b 1111, // index[ 7] RV_PLIC_LE_1
+ 4'b 0001, // index[ 12] RV_PLIC_PRIO0
+ 4'b 0001, // index[ 13] RV_PLIC_PRIO1
+ 4'b 0001, // index[ 14] RV_PLIC_PRIO2
+ 4'b 0001, // index[ 15] RV_PLIC_PRIO3
+ 4'b 0001, // index[ 16] RV_PLIC_PRIO4
+ 4'b 0001, // index[ 17] RV_PLIC_PRIO5
+ 4'b 0001, // index[ 18] RV_PLIC_PRIO6
+ 4'b 0001, // index[ 19] RV_PLIC_PRIO7
+ 4'b 0001, // index[ 20] RV_PLIC_PRIO8
+ 4'b 0001, // index[ 21] RV_PLIC_PRIO9
+ 4'b 0001, // index[ 22] RV_PLIC_PRIO10
+ 4'b 0001, // index[ 23] RV_PLIC_PRIO11
+ 4'b 0001, // index[ 24] RV_PLIC_PRIO12
+ 4'b 0001, // index[ 25] RV_PLIC_PRIO13
+ 4'b 0001, // index[ 26] RV_PLIC_PRIO14
+ 4'b 0001, // index[ 27] RV_PLIC_PRIO15
+ 4'b 0001, // index[ 28] RV_PLIC_PRIO16
+ 4'b 0001, // index[ 29] RV_PLIC_PRIO17
+ 4'b 0001, // index[ 30] RV_PLIC_PRIO18
+ 4'b 0001, // index[ 31] RV_PLIC_PRIO19
+ 4'b 0001, // index[ 32] RV_PLIC_PRIO20
+ 4'b 0001, // index[ 33] RV_PLIC_PRIO21
+ 4'b 0001, // index[ 34] RV_PLIC_PRIO22
+ 4'b 0001, // index[ 35] RV_PLIC_PRIO23
+ 4'b 0001, // index[ 36] RV_PLIC_PRIO24
+ 4'b 0001, // index[ 37] RV_PLIC_PRIO25
+ 4'b 0001, // index[ 38] RV_PLIC_PRIO26
+ 4'b 0001, // index[ 39] RV_PLIC_PRIO27
+ 4'b 0001, // index[ 40] RV_PLIC_PRIO28
+ 4'b 0001, // index[ 41] RV_PLIC_PRIO29
+ 4'b 0001, // index[ 42] RV_PLIC_PRIO30
+ 4'b 0001, // index[ 43] RV_PLIC_PRIO31
+ 4'b 0001, // index[ 44] RV_PLIC_PRIO32
+ 4'b 0001, // index[ 45] RV_PLIC_PRIO33
+ 4'b 0001, // index[ 46] RV_PLIC_PRIO34
+ 4'b 0001, // index[ 47] RV_PLIC_PRIO35
+ 4'b 0001, // index[ 48] RV_PLIC_PRIO36
+ 4'b 0001, // index[ 49] RV_PLIC_PRIO37
+ 4'b 0001, // index[ 50] RV_PLIC_PRIO38
+ 4'b 0001, // index[ 51] RV_PLIC_PRIO39
+ 4'b 0001, // index[ 52] RV_PLIC_PRIO40
+ 4'b 0001, // index[ 53] RV_PLIC_PRIO41
+ 4'b 0001, // index[ 54] RV_PLIC_PRIO42
+ 4'b 0001, // index[ 55] RV_PLIC_PRIO43
+ 4'b 1111, // index[184] RV_PLIC_IE0_0
+ 4'b 1111, // index[185] RV_PLIC_IE0_1
+ 4'b 0001, // index[190] RV_PLIC_THRESHOLD0
+ 4'b 0001, // index[191] RV_PLIC_CC0
+ 4'b 0001 // index[192] RV_PLIC_MSIP0
+ };
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..d7c1e35
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,6065 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Register Top module auto-generated by `reggen`
+module rv_plic_reg_top (
+ input clk_i,
+ input rst_ni,
+ // Below Regster interface can be changed
+ input tlul_pkg::tl_h2d_t tl_i,
+ output tlul_pkg::tl_d2h_t tl_o,
+ // To HW
+ output rv_plic_reg_pkg::rv_plic_reg2hw_t reg2hw, // Write
+ input rv_plic_reg_pkg::rv_plic_hw2reg_t hw2reg, // Read
+ // Config
+ input devmode_i // If 1, explicit error return for unmapped register access
+ import rv_plic_reg_pkg::* ;
+ localparam int AW = 10;
+ localparam int DW = 32;
+ localparam int DBW = DW/8; // Byte Width
+ // register signals
+ logic reg_we;
+ logic reg_re;
+ logic [AW-1:0] reg_addr;
+ logic [DW-1:0] reg_wdata;
+ logic [DBW-1:0] reg_be;
+ logic [DW-1:0] reg_rdata;
+ logic reg_error;
+ logic addrmiss, wr_err;
+ logic [DW-1:0] reg_rdata_next;
+ tlul_pkg::tl_h2d_t tl_reg_h2d;
+ tlul_pkg::tl_d2h_t tl_reg_d2h;
+ assign tl_reg_h2d = tl_i;
+ assign tl_o = tl_reg_d2h;
+ tlul_adapter_reg #(
+ .RegAw(AW),
+ .RegDw(DW)
+ ) u_reg_if (
+ .clk_i,
+ .rst_ni,
+ .tl_i (tl_reg_h2d),
+ .tl_o (tl_reg_d2h),
+ .we_o (reg_we),
+ .re_o (reg_re),
+ .addr_o (reg_addr),
+ .wdata_o (reg_wdata),
+ .be_o (reg_be),
+ .rdata_i (reg_rdata),
+ .error_i (reg_error)
+ );
+ assign reg_rdata = reg_rdata_next ;
+ assign reg_error = (devmode_i & addrmiss) | wr_err ;
+ // Define SW related signals
+ // Format: <reg>_<field>_{wd|we|qs}
+ // or <reg>_{wd|we|qs} if field == 1 or 0
+ logic ip_0_p_0_qs;
+ logic ip_0_p_1_qs;
+ logic ip_0_p_2_qs;
+ logic ip_0_p_3_qs;
+ logic ip_0_p_4_qs;
+ logic ip_0_p_5_qs;
+ logic ip_0_p_6_qs;
+ logic ip_0_p_7_qs;
+ logic ip_0_p_8_qs;
+ logic ip_0_p_9_qs;
+ logic ip_0_p_10_qs;
+ logic ip_0_p_11_qs;
+ logic ip_0_p_12_qs;
+ logic ip_0_p_13_qs;
+ logic ip_0_p_14_qs;
+ logic ip_0_p_15_qs;
+ logic ip_0_p_16_qs;
+ logic ip_0_p_17_qs;
+ logic ip_0_p_18_qs;
+ logic ip_0_p_19_qs;
+ logic ip_0_p_20_qs;
+ logic ip_0_p_21_qs;
+ logic ip_0_p_22_qs;
+ logic ip_0_p_23_qs;
+ logic ip_0_p_24_qs;
+ logic ip_0_p_25_qs;
+ logic ip_0_p_26_qs;
+ logic ip_0_p_27_qs;
+ logic ip_0_p_28_qs;
+ logic ip_0_p_29_qs;
+ logic ip_0_p_30_qs;
+ logic ip_0_p_31_qs;
+ logic ip_1_p_32_qs;
+ logic ip_1_p_33_qs;
+ logic ip_1_p_34_qs;
+ logic ip_1_p_35_qs;
+ logic ip_1_p_36_qs;
+ logic ip_1_p_37_qs;
+ logic ip_1_p_38_qs;
+ logic ip_1_p_39_qs;
+ logic ip_1_p_40_qs;
+ logic ip_1_p_41_qs;
+ logic ip_1_p_42_qs;
+ logic ip_1_p_43_qs;
+ logic le_0_le_0_qs;
+ logic le_0_le_0_wd;
+ logic le_0_le_0_we;
+ logic le_0_le_1_qs;
+ logic le_0_le_1_wd;
+ logic le_0_le_1_we;
+ logic le_0_le_2_qs;
+ logic le_0_le_2_wd;
+ logic le_0_le_2_we;
+ logic le_0_le_3_qs;
+ logic le_0_le_3_wd;
+ logic le_0_le_3_we;
+ logic le_0_le_4_qs;
+ logic le_0_le_4_wd;
+ logic le_0_le_4_we;
+ logic le_0_le_5_qs;
+ logic le_0_le_5_wd;
+ logic le_0_le_5_we;
+ logic le_0_le_6_qs;
+ logic le_0_le_6_wd;
+ logic le_0_le_6_we;
+ logic le_0_le_7_qs;
+ logic le_0_le_7_wd;
+ logic le_0_le_7_we;
+ logic le_0_le_8_qs;
+ logic le_0_le_8_wd;
+ logic le_0_le_8_we;
+ logic le_0_le_9_qs;
+ logic le_0_le_9_wd;
+ logic le_0_le_9_we;
+ logic le_0_le_10_qs;
+ logic le_0_le_10_wd;
+ logic le_0_le_10_we;
+ logic le_0_le_11_qs;
+ logic le_0_le_11_wd;
+ logic le_0_le_11_we;
+ logic le_0_le_12_qs;
+ logic le_0_le_12_wd;
+ logic le_0_le_12_we;
+ logic le_0_le_13_qs;
+ logic le_0_le_13_wd;
+ logic le_0_le_13_we;
+ logic le_0_le_14_qs;
+ logic le_0_le_14_wd;
+ logic le_0_le_14_we;
+ logic le_0_le_15_qs;
+ logic le_0_le_15_wd;
+ logic le_0_le_15_we;
+ logic le_0_le_16_qs;
+ logic le_0_le_16_wd;
+ logic le_0_le_16_we;
+ logic le_0_le_17_qs;
+ logic le_0_le_17_wd;
+ logic le_0_le_17_we;
+ logic le_0_le_18_qs;
+ logic le_0_le_18_wd;
+ logic le_0_le_18_we;
+ logic le_0_le_19_qs;
+ logic le_0_le_19_wd;
+ logic le_0_le_19_we;
+ logic le_0_le_20_qs;
+ logic le_0_le_20_wd;
+ logic le_0_le_20_we;
+ logic le_0_le_21_qs;
+ logic le_0_le_21_wd;
+ logic le_0_le_21_we;
+ logic le_0_le_22_qs;
+ logic le_0_le_22_wd;
+ logic le_0_le_22_we;
+ logic le_0_le_23_qs;
+ logic le_0_le_23_wd;
+ logic le_0_le_23_we;
+ logic le_0_le_24_qs;
+ logic le_0_le_24_wd;
+ logic le_0_le_24_we;
+ logic le_0_le_25_qs;
+ logic le_0_le_25_wd;
+ logic le_0_le_25_we;
+ logic le_0_le_26_qs;
+ logic le_0_le_26_wd;
+ logic le_0_le_26_we;
+ logic le_0_le_27_qs;
+ logic le_0_le_27_wd;
+ logic le_0_le_27_we;
+ logic le_0_le_28_qs;
+ logic le_0_le_28_wd;
+ logic le_0_le_28_we;
+ logic le_0_le_29_qs;
+ logic le_0_le_29_wd;
+ logic le_0_le_29_we;
+ logic le_0_le_30_qs;
+ logic le_0_le_30_wd;
+ logic le_0_le_30_we;
+ logic le_0_le_31_qs;
+ logic le_0_le_31_wd;
+ logic le_0_le_31_we;
+ logic le_1_le_32_qs;
+ logic le_1_le_32_wd;
+ logic le_1_le_32_we;
+ logic le_1_le_33_qs;
+ logic le_1_le_33_wd;
+ logic le_1_le_33_we;
+ logic le_1_le_34_qs;
+ logic le_1_le_34_wd;
+ logic le_1_le_34_we;
+ logic le_1_le_35_qs;
+ logic le_1_le_35_wd;
+ logic le_1_le_35_we;
+ logic le_1_le_36_qs;
+ logic le_1_le_36_wd;
+ logic le_1_le_36_we;
+ logic le_1_le_37_qs;
+ logic le_1_le_37_wd;
+ logic le_1_le_37_we;
+ logic le_1_le_38_qs;
+ logic le_1_le_38_wd;
+ logic le_1_le_38_we;
+ logic le_1_le_39_qs;
+ logic le_1_le_39_wd;
+ logic le_1_le_39_we;
+ logic le_1_le_40_qs;
+ logic le_1_le_40_wd;
+ logic le_1_le_40_we;
+ logic le_1_le_41_qs;
+ logic le_1_le_41_wd;
+ logic le_1_le_41_we;
+ logic le_1_le_42_qs;
+ logic le_1_le_42_wd;
+ logic le_1_le_42_we;
+ logic le_1_le_43_qs;
+ logic le_1_le_43_wd;
+ logic le_1_le_43_we;
+ logic [1:0] prio0_qs;
+ logic [1:0] prio0_wd;
+ logic prio0_we;
+ logic [1:0] prio1_qs;
+ logic [1:0] prio1_wd;
+ logic prio1_we;
+ logic [1:0] prio2_qs;
+ logic [1:0] prio2_wd;
+ logic prio2_we;
+ logic [1:0] prio3_qs;
+ logic [1:0] prio3_wd;
+ logic prio3_we;
+ logic [1:0] prio4_qs;
+ logic [1:0] prio4_wd;
+ logic prio4_we;
+ logic [1:0] prio5_qs;
+ logic [1:0] prio5_wd;
+ logic prio5_we;
+ logic [1:0] prio6_qs;
+ logic [1:0] prio6_wd;
+ logic prio6_we;
+ logic [1:0] prio7_qs;
+ logic [1:0] prio7_wd;
+ logic prio7_we;
+ logic [1:0] prio8_qs;
+ logic [1:0] prio8_wd;
+ logic prio8_we;
+ logic [1:0] prio9_qs;
+ logic [1:0] prio9_wd;
+ logic prio9_we;
+ logic [1:0] prio10_qs;
+ logic [1:0] prio10_wd;
+ logic prio10_we;
+ logic [1:0] prio11_qs;
+ logic [1:0] prio11_wd;
+ logic prio11_we;
+ logic [1:0] prio12_qs;
+ logic [1:0] prio12_wd;
+ logic prio12_we;
+ logic [1:0] prio13_qs;
+ logic [1:0] prio13_wd;
+ logic prio13_we;
+ logic [1:0] prio14_qs;
+ logic [1:0] prio14_wd;
+ logic prio14_we;
+ logic [1:0] prio15_qs;
+ logic [1:0] prio15_wd;
+ logic prio15_we;
+ logic [1:0] prio16_qs;
+ logic [1:0] prio16_wd;
+ logic prio16_we;
+ logic [1:0] prio17_qs;
+ logic [1:0] prio17_wd;
+ logic prio17_we;
+ logic [1:0] prio18_qs;
+ logic [1:0] prio18_wd;
+ logic prio18_we;
+ logic [1:0] prio19_qs;
+ logic [1:0] prio19_wd;
+ logic prio19_we;
+ logic [1:0] prio20_qs;
+ logic [1:0] prio20_wd;
+ logic prio20_we;
+ logic [1:0] prio21_qs;
+ logic [1:0] prio21_wd;
+ logic prio21_we;
+ logic [1:0] prio22_qs;
+ logic [1:0] prio22_wd;
+ logic prio22_we;
+ logic [1:0] prio23_qs;
+ logic [1:0] prio23_wd;
+ logic prio23_we;
+ logic [1:0] prio24_qs;
+ logic [1:0] prio24_wd;
+ logic prio24_we;
+ logic [1:0] prio25_qs;
+ logic [1:0] prio25_wd;
+ logic prio25_we;
+ logic [1:0] prio26_qs;
+ logic [1:0] prio26_wd;
+ logic prio26_we;
+ logic [1:0] prio27_qs;
+ logic [1:0] prio27_wd;
+ logic prio27_we;
+ logic [1:0] prio28_qs;
+ logic [1:0] prio28_wd;
+ logic prio28_we;
+ logic [1:0] prio29_qs;
+ logic [1:0] prio29_wd;
+ logic prio29_we;
+ logic [1:0] prio30_qs;
+ logic [1:0] prio30_wd;
+ logic prio30_we;
+ logic [1:0] prio31_qs;
+ logic [1:0] prio31_wd;
+ logic prio31_we;
+ logic [1:0] prio32_qs;
+ logic [1:0] prio32_wd;
+ logic prio32_we;
+ logic [1:0] prio33_qs;
+ logic [1:0] prio33_wd;
+ logic prio33_we;
+ logic [1:0] prio34_qs;
+ logic [1:0] prio34_wd;
+ logic prio34_we;
+ logic [1:0] prio35_qs;
+ logic [1:0] prio35_wd;
+ logic prio35_we;
+ logic [1:0] prio36_qs;
+ logic [1:0] prio36_wd;
+ logic prio36_we;
+ logic [1:0] prio37_qs;
+ logic [1:0] prio37_wd;
+ logic prio37_we;
+ logic [1:0] prio38_qs;
+ logic [1:0] prio38_wd;
+ logic prio38_we;
+ logic [1:0] prio39_qs;
+ logic [1:0] prio39_wd;
+ logic prio39_we;
+ logic [1:0] prio40_qs;
+ logic [1:0] prio40_wd;
+ logic prio40_we;
+ logic [1:0] prio41_qs;
+ logic [1:0] prio41_wd;
+ logic prio41_we;
+ logic [1:0] prio42_qs;
+ logic [1:0] prio42_wd;
+ logic prio42_we;
+ logic [1:0] prio43_qs;
+ logic [1:0] prio43_wd;
+ logic prio43_we;
+ logic ie0_0_e_0_qs;
+ logic ie0_0_e_0_wd;
+ logic ie0_0_e_0_we;
+ logic ie0_0_e_1_qs;
+ logic ie0_0_e_1_wd;
+ logic ie0_0_e_1_we;
+ logic ie0_0_e_2_qs;
+ logic ie0_0_e_2_wd;
+ logic ie0_0_e_2_we;
+ logic ie0_0_e_3_qs;
+ logic ie0_0_e_3_wd;
+ logic ie0_0_e_3_we;
+ logic ie0_0_e_4_qs;
+ logic ie0_0_e_4_wd;
+ logic ie0_0_e_4_we;
+ logic ie0_0_e_5_qs;
+ logic ie0_0_e_5_wd;
+ logic ie0_0_e_5_we;
+ logic ie0_0_e_6_qs;
+ logic ie0_0_e_6_wd;
+ logic ie0_0_e_6_we;
+ logic ie0_0_e_7_qs;
+ logic ie0_0_e_7_wd;
+ logic ie0_0_e_7_we;
+ logic ie0_0_e_8_qs;
+ logic ie0_0_e_8_wd;
+ logic ie0_0_e_8_we;
+ logic ie0_0_e_9_qs;
+ logic ie0_0_e_9_wd;
+ logic ie0_0_e_9_we;
+ logic ie0_0_e_10_qs;
+ logic ie0_0_e_10_wd;
+ logic ie0_0_e_10_we;
+ logic ie0_0_e_11_qs;
+ logic ie0_0_e_11_wd;
+ logic ie0_0_e_11_we;
+ logic ie0_0_e_12_qs;
+ logic ie0_0_e_12_wd;
+ logic ie0_0_e_12_we;
+ logic ie0_0_e_13_qs;
+ logic ie0_0_e_13_wd;
+ logic ie0_0_e_13_we;
+ logic ie0_0_e_14_qs;
+ logic ie0_0_e_14_wd;
+ logic ie0_0_e_14_we;
+ logic ie0_0_e_15_qs;
+ logic ie0_0_e_15_wd;
+ logic ie0_0_e_15_we;
+ logic ie0_0_e_16_qs;
+ logic ie0_0_e_16_wd;
+ logic ie0_0_e_16_we;
+ logic ie0_0_e_17_qs;
+ logic ie0_0_e_17_wd;
+ logic ie0_0_e_17_we;
+ logic ie0_0_e_18_qs;
+ logic ie0_0_e_18_wd;
+ logic ie0_0_e_18_we;
+ logic ie0_0_e_19_qs;
+ logic ie0_0_e_19_wd;
+ logic ie0_0_e_19_we;
+ logic ie0_0_e_20_qs;
+ logic ie0_0_e_20_wd;
+ logic ie0_0_e_20_we;
+ logic ie0_0_e_21_qs;
+ logic ie0_0_e_21_wd;
+ logic ie0_0_e_21_we;
+ logic ie0_0_e_22_qs;
+ logic ie0_0_e_22_wd;
+ logic ie0_0_e_22_we;
+ logic ie0_0_e_23_qs;
+ logic ie0_0_e_23_wd;
+ logic ie0_0_e_23_we;
+ logic ie0_0_e_24_qs;
+ logic ie0_0_e_24_wd;
+ logic ie0_0_e_24_we;
+ logic ie0_0_e_25_qs;
+ logic ie0_0_e_25_wd;
+ logic ie0_0_e_25_we;
+ logic ie0_0_e_26_qs;
+ logic ie0_0_e_26_wd;
+ logic ie0_0_e_26_we;
+ logic ie0_0_e_27_qs;
+ logic ie0_0_e_27_wd;
+ logic ie0_0_e_27_we;
+ logic ie0_0_e_28_qs;
+ logic ie0_0_e_28_wd;
+ logic ie0_0_e_28_we;
+ logic ie0_0_e_29_qs;
+ logic ie0_0_e_29_wd;
+ logic ie0_0_e_29_we;
+ logic ie0_0_e_30_qs;
+ logic ie0_0_e_30_wd;
+ logic ie0_0_e_30_we;
+ logic ie0_0_e_31_qs;
+ logic ie0_0_e_31_wd;
+ logic ie0_0_e_31_we;
+ logic ie0_1_e_32_qs;
+ logic ie0_1_e_32_wd;
+ logic ie0_1_e_32_we;
+ logic ie0_1_e_33_qs;
+ logic ie0_1_e_33_wd;
+ logic ie0_1_e_33_we;
+ logic ie0_1_e_34_qs;
+ logic ie0_1_e_34_wd;
+ logic ie0_1_e_34_we;
+ logic ie0_1_e_35_qs;
+ logic ie0_1_e_35_wd;
+ logic ie0_1_e_35_we;
+ logic ie0_1_e_36_qs;
+ logic ie0_1_e_36_wd;
+ logic ie0_1_e_36_we;
+ logic ie0_1_e_37_qs;
+ logic ie0_1_e_37_wd;
+ logic ie0_1_e_37_we;
+ logic ie0_1_e_38_qs;
+ logic ie0_1_e_38_wd;
+ logic ie0_1_e_38_we;
+ logic ie0_1_e_39_qs;
+ logic ie0_1_e_39_wd;
+ logic ie0_1_e_39_we;
+ logic ie0_1_e_40_qs;
+ logic ie0_1_e_40_wd;
+ logic ie0_1_e_40_we;
+ logic ie0_1_e_41_qs;
+ logic ie0_1_e_41_wd;
+ logic ie0_1_e_41_we;
+ logic ie0_1_e_42_qs;
+ logic ie0_1_e_42_wd;
+ logic ie0_1_e_42_we;
+ logic ie0_1_e_43_qs;
+ logic ie0_1_e_43_wd;
+ logic ie0_1_e_43_we;
+ logic [1:0] threshold0_qs;
+ logic [1:0] threshold0_wd;
+ logic threshold0_we;
+ logic [7:0] cc0_qs;
+ logic [7:0] cc0_wd;
+ logic cc0_we;
+ logic cc0_re;
+ logic msip0_qs;
+ logic msip0_wd;
+ logic msip0_we;
+ // Register instances
+ // Subregister 0 of Multireg ip
+ // R[ip_0]: V(False)
+ // F[p_0]: 0:0
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_0 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[0].de),
+ .d (hw2reg.ip[0].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_0_qs)
+ );
+ // F[p_1]: 1:1
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_1 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[1].de),
+ .d (hw2reg.ip[1].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_1_qs)
+ );
+ // F[p_2]: 2:2
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_2 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[2].de),
+ .d (hw2reg.ip[2].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_2_qs)
+ );
+ // F[p_3]: 3:3
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_3 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[3].de),
+ .d (hw2reg.ip[3].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_3_qs)
+ );
+ // F[p_4]: 4:4
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_4 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[4].de),
+ .d (hw2reg.ip[4].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_4_qs)
+ );
+ // F[p_5]: 5:5
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_5 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[5].de),
+ .d (hw2reg.ip[5].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_5_qs)
+ );
+ // F[p_6]: 6:6
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_6 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[6].de),
+ .d (hw2reg.ip[6].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_6_qs)
+ );
+ // F[p_7]: 7:7
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_7 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[7].de),
+ .d (hw2reg.ip[7].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_7_qs)
+ );
+ // F[p_8]: 8:8
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_8 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[8].de),
+ .d (hw2reg.ip[8].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_8_qs)
+ );
+ // F[p_9]: 9:9
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_9 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[9].de),
+ .d (hw2reg.ip[9].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_9_qs)
+ );
+ // F[p_10]: 10:10
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_10 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[10].de),
+ .d (hw2reg.ip[10].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_10_qs)
+ );
+ // F[p_11]: 11:11
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_11 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[11].de),
+ .d (hw2reg.ip[11].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_11_qs)
+ );
+ // F[p_12]: 12:12
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_12 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[12].de),
+ .d (hw2reg.ip[12].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_12_qs)
+ );
+ // F[p_13]: 13:13
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_13 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[13].de),
+ .d (hw2reg.ip[13].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_13_qs)
+ );
+ // F[p_14]: 14:14
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_14 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[14].de),
+ .d (hw2reg.ip[14].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_14_qs)
+ );
+ // F[p_15]: 15:15
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_15 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[15].de),
+ .d (hw2reg.ip[15].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_15_qs)
+ );
+ // F[p_16]: 16:16
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_16 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[16].de),
+ .d (hw2reg.ip[16].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_16_qs)
+ );
+ // F[p_17]: 17:17
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_17 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[17].de),
+ .d (hw2reg.ip[17].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_17_qs)
+ );
+ // F[p_18]: 18:18
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_18 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[18].de),
+ .d (hw2reg.ip[18].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_18_qs)
+ );
+ // F[p_19]: 19:19
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_19 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[19].de),
+ .d (hw2reg.ip[19].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_19_qs)
+ );
+ // F[p_20]: 20:20
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_20 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[20].de),
+ .d (hw2reg.ip[20].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_20_qs)
+ );
+ // F[p_21]: 21:21
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_21 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[21].de),
+ .d (hw2reg.ip[21].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_21_qs)
+ );
+ // F[p_22]: 22:22
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_22 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[22].de),
+ .d (hw2reg.ip[22].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_22_qs)
+ );
+ // F[p_23]: 23:23
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_23 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[23].de),
+ .d (hw2reg.ip[23].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_23_qs)
+ );
+ // F[p_24]: 24:24
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_24 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[24].de),
+ .d (hw2reg.ip[24].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_24_qs)
+ );
+ // F[p_25]: 25:25
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_25 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[25].de),
+ .d (hw2reg.ip[25].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_25_qs)
+ );
+ // F[p_26]: 26:26
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_26 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[26].de),
+ .d (hw2reg.ip[26].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_26_qs)
+ );
+ // F[p_27]: 27:27
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_27 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[27].de),
+ .d (hw2reg.ip[27].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_27_qs)
+ );
+ // F[p_28]: 28:28
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_28 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[28].de),
+ .d (hw2reg.ip[28].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_28_qs)
+ );
+ // F[p_29]: 29:29
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_29 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[29].de),
+ .d (hw2reg.ip[29].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_29_qs)
+ );
+ // F[p_30]: 30:30
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_30 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[30].de),
+ .d (hw2reg.ip[30].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_30_qs)
+ );
+ // F[p_31]: 31:31
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_0_p_31 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[31].de),
+ .d (hw2reg.ip[31].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_0_p_31_qs)
+ );
+ // Subregister 32 of Multireg ip
+ // R[ip_1]: V(False)
+ // F[p_32]: 0:0
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_1_p_32 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[32].de),
+ .d (hw2reg.ip[32].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_1_p_32_qs)
+ );
+ // F[p_33]: 1:1
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_1_p_33 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[33].de),
+ .d (hw2reg.ip[33].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_1_p_33_qs)
+ );
+ // F[p_34]: 2:2
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_1_p_34 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[34].de),
+ .d (hw2reg.ip[34].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_1_p_34_qs)
+ );
+ // F[p_35]: 3:3
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_1_p_35 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[35].de),
+ .d (hw2reg.ip[35].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_1_p_35_qs)
+ );
+ // F[p_36]: 4:4
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_1_p_36 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[36].de),
+ .d (hw2reg.ip[36].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_1_p_36_qs)
+ );
+ // F[p_37]: 5:5
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_1_p_37 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[37].de),
+ .d (hw2reg.ip[37].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_1_p_37_qs)
+ );
+ // F[p_38]: 6:6
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_1_p_38 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[38].de),
+ .d (hw2reg.ip[38].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_1_p_38_qs)
+ );
+ // F[p_39]: 7:7
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_1_p_39 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[39].de),
+ .d (hw2reg.ip[39].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_1_p_39_qs)
+ );
+ // F[p_40]: 8:8
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_1_p_40 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[40].de),
+ .d (hw2reg.ip[40].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_1_p_40_qs)
+ );
+ // F[p_41]: 9:9
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_1_p_41 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[41].de),
+ .d (hw2reg.ip[41].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_1_p_41_qs)
+ );
+ // F[p_42]: 10:10
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_1_p_42 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[42].de),
+ .d (hw2reg.ip[42].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_1_p_42_qs)
+ );
+ // F[p_43]: 11:11
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ip_1_p_43 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .we (1'b0),
+ .wd ('0 ),
+ // from internal hardware
+ .de (hw2reg.ip[43].de),
+ .d (hw2reg.ip[43].d ),
+ // to internal hardware
+ .qe (),
+ .q (),
+ // to register interface (read)
+ .qs (ip_1_p_43_qs)
+ );
+ // Subregister 0 of Multireg le
+ // R[le_0]: V(False)
+ // F[le_0]: 0:0
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_0 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_0_we),
+ .wd (le_0_le_0_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[0].q ),
+ // to register interface (read)
+ .qs (le_0_le_0_qs)
+ );
+ // F[le_1]: 1:1
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_1 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_1_we),
+ .wd (le_0_le_1_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[1].q ),
+ // to register interface (read)
+ .qs (le_0_le_1_qs)
+ );
+ // F[le_2]: 2:2
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_2 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_2_we),
+ .wd (le_0_le_2_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[2].q ),
+ // to register interface (read)
+ .qs (le_0_le_2_qs)
+ );
+ // F[le_3]: 3:3
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_3 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_3_we),
+ .wd (le_0_le_3_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[3].q ),
+ // to register interface (read)
+ .qs (le_0_le_3_qs)
+ );
+ // F[le_4]: 4:4
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_4 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_4_we),
+ .wd (le_0_le_4_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[4].q ),
+ // to register interface (read)
+ .qs (le_0_le_4_qs)
+ );
+ // F[le_5]: 5:5
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_5 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_5_we),
+ .wd (le_0_le_5_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[5].q ),
+ // to register interface (read)
+ .qs (le_0_le_5_qs)
+ );
+ // F[le_6]: 6:6
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_6 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_6_we),
+ .wd (le_0_le_6_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[6].q ),
+ // to register interface (read)
+ .qs (le_0_le_6_qs)
+ );
+ // F[le_7]: 7:7
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_7 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_7_we),
+ .wd (le_0_le_7_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[7].q ),
+ // to register interface (read)
+ .qs (le_0_le_7_qs)
+ );
+ // F[le_8]: 8:8
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_8 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_8_we),
+ .wd (le_0_le_8_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[8].q ),
+ // to register interface (read)
+ .qs (le_0_le_8_qs)
+ );
+ // F[le_9]: 9:9
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_9 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_9_we),
+ .wd (le_0_le_9_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[9].q ),
+ // to register interface (read)
+ .qs (le_0_le_9_qs)
+ );
+ // F[le_10]: 10:10
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_10 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_10_we),
+ .wd (le_0_le_10_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[10].q ),
+ // to register interface (read)
+ .qs (le_0_le_10_qs)
+ );
+ // F[le_11]: 11:11
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_11 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_11_we),
+ .wd (le_0_le_11_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[11].q ),
+ // to register interface (read)
+ .qs (le_0_le_11_qs)
+ );
+ // F[le_12]: 12:12
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_12 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_12_we),
+ .wd (le_0_le_12_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[12].q ),
+ // to register interface (read)
+ .qs (le_0_le_12_qs)
+ );
+ // F[le_13]: 13:13
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_13 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_13_we),
+ .wd (le_0_le_13_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[13].q ),
+ // to register interface (read)
+ .qs (le_0_le_13_qs)
+ );
+ // F[le_14]: 14:14
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_14 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_14_we),
+ .wd (le_0_le_14_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[14].q ),
+ // to register interface (read)
+ .qs (le_0_le_14_qs)
+ );
+ // F[le_15]: 15:15
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_15 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_15_we),
+ .wd (le_0_le_15_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[15].q ),
+ // to register interface (read)
+ .qs (le_0_le_15_qs)
+ );
+ // F[le_16]: 16:16
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_16 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_16_we),
+ .wd (le_0_le_16_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[16].q ),
+ // to register interface (read)
+ .qs (le_0_le_16_qs)
+ );
+ // F[le_17]: 17:17
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_17 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_17_we),
+ .wd (le_0_le_17_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[17].q ),
+ // to register interface (read)
+ .qs (le_0_le_17_qs)
+ );
+ // F[le_18]: 18:18
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_18 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_18_we),
+ .wd (le_0_le_18_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[18].q ),
+ // to register interface (read)
+ .qs (le_0_le_18_qs)
+ );
+ // F[le_19]: 19:19
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_19 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_19_we),
+ .wd (le_0_le_19_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[19].q ),
+ // to register interface (read)
+ .qs (le_0_le_19_qs)
+ );
+ // F[le_20]: 20:20
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_20 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_20_we),
+ .wd (le_0_le_20_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[20].q ),
+ // to register interface (read)
+ .qs (le_0_le_20_qs)
+ );
+ // F[le_21]: 21:21
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_21 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_21_we),
+ .wd (le_0_le_21_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[21].q ),
+ // to register interface (read)
+ .qs (le_0_le_21_qs)
+ );
+ // F[le_22]: 22:22
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_22 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_22_we),
+ .wd (le_0_le_22_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[22].q ),
+ // to register interface (read)
+ .qs (le_0_le_22_qs)
+ );
+ // F[le_23]: 23:23
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_23 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_23_we),
+ .wd (le_0_le_23_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[23].q ),
+ // to register interface (read)
+ .qs (le_0_le_23_qs)
+ );
+ // F[le_24]: 24:24
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_24 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_24_we),
+ .wd (le_0_le_24_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[24].q ),
+ // to register interface (read)
+ .qs (le_0_le_24_qs)
+ );
+ // F[le_25]: 25:25
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_25 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_25_we),
+ .wd (le_0_le_25_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[25].q ),
+ // to register interface (read)
+ .qs (le_0_le_25_qs)
+ );
+ // F[le_26]: 26:26
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_26 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_26_we),
+ .wd (le_0_le_26_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[26].q ),
+ // to register interface (read)
+ .qs (le_0_le_26_qs)
+ );
+ // F[le_27]: 27:27
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_27 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_27_we),
+ .wd (le_0_le_27_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[27].q ),
+ // to register interface (read)
+ .qs (le_0_le_27_qs)
+ );
+ // F[le_28]: 28:28
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_28 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_28_we),
+ .wd (le_0_le_28_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[28].q ),
+ // to register interface (read)
+ .qs (le_0_le_28_qs)
+ );
+ // F[le_29]: 29:29
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_29 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_29_we),
+ .wd (le_0_le_29_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[29].q ),
+ // to register interface (read)
+ .qs (le_0_le_29_qs)
+ );
+ // F[le_30]: 30:30
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_30 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_30_we),
+ .wd (le_0_le_30_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[30].q ),
+ // to register interface (read)
+ .qs (le_0_le_30_qs)
+ );
+ // F[le_31]: 31:31
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_0_le_31 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_0_le_31_we),
+ .wd (le_0_le_31_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[31].q ),
+ // to register interface (read)
+ .qs (le_0_le_31_qs)
+ );
+ // Subregister 32 of Multireg le
+ // R[le_1]: V(False)
+ // F[le_32]: 0:0
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_1_le_32 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_1_le_32_we),
+ .wd (le_1_le_32_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[32].q ),
+ // to register interface (read)
+ .qs (le_1_le_32_qs)
+ );
+ // F[le_33]: 1:1
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_1_le_33 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_1_le_33_we),
+ .wd (le_1_le_33_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[33].q ),
+ // to register interface (read)
+ .qs (le_1_le_33_qs)
+ );
+ // F[le_34]: 2:2
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_1_le_34 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_1_le_34_we),
+ .wd (le_1_le_34_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[34].q ),
+ // to register interface (read)
+ .qs (le_1_le_34_qs)
+ );
+ // F[le_35]: 3:3
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_1_le_35 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_1_le_35_we),
+ .wd (le_1_le_35_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[35].q ),
+ // to register interface (read)
+ .qs (le_1_le_35_qs)
+ );
+ // F[le_36]: 4:4
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_1_le_36 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_1_le_36_we),
+ .wd (le_1_le_36_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[36].q ),
+ // to register interface (read)
+ .qs (le_1_le_36_qs)
+ );
+ // F[le_37]: 5:5
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_1_le_37 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_1_le_37_we),
+ .wd (le_1_le_37_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[37].q ),
+ // to register interface (read)
+ .qs (le_1_le_37_qs)
+ );
+ // F[le_38]: 6:6
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_1_le_38 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_1_le_38_we),
+ .wd (le_1_le_38_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[38].q ),
+ // to register interface (read)
+ .qs (le_1_le_38_qs)
+ );
+ // F[le_39]: 7:7
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_1_le_39 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_1_le_39_we),
+ .wd (le_1_le_39_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[39].q ),
+ // to register interface (read)
+ .qs (le_1_le_39_qs)
+ );
+ // F[le_40]: 8:8
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_1_le_40 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_1_le_40_we),
+ .wd (le_1_le_40_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[40].q ),
+ // to register interface (read)
+ .qs (le_1_le_40_qs)
+ );
+ // F[le_41]: 9:9
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_1_le_41 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_1_le_41_we),
+ .wd (le_1_le_41_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[41].q ),
+ // to register interface (read)
+ .qs (le_1_le_41_qs)
+ );
+ // F[le_42]: 10:10
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_1_le_42 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_1_le_42_we),
+ .wd (le_1_le_42_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[42].q ),
+ // to register interface (read)
+ .qs (le_1_le_42_qs)
+ );
+ // F[le_43]: 11:11
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_le_1_le_43 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (le_1_le_43_we),
+ .wd (le_1_le_43_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.le[43].q ),
+ // to register interface (read)
+ .qs (le_1_le_43_qs)
+ );
+ // R[prio0]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio0 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio0_we),
+ .wd (prio0_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio0.q ),
+ // to register interface (read)
+ .qs (prio0_qs)
+ );
+ // R[prio1]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio1 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio1_we),
+ .wd (prio1_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio1.q ),
+ // to register interface (read)
+ .qs (prio1_qs)
+ );
+ // R[prio2]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio2 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio2_we),
+ .wd (prio2_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio2.q ),
+ // to register interface (read)
+ .qs (prio2_qs)
+ );
+ // R[prio3]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio3 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio3_we),
+ .wd (prio3_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio3.q ),
+ // to register interface (read)
+ .qs (prio3_qs)
+ );
+ // R[prio4]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio4 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio4_we),
+ .wd (prio4_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio4.q ),
+ // to register interface (read)
+ .qs (prio4_qs)
+ );
+ // R[prio5]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio5 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio5_we),
+ .wd (prio5_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio5.q ),
+ // to register interface (read)
+ .qs (prio5_qs)
+ );
+ // R[prio6]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio6 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio6_we),
+ .wd (prio6_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio6.q ),
+ // to register interface (read)
+ .qs (prio6_qs)
+ );
+ // R[prio7]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio7 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio7_we),
+ .wd (prio7_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio7.q ),
+ // to register interface (read)
+ .qs (prio7_qs)
+ );
+ // R[prio8]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio8 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio8_we),
+ .wd (prio8_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio8.q ),
+ // to register interface (read)
+ .qs (prio8_qs)
+ );
+ // R[prio9]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio9 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio9_we),
+ .wd (prio9_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio9.q ),
+ // to register interface (read)
+ .qs (prio9_qs)
+ );
+ // R[prio10]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio10 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio10_we),
+ .wd (prio10_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio10.q ),
+ // to register interface (read)
+ .qs (prio10_qs)
+ );
+ // R[prio11]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio11 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio11_we),
+ .wd (prio11_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio11.q ),
+ // to register interface (read)
+ .qs (prio11_qs)
+ );
+ // R[prio12]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio12 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio12_we),
+ .wd (prio12_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio12.q ),
+ // to register interface (read)
+ .qs (prio12_qs)
+ );
+ // R[prio13]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio13 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio13_we),
+ .wd (prio13_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio13.q ),
+ // to register interface (read)
+ .qs (prio13_qs)
+ );
+ // R[prio14]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio14 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio14_we),
+ .wd (prio14_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio14.q ),
+ // to register interface (read)
+ .qs (prio14_qs)
+ );
+ // R[prio15]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio15 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio15_we),
+ .wd (prio15_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio15.q ),
+ // to register interface (read)
+ .qs (prio15_qs)
+ );
+ // R[prio16]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio16 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio16_we),
+ .wd (prio16_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio16.q ),
+ // to register interface (read)
+ .qs (prio16_qs)
+ );
+ // R[prio17]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio17 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio17_we),
+ .wd (prio17_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio17.q ),
+ // to register interface (read)
+ .qs (prio17_qs)
+ );
+ // R[prio18]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio18 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio18_we),
+ .wd (prio18_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio18.q ),
+ // to register interface (read)
+ .qs (prio18_qs)
+ );
+ // R[prio19]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio19 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio19_we),
+ .wd (prio19_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio19.q ),
+ // to register interface (read)
+ .qs (prio19_qs)
+ );
+ // R[prio20]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio20 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio20_we),
+ .wd (prio20_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio20.q ),
+ // to register interface (read)
+ .qs (prio20_qs)
+ );
+ // R[prio21]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio21 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio21_we),
+ .wd (prio21_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio21.q ),
+ // to register interface (read)
+ .qs (prio21_qs)
+ );
+ // R[prio22]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio22 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio22_we),
+ .wd (prio22_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio22.q ),
+ // to register interface (read)
+ .qs (prio22_qs)
+ );
+ // R[prio23]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio23 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio23_we),
+ .wd (prio23_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio23.q ),
+ // to register interface (read)
+ .qs (prio23_qs)
+ );
+ // R[prio24]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio24 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio24_we),
+ .wd (prio24_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio24.q ),
+ // to register interface (read)
+ .qs (prio24_qs)
+ );
+ // R[prio25]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio25 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio25_we),
+ .wd (prio25_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio25.q ),
+ // to register interface (read)
+ .qs (prio25_qs)
+ );
+ // R[prio26]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio26 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio26_we),
+ .wd (prio26_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio26.q ),
+ // to register interface (read)
+ .qs (prio26_qs)
+ );
+ // R[prio27]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio27 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio27_we),
+ .wd (prio27_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio27.q ),
+ // to register interface (read)
+ .qs (prio27_qs)
+ );
+ // R[prio28]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio28 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio28_we),
+ .wd (prio28_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio28.q ),
+ // to register interface (read)
+ .qs (prio28_qs)
+ );
+ // R[prio29]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio29 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio29_we),
+ .wd (prio29_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio29.q ),
+ // to register interface (read)
+ .qs (prio29_qs)
+ );
+ // R[prio30]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio30 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio30_we),
+ .wd (prio30_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio30.q ),
+ // to register interface (read)
+ .qs (prio30_qs)
+ );
+ // R[prio31]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio31 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio31_we),
+ .wd (prio31_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio31.q ),
+ // to register interface (read)
+ .qs (prio31_qs)
+ );
+ // R[prio32]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio32 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio32_we),
+ .wd (prio32_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio32.q ),
+ // to register interface (read)
+ .qs (prio32_qs)
+ );
+ // R[prio33]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio33 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio33_we),
+ .wd (prio33_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio33.q ),
+ // to register interface (read)
+ .qs (prio33_qs)
+ );
+ // R[prio34]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio34 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio34_we),
+ .wd (prio34_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio34.q ),
+ // to register interface (read)
+ .qs (prio34_qs)
+ );
+ // R[prio35]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio35 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio35_we),
+ .wd (prio35_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio35.q ),
+ // to register interface (read)
+ .qs (prio35_qs)
+ );
+ // R[prio36]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio36 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio36_we),
+ .wd (prio36_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio36.q ),
+ // to register interface (read)
+ .qs (prio36_qs)
+ );
+ // R[prio37]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio37 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio37_we),
+ .wd (prio37_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio37.q ),
+ // to register interface (read)
+ .qs (prio37_qs)
+ );
+ // R[prio38]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio38 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio38_we),
+ .wd (prio38_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio38.q ),
+ // to register interface (read)
+ .qs (prio38_qs)
+ );
+ // R[prio39]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio39 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio39_we),
+ .wd (prio39_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio39.q ),
+ // to register interface (read)
+ .qs (prio39_qs)
+ );
+ // R[prio40]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio40 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio40_we),
+ .wd (prio40_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio40.q ),
+ // to register interface (read)
+ .qs (prio40_qs)
+ );
+ // R[prio41]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio41 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio41_we),
+ .wd (prio41_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio41.q ),
+ // to register interface (read)
+ .qs (prio41_qs)
+ );
+ // R[prio42]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio42 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio42_we),
+ .wd (prio42_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio42.q ),
+ // to register interface (read)
+ .qs (prio42_qs)
+ );
+ // R[prio43]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_prio43 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (prio43_we),
+ .wd (prio43_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.prio43.q ),
+ // to register interface (read)
+ .qs (prio43_qs)
+ );
+ // Subregister 0 of Multireg ie0
+ // R[ie0_0]: V(False)
+ // F[e_0]: 0:0
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_0 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_0_we),
+ .wd (ie0_0_e_0_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[0].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_0_qs)
+ );
+ // F[e_1]: 1:1
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_1 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_1_we),
+ .wd (ie0_0_e_1_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[1].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_1_qs)
+ );
+ // F[e_2]: 2:2
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_2 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_2_we),
+ .wd (ie0_0_e_2_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[2].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_2_qs)
+ );
+ // F[e_3]: 3:3
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_3 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_3_we),
+ .wd (ie0_0_e_3_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[3].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_3_qs)
+ );
+ // F[e_4]: 4:4
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_4 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_4_we),
+ .wd (ie0_0_e_4_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[4].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_4_qs)
+ );
+ // F[e_5]: 5:5
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_5 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_5_we),
+ .wd (ie0_0_e_5_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[5].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_5_qs)
+ );
+ // F[e_6]: 6:6
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_6 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_6_we),
+ .wd (ie0_0_e_6_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[6].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_6_qs)
+ );
+ // F[e_7]: 7:7
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_7 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_7_we),
+ .wd (ie0_0_e_7_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[7].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_7_qs)
+ );
+ // F[e_8]: 8:8
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_8 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_8_we),
+ .wd (ie0_0_e_8_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[8].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_8_qs)
+ );
+ // F[e_9]: 9:9
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_9 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_9_we),
+ .wd (ie0_0_e_9_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[9].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_9_qs)
+ );
+ // F[e_10]: 10:10
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_10 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_10_we),
+ .wd (ie0_0_e_10_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[10].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_10_qs)
+ );
+ // F[e_11]: 11:11
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_11 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_11_we),
+ .wd (ie0_0_e_11_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[11].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_11_qs)
+ );
+ // F[e_12]: 12:12
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_12 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_12_we),
+ .wd (ie0_0_e_12_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[12].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_12_qs)
+ );
+ // F[e_13]: 13:13
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_13 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_13_we),
+ .wd (ie0_0_e_13_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[13].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_13_qs)
+ );
+ // F[e_14]: 14:14
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_14 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_14_we),
+ .wd (ie0_0_e_14_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[14].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_14_qs)
+ );
+ // F[e_15]: 15:15
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_15 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_15_we),
+ .wd (ie0_0_e_15_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[15].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_15_qs)
+ );
+ // F[e_16]: 16:16
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_16 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_16_we),
+ .wd (ie0_0_e_16_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[16].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_16_qs)
+ );
+ // F[e_17]: 17:17
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_17 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_17_we),
+ .wd (ie0_0_e_17_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[17].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_17_qs)
+ );
+ // F[e_18]: 18:18
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_18 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_18_we),
+ .wd (ie0_0_e_18_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[18].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_18_qs)
+ );
+ // F[e_19]: 19:19
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_19 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_19_we),
+ .wd (ie0_0_e_19_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[19].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_19_qs)
+ );
+ // F[e_20]: 20:20
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_20 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_20_we),
+ .wd (ie0_0_e_20_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[20].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_20_qs)
+ );
+ // F[e_21]: 21:21
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_21 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_21_we),
+ .wd (ie0_0_e_21_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[21].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_21_qs)
+ );
+ // F[e_22]: 22:22
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_22 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_22_we),
+ .wd (ie0_0_e_22_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[22].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_22_qs)
+ );
+ // F[e_23]: 23:23
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_23 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_23_we),
+ .wd (ie0_0_e_23_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[23].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_23_qs)
+ );
+ // F[e_24]: 24:24
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_24 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_24_we),
+ .wd (ie0_0_e_24_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[24].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_24_qs)
+ );
+ // F[e_25]: 25:25
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_25 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_25_we),
+ .wd (ie0_0_e_25_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[25].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_25_qs)
+ );
+ // F[e_26]: 26:26
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_26 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_26_we),
+ .wd (ie0_0_e_26_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[26].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_26_qs)
+ );
+ // F[e_27]: 27:27
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_27 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_27_we),
+ .wd (ie0_0_e_27_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[27].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_27_qs)
+ );
+ // F[e_28]: 28:28
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_28 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_28_we),
+ .wd (ie0_0_e_28_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[28].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_28_qs)
+ );
+ // F[e_29]: 29:29
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_29 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_29_we),
+ .wd (ie0_0_e_29_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[29].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_29_qs)
+ );
+ // F[e_30]: 30:30
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_30 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_30_we),
+ .wd (ie0_0_e_30_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[30].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_30_qs)
+ );
+ // F[e_31]: 31:31
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_0_e_31 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_0_e_31_we),
+ .wd (ie0_0_e_31_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[31].q ),
+ // to register interface (read)
+ .qs (ie0_0_e_31_qs)
+ );
+ // Subregister 32 of Multireg ie0
+ // R[ie0_1]: V(False)
+ // F[e_32]: 0:0
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_1_e_32 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_1_e_32_we),
+ .wd (ie0_1_e_32_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[32].q ),
+ // to register interface (read)
+ .qs (ie0_1_e_32_qs)
+ );
+ // F[e_33]: 1:1
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_1_e_33 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_1_e_33_we),
+ .wd (ie0_1_e_33_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[33].q ),
+ // to register interface (read)
+ .qs (ie0_1_e_33_qs)
+ );
+ // F[e_34]: 2:2
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_1_e_34 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_1_e_34_we),
+ .wd (ie0_1_e_34_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[34].q ),
+ // to register interface (read)
+ .qs (ie0_1_e_34_qs)
+ );
+ // F[e_35]: 3:3
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_1_e_35 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_1_e_35_we),
+ .wd (ie0_1_e_35_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[35].q ),
+ // to register interface (read)
+ .qs (ie0_1_e_35_qs)
+ );
+ // F[e_36]: 4:4
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_1_e_36 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_1_e_36_we),
+ .wd (ie0_1_e_36_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[36].q ),
+ // to register interface (read)
+ .qs (ie0_1_e_36_qs)
+ );
+ // F[e_37]: 5:5
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_1_e_37 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_1_e_37_we),
+ .wd (ie0_1_e_37_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[37].q ),
+ // to register interface (read)
+ .qs (ie0_1_e_37_qs)
+ );
+ // F[e_38]: 6:6
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_1_e_38 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_1_e_38_we),
+ .wd (ie0_1_e_38_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[38].q ),
+ // to register interface (read)
+ .qs (ie0_1_e_38_qs)
+ );
+ // F[e_39]: 7:7
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_1_e_39 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_1_e_39_we),
+ .wd (ie0_1_e_39_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[39].q ),
+ // to register interface (read)
+ .qs (ie0_1_e_39_qs)
+ );
+ // F[e_40]: 8:8
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_1_e_40 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_1_e_40_we),
+ .wd (ie0_1_e_40_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[40].q ),
+ // to register interface (read)
+ .qs (ie0_1_e_40_qs)
+ );
+ // F[e_41]: 9:9
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_1_e_41 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_1_e_41_we),
+ .wd (ie0_1_e_41_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[41].q ),
+ // to register interface (read)
+ .qs (ie0_1_e_41_qs)
+ );
+ // F[e_42]: 10:10
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_1_e_42 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_1_e_42_we),
+ .wd (ie0_1_e_42_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[42].q ),
+ // to register interface (read)
+ .qs (ie0_1_e_42_qs)
+ );
+ // F[e_43]: 11:11
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ie0_1_e_43 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ie0_1_e_43_we),
+ .wd (ie0_1_e_43_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ie0[43].q ),
+ // to register interface (read)
+ .qs (ie0_1_e_43_qs)
+ );
+ // R[threshold0]: V(False)
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_threshold0 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (threshold0_we),
+ .wd (threshold0_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.threshold0.q ),
+ // to register interface (read)
+ .qs (threshold0_qs)
+ );
+ // R[cc0]: V(True)
+ prim_subreg_ext #(
+ .DW (8)
+ ) u_cc0 (
+ .re (cc0_re),
+ .we (cc0_we),
+ .wd (cc0_wd),
+ .d (hw2reg.cc0.d),
+ .qre (,
+ .qe (reg2hw.cc0.qe),
+ .q (reg2hw.cc0.q ),
+ .qs (cc0_qs)
+ );
+ // R[msip0]: V(False)
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_msip0 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (msip0_we),
+ .wd (msip0_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.msip0.q ),
+ // to register interface (read)
+ .qs (msip0_qs)
+ );
+ logic [52:0] addr_hit;
+ always_comb begin
+ addr_hit = '0;
+ addr_hit[ 0] = (reg_addr == RV_PLIC_IP_0_OFFSET);
+ addr_hit[ 1] = (reg_addr == RV_PLIC_IP_1_OFFSET);
+ addr_hit[ 2] = (reg_addr == RV_PLIC_LE_0_OFFSET);
+ addr_hit[ 3] = (reg_addr == RV_PLIC_LE_1_OFFSET);
+ addr_hit[ 4] = (reg_addr == RV_PLIC_PRIO0_OFFSET);
+ addr_hit[ 5] = (reg_addr == RV_PLIC_PRIO1_OFFSET);
+ addr_hit[ 6] = (reg_addr == RV_PLIC_PRIO2_OFFSET);
+ addr_hit[ 7] = (reg_addr == RV_PLIC_PRIO3_OFFSET);
+ addr_hit[ 8] = (reg_addr == RV_PLIC_PRIO4_OFFSET);
+ addr_hit[ 9] = (reg_addr == RV_PLIC_PRIO5_OFFSET);
+ addr_hit[ 10] = (reg_addr == RV_PLIC_PRIO6_OFFSET);
+ addr_hit[ 11] = (reg_addr == RV_PLIC_PRIO7_OFFSET);
+ addr_hit[ 12] = (reg_addr == RV_PLIC_PRIO8_OFFSET);
+ addr_hit[ 13] = (reg_addr == RV_PLIC_PRIO9_OFFSET);
+ addr_hit[ 14] = (reg_addr == RV_PLIC_PRIO10_OFFSET);
+ addr_hit[ 15] = (reg_addr == RV_PLIC_PRIO11_OFFSET);
+ addr_hit[ 16] = (reg_addr == RV_PLIC_PRIO12_OFFSET);
+ addr_hit[ 17] = (reg_addr == RV_PLIC_PRIO13_OFFSET);
+ addr_hit[ 18] = (reg_addr == RV_PLIC_PRIO14_OFFSET);
+ addr_hit[ 19] = (reg_addr == RV_PLIC_PRIO15_OFFSET);
+ addr_hit[ 20] = (reg_addr == RV_PLIC_PRIO16_OFFSET);
+ addr_hit[ 21] = (reg_addr == RV_PLIC_PRIO17_OFFSET);
+ addr_hit[ 22] = (reg_addr == RV_PLIC_PRIO18_OFFSET);
+ addr_hit[ 23] = (reg_addr == RV_PLIC_PRIO19_OFFSET);
+ addr_hit[ 24] = (reg_addr == RV_PLIC_PRIO20_OFFSET);
+ addr_hit[ 25] = (reg_addr == RV_PLIC_PRIO21_OFFSET);
+ addr_hit[ 26] = (reg_addr == RV_PLIC_PRIO22_OFFSET);
+ addr_hit[ 27] = (reg_addr == RV_PLIC_PRIO23_OFFSET);
+ addr_hit[ 28] = (reg_addr == RV_PLIC_PRIO24_OFFSET);
+ addr_hit[ 29] = (reg_addr == RV_PLIC_PRIO25_OFFSET);
+ addr_hit[ 30] = (reg_addr == RV_PLIC_PRIO26_OFFSET);
+ addr_hit[ 31] = (reg_addr == RV_PLIC_PRIO27_OFFSET);
+ addr_hit[ 32] = (reg_addr == RV_PLIC_PRIO28_OFFSET);
+ addr_hit[ 33] = (reg_addr == RV_PLIC_PRIO29_OFFSET);
+ addr_hit[ 34] = (reg_addr == RV_PLIC_PRIO30_OFFSET);
+ addr_hit[ 35] = (reg_addr == RV_PLIC_PRIO31_OFFSET);
+ addr_hit[ 36] = (reg_addr == RV_PLIC_PRIO32_OFFSET);
+ addr_hit[ 37] = (reg_addr == RV_PLIC_PRIO33_OFFSET);
+ addr_hit[ 38] = (reg_addr == RV_PLIC_PRIO34_OFFSET);
+ addr_hit[ 39] = (reg_addr == RV_PLIC_PRIO35_OFFSET);
+ addr_hit[ 40] = (reg_addr == RV_PLIC_PRIO36_OFFSET);
+ addr_hit[ 41] = (reg_addr == RV_PLIC_PRIO37_OFFSET);
+ addr_hit[ 42] = (reg_addr == RV_PLIC_PRIO38_OFFSET);
+ addr_hit[ 43] = (reg_addr == RV_PLIC_PRIO39_OFFSET);
+ addr_hit[ 44] = (reg_addr == RV_PLIC_PRIO40_OFFSET);
+ addr_hit[ 45] = (reg_addr == RV_PLIC_PRIO41_OFFSET);
+ addr_hit[ 46] = (reg_addr == RV_PLIC_PRIO42_OFFSET);
+ addr_hit[ 47] = (reg_addr == RV_PLIC_PRIO43_OFFSET);
+ addr_hit[ 48] = (reg_addr == RV_PLIC_IE0_0_OFFSET);
+ addr_hit[ 49] = (reg_addr == RV_PLIC_IE0_1_OFFSET);
+ addr_hit[ 50] = (reg_addr == RV_PLIC_THRESHOLD0_OFFSET);
+ addr_hit[ 51] = (reg_addr == RV_PLIC_CC0_OFFSET);
+ addr_hit[ 52] = (reg_addr == RV_PLIC_MSIP0_OFFSET);
+ end
+ assign addrmiss = (reg_re || reg_we) ? ~|addr_hit : 1'b0 ;
+ // Check sub-word write is permitted
+ always_comb begin
+ wr_err = 1'b0;
+ if (addr_hit[ 0] && reg_we && (RV_PLIC_PERMIT[ 0] != (RV_PLIC_PERMIT[ 0] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 1] && reg_we && (RV_PLIC_PERMIT[ 1] != (RV_PLIC_PERMIT[ 1] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 2] && reg_we && (RV_PLIC_PERMIT[ 2] != (RV_PLIC_PERMIT[ 2] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 3] && reg_we && (RV_PLIC_PERMIT[ 3] != (RV_PLIC_PERMIT[ 3] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 4] && reg_we && (RV_PLIC_PERMIT[ 4] != (RV_PLIC_PERMIT[ 4] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 5] && reg_we && (RV_PLIC_PERMIT[ 5] != (RV_PLIC_PERMIT[ 5] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 6] && reg_we && (RV_PLIC_PERMIT[ 6] != (RV_PLIC_PERMIT[ 6] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 7] && reg_we && (RV_PLIC_PERMIT[ 7] != (RV_PLIC_PERMIT[ 7] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 8] && reg_we && (RV_PLIC_PERMIT[ 8] != (RV_PLIC_PERMIT[ 8] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 9] && reg_we && (RV_PLIC_PERMIT[ 9] != (RV_PLIC_PERMIT[ 9] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 10] && reg_we && (RV_PLIC_PERMIT[ 10] != (RV_PLIC_PERMIT[ 10] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 11] && reg_we && (RV_PLIC_PERMIT[ 11] != (RV_PLIC_PERMIT[ 11] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 12] && reg_we && (RV_PLIC_PERMIT[ 12] != (RV_PLIC_PERMIT[ 12] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 13] && reg_we && (RV_PLIC_PERMIT[ 13] != (RV_PLIC_PERMIT[ 13] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 14] && reg_we && (RV_PLIC_PERMIT[ 14] != (RV_PLIC_PERMIT[ 14] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 15] && reg_we && (RV_PLIC_PERMIT[ 15] != (RV_PLIC_PERMIT[ 15] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 16] && reg_we && (RV_PLIC_PERMIT[ 16] != (RV_PLIC_PERMIT[ 16] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 17] && reg_we && (RV_PLIC_PERMIT[ 17] != (RV_PLIC_PERMIT[ 17] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 18] && reg_we && (RV_PLIC_PERMIT[ 18] != (RV_PLIC_PERMIT[ 18] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 19] && reg_we && (RV_PLIC_PERMIT[ 19] != (RV_PLIC_PERMIT[ 19] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 20] && reg_we && (RV_PLIC_PERMIT[ 20] != (RV_PLIC_PERMIT[ 20] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 21] && reg_we && (RV_PLIC_PERMIT[ 21] != (RV_PLIC_PERMIT[ 21] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 22] && reg_we && (RV_PLIC_PERMIT[ 22] != (RV_PLIC_PERMIT[ 22] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 23] && reg_we && (RV_PLIC_PERMIT[ 23] != (RV_PLIC_PERMIT[ 23] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 24] && reg_we && (RV_PLIC_PERMIT[ 24] != (RV_PLIC_PERMIT[ 24] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 25] && reg_we && (RV_PLIC_PERMIT[ 25] != (RV_PLIC_PERMIT[ 25] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 26] && reg_we && (RV_PLIC_PERMIT[ 26] != (RV_PLIC_PERMIT[ 26] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 27] && reg_we && (RV_PLIC_PERMIT[ 27] != (RV_PLIC_PERMIT[ 27] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 28] && reg_we && (RV_PLIC_PERMIT[ 28] != (RV_PLIC_PERMIT[ 28] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 29] && reg_we && (RV_PLIC_PERMIT[ 29] != (RV_PLIC_PERMIT[ 29] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 30] && reg_we && (RV_PLIC_PERMIT[ 30] != (RV_PLIC_PERMIT[ 30] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 31] && reg_we && (RV_PLIC_PERMIT[ 31] != (RV_PLIC_PERMIT[ 31] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 32] && reg_we && (RV_PLIC_PERMIT[ 32] != (RV_PLIC_PERMIT[ 32] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 33] && reg_we && (RV_PLIC_PERMIT[ 33] != (RV_PLIC_PERMIT[ 33] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 34] && reg_we && (RV_PLIC_PERMIT[ 34] != (RV_PLIC_PERMIT[ 34] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 35] && reg_we && (RV_PLIC_PERMIT[ 35] != (RV_PLIC_PERMIT[ 35] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 36] && reg_we && (RV_PLIC_PERMIT[ 36] != (RV_PLIC_PERMIT[ 36] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 37] && reg_we && (RV_PLIC_PERMIT[ 37] != (RV_PLIC_PERMIT[ 37] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 38] && reg_we && (RV_PLIC_PERMIT[ 38] != (RV_PLIC_PERMIT[ 38] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 39] && reg_we && (RV_PLIC_PERMIT[ 39] != (RV_PLIC_PERMIT[ 39] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 40] && reg_we && (RV_PLIC_PERMIT[ 40] != (RV_PLIC_PERMIT[ 40] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 41] && reg_we && (RV_PLIC_PERMIT[ 41] != (RV_PLIC_PERMIT[ 41] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 42] && reg_we && (RV_PLIC_PERMIT[ 42] != (RV_PLIC_PERMIT[ 42] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 43] && reg_we && (RV_PLIC_PERMIT[ 43] != (RV_PLIC_PERMIT[ 43] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 44] && reg_we && (RV_PLIC_PERMIT[ 44] != (RV_PLIC_PERMIT[ 44] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 45] && reg_we && (RV_PLIC_PERMIT[ 45] != (RV_PLIC_PERMIT[ 45] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 46] && reg_we && (RV_PLIC_PERMIT[ 46] != (RV_PLIC_PERMIT[ 46] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 47] && reg_we && (RV_PLIC_PERMIT[ 47] != (RV_PLIC_PERMIT[ 47] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 48] && reg_we && (RV_PLIC_PERMIT[ 48] != (RV_PLIC_PERMIT[ 48] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 49] && reg_we && (RV_PLIC_PERMIT[ 49] != (RV_PLIC_PERMIT[ 49] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 50] && reg_we && (RV_PLIC_PERMIT[ 50] != (RV_PLIC_PERMIT[ 50] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 51] && reg_we && (RV_PLIC_PERMIT[ 51] != (RV_PLIC_PERMIT[ 51] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 52] && reg_we && (RV_PLIC_PERMIT[ 52] != (RV_PLIC_PERMIT[ 52] & reg_be))) wr_err = 1'b1 ;
+ end
+ assign le_0_le_0_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_0_wd = reg_wdata[0];
+ assign le_0_le_1_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_1_wd = reg_wdata[1];
+ assign le_0_le_2_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_2_wd = reg_wdata[2];
+ assign le_0_le_3_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_3_wd = reg_wdata[3];
+ assign le_0_le_4_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_4_wd = reg_wdata[4];
+ assign le_0_le_5_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_5_wd = reg_wdata[5];
+ assign le_0_le_6_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_6_wd = reg_wdata[6];
+ assign le_0_le_7_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_7_wd = reg_wdata[7];
+ assign le_0_le_8_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_8_wd = reg_wdata[8];
+ assign le_0_le_9_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_9_wd = reg_wdata[9];
+ assign le_0_le_10_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_10_wd = reg_wdata[10];
+ assign le_0_le_11_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_11_wd = reg_wdata[11];
+ assign le_0_le_12_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_12_wd = reg_wdata[12];
+ assign le_0_le_13_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_13_wd = reg_wdata[13];
+ assign le_0_le_14_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_14_wd = reg_wdata[14];
+ assign le_0_le_15_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_15_wd = reg_wdata[15];
+ assign le_0_le_16_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_16_wd = reg_wdata[16];
+ assign le_0_le_17_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_17_wd = reg_wdata[17];
+ assign le_0_le_18_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_18_wd = reg_wdata[18];
+ assign le_0_le_19_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_19_wd = reg_wdata[19];
+ assign le_0_le_20_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_20_wd = reg_wdata[20];
+ assign le_0_le_21_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_21_wd = reg_wdata[21];
+ assign le_0_le_22_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_22_wd = reg_wdata[22];
+ assign le_0_le_23_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_23_wd = reg_wdata[23];
+ assign le_0_le_24_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_24_wd = reg_wdata[24];
+ assign le_0_le_25_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_25_wd = reg_wdata[25];
+ assign le_0_le_26_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_26_wd = reg_wdata[26];
+ assign le_0_le_27_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_27_wd = reg_wdata[27];
+ assign le_0_le_28_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_28_wd = reg_wdata[28];
+ assign le_0_le_29_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_29_wd = reg_wdata[29];
+ assign le_0_le_30_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_30_wd = reg_wdata[30];
+ assign le_0_le_31_we = addr_hit[2] & reg_we & ~wr_err;
+ assign le_0_le_31_wd = reg_wdata[31];
+ assign le_1_le_32_we = addr_hit[3] & reg_we & ~wr_err;
+ assign le_1_le_32_wd = reg_wdata[0];
+ assign le_1_le_33_we = addr_hit[3] & reg_we & ~wr_err;
+ assign le_1_le_33_wd = reg_wdata[1];
+ assign le_1_le_34_we = addr_hit[3] & reg_we & ~wr_err;
+ assign le_1_le_34_wd = reg_wdata[2];
+ assign le_1_le_35_we = addr_hit[3] & reg_we & ~wr_err;
+ assign le_1_le_35_wd = reg_wdata[3];
+ assign le_1_le_36_we = addr_hit[3] & reg_we & ~wr_err;
+ assign le_1_le_36_wd = reg_wdata[4];
+ assign le_1_le_37_we = addr_hit[3] & reg_we & ~wr_err;
+ assign le_1_le_37_wd = reg_wdata[5];
+ assign le_1_le_38_we = addr_hit[3] & reg_we & ~wr_err;
+ assign le_1_le_38_wd = reg_wdata[6];
+ assign le_1_le_39_we = addr_hit[3] & reg_we & ~wr_err;
+ assign le_1_le_39_wd = reg_wdata[7];
+ assign le_1_le_40_we = addr_hit[3] & reg_we & ~wr_err;
+ assign le_1_le_40_wd = reg_wdata[8];
+ assign le_1_le_41_we = addr_hit[3] & reg_we & ~wr_err;
+ assign le_1_le_41_wd = reg_wdata[9];
+ assign le_1_le_42_we = addr_hit[3] & reg_we & ~wr_err;
+ assign le_1_le_42_wd = reg_wdata[10];
+ assign le_1_le_43_we = addr_hit[3] & reg_we & ~wr_err;
+ assign le_1_le_43_wd = reg_wdata[11];
+ assign prio0_we = addr_hit[4] & reg_we & ~wr_err;
+ assign prio0_wd = reg_wdata[1:0];
+ assign prio1_we = addr_hit[5] & reg_we & ~wr_err;
+ assign prio1_wd = reg_wdata[1:0];
+ assign prio2_we = addr_hit[6] & reg_we & ~wr_err;
+ assign prio2_wd = reg_wdata[1:0];
+ assign prio3_we = addr_hit[7] & reg_we & ~wr_err;
+ assign prio3_wd = reg_wdata[1:0];
+ assign prio4_we = addr_hit[8] & reg_we & ~wr_err;
+ assign prio4_wd = reg_wdata[1:0];
+ assign prio5_we = addr_hit[9] & reg_we & ~wr_err;
+ assign prio5_wd = reg_wdata[1:0];
+ assign prio6_we = addr_hit[10] & reg_we & ~wr_err;
+ assign prio6_wd = reg_wdata[1:0];
+ assign prio7_we = addr_hit[11] & reg_we & ~wr_err;
+ assign prio7_wd = reg_wdata[1:0];
+ assign prio8_we = addr_hit[12] & reg_we & ~wr_err;
+ assign prio8_wd = reg_wdata[1:0];
+ assign prio9_we = addr_hit[13] & reg_we & ~wr_err;
+ assign prio9_wd = reg_wdata[1:0];
+ assign prio10_we = addr_hit[14] & reg_we & ~wr_err;
+ assign prio10_wd = reg_wdata[1:0];
+ assign prio11_we = addr_hit[15] & reg_we & ~wr_err;
+ assign prio11_wd = reg_wdata[1:0];
+ assign prio12_we = addr_hit[16] & reg_we & ~wr_err;
+ assign prio12_wd = reg_wdata[1:0];
+ assign prio13_we = addr_hit[17] & reg_we & ~wr_err;
+ assign prio13_wd = reg_wdata[1:0];
+ assign prio14_we = addr_hit[18] & reg_we & ~wr_err;
+ assign prio14_wd = reg_wdata[1:0];
+ assign prio15_we = addr_hit[19] & reg_we & ~wr_err;
+ assign prio15_wd = reg_wdata[1:0];
+ assign prio16_we = addr_hit[20] & reg_we & ~wr_err;
+ assign prio16_wd = reg_wdata[1:0];
+ assign prio17_we = addr_hit[21] & reg_we & ~wr_err;
+ assign prio17_wd = reg_wdata[1:0];
+ assign prio18_we = addr_hit[22] & reg_we & ~wr_err;
+ assign prio18_wd = reg_wdata[1:0];
+ assign prio19_we = addr_hit[23] & reg_we & ~wr_err;
+ assign prio19_wd = reg_wdata[1:0];
+ assign prio20_we = addr_hit[24] & reg_we & ~wr_err;
+ assign prio20_wd = reg_wdata[1:0];
+ assign prio21_we = addr_hit[25] & reg_we & ~wr_err;
+ assign prio21_wd = reg_wdata[1:0];
+ assign prio22_we = addr_hit[26] & reg_we & ~wr_err;
+ assign prio22_wd = reg_wdata[1:0];
+ assign prio23_we = addr_hit[27] & reg_we & ~wr_err;
+ assign prio23_wd = reg_wdata[1:0];
+ assign prio24_we = addr_hit[28] & reg_we & ~wr_err;
+ assign prio24_wd = reg_wdata[1:0];
+ assign prio25_we = addr_hit[29] & reg_we & ~wr_err;
+ assign prio25_wd = reg_wdata[1:0];
+ assign prio26_we = addr_hit[30] & reg_we & ~wr_err;
+ assign prio26_wd = reg_wdata[1:0];
+ assign prio27_we = addr_hit[31] & reg_we & ~wr_err;
+ assign prio27_wd = reg_wdata[1:0];
+ assign prio28_we = addr_hit[32] & reg_we & ~wr_err;
+ assign prio28_wd = reg_wdata[1:0];
+ assign prio29_we = addr_hit[33] & reg_we & ~wr_err;
+ assign prio29_wd = reg_wdata[1:0];
+ assign prio30_we = addr_hit[34] & reg_we & ~wr_err;
+ assign prio30_wd = reg_wdata[1:0];
+ assign prio31_we = addr_hit[35] & reg_we & ~wr_err;
+ assign prio31_wd = reg_wdata[1:0];
+ assign prio32_we = addr_hit[36] & reg_we & ~wr_err;
+ assign prio32_wd = reg_wdata[1:0];
+ assign prio33_we = addr_hit[37] & reg_we & ~wr_err;
+ assign prio33_wd = reg_wdata[1:0];
+ assign prio34_we = addr_hit[38] & reg_we & ~wr_err;
+ assign prio34_wd = reg_wdata[1:0];
+ assign prio35_we = addr_hit[39] & reg_we & ~wr_err;
+ assign prio35_wd = reg_wdata[1:0];
+ assign prio36_we = addr_hit[40] & reg_we & ~wr_err;
+ assign prio36_wd = reg_wdata[1:0];
+ assign prio37_we = addr_hit[41] & reg_we & ~wr_err;
+ assign prio37_wd = reg_wdata[1:0];
+ assign prio38_we = addr_hit[42] & reg_we & ~wr_err;
+ assign prio38_wd = reg_wdata[1:0];
+ assign prio39_we = addr_hit[43] & reg_we & ~wr_err;
+ assign prio39_wd = reg_wdata[1:0];
+ assign prio40_we = addr_hit[44] & reg_we & ~wr_err;
+ assign prio40_wd = reg_wdata[1:0];
+ assign prio41_we = addr_hit[45] & reg_we & ~wr_err;
+ assign prio41_wd = reg_wdata[1:0];
+ assign prio42_we = addr_hit[46] & reg_we & ~wr_err;
+ assign prio42_wd = reg_wdata[1:0];
+ assign prio43_we = addr_hit[47] & reg_we & ~wr_err;
+ assign prio43_wd = reg_wdata[1:0];
+ assign ie0_0_e_0_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_0_wd = reg_wdata[0];
+ assign ie0_0_e_1_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_1_wd = reg_wdata[1];
+ assign ie0_0_e_2_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_2_wd = reg_wdata[2];
+ assign ie0_0_e_3_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_3_wd = reg_wdata[3];
+ assign ie0_0_e_4_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_4_wd = reg_wdata[4];
+ assign ie0_0_e_5_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_5_wd = reg_wdata[5];
+ assign ie0_0_e_6_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_6_wd = reg_wdata[6];
+ assign ie0_0_e_7_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_7_wd = reg_wdata[7];
+ assign ie0_0_e_8_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_8_wd = reg_wdata[8];
+ assign ie0_0_e_9_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_9_wd = reg_wdata[9];
+ assign ie0_0_e_10_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_10_wd = reg_wdata[10];
+ assign ie0_0_e_11_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_11_wd = reg_wdata[11];
+ assign ie0_0_e_12_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_12_wd = reg_wdata[12];
+ assign ie0_0_e_13_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_13_wd = reg_wdata[13];
+ assign ie0_0_e_14_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_14_wd = reg_wdata[14];
+ assign ie0_0_e_15_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_15_wd = reg_wdata[15];
+ assign ie0_0_e_16_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_16_wd = reg_wdata[16];
+ assign ie0_0_e_17_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_17_wd = reg_wdata[17];
+ assign ie0_0_e_18_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_18_wd = reg_wdata[18];
+ assign ie0_0_e_19_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_19_wd = reg_wdata[19];
+ assign ie0_0_e_20_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_20_wd = reg_wdata[20];
+ assign ie0_0_e_21_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_21_wd = reg_wdata[21];
+ assign ie0_0_e_22_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_22_wd = reg_wdata[22];
+ assign ie0_0_e_23_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_23_wd = reg_wdata[23];
+ assign ie0_0_e_24_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_24_wd = reg_wdata[24];
+ assign ie0_0_e_25_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_25_wd = reg_wdata[25];
+ assign ie0_0_e_26_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_26_wd = reg_wdata[26];
+ assign ie0_0_e_27_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_27_wd = reg_wdata[27];
+ assign ie0_0_e_28_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_28_wd = reg_wdata[28];
+ assign ie0_0_e_29_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_29_wd = reg_wdata[29];
+ assign ie0_0_e_30_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_30_wd = reg_wdata[30];
+ assign ie0_0_e_31_we = addr_hit[48] & reg_we & ~wr_err;
+ assign ie0_0_e_31_wd = reg_wdata[31];
+ assign ie0_1_e_32_we = addr_hit[49] & reg_we & ~wr_err;
+ assign ie0_1_e_32_wd = reg_wdata[0];
+ assign ie0_1_e_33_we = addr_hit[49] & reg_we & ~wr_err;
+ assign ie0_1_e_33_wd = reg_wdata[1];
+ assign ie0_1_e_34_we = addr_hit[49] & reg_we & ~wr_err;
+ assign ie0_1_e_34_wd = reg_wdata[2];
+ assign ie0_1_e_35_we = addr_hit[49] & reg_we & ~wr_err;
+ assign ie0_1_e_35_wd = reg_wdata[3];
+ assign ie0_1_e_36_we = addr_hit[49] & reg_we & ~wr_err;
+ assign ie0_1_e_36_wd = reg_wdata[4];
+ assign ie0_1_e_37_we = addr_hit[6] & reg_we & ~wr_err;
+ assign ie0_1_e_37_wd = reg_wdata[5];
+ assign ie0_1_e_38_we = addr_hit[49] & reg_we & ~wr_err;
+ assign ie0_1_e_38_wd = reg_wdata[6];
+ assign ie0_1_e_39_we = addr_hit[49] & reg_we & ~wr_err;
+ assign ie0_1_e_39_wd = reg_wdata[7];
+ assign ie0_1_e_40_we = addr_hit[49] & reg_we & ~wr_err;
+ assign ie0_1_e_40_wd = reg_wdata[8];
+ assign ie0_1_e_41_we = addr_hit[49] & reg_we & ~wr_err;
+ assign ie0_1_e_41_wd = reg_wdata[9];
+ assign ie0_1_e_42_we = addr_hit[49] & reg_we & ~wr_err;
+ assign ie0_1_e_42_wd = reg_wdata[10];
+ assign ie0_1_e_43_we = addr_hit[49] & reg_we & ~wr_err;
+ assign ie0_1_e_43_wd = reg_wdata[11];
+ assign threshold0_we = addr_hit[50] & reg_we & ~wr_err;
+ assign threshold0_wd = reg_wdata[1:0];
+ assign cc0_we = addr_hit[51] & reg_we & ~wr_err;
+ assign cc0_wd = reg_wdata[7:0];
+ assign cc0_re = addr_hit[51] && reg_re;
+ assign msip0_we = addr_hit[52] & reg_we & ~wr_err;
+ assign msip0_wd = reg_wdata[0];
+ // Read data return
+ always_comb begin
+ reg_rdata_next = '0;
+ unique case (1'b1)
+ addr_hit[0]: begin
+ reg_rdata_next[0] = ip_0_p_0_qs;
+ reg_rdata_next[1] = ip_0_p_1_qs;
+ reg_rdata_next[2] = ip_0_p_2_qs;
+ reg_rdata_next[3] = ip_0_p_3_qs;
+ reg_rdata_next[4] = ip_0_p_4_qs;
+ reg_rdata_next[5] = ip_0_p_5_qs;
+ reg_rdata_next[6] = ip_0_p_6_qs;
+ reg_rdata_next[7] = ip_0_p_7_qs;
+ reg_rdata_next[8] = ip_0_p_8_qs;
+ reg_rdata_next[9] = ip_0_p_9_qs;
+ reg_rdata_next[10] = ip_0_p_10_qs;
+ reg_rdata_next[11] = ip_0_p_11_qs;
+ reg_rdata_next[12] = ip_0_p_12_qs;
+ reg_rdata_next[13] = ip_0_p_13_qs;
+ reg_rdata_next[14] = ip_0_p_14_qs;
+ reg_rdata_next[15] = ip_0_p_15_qs;
+ reg_rdata_next[16] = ip_0_p_16_qs;
+ reg_rdata_next[17] = ip_0_p_17_qs;
+ reg_rdata_next[18] = ip_0_p_18_qs;
+ reg_rdata_next[19] = ip_0_p_19_qs;
+ reg_rdata_next[20] = ip_0_p_20_qs;
+ reg_rdata_next[21] = ip_0_p_21_qs;
+ reg_rdata_next[22] = ip_0_p_22_qs;
+ reg_rdata_next[23] = ip_0_p_23_qs;
+ reg_rdata_next[24] = ip_0_p_24_qs;
+ reg_rdata_next[25] = ip_0_p_25_qs;
+ reg_rdata_next[26] = ip_0_p_26_qs;
+ reg_rdata_next[27] = ip_0_p_27_qs;
+ reg_rdata_next[28] = ip_0_p_28_qs;
+ reg_rdata_next[29] = ip_0_p_29_qs;
+ reg_rdata_next[30] = ip_0_p_30_qs;
+ reg_rdata_next[31] = ip_0_p_31_qs;
+ end
+ addr_hit[1]: begin
+ reg_rdata_next[0] = ip_1_p_32_qs;
+ reg_rdata_next[1] = ip_1_p_33_qs;
+ reg_rdata_next[2] = ip_1_p_34_qs;
+ reg_rdata_next[3] = ip_1_p_35_qs;
+ reg_rdata_next[4] = ip_1_p_36_qs;
+ reg_rdata_next[5] = ip_1_p_37_qs;
+ reg_rdata_next[6] = ip_1_p_38_qs;
+ reg_rdata_next[7] = ip_1_p_39_qs;
+ reg_rdata_next[8] = ip_1_p_40_qs;
+ reg_rdata_next[9] = ip_1_p_41_qs;
+ reg_rdata_next[10] = ip_1_p_42_qs;
+ reg_rdata_next[11] = ip_1_p_43_qs;
+ end
+ addr_hit[2]: begin
+ reg_rdata_next[0] = le_0_le_0_qs;
+ reg_rdata_next[1] = le_0_le_1_qs;
+ reg_rdata_next[2] = le_0_le_2_qs;
+ reg_rdata_next[3] = le_0_le_3_qs;
+ reg_rdata_next[4] = le_0_le_4_qs;
+ reg_rdata_next[5] = le_0_le_5_qs;
+ reg_rdata_next[6] = le_0_le_6_qs;
+ reg_rdata_next[7] = le_0_le_7_qs;
+ reg_rdata_next[8] = le_0_le_8_qs;
+ reg_rdata_next[9] = le_0_le_9_qs;
+ reg_rdata_next[10] = le_0_le_10_qs;
+ reg_rdata_next[11] = le_0_le_11_qs;
+ reg_rdata_next[12] = le_0_le_12_qs;
+ reg_rdata_next[13] = le_0_le_13_qs;
+ reg_rdata_next[14] = le_0_le_14_qs;
+ reg_rdata_next[15] = le_0_le_15_qs;
+ reg_rdata_next[16] = le_0_le_16_qs;
+ reg_rdata_next[17] = le_0_le_17_qs;
+ reg_rdata_next[18] = le_0_le_18_qs;
+ reg_rdata_next[19] = le_0_le_19_qs;
+ reg_rdata_next[20] = le_0_le_20_qs;
+ reg_rdata_next[21] = le_0_le_21_qs;
+ reg_rdata_next[22] = le_0_le_22_qs;
+ reg_rdata_next[23] = le_0_le_23_qs;
+ reg_rdata_next[24] = le_0_le_24_qs;
+ reg_rdata_next[25] = le_0_le_25_qs;
+ reg_rdata_next[26] = le_0_le_26_qs;
+ reg_rdata_next[27] = le_0_le_27_qs;
+ reg_rdata_next[28] = le_0_le_28_qs;
+ reg_rdata_next[29] = le_0_le_29_qs;
+ reg_rdata_next[30] = le_0_le_30_qs;
+ reg_rdata_next[31] = le_0_le_31_qs;
+ end
+ addr_hit[3]: begin
+ reg_rdata_next[0] = le_1_le_32_qs;
+ reg_rdata_next[1] = le_1_le_33_qs;
+ reg_rdata_next[2] = le_1_le_34_qs;
+ reg_rdata_next[3] = le_1_le_35_qs;
+ reg_rdata_next[4] = le_1_le_36_qs;
+ reg_rdata_next[5] = le_1_le_37_qs;
+ reg_rdata_next[6] = le_1_le_38_qs;
+ reg_rdata_next[7] = le_1_le_39_qs;
+ reg_rdata_next[8] = le_1_le_40_qs;
+ reg_rdata_next[9] = le_1_le_41_qs;
+ reg_rdata_next[10] = le_1_le_42_qs;
+ reg_rdata_next[11] = le_1_le_43_qs;
+ end
+ addr_hit[4]: begin
+ reg_rdata_next[1:0] = prio0_qs;
+ end
+ addr_hit[5]: begin
+ reg_rdata_next[1:0] = prio1_qs;
+ end
+ addr_hit[6]: begin
+ reg_rdata_next[1:0] = prio2_qs;
+ end
+ addr_hit[7]: begin
+ reg_rdata_next[1:0] = prio3_qs;
+ end
+ addr_hit[8]: begin
+ reg_rdata_next[1:0] = prio4_qs;
+ end
+ addr_hit[9]: begin
+ reg_rdata_next[1:0] = prio5_qs;
+ end
+ addr_hit[10]: begin
+ reg_rdata_next[1:0] = prio6_qs;
+ end
+ addr_hit[11]: begin
+ reg_rdata_next[1:0] = prio7_qs;
+ end
+ addr_hit[12]: begin
+ reg_rdata_next[1:0] = prio8_qs;
+ end
+ addr_hit[13]: begin
+ reg_rdata_next[1:0] = prio9_qs;
+ end
+ addr_hit[14]: begin
+ reg_rdata_next[1:0] = prio10_qs;
+ end
+ addr_hit[15]: begin
+ reg_rdata_next[1:0] = prio11_qs;
+ end
+ addr_hit[16]: begin
+ reg_rdata_next[1:0] = prio12_qs;
+ end
+ addr_hit[17]: begin
+ reg_rdata_next[1:0] = prio13_qs;
+ end
+ addr_hit[18]: begin
+ reg_rdata_next[1:0] = prio14_qs;
+ end
+ addr_hit[19]: begin
+ reg_rdata_next[1:0] = prio15_qs;
+ end
+ addr_hit[20]: begin
+ reg_rdata_next[1:0] = prio16_qs;
+ end
+ addr_hit[21]: begin
+ reg_rdata_next[1:0] = prio17_qs;
+ end
+ addr_hit[22]: begin
+ reg_rdata_next[1:0] = prio18_qs;
+ end
+ addr_hit[23]: begin
+ reg_rdata_next[1:0] = prio19_qs;
+ end
+ addr_hit[24]: begin
+ reg_rdata_next[1:0] = prio20_qs;
+ end
+ addr_hit[25]: begin
+ reg_rdata_next[1:0] = prio21_qs;
+ end
+ addr_hit[26]: begin
+ reg_rdata_next[1:0] = prio22_qs;
+ end
+ addr_hit[27]: begin
+ reg_rdata_next[1:0] = prio23_qs;
+ end
+ addr_hit[28]: begin
+ reg_rdata_next[1:0] = prio24_qs;
+ end
+ addr_hit[29]: begin
+ reg_rdata_next[1:0] = prio25_qs;
+ end
+ addr_hit[30]: begin
+ reg_rdata_next[1:0] = prio26_qs;
+ end
+ addr_hit[31]: begin
+ reg_rdata_next[1:0] = prio27_qs;
+ end
+ addr_hit[32]: begin
+ reg_rdata_next[1:0] = prio28_qs;
+ end
+ addr_hit[33]: begin
+ reg_rdata_next[1:0] = prio29_qs;
+ end
+ addr_hit[34]: begin
+ reg_rdata_next[1:0] = prio30_qs;
+ end
+ addr_hit[35]: begin
+ reg_rdata_next[1:0] = prio31_qs;
+ end
+ addr_hit[36]: begin
+ reg_rdata_next[1:0] = prio32_qs;
+ end
+ addr_hit[37]: begin
+ reg_rdata_next[1:0] = prio33_qs;
+ end
+ addr_hit[38]: begin
+ reg_rdata_next[1:0] = prio34_qs;
+ end
+ addr_hit[39]: begin
+ reg_rdata_next[1:0] = prio35_qs;
+ end
+ addr_hit[40]: begin
+ reg_rdata_next[1:0] = prio36_qs;
+ end
+ addr_hit[41]: begin
+ reg_rdata_next[1:0] = prio37_qs;
+ end
+ addr_hit[42]: begin
+ reg_rdata_next[1:0] = prio38_qs;
+ end
+ addr_hit[43]: begin
+ reg_rdata_next[1:0] = prio39_qs;
+ end
+ addr_hit[44]: begin
+ reg_rdata_next[1:0] = prio40_qs;
+ end
+ addr_hit[45]: begin
+ reg_rdata_next[1:0] = prio41_qs;
+ end
+ addr_hit[46]: begin
+ reg_rdata_next[1:0] = prio42_qs;
+ end
+ addr_hit[47]: begin
+ reg_rdata_next[1:0] = prio43_qs;
+ end
+ addr_hit[48]: begin
+ reg_rdata_next[0] = ie0_0_e_0_qs;
+ reg_rdata_next[1] = ie0_0_e_1_qs;
+ reg_rdata_next[2] = ie0_0_e_2_qs;
+ reg_rdata_next[3] = ie0_0_e_3_qs;
+ reg_rdata_next[4] = ie0_0_e_4_qs;
+ reg_rdata_next[5] = ie0_0_e_5_qs;
+ reg_rdata_next[6] = ie0_0_e_6_qs;
+ reg_rdata_next[7] = ie0_0_e_7_qs;
+ reg_rdata_next[8] = ie0_0_e_8_qs;
+ reg_rdata_next[9] = ie0_0_e_9_qs;
+ reg_rdata_next[10] = ie0_0_e_10_qs;
+ reg_rdata_next[11] = ie0_0_e_11_qs;
+ reg_rdata_next[12] = ie0_0_e_12_qs;
+ reg_rdata_next[13] = ie0_0_e_13_qs;
+ reg_rdata_next[14] = ie0_0_e_14_qs;
+ reg_rdata_next[15] = ie0_0_e_15_qs;
+ reg_rdata_next[16] = ie0_0_e_16_qs;
+ reg_rdata_next[17] = ie0_0_e_17_qs;
+ reg_rdata_next[18] = ie0_0_e_18_qs;
+ reg_rdata_next[19] = ie0_0_e_19_qs;
+ reg_rdata_next[20] = ie0_0_e_20_qs;
+ reg_rdata_next[21] = ie0_0_e_21_qs;
+ reg_rdata_next[22] = ie0_0_e_22_qs;
+ reg_rdata_next[23] = ie0_0_e_23_qs;
+ reg_rdata_next[24] = ie0_0_e_24_qs;
+ reg_rdata_next[25] = ie0_0_e_25_qs;
+ reg_rdata_next[26] = ie0_0_e_26_qs;
+ reg_rdata_next[27] = ie0_0_e_27_qs;
+ reg_rdata_next[28] = ie0_0_e_28_qs;
+ reg_rdata_next[29] = ie0_0_e_29_qs;
+ reg_rdata_next[30] = ie0_0_e_30_qs;
+ reg_rdata_next[31] = ie0_0_e_31_qs;
+ end
+ addr_hit[49]: begin
+ reg_rdata_next[0] = ie0_1_e_32_qs;
+ reg_rdata_next[1] = ie0_1_e_33_qs;
+ reg_rdata_next[2] = ie0_1_e_34_qs;
+ reg_rdata_next[3] = ie0_1_e_35_qs;
+ reg_rdata_next[4] = ie0_1_e_36_qs;
+ reg_rdata_next[5] = ie0_1_e_37_qs;
+ reg_rdata_next[6] = ie0_1_e_38_qs;
+ reg_rdata_next[7] = ie0_1_e_39_qs;
+ reg_rdata_next[8] = ie0_1_e_40_qs;
+ reg_rdata_next[9] = ie0_1_e_41_qs;
+ reg_rdata_next[10] = ie0_1_e_42_qs;
+ reg_rdata_next[11] = ie0_1_e_43_qs;
+ end
+ addr_hit[50]: begin
+ reg_rdata_next[1:0] = threshold0_qs;
+ end
+ addr_hit[51]: begin
+ reg_rdata_next[7:0] = cc0_qs;
+ end
+ addr_hit[52]: begin
+ reg_rdata_next[0] = msip0_qs;
+ end
+ default: begin
+ reg_rdata_next = '1;
+ end
+ endcase
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..045c202
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,128 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// RISC-V Platform-Level Interrupt Generator for Target
+// This module basically doing IE & IP based on priority and threshold_i.
+// Keep in mind that increasing MAX_PRIO affects logic size a lot.
+// The module implements a binary tree to find the maximal entry. the solution
+// has O(N) area and O(log(N)) delay complexity, and thus scales well with
+// many input sources.
+module rv_plic_target #(
+ parameter int N_SOURCE = 32,
+ parameter int MAX_PRIO = 7,
+ // Local param (Do not change this through parameter
+ localparam int SrcWidth = $clog2(N_SOURCE+1), // derived parameter
+ localparam int PrioWidth = $clog2(MAX_PRIO+1) // derived parameter
+) (
+ input clk_i,
+ input rst_ni,
+ input [N_SOURCE-1:0] ip_i,
+ input [N_SOURCE-1:0] ie_i,
+ input [PrioWidth-1:0] prio_i [N_SOURCE],
+ input [PrioWidth-1:0] threshold_i,
+ output logic irq_o,
+ output logic [SrcWidth-1:0] irq_id_o
+ // this only works with 2 or more sources
+ // align to powers of 2 for simplicity
+ // a full binary tree with N levels has 2**N + 2**N-1 nodes
+ localparam int NumLevels = $clog2(N_SOURCE);
+ logic [2**(NumLevels+1)-2:0] is_tree;
+ logic [2**(NumLevels+1)-2:0][SrcWidth-1:0] id_tree;
+ logic [2**(NumLevels+1)-2:0][PrioWidth-1:0] max_tree;
+ for (genvar level = 0; level < NumLevels+1; level++) begin : gen_tree
+ //
+ // level+1 C0 C1 <- "Base1" points to the first node on "level+1",
+ // \ / these nodes are the children of the nodes one level below
+ // level Pa <- "Base0", points to the first node on "level",
+ // these nodes are the parents of the nodes one level above
+ //
+ // hence we have the following indices for the paPa, C0, C1 nodes:
+ // Pa = 2**level - 1 + offset = Base0 + offset
+ // C0 = 2**(level+1) - 1 + 2*offset = Base1 + 2*offset
+ // C1 = 2**(level+1) - 1 + 2*offset + 1 = Base1 + 2*offset + 1
+ //
+ localparam int Base0 = (2**level)-1;
+ localparam int Base1 = (2**(level+1))-1;
+ for (genvar offset = 0; offset < 2**level; offset++) begin : gen_level
+ localparam int Pa = Base0 + offset;
+ localparam int C0 = Base1 + 2*offset;
+ localparam int C1 = Base1 + 2*offset + 1;
+ // this assigns the gated interrupt source signals, their
+ // corresponding IDs and priorities to the tree leafs
+ if (level == NumLevels) begin : gen_leafs
+ if (offset < N_SOURCE) begin : gen_assign
+ assign is_tree[Pa] = ip_i[offset] & ie_i[offset];
+ assign id_tree[Pa] = offset;
+ assign max_tree[Pa] = prio_i[offset];
+ end else begin : gen_tie_off
+ assign is_tree[Pa] = '0;
+ assign id_tree[Pa] = '0;
+ assign max_tree[Pa] = '0;
+ end
+ // this creates the node assignments
+ end else begin : gen_nodes
+ // NOTE: the code below has been written in this way in order to work
+ // around a synthesis issue in Vivado 2018.3 and 2019.2 where the whole
+ // module would be optimized away if these assign statements contained
+ // ternary statements to implement the muxes.
+ //
+ // TODO: rewrite these lines with ternary statmements onec the problem
+ // has been fixed in the tool.
+ //
+ // See also originating issue:
+ //
+ // Xilinx issue:
+ //
+ logic sel; // local helper variable
+ // in case only one of the parent has a pending irq_o, forward that one
+ // in case both irqs are pending, forward the one with higher priority
+ assign sel = (~is_tree[C0] & is_tree[C1]) |
+ (is_tree[C0] & is_tree[C1] & logic'(max_tree[C1] > max_tree[C0]));
+ // forwarding muxes
+ assign is_tree[Pa] = (sel & is_tree[C1]) | ((~sel) & is_tree[C0]);
+ assign id_tree[Pa] = ({SrcWidth{sel}} & id_tree[C1]) | ({SrcWidth{~sel}} & id_tree[C0]);
+ assign max_tree[Pa] = ({PrioWidth{sel}} & max_tree[C1]) | ({PrioWidth{~sel}} & max_tree[C0]);
+ end
+ end : gen_level
+ end : gen_tree
+ logic irq_d, irq_q;
+ logic [SrcWidth-1:0] irq_id_d, irq_id_q;
+ // the results can be found at the tree root
+ assign irq_d = (max_tree[0] > threshold_i) ? is_tree[0] : 1'b0;
+ assign irq_id_d = (is_tree[0]) ? id_tree[0] : '0;
+ always_ff @(posedge clk_i or negedge rst_ni) begin : gen_regs
+ if (!rst_ni) begin
+ irq_q <= 1'b0;
+ irq_id_q <= '0;
+ end else begin
+ irq_q <= irq_d;
+ irq_id_q <= irq_id_d;
+ end
+ end
+ assign irq_o = irq_q;
+ assign irq_id_o = irq_id_q;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..d88c28e
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,129 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+module rv_timer (
+ input clk_i,
+ input rst_ni,
+ input tlul_pkg::tl_h2d_t tl_i,
+ output tlul_pkg::tl_d2h_t tl_o,
+ output logic intr_timer_expired_0_0_o
+ localparam int N_HARTS = 1;
+ localparam int N_TIMERS = 1;
+ import rv_timer_reg_pkg::*;
+ rv_timer_reg2hw_t reg2hw;
+ rv_timer_hw2reg_t hw2reg;
+ logic [N_HARTS-1:0] active;
+ logic [11:0] prescaler [N_HARTS];
+ logic [7:0] step [N_HARTS];
+ logic [N_HARTS-1:0] tick;
+ logic [63:0] mtime_d [N_HARTS];
+ logic [63:0] mtime [N_HARTS];
+ logic [63:0] mtimecmp [N_HARTS][N_TIMERS]; // Only [harts][0] is connected to mtimecmp CSRs
+ logic mtimecmp_update [N_HARTS][N_TIMERS];
+ logic [N_HARTS*N_TIMERS-1:0] intr_timer_set;
+ logic [N_HARTS*N_TIMERS-1:0] intr_timer_en;
+ logic [N_HARTS*N_TIMERS-1:0] intr_timer_test_q;
+ logic [N_HARTS-1:0] intr_timer_test_qe;
+ logic [N_HARTS*N_TIMERS-1:0] intr_timer_state_q;
+ logic [N_HARTS-1:0] intr_timer_state_de;
+ logic [N_HARTS*N_TIMERS-1:0] intr_timer_state_d;
+ logic [N_HARTS*N_TIMERS-1:0] intr_out;
+ /////////////////////////////////////////////////
+ // Connecting register interface to the signal //
+ /////////////////////////////////////////////////
+ // Once reggen supports nested multireg, the following can be automated. For the moment, it must
+ // be connected manually.
+ assign active[0] = reg2hw.ctrl[0].q;
+ assign prescaler = '{reg2hw.cfg0.prescale.q};
+ assign step = '{reg2hw.cfg0.step.q};
+ assign = tick[0];
+ assign = tick[0];
+ assign hw2reg.timer_v_upper0.d = mtime_d[0][63:32];
+ assign hw2reg.timer_v_lower0.d = mtime_d[0][31: 0];
+ assign mtime[0] = {reg2hw.timer_v_upper0.q, reg2hw.timer_v_lower0.q};
+ assign mtimecmp = '{'{{reg2hw.compare_upper0_0.q,reg2hw.compare_lower0_0.q}}};
+ assign mtimecmp_update[0][0] = reg2hw.compare_upper0_0.qe | reg2hw.compare_lower0_0.qe;
+ assign intr_timer_expired_0_0_o = intr_out[0];
+ assign intr_timer_en = reg2hw.intr_enable0[0].q;
+ assign intr_timer_state_q = reg2hw.intr_state0[0].q;
+ assign intr_timer_test_q = reg2hw.intr_test0[0].q;
+ assign intr_timer_test_qe = reg2hw.intr_test0[0].qe;
+ assign hw2reg.intr_state0[0].de = intr_timer_state_de | mtimecmp_update[0][0];
+ assign hw2reg.intr_state0[0].d = intr_timer_state_d & ~mtimecmp_update[0][0];
+ for (genvar h = 0 ; h < N_HARTS ; h++) begin : gen_harts
+ prim_intr_hw #(
+ .Width(N_TIMERS)
+ ) u_intr_hw (
+ .clk_i,
+ .rst_ni,
+ .event_intr_i (intr_timer_set),
+ .reg2hw_intr_enable_q_i (intr_timer_en[h*N_TIMERS+:N_TIMERS]),
+ .reg2hw_intr_test_q_i (intr_timer_test_q[h*N_TIMERS+:N_TIMERS]),
+ .reg2hw_intr_test_qe_i (intr_timer_test_qe[h]),
+ .reg2hw_intr_state_q_i (intr_timer_state_q[h*N_TIMERS+:N_TIMERS]),
+ .hw2reg_intr_state_de_o (intr_timer_state_de),
+ .hw2reg_intr_state_d_o (intr_timer_state_d[h*N_TIMERS+:N_TIMERS]),
+ .intr_o (intr_out[h*N_TIMERS+:N_TIMERS])
+ );
+ timer_core #(
+ ) u_core (
+ .clk_i,
+ .rst_ni,
+ .active (active[h]),
+ .prescaler (prescaler[h]),
+ .step (step[h]),
+ .tick (tick[h]),
+ .mtime_d (mtime_d[h]),
+ .mtime (mtime[h]),
+ .mtimecmp (mtimecmp[h]),
+ .intr (intr_timer_set[h*N_TIMERS+:N_TIMERS])
+ );
+ end : gen_harts
+ // Register module
+ rv_timer_reg_top u_reg (
+ .clk_i,
+ .rst_ni,
+ .tl_i,
+ .tl_o,
+ .reg2hw,
+ .hw2reg,
+ .devmode_i (1'b1)
+ );
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..30cfb44
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,142 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Register Package auto-generated by `reggen` containing data structure
+package rv_timer_reg_pkg;
+ // Param list
+ parameter int N_HARTS = 1;
+ parameter int N_TIMERS = 1;
+ // Address width within the block
+ parameter int BlockAw = 9;
+ ////////////////////////////
+ // Typedefs for registers //
+ ////////////////////////////
+ typedef struct packed {
+ logic q;
+ } rv_timer_reg2hw_ctrl_mreg_t;
+ typedef struct packed {
+ struct packed {
+ logic [11:0] q;
+ } prescale;
+ struct packed {
+ logic [7:0] q;
+ } step;
+ } rv_timer_reg2hw_cfg0_reg_t;
+ typedef struct packed {
+ logic [31:0] q;
+ } rv_timer_reg2hw_timer_v_lower0_reg_t;
+ typedef struct packed {
+ logic [31:0] q;
+ } rv_timer_reg2hw_timer_v_upper0_reg_t;
+ typedef struct packed {
+ logic [31:0] q;
+ logic qe;
+ } rv_timer_reg2hw_compare_lower0_0_reg_t;
+ typedef struct packed {
+ logic [31:0] q;
+ logic qe;
+ } rv_timer_reg2hw_compare_upper0_0_reg_t;
+ typedef struct packed {
+ logic q;
+ } rv_timer_reg2hw_intr_enable0_mreg_t;
+ typedef struct packed {
+ logic q;
+ } rv_timer_reg2hw_intr_state0_mreg_t;
+ typedef struct packed {
+ logic q;
+ logic qe;
+ } rv_timer_reg2hw_intr_test0_mreg_t;
+ typedef struct packed {
+ logic [31:0] d;
+ logic de;
+ } rv_timer_hw2reg_timer_v_lower0_reg_t;
+ typedef struct packed {
+ logic [31:0] d;
+ logic de;
+ } rv_timer_hw2reg_timer_v_upper0_reg_t;
+ typedef struct packed {
+ logic d;
+ logic de;
+ } rv_timer_hw2reg_intr_state0_mreg_t;
+ ///////////////////////////////////////
+ // Register to internal design logic //
+ ///////////////////////////////////////
+ typedef struct packed {
+ rv_timer_reg2hw_ctrl_mreg_t [0:0] ctrl; // [154:154]
+ rv_timer_reg2hw_cfg0_reg_t cfg0; // [153:134]
+ rv_timer_reg2hw_timer_v_lower0_reg_t timer_v_lower0; // [133:102]
+ rv_timer_reg2hw_timer_v_upper0_reg_t timer_v_upper0; // [101:70]
+ rv_timer_reg2hw_compare_lower0_0_reg_t compare_lower0_0; // [69:37]
+ rv_timer_reg2hw_compare_upper0_0_reg_t compare_upper0_0; // [36:4]
+ rv_timer_reg2hw_intr_enable0_mreg_t [0:0] intr_enable0; // [3:3]
+ rv_timer_reg2hw_intr_state0_mreg_t [0:0] intr_state0; // [2:2]
+ rv_timer_reg2hw_intr_test0_mreg_t [0:0] intr_test0; // [1:0]
+ } rv_timer_reg2hw_t;
+ ///////////////////////////////////////
+ // Internal design logic to register //
+ ///////////////////////////////////////
+ typedef struct packed {
+ rv_timer_hw2reg_timer_v_lower0_reg_t timer_v_lower0; // [67:35]
+ rv_timer_hw2reg_timer_v_upper0_reg_t timer_v_upper0; // [34:2]
+ rv_timer_hw2reg_intr_state0_mreg_t [0:0] intr_state0; // [1:0]
+ } rv_timer_hw2reg_t;
+ // Register Address
+ parameter logic [BlockAw-1:0] RV_TIMER_CTRL_OFFSET = 9'h 0;
+ parameter logic [BlockAw-1:0] RV_TIMER_CFG0_OFFSET = 9'h 100;
+ parameter logic [BlockAw-1:0] RV_TIMER_TIMER_V_LOWER0_OFFSET = 9'h 104;
+ parameter logic [BlockAw-1:0] RV_TIMER_TIMER_V_UPPER0_OFFSET = 9'h 108;
+ parameter logic [BlockAw-1:0] RV_TIMER_COMPARE_LOWER0_0_OFFSET = 9'h 10c;
+ parameter logic [BlockAw-1:0] RV_TIMER_COMPARE_UPPER0_0_OFFSET = 9'h 110;
+ parameter logic [BlockAw-1:0] RV_TIMER_INTR_ENABLE0_OFFSET = 9'h 114;
+ parameter logic [BlockAw-1:0] RV_TIMER_INTR_STATE0_OFFSET = 9'h 118;
+ parameter logic [BlockAw-1:0] RV_TIMER_INTR_TEST0_OFFSET = 9'h 11c;
+ // Register Index
+ typedef enum int {
+ } rv_timer_id_e;
+ // Register width information to check illegal writes
+ parameter logic [3:0] RV_TIMER_PERMIT [9] = '{
+ 4'b 0001, // index[0] RV_TIMER_CTRL
+ 4'b 0111, // index[1] RV_TIMER_CFG0
+ 4'b 1111, // index[2] RV_TIMER_TIMER_V_LOWER0
+ 4'b 1111, // index[3] RV_TIMER_TIMER_V_UPPER0
+ 4'b 1111, // index[4] RV_TIMER_COMPARE_LOWER0_0
+ 4'b 1111, // index[5] RV_TIMER_COMPARE_UPPER0_0
+ 4'b 0001, // index[6] RV_TIMER_INTR_ENABLE0
+ 4'b 0001, // index[7] RV_TIMER_INTR_STATE0
+ 4'b 0001 // index[8] RV_TIMER_INTR_TEST0
+ };
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..62f1c51
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,482 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Register Top module auto-generated by `reggen`
+module rv_timer_reg_top (
+ input clk_i,
+ input rst_ni,
+ // Below Regster interface can be changed
+ input tlul_pkg::tl_h2d_t tl_i,
+ output tlul_pkg::tl_d2h_t tl_o,
+ // To HW
+ output rv_timer_reg_pkg::rv_timer_reg2hw_t reg2hw, // Write
+ input rv_timer_reg_pkg::rv_timer_hw2reg_t hw2reg, // Read
+ // Config
+ input devmode_i // If 1, explicit error return for unmapped register access
+ import rv_timer_reg_pkg::* ;
+ localparam int AW = 9;
+ localparam int DW = 32;
+ localparam int DBW = DW/8; // Byte Width
+ // register signals
+ logic reg_we;
+ logic reg_re;
+ logic [AW-1:0] reg_addr;
+ logic [DW-1:0] reg_wdata;
+ logic [DBW-1:0] reg_be;
+ logic [DW-1:0] reg_rdata;
+ logic reg_error;
+ logic addrmiss, wr_err;
+ logic [DW-1:0] reg_rdata_next;
+ tlul_pkg::tl_h2d_t tl_reg_h2d;
+ tlul_pkg::tl_d2h_t tl_reg_d2h;
+ assign tl_reg_h2d = tl_i;
+ assign tl_o = tl_reg_d2h;
+ tlul_adapter_reg #(
+ .RegAw(AW),
+ .RegDw(DW)
+ ) u_reg_if (
+ .clk_i,
+ .rst_ni,
+ .tl_i (tl_reg_h2d),
+ .tl_o (tl_reg_d2h),
+ .we_o (reg_we),
+ .re_o (reg_re),
+ .addr_o (reg_addr),
+ .wdata_o (reg_wdata),
+ .be_o (reg_be),
+ .rdata_i (reg_rdata),
+ .error_i (reg_error)
+ );
+ assign reg_rdata = reg_rdata_next ;
+ assign reg_error = (devmode_i & addrmiss) | wr_err ;
+ // Define SW related signals
+ // Format: <reg>_<field>_{wd|we|qs}
+ // or <reg>_{wd|we|qs} if field == 1 or 0
+ logic ctrl_qs;
+ logic ctrl_wd;
+ logic ctrl_we;
+ logic [11:0] cfg0_prescale_qs;
+ logic [11:0] cfg0_prescale_wd;
+ logic cfg0_prescale_we;
+ logic [7:0] cfg0_step_qs;
+ logic [7:0] cfg0_step_wd;
+ logic cfg0_step_we;
+ logic [31:0] timer_v_lower0_qs;
+ logic [31:0] timer_v_lower0_wd;
+ logic timer_v_lower0_we;
+ logic [31:0] timer_v_upper0_qs;
+ logic [31:0] timer_v_upper0_wd;
+ logic timer_v_upper0_we;
+ logic [31:0] compare_lower0_0_qs;
+ logic [31:0] compare_lower0_0_wd;
+ logic compare_lower0_0_we;
+ logic [31:0] compare_upper0_0_qs;
+ logic [31:0] compare_upper0_0_wd;
+ logic compare_upper0_0_we;
+ logic intr_enable0_qs;
+ logic intr_enable0_wd;
+ logic intr_enable0_we;
+ logic intr_state0_qs;
+ logic intr_state0_wd;
+ logic intr_state0_we;
+ logic intr_test0_wd;
+ logic intr_test0_we;
+ // Register instances
+ // Subregister 0 of Multireg ctrl
+ // R[ctrl]: V(False)
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ctrl (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ctrl_we),
+ .wd (ctrl_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ctrl[0].q ),
+ // to register interface (read)
+ .qs (ctrl_qs)
+ );
+ // R[cfg0]: V(False)
+ // F[prescale]: 11:0
+ prim_subreg #(
+ .DW (12),
+ .RESVAL (12'h0)
+ ) u_cfg0_prescale (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (cfg0_prescale_we),
+ .wd (cfg0_prescale_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.cfg0.prescale.q ),
+ // to register interface (read)
+ .qs (cfg0_prescale_qs)
+ );
+ // F[step]: 23:16
+ prim_subreg #(
+ .DW (8),
+ .RESVAL (8'h1)
+ ) u_cfg0_step (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (cfg0_step_we),
+ .wd (cfg0_step_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.cfg0.step.q ),
+ // to register interface (read)
+ .qs (cfg0_step_qs)
+ );
+ // R[timer_v_lower0]: V(False)
+ prim_subreg #(
+ .DW (32),
+ .RESVAL (32'h0)
+ ) u_timer_v_lower0 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (timer_v_lower0_we),
+ .wd (timer_v_lower0_wd),
+ // from internal hardware
+ .de (,
+ .d (hw2reg.timer_v_lower0.d ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.timer_v_lower0.q ),
+ // to register interface (read)
+ .qs (timer_v_lower0_qs)
+ );
+ // R[timer_v_upper0]: V(False)
+ prim_subreg #(
+ .DW (32),
+ .RESVAL (32'h0)
+ ) u_timer_v_upper0 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (timer_v_upper0_we),
+ .wd (timer_v_upper0_wd),
+ // from internal hardware
+ .de (,
+ .d (hw2reg.timer_v_upper0.d ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.timer_v_upper0.q ),
+ // to register interface (read)
+ .qs (timer_v_upper0_qs)
+ );
+ // R[compare_lower0_0]: V(False)
+ prim_subreg #(
+ .DW (32),
+ .RESVAL (32'hffffffff)
+ ) u_compare_lower0_0 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (compare_lower0_0_we),
+ .wd (compare_lower0_0_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (reg2hw.compare_lower0_0.qe),
+ .q (reg2hw.compare_lower0_0.q ),
+ // to register interface (read)
+ .qs (compare_lower0_0_qs)
+ );
+ // R[compare_upper0_0]: V(False)
+ prim_subreg #(
+ .DW (32),
+ .RESVAL (32'hffffffff)
+ ) u_compare_upper0_0 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (compare_upper0_0_we),
+ .wd (compare_upper0_0_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (reg2hw.compare_upper0_0.qe),
+ .q (reg2hw.compare_upper0_0.q ),
+ // to register interface (read)
+ .qs (compare_upper0_0_qs)
+ );
+ // Subregister 0 of Multireg intr_enable0
+ // R[intr_enable0]: V(False)
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_enable0 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_enable0_we),
+ .wd (intr_enable0_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_enable0[0].q ),
+ // to register interface (read)
+ .qs (intr_enable0_qs)
+ );
+ // Subregister 0 of Multireg intr_state0
+ // R[intr_state0]: V(False)
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_state0 (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_state0_we),
+ .wd (intr_state0_wd),
+ // from internal hardware
+ .de (hw2reg.intr_state0[0].de),
+ .d (hw2reg.intr_state0[0].d ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_state0[0].q ),
+ // to register interface (read)
+ .qs (intr_state0_qs)
+ );
+ // Subregister 0 of Multireg intr_test0
+ // R[intr_test0]: V(True)
+ prim_subreg_ext #(
+ .DW (1)
+ ) u_intr_test0 (
+ .re (1'b0),
+ .we (intr_test0_we),
+ .wd (intr_test0_wd),
+ .d ('0),
+ .qre (),
+ .qe (reg2hw.intr_test0[0].qe),
+ .q (reg2hw.intr_test0[0].q ),
+ .qs ()
+ );
+ logic [8:0] addr_hit;
+ always_comb begin
+ addr_hit = '0;
+ addr_hit[0] = (reg_addr == RV_TIMER_CTRL_OFFSET);
+ addr_hit[1] = (reg_addr == RV_TIMER_CFG0_OFFSET);
+ addr_hit[2] = (reg_addr == RV_TIMER_TIMER_V_LOWER0_OFFSET);
+ addr_hit[3] = (reg_addr == RV_TIMER_TIMER_V_UPPER0_OFFSET);
+ addr_hit[4] = (reg_addr == RV_TIMER_COMPARE_LOWER0_0_OFFSET);
+ addr_hit[5] = (reg_addr == RV_TIMER_COMPARE_UPPER0_0_OFFSET);
+ addr_hit[6] = (reg_addr == RV_TIMER_INTR_ENABLE0_OFFSET);
+ addr_hit[7] = (reg_addr == RV_TIMER_INTR_STATE0_OFFSET);
+ addr_hit[8] = (reg_addr == RV_TIMER_INTR_TEST0_OFFSET);
+ end
+ assign addrmiss = (reg_re || reg_we) ? ~|addr_hit : 1'b0 ;
+ // Check sub-word write is permitted
+ always_comb begin
+ wr_err = 1'b0;
+ if (addr_hit[0] && reg_we && (RV_TIMER_PERMIT[0] != (RV_TIMER_PERMIT[0] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[1] && reg_we && (RV_TIMER_PERMIT[1] != (RV_TIMER_PERMIT[1] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[2] && reg_we && (RV_TIMER_PERMIT[2] != (RV_TIMER_PERMIT[2] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[3] && reg_we && (RV_TIMER_PERMIT[3] != (RV_TIMER_PERMIT[3] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[4] && reg_we && (RV_TIMER_PERMIT[4] != (RV_TIMER_PERMIT[4] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[5] && reg_we && (RV_TIMER_PERMIT[5] != (RV_TIMER_PERMIT[5] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[6] && reg_we && (RV_TIMER_PERMIT[6] != (RV_TIMER_PERMIT[6] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[7] && reg_we && (RV_TIMER_PERMIT[7] != (RV_TIMER_PERMIT[7] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[8] && reg_we && (RV_TIMER_PERMIT[8] != (RV_TIMER_PERMIT[8] & reg_be))) wr_err = 1'b1 ;
+ end
+ assign ctrl_we = addr_hit[0] & reg_we & ~wr_err;
+ assign ctrl_wd = reg_wdata[0];
+ assign cfg0_prescale_we = addr_hit[1] & reg_we & ~wr_err;
+ assign cfg0_prescale_wd = reg_wdata[11:0];
+ assign cfg0_step_we = addr_hit[1] & reg_we & ~wr_err;
+ assign cfg0_step_wd = reg_wdata[23:16];
+ assign timer_v_lower0_we = addr_hit[2] & reg_we & ~wr_err;
+ assign timer_v_lower0_wd = reg_wdata[31:0];
+ assign timer_v_upper0_we = addr_hit[3] & reg_we & ~wr_err;
+ assign timer_v_upper0_wd = reg_wdata[31:0];
+ assign compare_lower0_0_we = addr_hit[4] & reg_we & ~wr_err;
+ assign compare_lower0_0_wd = reg_wdata[31:0];
+ assign compare_upper0_0_we = addr_hit[5] & reg_we & ~wr_err;
+ assign compare_upper0_0_wd = reg_wdata[31:0];
+ assign intr_enable0_we = addr_hit[6] & reg_we & ~wr_err;
+ assign intr_enable0_wd = reg_wdata[0];
+ assign intr_state0_we = addr_hit[7] & reg_we & ~wr_err;
+ assign intr_state0_wd = reg_wdata[0];
+ assign intr_test0_we = addr_hit[8] & reg_we & ~wr_err;
+ assign intr_test0_wd = reg_wdata[0];
+ // Read data return
+ always_comb begin
+ reg_rdata_next = '0;
+ unique case (1'b1)
+ addr_hit[0]: begin
+ reg_rdata_next[0] = ctrl_qs;
+ end
+ addr_hit[1]: begin
+ reg_rdata_next[11:0] = cfg0_prescale_qs;
+ reg_rdata_next[23:16] = cfg0_step_qs;
+ end
+ addr_hit[2]: begin
+ reg_rdata_next[31:0] = timer_v_lower0_qs;
+ end
+ addr_hit[3]: begin
+ reg_rdata_next[31:0] = timer_v_upper0_qs;
+ end
+ addr_hit[4]: begin
+ reg_rdata_next[31:0] = compare_lower0_0_qs;
+ end
+ addr_hit[5]: begin
+ reg_rdata_next[31:0] = compare_upper0_0_qs;
+ end
+ addr_hit[6]: begin
+ reg_rdata_next[0] = intr_enable0_qs;
+ end
+ addr_hit[7]: begin
+ reg_rdata_next[0] = intr_state0_qs;
+ end
+ addr_hit[8]: begin
+ reg_rdata_next[0] = '0;
+ end
+ default: begin
+ reg_rdata_next = '1;
+ end
+ endcase
+ end
diff --git a/verilog/rtl/spi_clgen.v b/verilog/rtl/spi_clgen.v
new file mode 100644
index 0000000..2086eaa
--- /dev/null
+++ b/verilog/rtl/spi_clgen.v
@@ -0,0 +1,66 @@
+module spi_clgen (
+ input clk_i, // input clock (system clock)
+ input rst_ni, // reset
+ input enable, // clock enable
+ input go, // start transfer
+ input last_clk, // last clock
+ input [`SPI_DIVIDER_LEN-1:0] divider, // clock divider (output clock is divided by this value)
+ output reg clk_out, // output clock
+ output reg pos_edge, // pulse marking positive edge of clk_out
+ output reg neg_edge // pulse marking negative edge of clk_out
+ //reg clk_out;
+ //reg pos_edge;
+ //reg neg_edge;
+ reg [`SPI_DIVIDER_LEN-1:0] cnt; // clock counter
+ wire cnt_zero; // conter is equal to zero
+ wire cnt_one; // conter is equal to one
+ assign cnt_zero = cnt == {`SPI_DIVIDER_LEN{1'b0}};
+ assign cnt_one = cnt == {{`SPI_DIVIDER_LEN-1{1'b0}}, 1'b1};
+ // Counter counts half period
+ always @(posedge clk_i or negedge rst_ni)
+ begin
+ if(~rst_ni)
+ cnt <= {`SPI_DIVIDER_LEN{1'b1}};
+ else
+ begin
+ if(!enable || cnt_zero)
+ cnt <= divider;
+ else
+ cnt <= cnt - {{`SPI_DIVIDER_LEN-1{1'b0}}, 1'b1};
+ end
+ end
+ // clk_out is asserted every other half period
+ always @(posedge clk_i or negedge rst_ni)
+ begin
+ if(~rst_ni)
+ clk_out <= 1'b0;
+ else
+ clk_out <= (enable && cnt_zero && (!last_clk || clk_out)) ? ~clk_out : clk_out;
+ end
+ // Pos and neg edge signals
+ always @(posedge clk_i or negedge rst_ni)
+ begin
+ if(~rst_ni)
+ begin
+ pos_edge <= 1'b0;
+ neg_edge <= 1'b0;
+ end
+ else
+ begin
+ pos_edge <= (enable && !clk_out && cnt_one) || (!(|divider) && clk_out) || (!(|divider) && go && !enable);
+ neg_edge <= (enable && clk_out && cnt_one) || (!(|divider) && !clk_out && enable);
+ end
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..b1c2453
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,199 @@
+module spi_core
+ // tlul signals
+ input clk_i,
+ input rst_ni,
+ input [7:0] addr_i,
+ input [31:0] wdata_i,
+ output reg [31:0] rdata_o,
+ input [3:0] be_i,
+ input we_i,
+ input re_i,
+ output reg error_o,
+ output reg intr_rx_o,
+ output reg intr_tx_o,
+ // SPI signals
+ output [`SPI_SS_NB-1:0] ss_o, // slave select
+ output sclk_o, // serial clock
+ output sd_o,
+ output reg sd_oe, // master out slave in
+ input sd_i // master in slave out
+ // Internal signals
+ reg [`SPI_DIVIDER_LEN-1:0] divider; // Divider register
+ reg [`SPI_CTRL_BIT_NB-1:0] ctrl; // Control and status register
+ reg [`SPI_SS_NB-1:0] ss; // Slave select register
+ reg [32-1:0] wb_dat; // wb data out
+ wire [`SPI_MAX_CHAR-1:0] rx; // Rx register
+ wire rx_negedge; // miso is sampled on negative edge
+ wire tx_negedge; // mosi is driven on negative edge
+ wire [`SPI_CHAR_LEN_BITS-1:0] char_len; // char len
+ wire go; // go
+ wire lsb; // lsb first on line
+ wire ie; // interrupt enable
+ wire ass; // automatic slave select
+ wire spi_divider_sel; // divider register select
+ wire spi_ctrl_sel; // ctrl register select
+ wire spi_tx_sel; // tx_l register select
+ wire spi_ss_sel; // ss register select
+ wire tip; // transfer in progress
+ wire pos_edge; // recognize posedge of sclk
+ wire neg_edge; // recognize negedge of sclk
+ wire last_bit; // marks last character bit
+ wire tx_en; // enables spi transmission
+ wire rx_en; // enables spi reception
+ // Address decoder
+ assign spi_divider_sel = we_i & ~re_i & (addr_i[`SPI_OFS_BITS] == `SPI_DEVIDE);
+ assign spi_ctrl_sel = we_i & ~re_i & (addr_i[`SPI_OFS_BITS] == `SPI_CTRL);
+ assign spi_tx_sel = we_i & ~re_i & (addr_i[`SPI_OFS_BITS] == `SPI_TX_0) & tx_en;
+ assign spi_ss_sel = we_i & ~re_i & (addr_i[`SPI_OFS_BITS] == `SPI_SS);
+ // Read from registers
+ always @(addr_i or rx or ctrl or divider or ss)
+ begin
+ case (addr_i[`SPI_OFS_BITS])
+ `SPI_RX_0: wb_dat = rx[`SPI_MAX_CHAR-1:0];
+ `SPI_CTRL: wb_dat = ctrl;
+ `SPI_DEVIDE: wb_dat = divider;
+ `SPI_SS: wb_dat = ss;
+ default: wb_dat = 32'b0;
+ endcase
+ end
+ // Wb data out
+ always @(posedge clk_i)
+ begin
+ if (~rst_ni)
+ rdata_o <= 32'b0;
+ else
+ rdata_o <= wb_dat;
+ end
+ // Wb error
+ assign error_o = 1'b0;
+ // Interrupt
+ always @(posedge clk_i)
+ begin
+ if (~rst_ni)
+ intr_tx_o <= 1'b0;
+ else if (ie && tip && last_bit && pos_edge && tx_en)
+ intr_tx_o <= 1'b1;
+ else
+ intr_tx_o <= 1'b0;
+ end
+ always @(posedge clk_i )
+ begin
+ if (~rst_ni)
+ intr_rx_o <= 1'b0;
+ else if (ie && tip && last_bit && pos_edge && rx_en)
+ intr_rx_o <= 1'b1;
+ else
+ intr_rx_o <= 1'b0;
+ end
+ // Divider register
+ always @(posedge clk_i)
+ begin
+ if (~rst_ni)
+ divider <= {`SPI_DIVIDER_LEN{1'b0}};
+ else if (spi_divider_sel && we_i && !tip)
+ begin
+ if (be_i[0])
+ divider[7:0] <= wdata_i[7:0];
+ if (be_i[1])
+ divider[`SPI_DIVIDER_LEN-1:8] <= wdata_i[`SPI_DIVIDER_LEN-1:8];
+ end
+ end
+ // Ctrl register
+ always @(posedge clk_i)
+ begin
+ if (~rst_ni)
+ ctrl <= {`SPI_CTRL_BIT_NB{1'b0}};
+ else if(spi_ctrl_sel && we_i && !tip)
+ begin
+ if (be_i[0])
+ ctrl[7:0] <= wdata_i[7:0] | {7'b0, ctrl[0]};
+ if (be_i[1])
+ ctrl[`SPI_CTRL_BIT_NB-1:8] <= wdata_i[`SPI_CTRL_BIT_NB-1:8];
+ end
+ else if(tip && last_bit && pos_edge)
+ ctrl[`SPI_CTRL_GO] <= 1'b0;
+ end
+ assign rx_negedge = ctrl[`SPI_CTRL_RX_NEGEDGE];
+ assign tx_negedge = ctrl[`SPI_CTRL_TX_NEGEDGE];
+ assign go = ctrl[`SPI_CTRL_GO];
+ assign char_len = ctrl[`SPI_CTRL_CHAR_LEN];
+ assign lsb = ctrl[`SPI_CTRL_LSB];
+ assign ie = ctrl[`SPI_CTRL_IE];
+ assign ass = ctrl[`SPI_CTRL_ASS];
+ assign rx_en = ctrl[`SPI_RX_SEL];
+ assign tx_en = ctrl[`SPI_TX_SEL];
+ always @(posedge clk_i or negedge rst_ni) begin
+ if(~rst_ni) begin
+ sd_oe <= 1'b0;
+ end else if (tx_en & !rx_en) begin
+ sd_oe <= 1'b1;
+ end else begin
+ sd_oe <= 1'b0;
+ end
+ end
+ // Slave select register
+ always @(posedge clk_i)
+ begin
+ if (~rst_ni)
+ ss <= {`SPI_SS_NB{1'b0}};
+ else if(spi_ss_sel && we_i && !tip)
+ begin
+ if (be_i[0])
+ ss <= wdata_i[`SPI_SS_NB-1:0];
+ end
+ end
+ assign ss_o = ~((ss & {`SPI_SS_NB{tip & ass}}) | (ss & {`SPI_SS_NB{!ass}}));
+ spi_clgen clgen (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .go (go),
+ .enable (tip),
+ .last_clk (last_bit),
+ .divider (divider),
+ .clk_out (sclk_o),
+ .pos_edge (pos_edge),
+ .neg_edge (neg_edge)
+ );
+ spi_shift shift (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .len (char_len[`SPI_CHAR_LEN_BITS-1:0]),
+ .latch (spi_tx_sel & we_i),
+ .byte_sel (be_i),
+ .lsb (lsb),
+ .go (go),
+ .pos_edge (pos_edge),
+ .neg_edge (neg_edge),
+ .rx_negedge (rx_negedge),
+ .tx_negedge (tx_negedge),
+ .tip (tip),
+ .last (last_bit),
+ .p_in (wdata_i),
+ .p_out (rx),
+ .s_clk (sclk_o),
+ .s_in (sd_i),
+ .s_out (sd_o),
+ .rx_en (rx_en)
+ );
diff --git a/verilog/rtl/spi_defines.v b/verilog/rtl/spi_defines.v
new file mode 100644
index 0000000..3e43ac9
--- /dev/null
+++ b/verilog/rtl/spi_defines.v
@@ -0,0 +1,117 @@
+// Number of bits used for devider register. If used in system with
+// low frequency of system clock this can be reduced.
+// Use SPI_DIVIDER_LEN for fine tuning theexact number.
+//`define SPI_DIVIDER_LEN_8
+`define SPI_DIVIDER_LEN_16
+//`define SPI_DIVIDER_LEN_24
+//`define SPI_DIVIDER_LEN_32
+ `define SPI_DIVIDER_LEN 8 // Can be set from 1 to 8
+`ifdef SPI_DIVIDER_LEN_16
+ `define SPI_DIVIDER_LEN 16 // Can be set from 9 to 16
+`ifdef SPI_DIVIDER_LEN_24
+ `define SPI_DIVIDER_LEN 24 // Can be set from 17 to 24
+`ifdef SPI_DIVIDER_LEN_32
+ `define SPI_DIVIDER_LEN 32 // Can be set from 25 to 32
+// Maximum nuber of bits that can be send/received at once.
+// Use SPI_MAX_CHAR for fine tuning the exact number, when using
+`define SPI_MAX_CHAR_32
+//`define SPI_MAX_CHAR_64
+//`define SPI_MAX_CHAR_32
+//`define SPI_MAX_CHAR_24
+//`define SPI_MAX_CHAR_16
+//`define SPI_MAX_CHAR_8
+`ifdef SPI_MAX_CHAR_128
+ `define SPI_MAX_CHAR 128 // Can only be set to 128
+ `define SPI_CHAR_LEN_BITS 7
+`ifdef SPI_MAX_CHAR_64
+ `define SPI_MAX_CHAR 64 // Can only be set to 64
+ `define SPI_CHAR_LEN_BITS 6
+`ifdef SPI_MAX_CHAR_32
+ `define SPI_MAX_CHAR 32 // Can be set from 25 to 32
+ `define SPI_CHAR_LEN_BITS 5
+`ifdef SPI_MAX_CHAR_24
+ `define SPI_MAX_CHAR 24 // Can be set from 17 to 24
+ `define SPI_CHAR_LEN_BITS 5
+`ifdef SPI_MAX_CHAR_16
+ `define SPI_MAX_CHAR 16 // Can be set from 9 to 16
+ `define SPI_CHAR_LEN_BITS 4
+`ifdef SPI_MAX_CHAR_8
+ `define SPI_MAX_CHAR 8 // Can be set from 1 to 8
+ `define SPI_CHAR_LEN_BITS 3
+// Number of device select signals. Use SPI_SS_NB for fine tuning the
+// exact number.
+`define SPI_SS_NB_4
+//`define SPI_SS_NB_16
+//`define SPI_SS_NB_24
+//`define SPI_SS_NB_32
+`ifdef SPI_SS_NB_4
+ `define SPI_SS_NB 4 // Can be set from 1 to 4
+`ifdef SPI_SS_NB_8
+ `define SPI_SS_NB 8 // Can be set from 1 to 8
+`ifdef SPI_SS_NB_16
+ `define SPI_SS_NB 16 // Can be set from 9 to 16
+`ifdef SPI_SS_NB_24
+ `define SPI_SS_NB 24 // Can be set from 17 to 24
+`ifdef SPI_SS_NB_32
+ `define SPI_SS_NB 32 // Can be set from 25 to 32
+// Bits of WISHBONE address used for partial decoding of SPI registers.
+`define SPI_OFS_BITS 6:2
+// Register offset
+`define SPI_RX_0 8
+`define SPI_TX_0 0
+`define SPI_CTRL 4
+`define SPI_DEVIDE 5
+`define SPI_SS 6
+// Number of bits in ctrl register
+`define SPI_CTRL_BIT_NB 16
+// Control register bit position
+`define SPI_RX_SEL 15
+`define SPI_TX_SEL 14
+`define SPI_CTRL_ASS 13
+`define SPI_CTRL_IE 12
+`define SPI_CTRL_LSB 11
+`define SPI_CTRL_GO 8
+`define SPI_CTRL_RES_1 7
+`define SPI_CTRL_CHAR_LEN 6:0
diff --git a/verilog/rtl/spi_shift.v b/verilog/rtl/spi_shift.v
new file mode 100644
index 0000000..9bc6067
--- /dev/null
+++ b/verilog/rtl/spi_shift.v
@@ -0,0 +1,103 @@
+module spi_shift (
+ input clk_i, // system clock
+ input rst_ni, // reset
+ input latch, // latch signal for storing the data in shift register
+ input [3:0] byte_sel, // byte select signals for storing the data in shift register
+ input [`SPI_CHAR_LEN_BITS-1:0] len, // data len in bits (minus one)
+ input lsb, // lbs first_ni on the line
+ input go, // start stansfer
+ input pos_edge, // recognize posedge of sclk_i
+ input neg_edge, // recognize negedge of sclk_i
+ input rx_negedge, // s_in is sampled on negative edge
+ input tx_negedge, // s_out is driven on negative edge
+ output reg tip, // transfer in progress
+ output last, // last bit
+ input [31:0] p_in, // parallel in
+ output [`SPI_MAX_CHAR-1:0] p_out, // parallel out
+ input s_clk, // serial clock
+ input s_in, // serial in
+ output reg s_out, // serial out
+ input rx_en // serial rx enable
+ // reg s_out;
+ // reg tip;
+ reg [`SPI_CHAR_LEN_BITS:0] cnt; // data bit count
+ reg [`SPI_MAX_CHAR-1:0] data;
+ reg [`SPI_MAX_CHAR-1:0] data_rx; // shift register
+ wire [`SPI_CHAR_LEN_BITS:0] tx_bit_pos; // next bit position
+ wire [`SPI_CHAR_LEN_BITS:0] rx_bit_pos; // next bit position
+ wire rx_clk_i; // rx clock enable
+ wire tx_clk_i; // tx clock enable
+ assign p_out = data_rx;
+ assign tx_bit_pos = lsb ? {!(|len), len} - cnt : cnt - {{`SPI_CHAR_LEN_BITS{1'b0}},1'b1};
+ assign rx_bit_pos = lsb ? {!(|len), len} - (rx_negedge ? cnt + {{`SPI_CHAR_LEN_BITS{1'b0}},1'b1} : cnt) :
+ (rx_negedge ? cnt : cnt - {{`SPI_CHAR_LEN_BITS{1'b0}},1'b1});
+ assign last = !(|cnt);
+ assign rx_clk_i = (rx_negedge ? neg_edge : pos_edge) && (!last || s_clk);
+ assign tx_clk_i = (tx_negedge ? neg_edge : pos_edge) && !last;
+ // Character bit counter
+ always @(posedge clk_i or negedge rst_ni)
+ begin
+ if(~rst_ni)
+ cnt <= {`SPI_CHAR_LEN_BITS+1{1'b0}};
+ else
+ begin
+ if(tip)
+ cnt <= pos_edge ? (cnt - {{`SPI_CHAR_LEN_BITS{1'b0}}, 1'b1}) : cnt;
+ else
+ cnt <= !(|len) ? {1'b1, {`SPI_CHAR_LEN_BITS{1'b0}}} : {1'b0, len};
+ end
+ end
+ // Transfer in progress
+ always @(posedge clk_i or negedge rst_ni)
+ begin
+ if(~rst_ni)
+ tip <= 1'b0;
+ else if(go && ~tip)
+ tip <= 1'b1;
+ else if(tip && last && pos_edge)
+ tip <= 1'b0;
+ end
+ // Sending bits to the line
+ always @(posedge clk_i or negedge rst_ni)
+ begin
+ if (~rst_ni)
+ s_out <= 1'b0;
+ else
+ s_out <= (tx_clk_i || !tip) ? data[tx_bit_pos[`SPI_CHAR_LEN_BITS-1:0]] : s_out;
+ end
+ // Receiving bits from the line
+ always @(posedge clk_i )
+ begin
+ if (~rst_ni)
+ data <= {`SPI_MAX_CHAR{1'b0}};
+ else if (latch && !tip)
+ begin
+ if (byte_sel[0])
+ data[7:0] <= p_in[7:0];
+ if (byte_sel[1])
+ data[15:8] <= p_in[15:8];
+ if (byte_sel[2])
+ data[23:16] <= p_in[23:16];
+ if (byte_sel[3])
+ data[`SPI_MAX_CHAR-1:24] <= p_in[`SPI_MAX_CHAR-1:24];
+ end
+ else if (rx_en && tip) begin
+ data_rx[rx_bit_pos[`SPI_CHAR_LEN_BITS-1:0]] <= rx_clk_i ? s_in : data_rx[rx_bit_pos[`SPI_CHAR_LEN_BITS-1:0]];
+ end
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..6c65601
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,75 @@
+module spi_top(
+ input clk_i,
+ input rst_ni,
+ input tlul_pkg::tl_h2d_t tl_i,
+ output tlul_pkg::tl_d2h_t tl_o,
+ // SPI signals
+ output intr_rx_o,
+ output intr_tx_o,
+ output [`SPI_SS_NB-1:0] ss_o,
+ output sclk_o,
+ output sd_o,
+ output sd_oe,
+ input sd_i
+localparam int AW = 8;
+localparam int DW = 32;
+logic re;
+logic we;
+logic [7:0] addr;
+logic [31:0] wdata;
+logic [3:0] be;
+logic [31:0] rdata;
+logic err;
+spi_core spi_host(
+ // tlul signals
+ .clk_i,
+ .rst_ni,
+ .addr_i (addr),
+ .wdata_i (wdata),
+ .rdata_o (rdata),
+ .be_i (be),
+ .we_i (we),
+ .re_i (re),
+ .error_o (err),
+ .intr_rx_o (intr_rx_o),
+ .intr_tx_o (intr_tx_o),
+ // SPI signals
+ .ss_o (ss_o), // slave select
+ .sclk_o (sclk_o), // serial clock
+ .sd_o (sd_o), // master out slave in
+ .sd_oe (sd_oe),
+ .sd_i (sd_i) // master in slave out
+tlul_adapter_reg #(
+ .RegAw(AW),
+ .RegDw(DW)
+) u_reg_if (
+ .clk_i,
+ .rst_ni,
+ .tl_i (tl_i),
+ .tl_o (tl_o),
+ .we_o (we),
+ .re_o (re),
+ .addr_o (addr),
+ .wdata_o (wdata),
+ .be_o (be),
+ .rdata_i (rdata),
+ .error_i (err)
diff --git a/verilog/rtl/sram.v b/verilog/rtl/sram.v
new file mode 100644
index 0000000..8fac671
--- /dev/null
+++ b/verilog/rtl/sram.v
@@ -0,0 +1,126 @@
+// OpenRAM SRAM model
+// Words: 1024
+// Word size: 32
+// Write size: 8
+module sram #(
+ parameter NUM_WMASKS = 4 ,
+ parameter DATA_WIDTH = 32 ,
+ parameter ADDR_WIDTH = 10 ,
+ parameter RAM_DEPTH = 1 << ADDR_WIDTH,
+ // FIXME: This delay is arbitrary.
+ parameter VERBOSE = 1 , //Set to 0 to only display warnings
+ parameter T_HOLD = 1 ,//Delay to hold dout value after posedge. Value is arbitrary
+ parameter IZERO = 0 , // binary / Initial RAM with zeros (has priority over INITFILE)
+ parameter IFILE = ""
+// Port 0: RW
+ clk0,csb0,web0,wmask0,addr0,din0,dout0,
+// Port 1: R
+ clk1,csb1,addr1,dout1
+ );
+ /*parameter NUM_WMASKS = 4 ;
+ parameter DATA_WIDTH = 32 ;
+ parameter ADDR_WIDTH = 10 ;
+ parameter RAM_DEPTH = 1 << ADDR_WIDTH;
+ // FIXME: This delay is arbitrary.
+ parameter DELAY = 3 ;
+ parameter VERBOSE = 1 ; //Set to 0 to only display warnings
+ parameter T_HOLD = 1 ; //Delay to hold dout value after posedge. Value is arbitrary
+ parameter IZERO = 0 ; // binary / Initial RAM with zeros (has priority over INITFILE)
+ parameter IFILE = "";
+ input clk0; // clock
+ input csb0; // active low chip select
+ input web0; // active low write control
+ input [NUM_WMASKS-1:0] wmask0; // write mask
+ input [ADDR_WIDTH-1:0] addr0;
+ input [DATA_WIDTH-1:0] din0;
+ output [DATA_WIDTH-1:0] dout0;
+ input clk1; // clock
+ input csb1; // active low chip select
+ input [ADDR_WIDTH-1:0] addr1;
+ output [DATA_WIDTH-1:0] dout1;
+ reg csb0_reg;
+ reg web0_reg;
+ reg [NUM_WMASKS-1:0] wmask0_reg;
+ reg [ADDR_WIDTH-1:0] addr0_reg;
+ reg [DATA_WIDTH-1:0] din0_reg;
+ reg [DATA_WIDTH-1:0] dout0;
+ // All inputs are registers
+ always @(posedge clk0)
+ begin
+ csb0_reg = csb0;
+ web0_reg = web0;
+ wmask0_reg = wmask0;
+ addr0_reg = addr0;
+ din0_reg = din0;
+ //dout0 = 32'bx;
+ //if ( !csb0_reg && web0_reg && VERBOSE )
+ //$display($time," Reading %m addr0=%b dout0=%b",addr0_reg,mem[addr0_reg]);
+ //if ( !csb0_reg && !web0_reg && VERBOSE )
+ //$display($time," Writing %m addr0=%b din0=%b wmask0=%b",addr0_reg,din0_reg,wmask0_reg);
+ end
+ reg csb1_reg;
+ reg [ADDR_WIDTH-1:0] addr1_reg;
+ reg [DATA_WIDTH-1:0] dout1;
+ // All inputs are registers
+ always @(posedge clk1)
+ begin
+ csb1_reg = csb1;
+ addr1_reg = addr1;
+ //if (!csb0 && !web0 && !csb1 && (addr0 == addr1))
+ //$display($time," WARNING: Writing and reading addr0=%b and addr1=%b simultaneously!",addr0,addr1);
+ //#(T_HOLD) dout1 = 32'bx;
+ //if ( !csb1_reg && VERBOSE )
+ //$display($time," Reading %m addr1=%b dout1=%b",addr1_reg,mem[addr1_reg]);
+ end
+integer i;
+reg [DATA_WIDTH-1:0] mem [0:RAM_DEPTH-1];
+ //if (IZERO)
+ //for (i=0; i<RAM_DEPTH; i=i+1) mem[i] = {DATA_WIDTH{1'b0}};
+ //else
+ //if (IFILE != "") $readmemh({IFILE,".hex"}, mem);
+ // Memory Write Block Port 0
+ // Write Operation : When web0 = 0, csb0 = 0
+ always @ (negedge clk0)
+ begin : MEM_WRITE0
+ if ( !csb0_reg && !web0_reg ) begin
+ if (wmask0_reg[0])
+ mem[addr0_reg][7:0] = din0_reg[7:0];
+ if (wmask0_reg[1])
+ mem[addr0_reg][15:8] = din0_reg[15:8];
+ if (wmask0_reg[2])
+ mem[addr0_reg][23:16] = din0_reg[23:16];
+ if (wmask0_reg[3])
+ mem[addr0_reg][31:24] = din0_reg[31:24];
+ end
+ end
+ // Memory Read Block Port 0
+ // Read Operation : When web0 = 1, csb0 = 0
+ always @ (negedge clk0)
+ begin : MEM_READ0
+ if (!csb0_reg && web0_reg)
+ dout0 <= mem[addr0_reg];
+ end
+ // Memory Read Block Port 1
+ // Read Operation : When web1 = 1, csb1 = 0
+ always @ (negedge clk1)
+ begin : MEM_READ1
+ if (!csb1_reg)
+ dout1 <= mem[addr1_reg];
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..7cd1c62
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,49 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+module timer_core #(
+ parameter int N = 1
+) (
+ input clk_i,
+ input rst_ni,
+ input active,
+ input [11:0] prescaler,
+ input [ 7:0] step,
+ output logic tick,
+ output logic [63:0] mtime_d,
+ input [63:0] mtime,
+ input [63:0] mtimecmp [N],
+ output logic [N-1:0] intr
+ logic [11:0] tick_count;
+ always_ff @(posedge clk_i or negedge rst_ni) begin : generate_tick
+ if (!rst_ni) begin
+ tick_count <= 12'h0;
+ end else if (!active) begin
+ tick_count <= 12'h0;
+ end else if (tick_count == prescaler) begin
+ tick_count <= 12'h0;
+ end else begin
+ tick_count <= tick_count + 1'b1;
+ end
+ end
+ assign tick = active & (tick_count >= prescaler);
+ assign mtime_d = mtime + 64'(step);
+ // interrupt is generated if mtime is greater than or equal to mtimecmp
+ // TODO: Check if it must consider overflow case
+ for (genvar t = 0 ; t < N ; t++) begin : gen_intr
+ assign intr[t] = active & (mtime >= mtimecmp[t]);
+ end
+endmodule : timer_core
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..4421478
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,50 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// tl_main package generated by `` tool
+package tl_main_pkg;
+ localparam logic [31:0] ADDR_SPACE_ICCM = 32'h 20000000;
+ localparam logic [31:0] ADDR_SPACE_DEBUG_ROM = 32'h 10040000;
+ localparam logic [31:0] ADDR_SPACE_DCCM = 32'h 10000000;
+ localparam logic [31:0] ADDR_SPACE_TIMER0 = 32'h 40000000;
+ localparam logic [31:0] ADDR_SPACE_UART0 = 32'h 40060000;
+ localparam logic [31:0] ADDR_SPACE_SPI0 = 32'h 40080000;
+ localparam logic [31:0] ADDR_SPACE_PWM = 32'h 400b0000;
+ localparam logic [31:0] ADDR_SPACE_GPIO = 32'h 400c0000;
+ localparam logic [31:0] ADDR_SPACE_PLIC = 32'h 40050000;
+ localparam logic [31:0] ADDR_MASK_ICCM = 32'h 0000ffff;
+ localparam logic [31:0] ADDR_MASK_DEBUG_ROM = 32'h 0000ffff;
+ localparam logic [31:0] ADDR_MASK_DCCM = 32'h 0000ffff;
+ localparam logic [31:0] ADDR_MASK_TIMER0 = 32'h 0000ffff;
+ localparam logic [31:0] ADDR_MASK_UART0 = 32'h 0000ffff;
+ localparam logic [31:0] ADDR_MASK_SPI0 = 32'h 0000ffff;
+ localparam logic [31:0] ADDR_MASK_PWM = 32'h 0000ffff;
+ localparam logic [31:0] ADDR_MASK_GPIO = 32'h 0000ffff;
+ localparam logic [31:0] ADDR_MASK_PLIC = 32'h 0000ffff;
+ localparam int N_HOST = 3;
+ localparam int N_DEVICE = 9;
+ typedef enum int {
+ TlIccm = 0,
+ TlDebugRom = 1,
+ TlDccm = 2,
+ TlTimer0 = 3,
+ TlUart0 = 4,
+ TlSpi0 = 5,
+ TlPwm = 6,
+ TlGpio = 7,
+ TlPlic = 8
+ } tl_device_e;
+ typedef enum int {
+ TlBrqif = 0,
+ TlBrqlsu = 1,
+ TlDmSba = 2
+ } tl_host_e;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..65f3a0b
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,424 @@
+// main XBAR
+module tl_xbar_main (
+ input clk_i,
+ input rst_ni,
+ // Host interfaces
+ input tlul_pkg::tl_h2d_t tl_brqif_i,
+ output tlul_pkg::tl_d2h_t tl_brqif_o,
+ input tlul_pkg::tl_h2d_t tl_brqlsu_i,
+ output tlul_pkg::tl_d2h_t tl_brqlsu_o,
+ input tlul_pkg::tl_h2d_t tl_dm_sba_i,
+ output tlul_pkg::tl_d2h_t tl_dm_sba_o,
+ // Device interfaces
+ output tlul_pkg::tl_h2d_t tl_iccm_o,
+ input tlul_pkg::tl_d2h_t tl_iccm_i,
+ output tlul_pkg::tl_h2d_t tl_debug_rom_o,
+ input tlul_pkg::tl_d2h_t tl_debug_rom_i,
+ output tlul_pkg::tl_h2d_t tl_dccm_o,
+ input tlul_pkg::tl_d2h_t tl_dccm_i,
+ output tlul_pkg::tl_h2d_t tl_timer0_o,
+ input tlul_pkg::tl_d2h_t tl_timer0_i,
+ output tlul_pkg::tl_h2d_t tl_uart_o,
+ input tlul_pkg::tl_d2h_t tl_uart_i,
+ output tlul_pkg::tl_h2d_t tl_spi_o,
+ input tlul_pkg::tl_d2h_t tl_spi_i,
+ output tlul_pkg::tl_h2d_t tl_pwm_o,
+ input tlul_pkg::tl_d2h_t tl_pwm_i,
+ output tlul_pkg::tl_h2d_t tl_gpio_o,
+ input tlul_pkg::tl_d2h_t tl_gpio_i,
+ output tlul_pkg::tl_h2d_t tl_plic_o,
+ input tlul_pkg::tl_d2h_t tl_plic_i
+ import tlul_pkg::*;
+ import tl_main_pkg::*;
+ // scanmode_i is currently not used, but provisioned for future use
+ // this assignment prevents lint warnings
+// host 1 IFU
+ tlul_pkg::tl_h2d_t brqifu_to_s1n;
+ tlul_pkg::tl_d2h_t s1n_to_brqifu;
+ logic [1:0] device_sel_1;
+// host 2 LSU
+ tlul_pkg::tl_h2d_t brqlsu_to_s1n;
+ tlul_pkg::tl_d2h_t s1n_to_brqlsu;
+ logic [3:0] device_sel_2;
+// host 3 debug system bus access
+ tlul_pkg::tl_h2d_t dbg_to_s1n;
+ tlul_pkg::tl_d2h_t s1n_to_dbg;
+ logic [3:0] device_sel_3;
+// Dveice connections
+ tlul_pkg::tl_h2d_t h1_dv_i[2];
+ tlul_pkg::tl_d2h_t h1_dv_o[2];
+ tlul_pkg::tl_h2d_t h2_dv_i[9];
+ tlul_pkg::tl_d2h_t h2_dv_o[9];
+ tlul_pkg::tl_h2d_t h3_dv_i[8];
+ tlul_pkg::tl_d2h_t h3_dv_o[8];
+// ICCM
+ tlul_pkg::tl_h2d_t s1n_sm1_1[3];
+ tlul_pkg::tl_d2h_t sm1_s1n_1[3];
+// DCCM
+ tlul_pkg::tl_h2d_t s1n_sm1_2[2];
+ tlul_pkg::tl_d2h_t sm1_s1n_2[2];
+ tlul_pkg::tl_h2d_t s1n_sm1_4[2];
+ tlul_pkg::tl_d2h_t sm1_s1n_4[2];
+ tlul_pkg::tl_h2d_t s1n_sm1_5[2];
+ tlul_pkg::tl_d2h_t sm1_s1n_5[2];
+// UART
+ tlul_pkg::tl_h2d_t s1n_sm1_6[2];
+ tlul_pkg::tl_d2h_t sm1_s1n_6[2];
+// SPI
+ tlul_pkg::tl_h2d_t s1n_sm1_7[2];
+ tlul_pkg::tl_d2h_t sm1_s1n_7[2];
+// PWM
+ tlul_pkg::tl_h2d_t s1n_sm1_8[2];
+ tlul_pkg::tl_d2h_t sm1_s1n_8[2];
+// GPIO
+ tlul_pkg::tl_h2d_t s1n_sm1_9[2];
+ tlul_pkg::tl_d2h_t sm1_s1n_9[2];
+// PLIC
+ tlul_pkg::tl_h2d_t s1n_sm1_10[2];
+ tlul_pkg::tl_d2h_t sm1_s1n_10[2];
+// Device 1 host connections (ICCM)
+ assign h1_dv_o[0] = sm1_s1n_1[0];
+ assign h3_dv_o[1] = sm1_s1n_1[1];
+ assign h2_dv_o[8] = sm1_s1n_1[2];
+ assign s1n_sm1_1[0] = h1_dv_i[0];
+ assign s1n_sm1_1[1] = h3_dv_i[1];
+ assign s1n_sm1_1[2] = h2_dv_i[8];
+// Device 2 host connections (DCCM)
+ assign h2_dv_o[0] = sm1_s1n_2[0];
+ assign h3_dv_o[0] = sm1_s1n_2[1];
+ assign s1n_sm1_2[0] = h2_dv_i[0];
+ assign s1n_sm1_2[1] = h3_dv_i[0];
+// Device 3 host connections (DEBUG ROM)
+ assign h1_dv_o[1] = sm1_s1n_4[0];
+ assign h2_dv_o[1] = sm1_s1n_4[1];
+ assign s1n_sm1_4[0] = h1_dv_i[1];
+ assign s1n_sm1_4[1] = h2_dv_i[1];
+// Device 4 host connections (TIMER0)
+ assign h2_dv_o[2] = sm1_s1n_5[0];
+ assign h3_dv_o[2] = sm1_s1n_5[1];
+ assign s1n_sm1_5[0] = h2_dv_i[2];
+ assign s1n_sm1_5[1] = h3_dv_i[2];
+// Device 5 host connections (UART)
+ assign h2_dv_o[3] = sm1_s1n_6[0];
+ assign h3_dv_o[3] = sm1_s1n_6[1];
+ assign s1n_sm1_6[0] = h2_dv_i[3];
+ assign s1n_sm1_6[1] = h3_dv_i[3];
+// Device 6 host connections (SPI)
+ assign h2_dv_o[4] = sm1_s1n_7[0];
+ assign h3_dv_o[4] = sm1_s1n_7[1];
+ assign s1n_sm1_7[0] = h2_dv_i[4];
+ assign s1n_sm1_7[1] = h3_dv_i[4];
+// Device 7 host connections (PWM)
+ assign h2_dv_o[5] = sm1_s1n_8[0];
+ assign h3_dv_o[5] = sm1_s1n_8[1];
+ assign s1n_sm1_8[0] = h2_dv_i[5];
+ assign s1n_sm1_8[1] = h3_dv_i[5];
+// Device 8 host connections (GPIO)
+ assign h2_dv_o[6] = sm1_s1n_9[0];
+ assign h3_dv_o[6] = sm1_s1n_9[1];
+ assign s1n_sm1_9[0] = h2_dv_i[6];
+ assign s1n_sm1_9[1] = h3_dv_i[6];
+// Device 9 host connections (PLIC)
+ assign h2_dv_o[7] = sm1_s1n_10[0];
+ assign h3_dv_o[7] = sm1_s1n_10[1];
+ assign s1n_sm1_10[0] = h2_dv_i[7];
+ assign s1n_sm1_10[1] = h3_dv_i[7];
+// hostv 1 connections
+ assign brqifu_to_s1n = tl_brqif_i;
+ assign tl_brqif_o = s1n_to_brqifu;
+// hostv 2 connections
+ assign brqlsu_to_s1n = tl_brqlsu_i;
+ assign tl_brqlsu_o = s1n_to_brqlsu;
+// host 3 connections
+ assign dbg_to_s1n = tl_dm_sba_i;
+ assign tl_dm_sba_o = s1n_to_dbg;
+// host 1 device selection
+ always_comb begin
+ device_sel_1 = 2'd2;
+ if((brqifu_to_s1n.a_address & ~(ADDR_MASK_ICCM)) == ADDR_SPACE_ICCM) begin
+ device_sel_1 = 2'd0;
+ end else if ((brqifu_to_s1n.a_address & ~(ADDR_MASK_DEBUG_ROM)) == ADDR_SPACE_DEBUG_ROM) begin
+ device_sel_1 = 2'd1;
+ end
+ end
+// host 1 socket
+ tlul_socket_1n #(
+ .HReqDepth (4'h0),
+ .HRspDepth (4'h0),
+ .DReqDepth (12'h0),
+ .DRspDepth (12'h0),
+ .N (2)
+ ) host_1 (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tl_h_i (brqifu_to_s1n),
+ .tl_h_o (s1n_to_brqifu),
+ .tl_d_o (h1_dv_i),
+ .tl_d_i (h1_dv_o),
+ .dev_select_i (device_sel_1)
+ );
+// host 2 socket
+ always_comb begin
+ device_sel_2 = 4'd9;
+ if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_DCCM)) == ADDR_SPACE_DCCM) begin
+ device_sel_2 = 4'd0;
+ end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_DEBUG_ROM)) == ADDR_SPACE_DEBUG_ROM) begin
+ device_sel_2 = 4'd1;
+ end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_TIMER0)) == ADDR_SPACE_TIMER0) begin
+ device_sel_2 = 4'd2;
+ end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_UART0)) == ADDR_SPACE_UART0) begin
+ device_sel_2 = 4'd3;
+ end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_SPI0)) == ADDR_SPACE_SPI0) begin
+ device_sel_2 = 4'd4;
+ end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_PWM)) == ADDR_SPACE_PWM) begin
+ device_sel_2 = 4'd5;
+ end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_GPIO)) == ADDR_SPACE_GPIO) begin
+ device_sel_2 = 4'd6;
+ end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_PLIC)) == ADDR_SPACE_PLIC) begin
+ device_sel_2 = 4'd7;
+ end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_ICCM)) == ADDR_SPACE_ICCM) begin
+ device_sel_2 = 4'd8;
+ end
+ end
+// host 2 socket
+ tlul_socket_1n #(
+ .HReqDepth (4'h0),
+ .HRspDepth (4'h0),
+ .DReqDepth (36'h0),
+ .DRspDepth (36'h0),
+ .N (9)
+ ) host_2 (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tl_h_i (brqlsu_to_s1n),
+ .tl_h_o (s1n_to_brqlsu),
+ .tl_d_o (h2_dv_i),
+ .tl_d_i (h2_dv_o),
+ .dev_select_i (device_sel_2)
+ );
+// host 3 device selection
+ always_comb begin
+ device_sel_3 = 4'd8;
+ if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_DCCM)) == ADDR_SPACE_DCCM) begin
+ device_sel_3 = 4'd0;
+ end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_ICCM)) == ADDR_SPACE_ICCM) begin
+ device_sel_3 = 4'd1;
+ end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_TIMER0)) == ADDR_SPACE_TIMER0) begin
+ device_sel_3 = 4'd2;
+ end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_UART0)) == ADDR_SPACE_UART0) begin
+ device_sel_3 = 4'd3;
+ end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_SPI0)) == ADDR_SPACE_SPI0) begin
+ device_sel_3 = 4'd4;
+ end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_PWM)) == ADDR_SPACE_PWM) begin
+ device_sel_3 = 4'd5;
+ end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_GPIO)) == ADDR_SPACE_GPIO) begin
+ device_sel_3 = 4'd6;
+ end else if ((brqlsu_to_s1n.a_address & ~(ADDR_MASK_PLIC)) == ADDR_SPACE_PLIC) begin
+ device_sel_3 = 4'd7;
+ end
+ end
+ tlul_socket_1n #(
+ .HReqDepth (4'h0),
+ .HRspDepth (4'h0),
+ .DReqDepth (36'h0),
+ .DRspDepth (36'h0),
+ .N (8)
+ ) host_3 (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tl_h_i (dbg_to_s1n),
+ .tl_h_o (s1n_to_dbg),
+ .tl_d_o (h3_dv_i),
+ .tl_d_i (h3_dv_o),
+ .dev_select_i (device_sel_3)
+ );
+// Devices
+ tlul_socket_m1 #(
+ .HReqDepth (8'h0),
+ .HRspDepth (8'h0),
+ .DReqDepth (4'h0),
+ .DRspDepth (4'h0),
+ .M (3)
+ ) ICCM (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tl_h_i (s1n_sm1_1),
+ .tl_h_o (sm1_s1n_1),
+ .tl_d_o (tl_iccm_o),
+ .tl_d_i (tl_iccm_i)
+ );
+ tlul_socket_m1 #(
+ .HReqDepth (8'h0),
+ .HRspDepth (8'h0),
+ .DReqDepth (4'h0),
+ .DRspDepth (4'h0),
+ .M (2)
+ ) DCCM (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tl_h_i (s1n_sm1_2),
+ .tl_h_o (sm1_s1n_2),
+ .tl_d_o (tl_dccm_o),
+ .tl_d_i (tl_dccm_i)
+ );
+ tlul_socket_m1 #(
+ .HReqDepth (8'h0),
+ .HRspDepth (8'h0),
+ .DReqDepth (4'h0),
+ .DRspDepth (4'h0),
+ .M (2)
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tl_h_i (s1n_sm1_4),
+ .tl_h_o (sm1_s1n_4),
+ .tl_d_o (tl_debug_rom_o),
+ .tl_d_i (tl_debug_rom_i)
+ );
+ tlul_socket_m1 #(
+ .HReqDepth (8'h0),
+ .HRspDepth (8'h0),
+ .DReqDepth (4'h0),
+ .DRspDepth (4'h0),
+ .M (2)
+ ) TIMER (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tl_h_i (s1n_sm1_5),
+ .tl_h_o (sm1_s1n_5),
+ .tl_d_o (tl_timer0_o),
+ .tl_d_i (tl_timer0_i)
+ );
+ tlul_socket_m1 #(
+ .HReqDepth (8'h0),
+ .HRspDepth (8'h0),
+ .DReqDepth (4'h0),
+ .DRspDepth (4'h0),
+ .M (2)
+ ) UART (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tl_h_i (s1n_sm1_6),
+ .tl_h_o (sm1_s1n_6),
+ .tl_d_o (tl_uart_o),
+ .tl_d_i (tl_uart_i)
+ );
+ tlul_socket_m1 #(
+ .HReqDepth (8'h0),
+ .HRspDepth (8'h0),
+ .DReqDepth (4'h0),
+ .DRspDepth (4'h0),
+ .M (2)
+ ) SPI (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tl_h_i (s1n_sm1_7),
+ .tl_h_o (sm1_s1n_7),
+ .tl_d_o (tl_spi_o),
+ .tl_d_i (tl_spi_i)
+ );
+ tlul_socket_m1 #(
+ .HReqDepth (8'h0),
+ .HRspDepth (8'h0),
+ .DReqDepth (4'h0),
+ .DRspDepth (4'h0),
+ .M (2)
+ ) PWM (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tl_h_i (s1n_sm1_8),
+ .tl_h_o (sm1_s1n_8),
+ .tl_d_o (tl_pwm_o),
+ .tl_d_i (tl_pwm_i)
+ );
+ tlul_socket_m1 #(
+ .HReqDepth (8'h0),
+ .HRspDepth (8'h0),
+ .DReqDepth (4'h0),
+ .DRspDepth (4'h0),
+ .M (2)
+ ) GPIO (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tl_h_i (s1n_sm1_9),
+ .tl_h_o (sm1_s1n_9),
+ .tl_d_o (tl_gpio_o),
+ .tl_d_i (tl_gpio_i)
+ );
+ tlul_socket_m1 #(
+ .HReqDepth (8'h0),
+ .HRspDepth (8'h0),
+ .DReqDepth (4'h0),
+ .DRspDepth (4'h0),
+ .M (2)
+ ) PLIC (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tl_h_i (s1n_sm1_10),
+ .tl_h_o (sm1_s1n_10),
+ .tl_d_o (tl_plic_o),
+ .tl_d_i (tl_plic_i)
+ );
\ No newline at end of file
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..c801bc5
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,125 @@
+module tlul_adapter_reg import tlul_pkg::*; #(
+ parameter int RegAw = 8,
+ parameter int RegDw = 32, // Shall be matched with TL_DW
+ localparam int RegBw = RegDw/8
+) (
+ input clk_i,
+ input rst_ni,
+ // TL-UL interface
+ input tl_h2d_t tl_i,
+ output tl_d2h_t tl_o,
+ // Register interface
+ output logic re_o,
+ output logic we_o,
+ output logic [RegAw-1:0] addr_o,
+ output logic [RegDw-1:0] wdata_o,
+ output logic [RegBw-1:0] be_o,
+ input [RegDw-1:0] rdata_i,
+ input error_i
+ localparam int IW = $bits(tl_i.a_source);
+ localparam int SZW = $bits(tl_i.a_size);
+ logic outstanding; // Indicates current request is pending
+ logic a_ack, d_ack;
+ logic [RegDw-1:0] rdata;
+ logic error, err_internal;
+ logic addr_align_err; // Size and alignment
+// logic malformed_meta_err; // User signal format error or unsupported
+ logic tl_err; // Common TL-UL error checker
+ logic [IW-1:0] reqid;
+ logic [SZW-1:0] reqsz;
+ tlul_pkg::tl_d_m_op rspop;
+ logic rd_req, wr_req;
+ assign a_ack = tl_i.a_valid & tl_o.a_ready;
+ assign d_ack = tl_o.d_valid & tl_i.d_ready;
+ // Request signal
+ assign wr_req = a_ack & ((tl_i.a_opcode == PutFullData) | (tl_i.a_opcode == PutPartialData));
+ assign rd_req = a_ack & (tl_i.a_opcode == Get);
+ assign we_o = wr_req & ~err_internal;
+ assign re_o = rd_req & ~err_internal;
+ assign addr_o = {tl_i.a_address[RegAw-1:2], 2'b00}; // generate always word-align
+ assign wdata_o = tl_i.a_data;
+ assign be_o = tl_i.a_mask;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) outstanding <= 1'b0;
+ else if (a_ack) outstanding <= 1'b1;
+ else if (d_ack) outstanding <= 1'b0;
+ end
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ reqid <= '0;
+ reqsz <= '0;
+ rspop <= AccessAck;
+ end else if (a_ack) begin
+ reqid <= tl_i.a_source;
+ reqsz <= tl_i.a_size;
+ // Return AccessAckData regardless of error
+ rspop <= (rd_req) ? AccessAckData : AccessAck ;
+ end
+ end
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rdata <= '0;
+ error <= 1'b0;
+ end else if (a_ack) begin
+ rdata <= (err_internal) ? '1 : rdata_i;
+ error <= error_i | err_internal;
+ end
+ end
+ assign tl_o = '{
+ a_ready: ~outstanding,
+ d_valid: outstanding,
+ d_opcode: rspop,
+ d_param: '0,
+ d_size: reqsz,
+ d_source: reqid,
+ d_sink: '0,
+ d_data: rdata,
+ d_error: error
+ };
+ ////////////////////
+ // Error Handling //
+ ////////////////////
+ assign err_internal = addr_align_err | tl_err ;
+ // malformed_meta_err
+ // Raised if not supported feature is turned on or user signal has malformed
+ // assign malformed_meta_err = (tl_i.a_user.parity_en == 1'b1);
+ // addr_align_err
+ // Raised if addr isn't aligned with the size
+ // Read size error is checked in
+ // Here is it added due to the limitation of register interface.
+ always_comb begin
+ if (wr_req) begin
+ // Only word-align is accepted based on comportability spec
+ addr_align_err = |tl_i.a_address[1:0];
+ end else begin
+ // No request
+ addr_align_err = 1'b0;
+ end
+ end
+ // tl_err : separate checker
+ tlul_err u_err (
+ .tl_i (tl_i),
+ .err_o (tl_err)
+ );
\ No newline at end of file
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..32d7b67
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,86 @@
+module tlul_err import tlul_pkg::*; (
+ input tl_h2d_t tl_i,
+ output logic err_o
+ localparam int IW = $bits(tl_i.a_source);
+ localparam int SZW = $bits(tl_i.a_size);
+ localparam int DW = $bits(tl_i.a_data);
+ localparam int MW = $bits(tl_i.a_mask);
+ localparam int SubAW = $clog2(DW/8);
+ logic opcode_allowed, a_config_allowed;
+ logic op_full, op_partial, op_get;
+ assign op_full = (tl_i.a_opcode == PutFullData);
+ assign op_partial = (tl_i.a_opcode == PutPartialData);
+ assign op_get = (tl_i.a_opcode == Get);
+ // Anything that doesn't fall into the permitted category, it raises an error
+ assign err_o = ~(opcode_allowed & a_config_allowed);
+ // opcode check
+ assign opcode_allowed = (tl_i.a_opcode == PutFullData)
+ | (tl_i.a_opcode == PutPartialData)
+ | (tl_i.a_opcode == Get);
+ // a channel configuration check
+ logic addr_sz_chk; // address and size alignment check
+ logic mask_chk; // inactive lane a_mask check
+ logic fulldata_chk; // PutFullData should have size match to mask
+ logic [MW-1:0] mask;
+ assign mask = (1 << tl_i.a_address[SubAW-1:0]);
+ always_comb begin
+ addr_sz_chk = 1'b0;
+ mask_chk = 1'b0;
+ fulldata_chk = 1'b0; // Only valid when opcode is PutFullData
+ if (tl_i.a_valid) begin
+ unique case (tl_i.a_size)
+ 'h0: begin // 1 Byte
+ addr_sz_chk = 1'b1;
+ mask_chk = ~|(tl_i.a_mask & ~mask);
+ fulldata_chk = |(tl_i.a_mask & mask);
+ end
+ 'h1: begin // 2 Byte
+ addr_sz_chk = ~tl_i.a_address[0];
+ // check inactive lanes if lower 2B, check a_mask[3:2], if uppwer 2B, a_mask[1:0]
+ mask_chk = (tl_i.a_address[1]) ? ~|(tl_i.a_mask & 4'b0011)
+ : ~|(tl_i.a_mask & 4'b1100);
+ fulldata_chk = (tl_i.a_address[1]) ? &tl_i.a_mask[3:2] : &tl_i.a_mask[1:0] ;
+ end
+ 'h2: begin // 4 Byte
+ addr_sz_chk = ~|tl_i.a_address[SubAW-1:0];
+ mask_chk = 1'b1;
+ fulldata_chk = &tl_i.a_mask[3:0];
+ end
+ default: begin // else
+ addr_sz_chk = 1'b0;
+ mask_chk = 1'b0;
+ fulldata_chk = 1'b0;
+ end
+ endcase
+ end else begin
+ addr_sz_chk = 1'b0;
+ mask_chk = 1'b0;
+ fulldata_chk = 1'b0;
+ end
+ end
+ assign a_config_allowed = addr_sz_chk
+ & mask_chk
+ & (op_get | op_partial | fulldata_chk) ;
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..bd5a73c
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,56 @@
+// TL-UL error responder module, used by tlul_socket_1n to help response
+// to requests to no correct address space. Responses are always one cycle
+// after request with no stalling unless response is stuck on the way out.
+//`include "/home/sajjad/Shaheen-sv/src/buraq_core_top/ibex_core/"
+module tlul_err_resp (
+ input clk_i,
+ input rst_ni,
+ input tlul_pkg::tl_h2d_t tl_h_i,
+ output tlul_pkg::tl_d2h_t tl_h_o
+ import tlul_pkg::*;
+ tlul_pkg::tl_a_m_op err_opcode;
+// tlul_pkg::tl_a_m_op get;
+ logic [$bits(tl_h_i.a_source)-1:0] err_source;
+ logic [$bits(tl_h_i.a_size)-1:0] err_size;
+ logic err_req_pending, err_rsp_pending;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ err_req_pending <= 1'b0;
+ err_source <= {tlul_pkg::TL_AIW{1'b0}};
+ err_opcode <= tlul_pkg::Get;
+ err_size <= '0;
+ end else if (tl_h_i.a_valid && tl_h_o.a_ready) begin
+ err_req_pending <= 1'b1;
+ err_source <= tl_h_i.a_source;
+ err_opcode <= tl_h_i.a_opcode;
+ err_size <= tl_h_i.a_size;
+ end else if (!err_rsp_pending) begin
+ err_req_pending <= 1'b0;
+ end
+ end
+ assign tl_h_o.a_ready = ~err_rsp_pending & ~(err_req_pending & ~tl_h_i.d_ready);
+ assign tl_h_o.d_valid = err_req_pending | err_rsp_pending;
+ assign tl_h_o.d_data = '1; // Return all F
+ assign tl_h_o.d_source = err_source;
+ assign tl_h_o.d_sink = '0;
+ assign tl_h_o.d_param = '0;
+ assign tl_h_o.d_size = err_size;
+ assign tl_h_o.d_opcode = (err_opcode == tlul_pkg::Get) ? AccessAckData : AccessAck;
+ assign tl_h_o.d_error = 1'b1;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ err_rsp_pending <= 1'b0;
+ end else if ((err_req_pending || err_rsp_pending) && !tl_h_i.d_ready) begin
+ err_rsp_pending <= 1'b1;
+ end else begin
+ err_rsp_pending <= 1'b0;
+ end
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..917e059
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,86 @@
+// TL-UL fifo, used to add elasticity or an asynchronous clock crossing
+// to an TL-UL bus. This instantiates two FIFOs, one for the request side,
+// and one for the response side.
+module tlul_fifo_sync #(
+ parameter bit ReqPass = 1'b1,
+ parameter bit RspPass = 1'b1,
+ parameter int unsigned ReqDepth = 0,
+ parameter int unsigned RspDepth = 0,
+ parameter int unsigned SpareReqW = 1,
+ parameter int unsigned SpareRspW = 1
+) (
+ input clk_i,
+ input rst_ni,
+ input tlul_pkg::tl_h2d_t tl_h_i,
+ output tlul_pkg::tl_d2h_t tl_h_o,
+ output tlul_pkg::tl_h2d_t tl_d_o,
+ input tlul_pkg::tl_d2h_t tl_d_i,
+ input [SpareReqW-1:0] spare_req_i,
+ output [SpareReqW-1:0] spare_req_o,
+ input [SpareRspW-1:0] spare_rsp_i,
+ output [SpareRspW-1:0] spare_rsp_o
+ // Put everything on the request side into one FIFO
+ localparam int unsigned REQFIFO_WIDTH = $bits(tlul_pkg::tl_h2d_t) -2 + SpareReqW;
+ fifo_sync #(.Width(REQFIFO_WIDTH), .Pass(ReqPass), .Depth(ReqDepth)) reqfifo (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .clr_i (1'b0 ),
+ .wvalid_i (tl_h_i.a_valid),
+ .wready_o (tl_h_o.a_ready),
+ .wdata_i ({tl_h_i.a_opcode ,
+ tl_h_i.a_param ,
+ tl_h_i.a_size ,
+ tl_h_i.a_source ,
+ tl_h_i.a_address,
+ tl_h_i.a_mask ,
+ tl_h_i.a_data ,
+ spare_req_i}),
+ .depth_o (),
+ .rvalid_o (tl_d_o.a_valid),
+ .rready_i (tl_d_i.a_ready),
+ .rdata_o ({tl_d_o.a_opcode ,
+ tl_d_o.a_param ,
+ tl_d_o.a_size ,
+ tl_d_o.a_source ,
+ tl_d_o.a_address,
+ tl_d_o.a_mask ,
+ tl_d_o.a_data ,
+ spare_req_o}));
+ // Put everything on the response side into the other FIFO
+ localparam int unsigned RSPFIFO_WIDTH = $bits(tlul_pkg::tl_d2h_t) -2 + SpareRspW;
+ fifo_sync #(.Width(RSPFIFO_WIDTH), .Pass(RspPass), .Depth(RspDepth)) rspfifo (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .clr_i (1'b0 ),
+ .wvalid_i (tl_d_i.d_valid),
+ .wready_o (tl_d_o.d_ready),
+ .wdata_i ({tl_d_i.d_opcode,
+ tl_d_i.d_param ,
+ tl_d_i.d_size ,
+ tl_d_i.d_source,
+ tl_d_i.d_sink ,
+ (tl_d_i.d_opcode == tlul_pkg::AccessAckData) ? tl_d_i.d_data :
+ {tlul_pkg::TL_DW{1'b0}} ,
+ tl_d_i.d_error ,
+ spare_rsp_i}),
+ .depth_o (),
+ .rvalid_o (tl_h_o.d_valid),
+ .rready_i (tl_h_i.d_ready),
+ .rdata_o ({tl_h_o.d_opcode,
+ tl_h_o.d_param ,
+ tl_h_o.d_size ,
+ tl_h_o.d_source,
+ tl_h_o.d_sink ,
+ tl_h_o.d_data ,
+ tl_h_o.d_error ,
+ spare_rsp_o}));
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..8d18df5
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,95 @@
+// tlul_adapter (Host adapter) converts basic req/grant/rvalid into TL-UL interface. If
+// MAX_REQS == 1 it is purely combinational logic. If MAX_REQS > 1 flops are required.
+// The host driving the adapter is responsible for ensuring it doesn't have more requests in flight
+// than the specified MAX_REQS.
+// The outgoing address is always word aligned. The access size is always the word size (as
+// specified by TL_DW). For write accesses that occupy all lanes the operation is PutFullData,
+// otherwise it is PutPartialData, mask is generated from be_i. For reads all lanes are enabled as
+// required by TL-UL (every bit in mask set).
+// When MAX_REQS > 1 tlul_adapter_host does not do anything to order responses from the TL-UL
+// interface which could return them out of order. It is the host's responsibility to either only
+// have outstanding requests to an address space it knows will return responses in order or to not
+// care about out of order responses (note that if read data is returned out of order there is no
+// way to determine this).
+module tlul_host_adapter #(
+ parameter int unsigned MAX_REQS = 1
+) (
+ input clk_i,
+ input rst_ni,
+// interface with host agent
+ input req_i,
+ output logic gnt_o,
+ input logic [tlul_pkg::TL_AW-1:0] addr_i,
+ input logic we_i,
+ input logic [tlul_pkg::TL_DW-1:0] wdata_i,
+ input logic [tlul_pkg::TL_DBW-1:0] be_i,
+ output logic valid_o,
+ output logic [tlul_pkg::TL_DW-1:0] rdata_o,
+ output logic err_o,
+// interface with other tilelink agents or tlul interface
+ output tlul_pkg::tl_h2d_t tl_h_c_a, // tilelink host channel A
+ input tlul_pkg::tl_d2h_t tl_h_c_d // tilelink host channel D
+ localparam int WordSize = $clog2(tlul_pkg::TL_DBW);
+ logic [tlul_pkg::TL_AIW-1:0] tl_source;
+ logic [tlul_pkg::TL_DBW-1:0] tl_be;
+ if(MAX_REQS == 1) begin
+ assign tl_source = '0;
+ end else begin
+ localparam int ReqNumW = $clog2(MAX_REQS);
+ logic [ReqNumW-1:0] source_d, source_q;
+ always_ff @(posedge clk_i) begin
+ if(!rst_ni) begin
+ source_q <= '0;
+ end else begin
+ source_q <= source_d;
+ end
+ end
+ always_comb begin
+ source_d = source_q;
+ if(req_i && gnt_o) begin
+ if(source_q == MAX_REQS -1) source_d = '0;
+ else source_d = source_q + 1;
+ end
+ end
+ assign tl_source = tlul_pkg::TL_AIW'(source_q);
+ end
+// For TL-UL Get opcode all active bytes must have their mask bit set, so all reads get all tl_be
+// bits set. For writes the supplied be_i is used as the mask.
+ assign tl_be = ~we_i ? {tlul_pkg::TL_DBW{1'b1}} : be_i;
+ assign tl_h_c_a = '{
+ a_valid: req_i,
+ a_opcode: (~we_i) ? tlul_pkg::Get :
+ (&be_i) ? tlul_pkg::PutFullData :
+ tlul_pkg::PutPartialData,
+ a_param: 3'h0,
+ a_size: tlul_pkg::TL_SZW'(WordSize),
+ a_mask: tl_be,
+ a_source: tl_source,
+ a_address: {addr_i[31:WordSize], {WordSize{1'b0}}},
+ a_data: wdata_i,
+ d_ready: 1'b1
+ };
+ assign gnt_o = tl_h_c_d.a_ready;
+ //assign rdata_0 = tl_h_c_d.d_data;
+ assign err_o = tl_h_c_d.d_error;
+ assign valid_o = tl_h_c_d.d_valid;
+ logic [31:0] rddata;
+ assign rddata = tl_h_c_d.d_data;
+ assign rdata_o = rddata;
\ No newline at end of file
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..9da373f
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,120 @@
+package tlul_pkg;
+ parameter ArbiterImpl = "PPC";
+function automatic integer _clog2(integer value);
+ integer result;
+ value = value - 1;
+ for (result = 0; value > 0; result = result + 1) begin
+ value = value >> 1;
+ end
+ return result;
+ endfunction
+ /**
+ * Math function: Number of bits needed to address |value| items.
+ *
+ * 0 for value == 0
+ * vbits = 1 for value == 1
+ * ceil(log2(value)) for value > 1
+ *
+ *
+ * The primary use case for this function is the definition of registers/arrays
+ * which are wide enough to contain |value| items.
+ *
+ * This function identical to $clog2() for all input values except the value 1;
+ * it could be considered an "enhanced" $clog2() function.
+ *
+ *
+ * Example 1:
+ * parameter Items = 1;
+ * localparam ItemsWidth = vbits(Items); // 1
+ * logic [ItemsWidth-1:0] item_register; // items_register is now [0:0]
+ *
+ * Example 2:
+ * parameter Items = 64;
+ * localparam ItemsWidth = vbits(Items); // 6
+ * logic [ItemsWidth-1:0] item_register; // items_register is now [5:0]
+ *
+ * Note: If you want to store the number "value" inside a register, you need
+ * a register with size vbits(value + 1), since you also need to store
+ * the number 0.
+ *
+ * Example 3:
+ * logic [vbits(64)-1:0] store_64_logic_values; // width is [5:0]
+ * logic [vbits(64 + 1)-1:0] store_number_64; // width is [6:0]
+ */
+ function automatic integer vbits(integer value);
+`ifdef XCELIUM
+ // The use of system functions was not allowed here in Verilog-2001, but is
+ // valid since (System)Verilog-2005, which is also when $clog2() first
+ // appeared.
+ // Xcelium < 19.10 does not yet support the use of $clog2() here, fall back
+ // to an implementation without a system function. Remove this workaround
+ // if we require a newer Xcelium version.
+ // See #2579 and #2597.
+ return (value == 1) ? 1 : prim_util_pkg::_clog2(value);
+ return (value == 1) ? 1 : $clog2(value);
+ endfunction
+ localparam int TL_AW=32;
+ localparam int TL_DW=32;
+ localparam int TL_AIW=8;
+ localparam int TL_DIW=1;
+ localparam int TL_DBW=(TL_DW>>3);
+ localparam int TL_SZW=$clog2($clog2(TL_DBW)+1);
+// opcodes for channel D messages/operations defined in official TileLink spec
+ typedef enum logic [2:0] {
+ PutFullData = 3'h0,
+ PutPartialData = 3'h1,
+ Get = 3'h4
+ } tl_a_m_op;
+// opcodes for channel D messages/operations defined in official TileLink spec
+ typedef enum logic [2:0] {
+ AccessAck = 3'h0,
+ AccessAckData = 3'h1
+ } tl_d_m_op;
+ typedef struct packed {
+ logic a_valid;
+ tl_a_m_op a_opcode;
+ logic [2:0] a_param;
+ logic [TL_SZW-1:0] a_size;
+ logic [TL_AIW-1:0] a_source;
+ logic [TL_AW-1:0] a_address;
+ logic [TL_DBW-1:0] a_mask;
+ logic [TL_DW-1:0] a_data;
+ logic d_ready;
+ } tl_h2d_t;
+ localparam tl_h2d_t TL_H2D_DEFAULT = '{
+ d_ready: 1'b1,
+ a_opcode: tl_a_m_op'('0),
+ default: '0
+ };
+ typedef struct packed {
+ logic d_valid;
+ tl_d_m_op d_opcode;
+ logic [2:0] d_param;
+ logic [TL_SZW-1:0] d_size;
+ logic [TL_AIW-1:0] d_source;
+ logic [TL_DIW-1:0] d_sink;
+ logic [TL_DW-1:0] d_data;
+ logic d_error;
+ logic a_ready;
+ } tl_d2h_t;
+ localparam tl_d2h_t TL_D2H_DEFAULT = '{
+ a_ready: 1'b1,
+ d_opcode: tl_d_m_op'('0),
+ default: '0
+ };
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..8ab6bdb
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,209 @@
+// TL-UL socket 1:N module
+// configuration settings
+// device_count: 4
+// Verilog parameters
+// HReqPass: if 1 then host requests can pass through on empty fifo,
+// default 1
+// HRspPass: if 1 then host responses can pass through on empty fifo,
+// default 1
+// DReqPass: (one per device_count) if 1 then device i requests can
+// pass through on empty fifo, default 1
+// DRspPass: (one per device_count) if 1 then device i responses can
+// pass through on empty fifo, default 1
+// HReqDepth: Depth of host request FIFO, default 2
+// HRspDepth: Depth of host response FIFO, default 2
+// DReqDepth: (one per device_count) Depth of device i request FIFO,
+// default 2
+// DRspDepth: (one per device_count) Depth of device i response FIFO,
+// default 2
+// Requests must stall to one device until all responses from other devices
+// have returned. Need to keep a counter of all outstanding requests and
+// wait until that counter is zero before switching devices.
+// This module will return a request error if the input value of 'dev_select_i'
+// is not within the range 0..N-1. Thus the instantiator of the socket
+// can indicate error by any illegal value of dev_select_i. 4'b1111 is
+// recommended for visibility
+// The maximum value of N is 15
+module tlul_socket_1n #(
+ parameter int unsigned N = 4,
+ parameter bit HReqPass = 1'b1,
+ parameter bit HRspPass = 1'b1,
+ parameter bit [N-1:0] DReqPass = {N{1'b1}},
+ parameter bit [N-1:0] DRspPass = {N{1'b1}},
+ parameter bit [3:0] HReqDepth = 4'h2,
+ parameter bit [3:0] HRspDepth = 4'h2,
+ parameter bit [N*4-1:0] DReqDepth = {N{4'h2}},
+ parameter bit [N*4-1:0] DRspDepth = {N{4'h2}},
+ localparam int unsigned NWD = $clog2(N+1) // derived parameter
+) (
+ input clk_i,
+ input rst_ni,
+ input tlul_pkg::tl_h2d_t tl_h_i,
+ output tlul_pkg::tl_d2h_t tl_h_o,
+ output tlul_pkg::tl_h2d_t tl_d_o [N],
+ input tlul_pkg::tl_d2h_t tl_d_i [N],
+ input [NWD-1:0] dev_select_i
+ // Since our steering is done after potential FIFOing, we need to
+ // shove our device select bits into spare bits of reqfifo
+ // instantiate the host fifo, create intermediate bus 't'
+ // FIFO'd version of device select
+ logic [NWD-1:0] dev_select_t;
+ tlul_pkg::tl_h2d_t tl_t_o;
+ tlul_pkg::tl_d2h_t tl_t_i;
+ tlul_fifo_sync #(
+ .ReqPass(HReqPass),
+ .RspPass(HRspPass),
+ .ReqDepth(HReqDepth),
+ .RspDepth(HRspDepth),
+ .SpareReqW(NWD)
+ ) fifo_h (
+ .clk_i,
+ .rst_ni,
+ .tl_h_i,
+ .tl_h_o,
+ .tl_d_o (tl_t_o),
+ .tl_d_i (tl_t_i),
+ .spare_req_i (dev_select_i),
+ .spare_req_o (dev_select_t),
+ .spare_rsp_i (1'b0),
+ .spare_rsp_o ());
+ // We need to keep track of how many requests are outstanding,
+ // and to which device. New requests are compared to this and
+ // stall until that number is zero.
+ localparam int MaxOutstanding = 4**tlul_pkg::TL_AIW; // Up to 256 ounstanding
+ localparam int OutstandingW = $clog2(MaxOutstanding+1);
+ logic [OutstandingW-1:0] num_req_outstanding;
+ logic [NWD-1:0] dev_select_outstanding;
+ logic hold_all_requests;
+ logic accept_t_req, accept_t_rsp;
+ assign accept_t_req = tl_t_o.a_valid & tl_t_i.a_ready;
+ assign accept_t_rsp = tl_t_i.d_valid & tl_t_o.d_ready;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ num_req_outstanding <= '0;
+ dev_select_outstanding <= '0;
+ end else if (accept_t_req) begin
+ if (!accept_t_rsp) begin
+ num_req_outstanding <= num_req_outstanding + 1'b1;
+ end
+ dev_select_outstanding <= dev_select_t;
+ end else if (accept_t_rsp) begin
+ num_req_outstanding <= num_req_outstanding - 1'b1;
+ end
+ end
+ assign hold_all_requests =
+ (num_req_outstanding != '0) &
+ (dev_select_t != dev_select_outstanding);
+ // Make N copies of 't' request side with modified reqvalid, call
+ // them 'u[0]' .. 'u[n-1]'.
+ tlul_pkg::tl_h2d_t tl_u_o [N+1];
+ tlul_pkg::tl_d2h_t tl_u_i [N+1];
+ for (genvar i = 0 ; i < N ; i++) begin : gen_u_o
+ assign tl_u_o[i].a_valid = tl_t_o.a_valid &
+ (dev_select_t == NWD'(i)) &
+ ~hold_all_requests;
+ assign tl_u_o[i].a_opcode = tl_t_o.a_opcode;
+ assign tl_u_o[i].a_param = tl_t_o.a_param;
+ assign tl_u_o[i].a_size = tl_t_o.a_size;
+ assign tl_u_o[i].a_source = tl_t_o.a_source;
+ assign tl_u_o[i].a_address = tl_t_o.a_address;
+ assign tl_u_o[i].a_mask = tl_t_o.a_mask;
+ assign tl_u_o[i].a_data = tl_t_o.a_data;
+ end
+ tlul_pkg::tl_d2h_t tl_t_p ;
+ // for the returning reqready, only look at the device we're addressing
+ logic hfifo_reqready;
+ always_comb begin
+ hfifo_reqready = tl_u_i[N].a_ready; // default to error
+ for (int idx = 0 ; idx < N ; idx++) begin
+ //if (dev_select_outstanding == NWD'(idx)) hfifo_reqready = tl_u_i[idx].a_ready;
+ if (dev_select_t == NWD'(idx)) hfifo_reqready = tl_u_i[idx].a_ready;
+ end
+ if (hold_all_requests) hfifo_reqready = 1'b0;
+ end
+ // Adding a_valid as a qualifier. This prevents the a_ready from having unknown value
+ // when the address is unknown and the Host TL-UL FIFO is bypass mode.
+ assign tl_t_i.a_ready = tl_t_o.a_valid & hfifo_reqready;
+ always_comb begin
+ tl_t_p = tl_u_i[N];
+ for (int idx = 0 ; idx < N ; idx++) begin
+ if (dev_select_outstanding == NWD'(idx)) tl_t_p = tl_u_i[idx];
+ end
+ end
+ assign tl_t_i.d_valid = tl_t_p.d_valid ;
+ assign tl_t_i.d_opcode = tl_t_p.d_opcode;
+ assign tl_t_i.d_param = tl_t_p.d_param ;
+ assign tl_t_i.d_size = tl_t_p.d_size ;
+ assign tl_t_i.d_source = tl_t_p.d_source;
+ assign tl_t_i.d_sink = tl_t_p.d_sink ;
+ assign tl_t_i.d_data = tl_t_p.d_data ;
+ assign tl_t_i.d_error = tl_t_p.d_error ;
+ // accept responses from devices when selected if upstream is accepting
+ for (genvar i = 0 ; i < N+1 ; i++) begin : gen_u_o_d_ready
+ assign tl_u_o[i].d_ready = tl_t_o.d_ready;
+ end
+ // finally instantiate all device FIFOs and the error responder
+ for (genvar i = 0 ; i < N ; i++) begin : gen_dfifo
+ tlul_fifo_sync #(
+ .ReqPass(DReqPass[i]),
+ .RspPass(DRspPass[i]),
+ .ReqDepth(DReqDepth[i*4+:4]),
+ .RspDepth(DRspDepth[i*4+:4])
+ ) fifo_d (
+ .clk_i,
+ .rst_ni,
+ .tl_h_i (tl_u_o[i]),
+ .tl_h_o (tl_u_i[i]),
+ .tl_d_o (tl_d_o[i]),
+ .tl_d_i (tl_d_i[i]),
+ .spare_req_i (1'b0),
+ .spare_req_o (),
+ .spare_rsp_i (1'b0),
+ .spare_rsp_o ());
+ end
+ assign tl_u_o[N].a_valid = tl_t_o.a_valid &
+ (dev_select_t == NWD'(N)) &
+ ~hold_all_requests;
+ assign tl_u_o[N].a_opcode = tl_t_o.a_opcode;
+ assign tl_u_o[N].a_param = tl_t_o.a_param;
+ assign tl_u_o[N].a_size = tl_t_o.a_size;
+ assign tl_u_o[N].a_source = tl_t_o.a_source;
+ assign tl_u_o[N].a_address = tl_t_o.a_address;
+ assign tl_u_o[N].a_mask = tl_t_o.a_mask;
+ assign tl_u_o[N].a_data = tl_t_o.a_data;
+ tlul_err_resp err_resp (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tl_h_i (tl_u_o[N]),
+ .tl_h_o (tl_u_i[N]));
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..2a9d335
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,245 @@
+// TL-UL socket M:1 module
+// Verilog parameters
+// M: Number of host ports.
+// HReqPass: M bit array to allow requests to pass through the host i
+// FIFO with no clock delay if the request FIFO is empty. If
+// 1'b0, at least one clock cycle of latency is created.
+// Default is 1'b1.
+// HRspPass: Same as HReqPass but for host response FIFO.
+// HReqDepth: Mx4 bit array. bit[i*4+:4] is depth of host i request FIFO.
+// Depth of zero is allowed if ReqPass is true. A maximum value
+// of 16 is allowed, default is 2.
+// HRspDepth: Same as HReqDepth but for host response FIFO.
+// DReqPass: Same as HReqPass but for device request FIFO.
+// DRspPass: Same as HReqPass but for device response FIFO.
+// DReqDepth: Same as HReqDepth but for device request FIFO.
+// DRspDepth: Same as HReqDepth but for device response FIFO.
+module tlul_socket_m1 #(
+ parameter int unsigned M = 4,
+ parameter bit [M-1:0] HReqPass = {M{1'b1}},
+ parameter bit [M-1:0] HRspPass = {M{1'b1}},
+ parameter bit [M*4-1:0] HReqDepth = {M{4'h2}},
+ parameter bit [M*4-1:0] HRspDepth = {M{4'h2}},
+ parameter bit DReqPass = 1'b1,
+ parameter bit DRspPass = 1'b1,
+ parameter bit [3:0] DReqDepth = 4'h2,
+ parameter bit [3:0] DRspDepth = 4'h2
+) (
+ input clk_i,
+ input rst_ni,
+ input tlul_pkg::tl_h2d_t tl_h_i [M],
+ output tlul_pkg::tl_d2h_t tl_h_o [M],
+ output tlul_pkg::tl_h2d_t tl_d_o,
+ input tlul_pkg::tl_d2h_t tl_d_i
+ // Signals
+ //
+ // tl_h_i/o[0] | tl_h_i/o[1] | ... | tl_h_i/o[M-1]
+ // | | |
+ // u_hostfifo[0] u_hostfifo[1] u_hostfifo[M-1]
+ // | | |
+ // hreq_fifo_o(i) / hrsp_fifo_i(i)
+ // ---------------------------------------
+ // | request/grant/req_data |
+ // | |
+ // | |
+ // | arb_valid / arb_ready / arb_data |
+ // ---------------------------------------
+ // |
+ // dreq_fifo_i / drsp_fifo_o
+ // |
+ // u_devicefifo
+ // |
+ // tl_d_o/i
+ //
+ // Required ID width to distinguish between host ports
+ // Used in response steering
+ localparam int unsigned IDW = tlul_pkg::TL_AIW;
+ localparam int unsigned STIDW = $clog2(M);
+ tlul_pkg::tl_h2d_t hreq_fifo_o [M];
+ tlul_pkg::tl_d2h_t hrsp_fifo_i [M];
+ logic [M-1:0] hrequest;
+ logic [M-1:0] hgrant;
+ tlul_pkg::tl_h2d_t dreq_fifo_i;
+ tlul_pkg::tl_d2h_t drsp_fifo_o;
+ logic arb_valid;
+ logic arb_ready;
+ tlul_pkg::tl_h2d_t arb_data;
+ // Host Req/Rsp FIFO
+ for (genvar i = 0 ; i < M ; i++) begin : gen_host_fifo
+ tlul_pkg::tl_h2d_t hreq_fifo_i;
+ // ID Shifting
+ logic [STIDW-1:0] reqid_sub;
+ logic [IDW-1:0] shifted_id;
+ assign reqid_sub = i; // can cause conversion error?
+ assign shifted_id = {
+ tl_h_i[i].a_source[0+:(IDW-STIDW)],
+ reqid_sub
+ };
+ // assign not connected bits to nc_* signal to make lint happy
+ logic [IDW-1 : IDW-STIDW] unused_tl_h_source;
+ assign unused_tl_h_source = tl_h_i[i].a_source[IDW-1 -: STIDW];
+ // Put shifted ID
+ assign hreq_fifo_i = '{
+ a_valid: tl_h_i[i].a_valid,
+ a_opcode: tl_h_i[i].a_opcode,
+ a_param: tl_h_i[i].a_param,
+ a_size: tl_h_i[i].a_size,
+ a_source: shifted_id,
+ a_address: tl_h_i[i].a_address,
+ a_mask: tl_h_i[i].a_mask,
+ a_data: tl_h_i[i].a_data,
+ d_ready: tl_h_i[i].d_ready
+ };
+ tlul_fifo_sync #(
+ .ReqPass (HReqPass[i]),
+ .RspPass (HRspPass[i]),
+ .ReqDepth (HReqDepth[i*4+:4]),
+ .RspDepth (HRspDepth[i*4+:4]),
+ .SpareReqW (1)
+ ) u_hostfifo (
+ .clk_i,
+ .rst_ni,
+ .tl_h_i (hreq_fifo_i),
+ .tl_h_o (tl_h_o[i]),
+ .tl_d_o (hreq_fifo_o[i]),
+ .tl_d_i (hrsp_fifo_i[i]),
+ .spare_req_i (1'b0),
+ .spare_req_o (),
+ .spare_rsp_i (1'b0),
+ .spare_rsp_o ()
+ );
+ end
+ // Device Req/Rsp FIFO
+ tlul_fifo_sync #(
+ .ReqPass (DReqPass),
+ .RspPass (DRspPass),
+ .ReqDepth (DReqDepth),
+ .RspDepth (DRspDepth),
+ .SpareReqW (1)
+ ) u_devicefifo (
+ .clk_i,
+ .rst_ni,
+ .tl_h_i (dreq_fifo_i),
+ .tl_h_o (drsp_fifo_o),
+ .tl_d_o (tl_d_o),
+ .tl_d_i (tl_d_i),
+ .spare_req_i (1'b0),
+ .spare_req_o (),
+ .spare_rsp_i (1'b0),
+ .spare_rsp_o ()
+ );
+ // Request Arbiter
+ for (genvar i = 0 ; i < M ; i++) begin : gen_arbreqgnt
+ assign hrequest[i] = hreq_fifo_o[i].a_valid;
+ end
+ assign arb_ready = drsp_fifo_o.a_ready;
+ if (tlul_pkg::ArbiterImpl == "PPC") begin : gen_arb_ppc
+ prim_arbiter_ppc #(
+ .N (M),
+ .DW ($bits(tlul_pkg::tl_h2d_t)),
+ .EnReqStabA (0)
+ ) u_reqarb (
+ .clk_i,
+ .rst_ni,
+ .req_i ( hrequest ),
+ .data_i ( hreq_fifo_o ),
+ .gnt_o ( hgrant ),
+ .idx_o ( ),
+ .valid_o ( arb_valid ),
+ .data_o ( arb_data ),
+ .ready_i ( arb_ready )
+ );
+ end else if (tlul_pkg::ArbiterImpl == "BINTREE") begin : gen_tree_arb
+ prim_arbiter_tree #(
+ .N (M),
+ .DW ($bits(tlul_pkg::tl_h2d_t)),
+ .EnReqStabA (0)
+ ) u_reqarb (
+ .clk_i,
+ .rst_ni,
+ .req_i ( hrequest ),
+ .data_i ( hreq_fifo_o ),
+ .gnt_o ( hgrant ),
+ .idx_o ( ),
+ .valid_o ( arb_valid ),
+ .data_o ( arb_data ),
+ .ready_i ( arb_ready )
+ );
+ end else begin : gen_unknown
+ end
+ logic [ M-1:0] hfifo_rspvalid;
+ logic [ M-1:0] dfifo_rspready;
+ logic [IDW-1:0] hfifo_rspid;
+ logic dfifo_rspready_merged;
+ // arb_data --> dreq_fifo_i
+ // dreq_fifo_i.hd_rspready <= dfifo_rspready
+ assign dfifo_rspready_merged = |dfifo_rspready;
+ assign dreq_fifo_i = '{
+ a_valid: arb_valid,
+ a_opcode: arb_data.a_opcode,
+ a_param: arb_data.a_param,
+ a_size: arb_data.a_size,
+ a_source: arb_data.a_source,
+ a_address: arb_data.a_address,
+ a_mask: arb_data.a_mask,
+ a_data: arb_data.a_data,
+ d_ready: dfifo_rspready_merged
+ };
+ // Response ID steering
+ // drsp_fifo_o --> hrsp_fifo_i[i]
+ // Response ID shifting before put into host fifo
+ assign hfifo_rspid = {
+ {STIDW{1'b0}},
+ drsp_fifo_o.d_source[IDW-1:STIDW]
+ };
+ for (genvar i = 0 ; i < M ; i++) begin : gen_idrouting
+ assign hfifo_rspvalid[i] = drsp_fifo_o.d_valid &
+ (drsp_fifo_o.d_source[0+:STIDW] == i);
+ assign dfifo_rspready[i] = hreq_fifo_o[i].d_ready &
+ (drsp_fifo_o.d_source[0+:STIDW] == i) &
+ drsp_fifo_o.d_valid;
+ assign hrsp_fifo_i[i] = '{
+ d_valid: hfifo_rspvalid[i],
+ d_opcode: drsp_fifo_o.d_opcode,
+ d_param: drsp_fifo_o.d_param,
+ d_size: drsp_fifo_o.d_size,
+ d_source: hfifo_rspid,
+ d_sink: drsp_fifo_o.d_sink,
+ d_data: drsp_fifo_o.d_data,
+ d_error: drsp_fifo_o.d_error,
+ a_ready: hgrant[i]
+ };
+ end
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..dc77555
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,338 @@
+ * Tile-Link UL adapter for SRAM-like devices
+ *
+ * - Intentionally omitted BaseAddr in case of multiple memory maps are used in a SoC,
+ * it means that aliasing can happen if target device size in TL-UL crossbar is bigger
+ * than SRAM size
+ */
+module tlul_sram_adapter #(
+ parameter int SramAw = 12,
+ parameter int SramDw = 32, // Must be multiple of the TL width
+ parameter int Outstanding = 1, // Only one request is accepted
+ parameter bit ByteAccess = 1, // 1: true, 0: false
+ parameter bit ErrOnWrite = 0, // 1: Writes not allowed, automatically error
+ parameter bit ErrOnRead = 0 // 1: Reads not allowed, automatically error
+) (
+ input clk_i,
+ input rst_ni,
+ // TL-UL interface
+ input tlul_pkg::tl_h2d_t tl_i,
+ output tlul_pkg::tl_d2h_t tl_o,
+ // SRAM interface
+ output logic req_o,
+ input gnt_i,
+ output logic we_o,
+ output logic [SramAw-1:0] addr_o,
+ output logic [SramDw-1:0] wdata_o,
+ output logic [SramDw-1:0] wmask_o,
+ input [SramDw-1:0] rdata_i,
+ input rvalid_i,
+ input [1:0] rerror_i // 2 bit error [1]: Uncorrectable, [0]: Correctable
+ import tlul_pkg::*;
+ localparam int SramByte = SramDw/8;
+ localparam int DataBitWidth = tlul_pkg::vbits(SramByte);
+ localparam int WidthMult = SramDw / tlul_pkg::TL_DW;
+ localparam int WoffsetWidth = (SramByte == tlul_pkg::TL_DBW) ? 1 :
+ DataBitWidth - tlul_pkg::vbits(tlul_pkg::TL_DBW);
+ typedef struct packed {
+ logic [tlul_pkg::TL_DBW-1:0] mask ; // Byte mask within the TL-UL word
+ logic [WoffsetWidth-1:0] woffset ; // Offset of the TL-UL word within the SRAM word
+ } sram_req_t ;
+ typedef enum logic [1:0] {
+ OpWrite,
+ OpRead,
+ OpUnknown
+ } req_op_e ;
+ typedef struct packed {
+ req_op_e op ;
+ logic error ;
+ logic [tlul_pkg::TL_SZW-1:0] size ;
+ logic [tlul_pkg::TL_AIW-1:0] source ;
+ } req_t ;
+ typedef struct packed {
+ logic [SramDw-1:0] data ;
+ logic error ;
+ } rsp_t ;
+ localparam int SramReqFifoWidth = $bits(sram_req_t) ;
+ localparam int ReqFifoWidth = $bits(req_t) ;
+ localparam int RspFifoWidth = $bits(rsp_t) ;
+ // FIFO signal in case OutStand is greater than 1
+ // If request is latched, {write, source} is pushed to req fifo.
+ // Req fifo is popped when D channel is acknowledged (v & r)
+ // D channel valid is asserted if it is write request or rsp fifo not empty if read.
+ logic reqfifo_wvalid, reqfifo_wready;
+ logic reqfifo_rvalid, reqfifo_rready;
+ req_t reqfifo_wdata, reqfifo_rdata;
+ logic sramreqfifo_wvalid, sramreqfifo_wready;
+ logic sramreqfifo_rready;
+ sram_req_t sramreqfifo_wdata, sramreqfifo_rdata;
+ logic rspfifo_wvalid, rspfifo_wready;
+ logic rspfifo_rvalid, rspfifo_rready;
+ rsp_t rspfifo_wdata, rspfifo_rdata;
+ logic error_internal; // Internal protocol error checker
+ logic wr_attr_error;
+ logic wr_vld_error;
+ logic rd_vld_error;
+ logic tlul_error; // Error from `tlul_err` module
+ logic a_ack, d_ack, sram_ack;
+ assign a_ack = tl_i.a_valid & tl_o.a_ready ;
+ assign d_ack = tl_o.d_valid & tl_i.d_ready ;
+ assign sram_ack = req_o & gnt_i ;
+ // Valid handling
+ logic d_valid, d_error;
+ always_comb begin
+ d_valid = 1'b0;
+ if (reqfifo_rvalid) begin
+ if (reqfifo_rdata.error) begin
+ // Return error response. Assume no request went out to SRAM
+ d_valid = 1'b1;
+ end else if (reqfifo_rdata.op == OpRead) begin
+ d_valid = rspfifo_rvalid;
+ end else begin
+ // Write without error
+ d_valid = 1'b1;
+ end
+ end else begin
+ d_valid = 1'b0;
+ end
+ end
+ always_comb begin
+ d_error = 1'b0;
+ if (reqfifo_rvalid) begin
+ if (reqfifo_rdata.op == OpRead) begin
+ d_error = rspfifo_rdata.error | reqfifo_rdata.error;
+ end else begin
+ d_error = reqfifo_rdata.error;
+ end
+ end else begin
+ d_error = 1'b0;
+ end
+ end
+ assign tl_o = '{
+ d_valid : d_valid ,
+ d_opcode : (d_valid && reqfifo_rdata.op != OpRead) ? AccessAck : AccessAckData,
+ d_param : '0,
+ d_size : (d_valid) ? reqfifo_rdata.size : '0,
+ d_source : (d_valid) ? reqfifo_rdata.source : '0,
+ d_sink : 1'b0,
+ d_data : (d_valid && rspfifo_rvalid && reqfifo_rdata.op == OpRead)
+ ? : '0,
+ d_error : d_valid && d_error,
+ a_ready : (gnt_i | error_internal) & reqfifo_wready & sramreqfifo_wready
+ };
+ // a_ready depends on the FIFO full condition and grant from SRAM (or SRAM arbiter)
+ // assemble response, including read response, write response, and error for unsupported stuff
+ // Output to SRAM:
+ // Generate request only when no internal error occurs. If error occurs, the request should be
+ // dropped and returned error response to the host. So, error to be pushed to reqfifo.
+ // In this case, it is assumed the request is granted (may cause ordering issue later?)
+ assign req_o = tl_i.a_valid & reqfifo_wready & ~error_internal;
+ assign we_o = tl_i.a_valid & logic'(tl_i.a_opcode inside {PutFullData, PutPartialData});
+ assign addr_o = (tl_i.a_valid) ? tl_i.a_address[DataBitWidth+:SramAw] : '0;
+ // Support SRAMs wider than the TL-UL word width by mapping the parts of the
+ // TL-UL address which are more fine-granular than the SRAM width to the
+ // SRAM write mask.
+ logic [WoffsetWidth-1:0] woffset;
+ if (tlul_pkg::TL_DW != SramDw) begin : gen_wordwidthadapt
+ assign woffset = tl_i.a_address[DataBitWidth-1:tlul_pkg::vbits(tlul_pkg::TL_DBW)];
+ end else begin : gen_no_wordwidthadapt
+ assign woffset = '0;
+ end
+ // Convert byte mask to SRAM bit mask for writes, and only forward valid data
+ logic [WidthMult-1:0][tlul_pkg::TL_DW-1:0] wmask_int;
+ logic [WidthMult-1:0][tlul_pkg::TL_DW-1:0] wdata_int;
+ always_comb begin
+ wmask_int = '0;
+ wdata_int = '0;
+ if (tl_i.a_valid) begin
+ for (int i = 0 ; i < tlul_pkg::TL_DW/8 ; i++) begin
+ wmask_int[woffset][8*i +: 8] = {8{tl_i.a_mask[i]}};
+ wdata_int[woffset][8*i +: 8] = (tl_i.a_mask[i] && we_o) ? tl_i.a_data[8*i+:8] : '0;
+ end
+ end
+ end
+ assign wmask_o = wmask_int;
+ assign wdata_o = wdata_int;
+ // Begin: Request Error Detection
+ // wr_attr_error: Check if the request size,mask are permitted.
+ // Basic check of size, mask, addr align is done in tlul_err module.
+ // Here it checks any partial write if ByteAccess isn't allowed.
+ assign wr_attr_error = (tl_i.a_opcode == PutFullData || tl_i.a_opcode == PutPartialData) ?
+ (ByteAccess == 0) ? (tl_i.a_mask != '1 || tl_i.a_size != 2'h2) : 1'b0 :
+ 1'b0;
+ if (ErrOnWrite == 1) begin : gen_no_writes
+ assign wr_vld_error = tl_i.a_opcode != Get;
+ end else begin : gen_writes_allowed
+ assign wr_vld_error = 1'b0;
+ end
+ if (ErrOnRead == 1) begin: gen_no_reads
+ assign rd_vld_error = tl_i.a_opcode == Get;
+ end else begin : gen_reads_allowed
+ assign rd_vld_error = 1'b0;
+ end
+ tlul_err u_err (
+ .tl_i (tl_i),
+ .err_o (tlul_error)
+ );
+ assign error_internal = wr_attr_error | wr_vld_error | rd_vld_error | tlul_error;
+ // End: Request Error Detection
+ assign reqfifo_wvalid = a_ack ; // Push to FIFO only when granted
+ assign reqfifo_wdata = '{
+ op: (tl_i.a_opcode != Get) ? OpWrite : OpRead, // To return AccessAck for opcode error
+ error: error_internal,
+ size: tl_i.a_size,
+ source: tl_i.a_source
+ }; // Store the request only. Doesn't have to store data
+ assign reqfifo_rready = d_ack ;
+ // push together with ReqFIFO, pop upon returning read
+ assign sramreqfifo_wdata = '{
+ mask : tl_i.a_mask,
+ woffset : woffset
+ };
+ assign sramreqfifo_wvalid = sram_ack & ~we_o;
+ assign sramreqfifo_rready = rspfifo_wvalid;
+ assign rspfifo_wvalid = rvalid_i & reqfifo_rvalid;
+ // Make sure only requested bytes are forwarded
+ logic [WidthMult-1:0][tlul_pkg::TL_DW-1:0] rdata;
+ logic [WidthMult-1:0][tlul_pkg::TL_DW-1:0] rmask;
+ //logic [SramDw-1:0] rmask;
+ logic [tlul_pkg::TL_DW-1:0] rdata_tlword;
+ always_comb begin
+ rmask = '0;
+ for (int i = 0 ; i < tlul_pkg::TL_DW/8 ; i++) begin
+ rmask[sramreqfifo_rdata.woffset][8*i +: 8] = {8{sramreqfifo_rdata.mask[i]}};
+ end
+ end
+ assign rdata = rdata_i & rmask;
+ assign rdata_tlword = rdata[sramreqfifo_rdata.woffset];
+ assign rspfifo_wdata = '{
+ data : rdata_tlword,
+ error: rerror_i[1] // Only care for Uncorrectable error
+ };
+ assign rspfifo_rready = (reqfifo_rdata.op == OpRead & ~reqfifo_rdata.error)
+ ? reqfifo_rready : 1'b0 ;
+ // This module only cares about uncorrectable errors.
+ logic unused_rerror;
+ assign unused_rerror = rerror_i[0];
+ // FIFO instance: REQ, RSP
+ // ReqFIFO is to store the Access type to match to the Response data.
+ // For instance, SRAM accepts the write request but doesn't return the
+ // acknowledge. In this case, it may be hard to determine when the D
+ // response for the write data should send out if reads/writes are
+ // interleaved. So, to make it in-order (even TL-UL allows out-of-order
+ // responses), storing the request is necessary. And if the read entry
+ // is write op, it is safe to return the response right away. If it is
+ // read reqeust, then D response is waiting until read data arrives.
+ // Notes:
+ // The oustanding+1 allows the reqfifo to absorb back to back transactions
+ // without any wait states. Alternatively, the depth can be kept as
+ // oustanding as long as the outgoing ready is qualified with the acceptance
+ // of the response in the same cycle. Doing so however creates a path from
+ // ready_i to ready_o, which may not be desireable.
+ fifo_sync #(
+ .Width (ReqFifoWidth),
+ .Pass (1'b0),
+ .Depth (Outstanding)
+ ) u_reqfifo (
+ .clk_i,
+ .rst_ni,
+ .clr_i (1'b0),
+ .wvalid_i(reqfifo_wvalid),
+ .wready_o(reqfifo_wready),
+ .wdata_i (reqfifo_wdata),
+ .depth_o (),
+ .rvalid_o(reqfifo_rvalid),
+ .rready_i(reqfifo_rready),
+ .rdata_o (reqfifo_rdata)
+ );
+ // sramreqfifo:
+ // While the ReqFIFO holds the request until it is sent back via TL-UL, the
+ // sramreqfifo only needs to hold the mask and word offset until the read
+ // data returns from memory.
+ fifo_sync #(
+ .Width (SramReqFifoWidth),
+ .Pass (1'b0),
+ .Depth (Outstanding)
+ ) u_sramreqfifo (
+ .clk_i,
+ .rst_ni,
+ .clr_i (1'b0),
+ .wvalid_i(sramreqfifo_wvalid),
+ .wready_o(sramreqfifo_wready),
+ .wdata_i (sramreqfifo_wdata),
+ .depth_o (),
+ .rvalid_o(),
+ .rready_i(sramreqfifo_rready),
+ .rdata_o (sramreqfifo_rdata)
+ );
+ // Rationale having #Outstanding depth in response FIFO.
+ // In normal case, if the host or the crossbar accepts the response data,
+ // response FIFO isn't needed. But if in any case it has a chance to be
+ // back pressured, the response FIFO should store the returned data not to
+ // lose the data from the SRAM interface. Remember, SRAM interface doesn't
+ // have back-pressure signal such as read_ready.
+ fifo_sync #(
+ .Width (RspFifoWidth),
+ .Pass (1'b1),
+ .Depth (Outstanding)
+ ) u_rspfifo (
+ .clk_i,
+ .rst_ni,
+ .clr_i (1'b0),
+ .wvalid_i(rspfifo_wvalid),
+ .wready_o(rspfifo_wready),
+ .wdata_i (rspfifo_wdata),
+ .depth_o (),
+ .rvalid_o(rspfifo_rvalid),
+ .rready_i(rspfifo_rready),
+ .rdata_o (rspfifo_rdata)
+ );
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..090b02b
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,85 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Description: UART top level wrapper file
+// `include ""
+module uart (
+ input clk_i,
+ input rst_ni,
+ // Bus Interface
+ input tlul_pkg::tl_h2d_t tl_i,
+ output tlul_pkg::tl_d2h_t tl_o,
+ // Generic IO
+ input cio_rx_i,
+ output logic cio_tx_o,
+ output logic cio_tx_en_o,
+ // Interrupts
+ output logic intr_tx_watermark_o ,
+ output logic intr_rx_watermark_o ,
+ output logic intr_tx_empty_o ,
+ output logic intr_rx_overflow_o ,
+ output logic intr_rx_frame_err_o ,
+ output logic intr_rx_break_err_o ,
+ output logic intr_rx_timeout_o ,
+ output logic intr_rx_parity_err_o
+ import uart_reg_pkg::*;
+ uart_reg2hw_t reg2hw;
+ uart_hw2reg_t hw2reg;
+ uart_reg_top u_reg (
+ .clk_i,
+ .rst_ni,
+ .tl_i,
+ .tl_o,
+ .reg2hw,
+ .hw2reg,
+ .devmode_i (1'b1)
+ );
+ uart_core uart_core (
+ .clk_i,
+ .rst_ni,
+ .reg2hw,
+ .hw2reg,
+ .rx (cio_rx_i ),
+ .tx (cio_tx_o ),
+ .intr_tx_watermark_o,
+ .intr_rx_watermark_o,
+ .intr_tx_empty_o,
+ .intr_rx_overflow_o,
+ .intr_rx_frame_err_o,
+ .intr_rx_break_err_o,
+ .intr_rx_timeout_o,
+ .intr_rx_parity_err_o
+ );
+ // always enable the driving out of TX
+ assign cio_tx_en_o = 1'b1;
+ // // Assert Known for outputs
+ // `ASSERT_KNOWN(txenKnown, cio_tx_en_o)
+ // `ASSERT_KNOWN(txKnown, cio_tx_o, clk_i, !rst_ni || !cio_tx_en_o)
+ // // Assert Known for interrupts
+ // `ASSERT_KNOWN(txWatermarkKnown, intr_tx_watermark_o)
+ // `ASSERT_KNOWN(rxWatermarkKnown, intr_rx_watermark_o)
+ // `ASSERT_KNOWN(txEmptyKnown, intr_tx_empty_o)
+ // `ASSERT_KNOWN(rxOverflowKnown, intr_rx_overflow_o)
+ // `ASSERT_KNOWN(rxFrameErrKnown, intr_rx_frame_err_o)
+ // `ASSERT_KNOWN(rxBreakErrKnown, intr_rx_break_err_o)
+ // `ASSERT_KNOWN(rxTimeoutKnown, intr_rx_timeout_o)
+ // `ASSERT_KNOWN(rxParityErrKnown, intr_rx_parity_err_o)
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..c205d90
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,490 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Description: UART core module
+module uart_core (
+ input clk_i,
+ input rst_ni,
+ input uart_reg_pkg::uart_reg2hw_t reg2hw,
+ output uart_reg_pkg::uart_hw2reg_t hw2reg,
+ input rx,
+ output logic tx,
+ output logic intr_tx_watermark_o,
+ output logic intr_rx_watermark_o,
+ output logic intr_tx_empty_o,
+ output logic intr_rx_overflow_o,
+ output logic intr_rx_frame_err_o,
+ output logic intr_rx_break_err_o,
+ output logic intr_rx_timeout_o,
+ output logic intr_rx_parity_err_o
+ import uart_reg_pkg::*;
+ localparam int NcoWidth = $bits(reg2hw.ctrl.nco.q);
+ logic [15:0] rx_val_q;
+ logic [7:0] uart_rdata;
+ logic tick_baud_x16, rx_tick_baud;
+ logic [5:0] tx_fifo_depth, rx_fifo_depth;
+ logic [5:0] rx_fifo_depth_prev_q;
+ logic [23:0] rx_timeout_count_d, rx_timeout_count_q, uart_rxto_val;
+ logic rx_fifo_depth_changed, uart_rxto_en;
+ logic tx_enable, rx_enable;
+ logic sys_loopback, line_loopback, rxnf_enable;
+ logic uart_fifo_rxrst, uart_fifo_txrst;
+ logic [2:0] uart_fifo_rxilvl;
+ logic [1:0] uart_fifo_txilvl;
+ logic ovrd_tx_en, ovrd_tx_val;
+ logic [7:0] tx_fifo_data;
+ logic tx_fifo_rready, tx_fifo_rvalid;
+ logic tx_fifo_wready, tx_uart_idle;
+ logic tx_out;
+ logic tx_out_q;
+ logic [7:0] rx_fifo_data;
+ logic rx_valid, rx_fifo_wvalid, rx_fifo_rvalid;
+ logic rx_fifo_wready, rx_uart_idle;
+ logic rx_sync;
+ logic rx_in;
+ logic break_err;
+ logic [4:0] allzero_cnt_d, allzero_cnt_q;
+ logic allzero_err, not_allzero_char;
+ logic event_tx_watermark, event_rx_watermark, event_tx_empty, event_rx_overflow;
+ logic event_rx_frame_err, event_rx_break_err, event_rx_timeout, event_rx_parity_err;
+ logic tx_watermark_d, tx_watermark_prev_q;
+ logic rx_watermark_d, rx_watermark_prev_q;
+ logic tx_uart_idle_q;
+ assign tx_enable = reg2hw.ctrl.tx.q;
+ assign rx_enable = reg2hw.ctrl.rx.q;
+ assign rxnf_enable =;
+ assign sys_loopback = reg2hw.ctrl.slpbk.q;
+ assign line_loopback = reg2hw.ctrl.llpbk.q;
+ assign uart_fifo_rxrst = reg2hw.fifo_ctrl.rxrst.q & reg2hw.fifo_ctrl.rxrst.qe;
+ assign uart_fifo_txrst = reg2hw.fifo_ctrl.txrst.q & reg2hw.fifo_ctrl.txrst.qe;
+ assign uart_fifo_rxilvl = reg2hw.fifo_ctrl.rxilvl.q;
+ assign uart_fifo_txilvl = reg2hw.fifo_ctrl.txilvl.q;
+ assign ovrd_tx_en = reg2hw.ovrd.txen.q;
+ assign ovrd_tx_val = reg2hw.ovrd.txval.q;
+ typedef enum logic {
+ } break_st_e ;
+ break_st_e break_st_q;
+ assign not_allzero_char = rx_valid & (~event_rx_frame_err | (rx_fifo_data != 8'h0));
+ assign allzero_err = event_rx_frame_err & (rx_fifo_data == 8'h0);
+ assign allzero_cnt_d = (break_st_q == BRK_WAIT || not_allzero_char) ? 5'h0 :
+ //allzero_cnt_q[4] never be 1b without break_st_q as BRK_WAIT
+ //allzero_cnt_q[4] ? allzero_cnt_q :
+ allzero_err ? allzero_cnt_q + 5'd1 :
+ allzero_cnt_q;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) allzero_cnt_q <= '0;
+ else if (rx_enable) allzero_cnt_q <= allzero_cnt_d;
+ end
+ // break_err edges in same cycle as event_rx_frame_err edges ; that way the
+ // reset-on-read works the same way for break and frame error interrupts.
+ always_comb begin
+ unique case (reg2hw.ctrl.rxblvl.q)
+ 2'h0: break_err = allzero_cnt_d >= 5'd2;
+ 2'h1: break_err = allzero_cnt_d >= 5'd4;
+ 2'h2: break_err = allzero_cnt_d >= 5'd8;
+ default: break_err = allzero_cnt_d >= 5'd16;
+ endcase
+ end
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) break_st_q <= BRK_CHK;
+ else begin
+ unique case (break_st_q)
+ BRK_CHK: begin
+ if (event_rx_break_err) break_st_q <= BRK_WAIT;
+ end
+ BRK_WAIT: begin
+ if (rx_in) break_st_q <= BRK_CHK;
+ end
+ // default: begin
+ // break_st_q <= BRK_CHK;
+ //end
+ endcase
+ end
+ end
+ assign hw2reg.val.d = rx_val_q;
+ assign hw2reg.rdata.d = uart_rdata;
+ assign hw2reg.status.rxempty.d = ~rx_fifo_rvalid;
+ assign hw2reg.status.rxidle.d = rx_uart_idle;
+ assign hw2reg.status.txidle.d = tx_uart_idle & ~tx_fifo_rvalid;
+ assign hw2reg.status.txempty.d = ~tx_fifo_rvalid;
+ assign hw2reg.status.rxfull.d = ~rx_fifo_wready;
+ assign hw2reg.status.txfull.d = ~tx_fifo_wready;
+ assign hw2reg.fifo_status.txlvl.d = tx_fifo_depth;
+ assign hw2reg.fifo_status.rxlvl.d = rx_fifo_depth;
+ // resets are self-clearing, so need to update FIFO_CTRL
+ assign = 1'b0;
+ assign hw2reg.fifo_ctrl.rxilvl.d = 3'h0;
+ assign = 1'b0;
+ assign hw2reg.fifo_ctrl.txilvl.d = 2'h0;
+ // NCO 16x Baud Generator
+ // output clock rate is:
+ // Fin * (NCO/2**NcoWidth)
+ logic [NcoWidth:0] nco_sum_q; // extra bit to get the carry
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ nco_sum_q <= 17'h0;
+ end else if (tx_enable || rx_enable) begin
+ nco_sum_q <= {1'b0,nco_sum_q[NcoWidth-1:0]} + {1'b0,reg2hw.ctrl.nco.q[NcoWidth-1:0]};
+ end
+ end
+ assign tick_baud_x16 = nco_sum_q[16];
+ //////////////
+ // TX Logic //
+ //////////////
+ assign tx_fifo_rready = tx_uart_idle & tx_fifo_rvalid & tx_enable;
+ fifo_sync #(
+ .Width (8),
+ .Pass (1'b0),
+ .Depth (32)
+ ) u_uart_txfifo (
+ .clk_i,
+ .rst_ni,
+ .clr_i (uart_fifo_txrst),
+ .wvalid_i(reg2hw.wdata.qe),
+ .wready_o(tx_fifo_wready),
+ .wdata_i (reg2hw.wdata.q),
+ .depth_o (tx_fifo_depth),
+ .rvalid_o(tx_fifo_rvalid),
+ .rready_i(tx_fifo_rready),
+ .rdata_o (tx_fifo_data)
+ );
+ uart_tx uart_tx (
+ .clk_i,
+ .rst_ni,
+ .tx_enable,
+ .tick_baud_x16,
+ .parity_enable (reg2hw.ctrl.parity_en.q),
+ .wr (tx_fifo_rready),
+ .wr_parity ((^tx_fifo_data) ^ reg2hw.ctrl.parity_odd.q),
+ .wr_data (tx_fifo_data),
+ .idle (tx_uart_idle),
+ .tx (tx_out)
+ );
+ assign tx = line_loopback ? rx : tx_out_q ;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ tx_out_q <= 1'b1;
+ end else if (ovrd_tx_en) begin
+ tx_out_q <= ovrd_tx_val ;
+ end else if (sys_loopback) begin
+ tx_out_q <= 1'b1;
+ end else begin
+ tx_out_q <= tx_out;
+ end
+ end
+ //////////////
+ // RX Logic //
+ //////////////
+ // sync the incoming data
+ prim_generic_flop_2sync #(
+ .Width(1),
+ .ResetValue(1'b1)
+ ) sync_rx (
+ .clk_i,
+ .rst_ni,
+ .d_i(rx),
+ .q_o(rx_sync)
+ );
+ // Based on: mentions the use of a majority filter
+ // in UART to ignore brief noise spikes
+ logic rx_sync_q1, rx_sync_q2, rx_in_mx, rx_in_maj;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rx_sync_q1 <= 1'b1;
+ rx_sync_q2 <= 1'b1;
+ end else begin
+ rx_sync_q1 <= rx_sync;
+ rx_sync_q2 <= rx_sync_q1;
+ end
+ end
+ assign rx_in_maj = (rx_sync & rx_sync_q1) |
+ (rx_sync & rx_sync_q2) |
+ (rx_sync_q1 & rx_sync_q2);
+ assign rx_in_mx = rxnf_enable ? rx_in_maj : rx_sync;
+ assign rx_in = sys_loopback ? tx_out :
+ line_loopback ? 1'b1 :
+ rx_in_mx;
+ uart_rx uart_rx (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .rx_enable (rx_enable),
+ .tick_baud_x16 (tick_baud_x16),
+ .parity_enable (reg2hw.ctrl.parity_en.q),
+ .parity_odd (reg2hw.ctrl.parity_odd.q),
+ .tick_baud (rx_tick_baud),
+ .rx_valid (rx_valid),
+ .rx_data (rx_fifo_data),
+ .idle (rx_uart_idle),
+ .frame_err (event_rx_frame_err),
+ .rx (rx_in),
+ .rx_parity_err (event_rx_parity_err)
+ );
+ assign rx_fifo_wvalid = rx_valid & ~event_rx_frame_err & ~event_rx_parity_err;
+ fifo_sync #(
+ .Width (8),
+ .Pass (1'b0),
+ .Depth (32)
+ ) u_uart_rxfifo (
+ .clk_i,
+ .rst_ni,
+ .clr_i (uart_fifo_rxrst),
+ .wvalid_i(rx_fifo_wvalid),
+ .wready_o(rx_fifo_wready),
+ .wdata_i (rx_fifo_data),
+ .depth_o (rx_fifo_depth),
+ .rvalid_o(rx_fifo_rvalid),
+ .rready_i(,
+ .rdata_o (uart_rdata)
+ );
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) rx_val_q <= 16'h0;
+ else if (tick_baud_x16) rx_val_q <= {rx_val_q[14:0], rx_in};
+ end
+ ////////////////////////
+ // Interrupt & Status //
+ ////////////////////////
+ always_comb begin
+ unique case(uart_fifo_txilvl)
+ 2'h0: tx_watermark_d = (tx_fifo_depth < 6'd2);
+ 2'h1: tx_watermark_d = (tx_fifo_depth < 6'd4);
+ 2'h2: tx_watermark_d = (tx_fifo_depth < 6'd8);
+ default: tx_watermark_d = (tx_fifo_depth < 6'd16);
+ endcase
+ end
+ assign event_tx_watermark = tx_watermark_d & ~tx_watermark_prev_q;
+ // The empty condition handling is a bit different.
+ // If empty rising conditions were detected directly, then every first write of a burst
+ // would trigger an empty. This is due to the fact that the uart_tx fsm immediately
+ // withdraws the content and asserts "empty".
+ // To guard against this false trigger, empty is qualified with idle to extend the window
+ // in which software has an opportunity to deposit new data.
+ // However, if software deposit speed is TOO slow, this would still be an issue.
+ //
+ // The alternative software fix is to disable tx_enable until it has a chance to
+ // burst in the desired amount of data.
+ assign event_tx_empty = ~tx_fifo_rvalid & ~tx_uart_idle_q & tx_uart_idle;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ tx_watermark_prev_q <= 1'b1; // by default watermark condition is true
+ rx_watermark_prev_q <= 1'b0; // by default watermark condition is false
+ tx_uart_idle_q <= 1'b1;
+ end else begin
+ tx_watermark_prev_q <= tx_watermark_d;
+ rx_watermark_prev_q <= rx_watermark_d;
+ tx_uart_idle_q <= tx_uart_idle;
+ end
+ end
+ always_comb begin
+ unique case(uart_fifo_rxilvl)
+ 3'h0: rx_watermark_d = (rx_fifo_depth >= 6'd1);
+ 3'h1: rx_watermark_d = (rx_fifo_depth >= 6'd4);
+ 3'h2: rx_watermark_d = (rx_fifo_depth >= 6'd8);
+ 3'h3: rx_watermark_d = (rx_fifo_depth >= 6'd16);
+ 3'h4: rx_watermark_d = (rx_fifo_depth >= 6'd30);
+ default: rx_watermark_d = 1'b0;
+ endcase
+ end
+ assign event_rx_watermark = rx_watermark_d & ~rx_watermark_prev_q;
+ // rx timeout interrupt
+ assign uart_rxto_en = reg2hw.timeout_ctrl.en.q;
+ assign uart_rxto_val = reg2hw.timeout_ctrl.val.q;
+ assign rx_fifo_depth_changed = (rx_fifo_depth != rx_fifo_depth_prev_q);
+ assign rx_timeout_count_d =
+ // don't count if timeout feature not enabled ;
+ // will never reach timeout val + lower power
+ (uart_rxto_en == 1'b0) ? 24'd0 :
+ // reset count if timeout interrupt is set
+ event_rx_timeout ? 24'd0 :
+ // reset count upon change in fifo level: covers both read and receiving a new byte
+ rx_fifo_depth_changed ? 24'd0 :
+ // reset count if no bytes are pending
+ (rx_fifo_depth == 5'd0) ? 24'd0 :
+ // stop the count at timeout value (this will set the interrupt)
+ // Removed below line as when the timeout reaches the value,
+ // event occured, and timeout value reset to 0h.
+ //(rx_timeout_count_q == uart_rxto_val) ? rx_timeout_count_q :
+ // increment if at rx baud tick
+ rx_tick_baud ? (rx_timeout_count_q + 24'd1) :
+ rx_timeout_count_q;
+ assign event_rx_timeout = (rx_timeout_count_q == uart_rxto_val) & uart_rxto_en;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rx_timeout_count_q <= 24'd0;
+ rx_fifo_depth_prev_q <= 6'd0;
+ end else begin
+ rx_timeout_count_q <= rx_timeout_count_d;
+ rx_fifo_depth_prev_q <= rx_fifo_depth;
+ end
+ end
+ assign event_rx_overflow = rx_fifo_wvalid & ~rx_fifo_wready;
+ assign event_rx_break_err = break_err & (break_st_q == BRK_CHK);
+ // instantiate interrupt hardware primitives
+ prim_intr_hw #(.Width(1)) intr_hw_tx_watermark (
+ .clk_i,
+ .rst_ni,
+ .event_intr_i (event_tx_watermark),
+ .reg2hw_intr_enable_q_i (reg2hw.intr_enable.tx_watermark.q),
+ .reg2hw_intr_test_q_i (reg2hw.intr_test.tx_watermark.q),
+ .reg2hw_intr_test_qe_i (reg2hw.intr_test.tx_watermark.qe),
+ .reg2hw_intr_state_q_i (reg2hw.intr_state.tx_watermark.q),
+ .hw2reg_intr_state_de_o (,
+ .hw2reg_intr_state_d_o (hw2reg.intr_state.tx_watermark.d),
+ .intr_o (intr_tx_watermark_o)
+ );
+ prim_intr_hw #(.Width(1)) intr_hw_rx_watermark (
+ .clk_i,
+ .rst_ni,
+ .event_intr_i (event_rx_watermark),
+ .reg2hw_intr_enable_q_i (reg2hw.intr_enable.rx_watermark.q),
+ .reg2hw_intr_test_q_i (reg2hw.intr_test.rx_watermark.q),
+ .reg2hw_intr_test_qe_i (reg2hw.intr_test.rx_watermark.qe),
+ .reg2hw_intr_state_q_i (reg2hw.intr_state.rx_watermark.q),
+ .hw2reg_intr_state_de_o (,
+ .hw2reg_intr_state_d_o (hw2reg.intr_state.rx_watermark.d),
+ .intr_o (intr_rx_watermark_o)
+ );
+ prim_intr_hw #(.Width(1)) intr_hw_tx_empty (
+ .clk_i,
+ .rst_ni,
+ .event_intr_i (event_tx_empty),
+ .reg2hw_intr_enable_q_i (reg2hw.intr_enable.tx_empty.q),
+ .reg2hw_intr_test_q_i (reg2hw.intr_test.tx_empty.q),
+ .reg2hw_intr_test_qe_i (reg2hw.intr_test.tx_empty.qe),
+ .reg2hw_intr_state_q_i (reg2hw.intr_state.tx_empty.q),
+ .hw2reg_intr_state_de_o (,
+ .hw2reg_intr_state_d_o (hw2reg.intr_state.tx_empty.d),
+ .intr_o (intr_tx_empty_o)
+ );
+ prim_intr_hw #(.Width(1)) intr_hw_rx_overflow (
+ .clk_i,
+ .rst_ni,
+ .event_intr_i (event_rx_overflow),
+ .reg2hw_intr_enable_q_i (reg2hw.intr_enable.rx_overflow.q),
+ .reg2hw_intr_test_q_i (reg2hw.intr_test.rx_overflow.q),
+ .reg2hw_intr_test_qe_i (reg2hw.intr_test.rx_overflow.qe),
+ .reg2hw_intr_state_q_i (reg2hw.intr_state.rx_overflow.q),
+ .hw2reg_intr_state_de_o (,
+ .hw2reg_intr_state_d_o (hw2reg.intr_state.rx_overflow.d),
+ .intr_o (intr_rx_overflow_o)
+ );
+ prim_intr_hw #(.Width(1)) intr_hw_rx_frame_err (
+ .clk_i,
+ .rst_ni,
+ .event_intr_i (event_rx_frame_err),
+ .reg2hw_intr_enable_q_i (reg2hw.intr_enable.rx_frame_err.q),
+ .reg2hw_intr_test_q_i (reg2hw.intr_test.rx_frame_err.q),
+ .reg2hw_intr_test_qe_i (reg2hw.intr_test.rx_frame_err.qe),
+ .reg2hw_intr_state_q_i (reg2hw.intr_state.rx_frame_err.q),
+ .hw2reg_intr_state_de_o (,
+ .hw2reg_intr_state_d_o (hw2reg.intr_state.rx_frame_err.d),
+ .intr_o (intr_rx_frame_err_o)
+ );
+ prim_intr_hw #(.Width(1)) intr_hw_rx_break_err (
+ .clk_i,
+ .rst_ni,
+ .event_intr_i (event_rx_break_err),
+ .reg2hw_intr_enable_q_i (reg2hw.intr_enable.rx_break_err.q),
+ .reg2hw_intr_test_q_i (reg2hw.intr_test.rx_break_err.q),
+ .reg2hw_intr_test_qe_i (reg2hw.intr_test.rx_break_err.qe),
+ .reg2hw_intr_state_q_i (reg2hw.intr_state.rx_break_err.q),
+ .hw2reg_intr_state_de_o (,
+ .hw2reg_intr_state_d_o (hw2reg.intr_state.rx_break_err.d),
+ .intr_o (intr_rx_break_err_o)
+ );
+ prim_intr_hw #(.Width(1)) intr_hw_rx_timeout (
+ .clk_i,
+ .rst_ni,
+ .event_intr_i (event_rx_timeout),
+ .reg2hw_intr_enable_q_i (reg2hw.intr_enable.rx_timeout.q),
+ .reg2hw_intr_test_q_i (reg2hw.intr_test.rx_timeout.q),
+ .reg2hw_intr_test_qe_i (reg2hw.intr_test.rx_timeout.qe),
+ .reg2hw_intr_state_q_i (reg2hw.intr_state.rx_timeout.q),
+ .hw2reg_intr_state_de_o (,
+ .hw2reg_intr_state_d_o (hw2reg.intr_state.rx_timeout.d),
+ .intr_o (intr_rx_timeout_o)
+ );
+ prim_intr_hw #(.Width(1)) intr_hw_rx_parity_err (
+ .clk_i,
+ .rst_ni,
+ .event_intr_i (event_rx_parity_err),
+ .reg2hw_intr_enable_q_i (reg2hw.intr_enable.rx_parity_err.q),
+ .reg2hw_intr_test_q_i (reg2hw.intr_test.rx_parity_err.q),
+ .reg2hw_intr_test_qe_i (reg2hw.intr_test.rx_parity_err.qe),
+ .reg2hw_intr_state_q_i (reg2hw.intr_state.rx_parity_err.q),
+ .hw2reg_intr_state_de_o (,
+ .hw2reg_intr_state_d_o (hw2reg.intr_state.rx_parity_err.d),
+ .intr_o (intr_rx_parity_err_o)
+ );
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..22ae7ac
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,369 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Register Package auto-generated by `reggen` containing data structure
+package uart_reg_pkg;
+ // Address width within the block
+ parameter int BlockAw = 6;
+ ////////////////////////////
+ // Typedefs for registers //
+ ////////////////////////////
+ typedef struct packed {
+ struct packed {
+ logic q;
+ } tx_watermark;
+ struct packed {
+ logic q;
+ } rx_watermark;
+ struct packed {
+ logic q;
+ } tx_empty;
+ struct packed {
+ logic q;
+ } rx_overflow;
+ struct packed {
+ logic q;
+ } rx_frame_err;
+ struct packed {
+ logic q;
+ } rx_break_err;
+ struct packed {
+ logic q;
+ } rx_timeout;
+ struct packed {
+ logic q;
+ } rx_parity_err;
+ } uart_reg2hw_intr_state_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic q;
+ } tx_watermark;
+ struct packed {
+ logic q;
+ } rx_watermark;
+ struct packed {
+ logic q;
+ } tx_empty;
+ struct packed {
+ logic q;
+ } rx_overflow;
+ struct packed {
+ logic q;
+ } rx_frame_err;
+ struct packed {
+ logic q;
+ } rx_break_err;
+ struct packed {
+ logic q;
+ } rx_timeout;
+ struct packed {
+ logic q;
+ } rx_parity_err;
+ } uart_reg2hw_intr_enable_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic q;
+ logic qe;
+ } tx_watermark;
+ struct packed {
+ logic q;
+ logic qe;
+ } rx_watermark;
+ struct packed {
+ logic q;
+ logic qe;
+ } tx_empty;
+ struct packed {
+ logic q;
+ logic qe;
+ } rx_overflow;
+ struct packed {
+ logic q;
+ logic qe;
+ } rx_frame_err;
+ struct packed {
+ logic q;
+ logic qe;
+ } rx_break_err;
+ struct packed {
+ logic q;
+ logic qe;
+ } rx_timeout;
+ struct packed {
+ logic q;
+ logic qe;
+ } rx_parity_err;
+ } uart_reg2hw_intr_test_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic q;
+ } tx;
+ struct packed {
+ logic q;
+ } rx;
+ struct packed {
+ logic q;
+ } nf;
+ struct packed {
+ logic q;
+ } slpbk;
+ struct packed {
+ logic q;
+ } llpbk;
+ struct packed {
+ logic q;
+ } parity_en;
+ struct packed {
+ logic q;
+ } parity_odd;
+ struct packed {
+ logic [1:0] q;
+ } rxblvl;
+ struct packed {
+ logic [15:0] q;
+ } nco;
+ } uart_reg2hw_ctrl_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic q;
+ logic re;
+ } txfull;
+ struct packed {
+ logic q;
+ logic re;
+ } rxfull;
+ struct packed {
+ logic q;
+ logic re;
+ } txempty;
+ struct packed {
+ logic q;
+ logic re;
+ } txidle;
+ struct packed {
+ logic q;
+ logic re;
+ } rxidle;
+ struct packed {
+ logic q;
+ logic re;
+ } rxempty;
+ } uart_reg2hw_status_reg_t;
+ typedef struct packed {
+ logic [7:0] q;
+ logic re;
+ } uart_reg2hw_rdata_reg_t;
+ typedef struct packed {
+ logic [7:0] q;
+ logic qe;
+ } uart_reg2hw_wdata_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic q;
+ logic qe;
+ } rxrst;
+ struct packed {
+ logic q;
+ logic qe;
+ } txrst;
+ struct packed {
+ logic [2:0] q;
+ logic qe;
+ } rxilvl;
+ struct packed {
+ logic [1:0] q;
+ logic qe;
+ } txilvl;
+ } uart_reg2hw_fifo_ctrl_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic q;
+ } txen;
+ struct packed {
+ logic q;
+ } txval;
+ } uart_reg2hw_ovrd_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic [23:0] q;
+ } val;
+ struct packed {
+ logic q;
+ } en;
+ } uart_reg2hw_timeout_ctrl_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic d;
+ logic de;
+ } tx_watermark;
+ struct packed {
+ logic d;
+ logic de;
+ } rx_watermark;
+ struct packed {
+ logic d;
+ logic de;
+ } tx_empty;
+ struct packed {
+ logic d;
+ logic de;
+ } rx_overflow;
+ struct packed {
+ logic d;
+ logic de;
+ } rx_frame_err;
+ struct packed {
+ logic d;
+ logic de;
+ } rx_break_err;
+ struct packed {
+ logic d;
+ logic de;
+ } rx_timeout;
+ struct packed {
+ logic d;
+ logic de;
+ } rx_parity_err;
+ } uart_hw2reg_intr_state_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic d;
+ } txfull;
+ struct packed {
+ logic d;
+ } rxfull;
+ struct packed {
+ logic d;
+ } txempty;
+ struct packed {
+ logic d;
+ } txidle;
+ struct packed {
+ logic d;
+ } rxidle;
+ struct packed {
+ logic d;
+ } rxempty;
+ } uart_hw2reg_status_reg_t;
+ typedef struct packed {
+ logic [7:0] d;
+ } uart_hw2reg_rdata_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic [2:0] d;
+ logic de;
+ } rxilvl;
+ struct packed {
+ logic [1:0] d;
+ logic de;
+ } txilvl;
+ } uart_hw2reg_fifo_ctrl_reg_t;
+ typedef struct packed {
+ struct packed {
+ logic [5:0] d;
+ } txlvl;
+ struct packed {
+ logic [5:0] d;
+ } rxlvl;
+ } uart_hw2reg_fifo_status_reg_t;
+ typedef struct packed {
+ logic [15:0] d;
+ } uart_hw2reg_val_reg_t;
+ ///////////////////////////////////////
+ // Register to internal design logic //
+ ///////////////////////////////////////
+ typedef struct packed {
+ uart_reg2hw_intr_state_reg_t intr_state; // [124:117]
+ uart_reg2hw_intr_enable_reg_t intr_enable; // [116:109]
+ uart_reg2hw_intr_test_reg_t intr_test; // [108:93]
+ uart_reg2hw_ctrl_reg_t ctrl; // [92:68]
+ uart_reg2hw_status_reg_t status; // [67:56]
+ uart_reg2hw_rdata_reg_t rdata; // [55:47]
+ uart_reg2hw_wdata_reg_t wdata; // [46:38]
+ uart_reg2hw_fifo_ctrl_reg_t fifo_ctrl; // [37:27]
+ uart_reg2hw_ovrd_reg_t ovrd; // [26:25]
+ uart_reg2hw_timeout_ctrl_reg_t timeout_ctrl; // [24:0]
+ } uart_reg2hw_t;
+ ///////////////////////////////////////
+ // Internal design logic to register //
+ ///////////////////////////////////////
+ typedef struct packed {
+ uart_hw2reg_intr_state_reg_t intr_state; // [64:49]
+ uart_hw2reg_status_reg_t status; // [48:43]
+ uart_hw2reg_rdata_reg_t rdata; // [42:35]
+ uart_hw2reg_fifo_ctrl_reg_t fifo_ctrl; // [34:28]
+ uart_hw2reg_fifo_status_reg_t fifo_status; // [27:16]
+ uart_hw2reg_val_reg_t val; // [15:0]
+ } uart_hw2reg_t;
+ // Register Address
+ parameter logic [BlockAw-1:0] UART_INTR_STATE_OFFSET = 6'h 0;
+ parameter logic [BlockAw-1:0] UART_INTR_ENABLE_OFFSET = 6'h 4;
+ parameter logic [BlockAw-1:0] UART_INTR_TEST_OFFSET = 6'h 8;
+ parameter logic [BlockAw-1:0] UART_CTRL_OFFSET = 6'h c;
+ parameter logic [BlockAw-1:0] UART_STATUS_OFFSET = 6'h 10;
+ parameter logic [BlockAw-1:0] UART_RDATA_OFFSET = 6'h 14;
+ parameter logic [BlockAw-1:0] UART_WDATA_OFFSET = 6'h 18;
+ parameter logic [BlockAw-1:0] UART_FIFO_CTRL_OFFSET = 6'h 1c;
+ parameter logic [BlockAw-1:0] UART_FIFO_STATUS_OFFSET = 6'h 20;
+ parameter logic [BlockAw-1:0] UART_OVRD_OFFSET = 6'h 24;
+ parameter logic [BlockAw-1:0] UART_VAL_OFFSET = 6'h 28;
+ parameter logic [BlockAw-1:0] UART_TIMEOUT_CTRL_OFFSET = 6'h 2c;
+ // Register Index
+ typedef enum int {
+ } uart_id_e;
+ // Register width information to check illegal writes
+ parameter logic [3:0] UART_PERMIT [12] = '{
+ 4'b 0001, // index[ 0] UART_INTR_STATE
+ 4'b 0001, // index[ 1] UART_INTR_ENABLE
+ 4'b 0001, // index[ 2] UART_INTR_TEST
+ 4'b 1111, // index[ 3] UART_CTRL
+ 4'b 0001, // index[ 4] UART_STATUS
+ 4'b 0001, // index[ 5] UART_RDATA
+ 4'b 0001, // index[ 6] UART_WDATA
+ 4'b 0001, // index[ 7] UART_FIFO_CTRL
+ 4'b 0111, // index[ 8] UART_FIFO_STATUS
+ 4'b 0001, // index[ 9] UART_OVRD
+ 4'b 0011, // index[10] UART_VAL
+ 4'b 1111 // index[11] UART_TIMEOUT_CTRL
+ };
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..4342e2a
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,1677 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Register Top module auto-generated by `reggen`
+// `include ""
+module uart_reg_top (
+ input clk_i,
+ input rst_ni,
+ // Below Regster interface can be changed
+ input tlul_pkg::tl_h2d_t tl_i,
+ output tlul_pkg::tl_d2h_t tl_o,
+ // To HW
+ output uart_reg_pkg::uart_reg2hw_t reg2hw, // Write
+ input uart_reg_pkg::uart_hw2reg_t hw2reg, // Read
+ // Config
+ input devmode_i // If 1, explicit error return for unmapped register access
+ import uart_reg_pkg::* ;
+ localparam int AW = 6;
+ localparam int DW = 32;
+ localparam int DBW = DW/8; // Byte Width
+ // register signals
+ logic reg_we;
+ logic reg_re;
+ logic [AW-1:0] reg_addr;
+ logic [DW-1:0] reg_wdata;
+ logic [DBW-1:0] reg_be;
+ logic [DW-1:0] reg_rdata;
+ logic reg_error;
+ logic addrmiss, wr_err;
+ logic [DW-1:0] reg_rdata_next;
+ tlul_pkg::tl_h2d_t tl_reg_h2d;
+ tlul_pkg::tl_d2h_t tl_reg_d2h;
+ assign tl_reg_h2d = tl_i;
+ assign tl_o = tl_reg_d2h;
+ tlul_adapter_reg #(
+ .RegAw(AW),
+ .RegDw(DW)
+ ) u_reg_if (
+ .clk_i,
+ .rst_ni,
+ .tl_i (tl_reg_h2d),
+ .tl_o (tl_reg_d2h),
+ .we_o (reg_we),
+ .re_o (reg_re),
+ .addr_o (reg_addr),
+ .wdata_o (reg_wdata),
+ .be_o (reg_be),
+ .rdata_i (reg_rdata),
+ .error_i (reg_error)
+ );
+ assign reg_rdata = reg_rdata_next ;
+ assign reg_error = (devmode_i & addrmiss) | wr_err ;
+ // Define SW related signals
+ // Format: <reg>_<field>_{wd|we|qs}
+ // or <reg>_{wd|we|qs} if field == 1 or 0
+ logic intr_state_tx_watermark_qs;
+ logic intr_state_tx_watermark_wd;
+ logic intr_state_tx_watermark_we;
+ logic intr_state_rx_watermark_qs;
+ logic intr_state_rx_watermark_wd;
+ logic intr_state_rx_watermark_we;
+ logic intr_state_tx_empty_qs;
+ logic intr_state_tx_empty_wd;
+ logic intr_state_tx_empty_we;
+ logic intr_state_rx_overflow_qs;
+ logic intr_state_rx_overflow_wd;
+ logic intr_state_rx_overflow_we;
+ logic intr_state_rx_frame_err_qs;
+ logic intr_state_rx_frame_err_wd;
+ logic intr_state_rx_frame_err_we;
+ logic intr_state_rx_break_err_qs;
+ logic intr_state_rx_break_err_wd;
+ logic intr_state_rx_break_err_we;
+ logic intr_state_rx_timeout_qs;
+ logic intr_state_rx_timeout_wd;
+ logic intr_state_rx_timeout_we;
+ logic intr_state_rx_parity_err_qs;
+ logic intr_state_rx_parity_err_wd;
+ logic intr_state_rx_parity_err_we;
+ logic intr_enable_tx_watermark_qs;
+ logic intr_enable_tx_watermark_wd;
+ logic intr_enable_tx_watermark_we;
+ logic intr_enable_rx_watermark_qs;
+ logic intr_enable_rx_watermark_wd;
+ logic intr_enable_rx_watermark_we;
+ logic intr_enable_tx_empty_qs;
+ logic intr_enable_tx_empty_wd;
+ logic intr_enable_tx_empty_we;
+ logic intr_enable_rx_overflow_qs;
+ logic intr_enable_rx_overflow_wd;
+ logic intr_enable_rx_overflow_we;
+ logic intr_enable_rx_frame_err_qs;
+ logic intr_enable_rx_frame_err_wd;
+ logic intr_enable_rx_frame_err_we;
+ logic intr_enable_rx_break_err_qs;
+ logic intr_enable_rx_break_err_wd;
+ logic intr_enable_rx_break_err_we;
+ logic intr_enable_rx_timeout_qs;
+ logic intr_enable_rx_timeout_wd;
+ logic intr_enable_rx_timeout_we;
+ logic intr_enable_rx_parity_err_qs;
+ logic intr_enable_rx_parity_err_wd;
+ logic intr_enable_rx_parity_err_we;
+ logic intr_test_tx_watermark_wd;
+ logic intr_test_tx_watermark_we;
+ logic intr_test_rx_watermark_wd;
+ logic intr_test_rx_watermark_we;
+ logic intr_test_tx_empty_wd;
+ logic intr_test_tx_empty_we;
+ logic intr_test_rx_overflow_wd;
+ logic intr_test_rx_overflow_we;
+ logic intr_test_rx_frame_err_wd;
+ logic intr_test_rx_frame_err_we;
+ logic intr_test_rx_break_err_wd;
+ logic intr_test_rx_break_err_we;
+ logic intr_test_rx_timeout_wd;
+ logic intr_test_rx_timeout_we;
+ logic intr_test_rx_parity_err_wd;
+ logic intr_test_rx_parity_err_we;
+ logic ctrl_tx_qs;
+ logic ctrl_tx_wd;
+ logic ctrl_tx_we;
+ logic ctrl_rx_qs;
+ logic ctrl_rx_wd;
+ logic ctrl_rx_we;
+ logic ctrl_nf_qs;
+ logic ctrl_nf_wd;
+ logic ctrl_nf_we;
+ logic ctrl_slpbk_qs;
+ logic ctrl_slpbk_wd;
+ logic ctrl_slpbk_we;
+ logic ctrl_llpbk_qs;
+ logic ctrl_llpbk_wd;
+ logic ctrl_llpbk_we;
+ logic ctrl_parity_en_qs;
+ logic ctrl_parity_en_wd;
+ logic ctrl_parity_en_we;
+ logic ctrl_parity_odd_qs;
+ logic ctrl_parity_odd_wd;
+ logic ctrl_parity_odd_we;
+ logic [1:0] ctrl_rxblvl_qs;
+ logic [1:0] ctrl_rxblvl_wd;
+ logic ctrl_rxblvl_we;
+ logic [15:0] ctrl_nco_qs;
+ logic [15:0] ctrl_nco_wd;
+ logic ctrl_nco_we;
+ logic status_txfull_qs;
+ logic status_txfull_re;
+ logic status_rxfull_qs;
+ logic status_rxfull_re;
+ logic status_txempty_qs;
+ logic status_txempty_re;
+ logic status_txidle_qs;
+ logic status_txidle_re;
+ logic status_rxidle_qs;
+ logic status_rxidle_re;
+ logic status_rxempty_qs;
+ logic status_rxempty_re;
+ logic [7:0] rdata_qs;
+ logic rdata_re;
+ logic [7:0] wdata_wd;
+ logic wdata_we;
+ logic fifo_ctrl_rxrst_wd;
+ logic fifo_ctrl_rxrst_we;
+ logic fifo_ctrl_txrst_wd;
+ logic fifo_ctrl_txrst_we;
+ logic [2:0] fifo_ctrl_rxilvl_qs;
+ logic [2:0] fifo_ctrl_rxilvl_wd;
+ logic fifo_ctrl_rxilvl_we;
+ logic [1:0] fifo_ctrl_txilvl_qs;
+ logic [1:0] fifo_ctrl_txilvl_wd;
+ logic fifo_ctrl_txilvl_we;
+ logic [5:0] fifo_status_txlvl_qs;
+ logic fifo_status_txlvl_re;
+ logic [5:0] fifo_status_rxlvl_qs;
+ logic fifo_status_rxlvl_re;
+ logic ovrd_txen_qs;
+ logic ovrd_txen_wd;
+ logic ovrd_txen_we;
+ logic ovrd_txval_qs;
+ logic ovrd_txval_wd;
+ logic ovrd_txval_we;
+ logic [15:0] val_qs;
+ logic val_re;
+ logic [23:0] timeout_ctrl_val_qs;
+ logic [23:0] timeout_ctrl_val_wd;
+ logic timeout_ctrl_val_we;
+ logic timeout_ctrl_en_qs;
+ logic timeout_ctrl_en_wd;
+ logic timeout_ctrl_en_we;
+ // Register instances
+ // R[intr_state]: V(False)
+ // F[tx_watermark]: 0:0
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_state_tx_watermark (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_state_tx_watermark_we),
+ .wd (intr_state_tx_watermark_wd),
+ // from internal hardware
+ .de (,
+ .d (hw2reg.intr_state.tx_watermark.d ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_state.tx_watermark.q ),
+ // to register interface (read)
+ .qs (intr_state_tx_watermark_qs)
+ );
+ // F[rx_watermark]: 1:1
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_state_rx_watermark (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_state_rx_watermark_we),
+ .wd (intr_state_rx_watermark_wd),
+ // from internal hardware
+ .de (,
+ .d (hw2reg.intr_state.rx_watermark.d ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_state.rx_watermark.q ),
+ // to register interface (read)
+ .qs (intr_state_rx_watermark_qs)
+ );
+ // F[tx_empty]: 2:2
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_state_tx_empty (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_state_tx_empty_we),
+ .wd (intr_state_tx_empty_wd),
+ // from internal hardware
+ .de (,
+ .d (hw2reg.intr_state.tx_empty.d ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_state.tx_empty.q ),
+ // to register interface (read)
+ .qs (intr_state_tx_empty_qs)
+ );
+ // F[rx_overflow]: 3:3
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_state_rx_overflow (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_state_rx_overflow_we),
+ .wd (intr_state_rx_overflow_wd),
+ // from internal hardware
+ .de (,
+ .d (hw2reg.intr_state.rx_overflow.d ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_state.rx_overflow.q ),
+ // to register interface (read)
+ .qs (intr_state_rx_overflow_qs)
+ );
+ // F[rx_frame_err]: 4:4
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_state_rx_frame_err (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_state_rx_frame_err_we),
+ .wd (intr_state_rx_frame_err_wd),
+ // from internal hardware
+ .de (,
+ .d (hw2reg.intr_state.rx_frame_err.d ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_state.rx_frame_err.q ),
+ // to register interface (read)
+ .qs (intr_state_rx_frame_err_qs)
+ );
+ // F[rx_break_err]: 5:5
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_state_rx_break_err (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_state_rx_break_err_we),
+ .wd (intr_state_rx_break_err_wd),
+ // from internal hardware
+ .de (,
+ .d (hw2reg.intr_state.rx_break_err.d ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_state.rx_break_err.q ),
+ // to register interface (read)
+ .qs (intr_state_rx_break_err_qs)
+ );
+ // F[rx_timeout]: 6:6
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_state_rx_timeout (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_state_rx_timeout_we),
+ .wd (intr_state_rx_timeout_wd),
+ // from internal hardware
+ .de (,
+ .d (hw2reg.intr_state.rx_timeout.d ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_state.rx_timeout.q ),
+ // to register interface (read)
+ .qs (intr_state_rx_timeout_qs)
+ );
+ // F[rx_parity_err]: 7:7
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_state_rx_parity_err (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_state_rx_parity_err_we),
+ .wd (intr_state_rx_parity_err_wd),
+ // from internal hardware
+ .de (,
+ .d (hw2reg.intr_state.rx_parity_err.d ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_state.rx_parity_err.q ),
+ // to register interface (read)
+ .qs (intr_state_rx_parity_err_qs)
+ );
+ // R[intr_enable]: V(False)
+ // F[tx_watermark]: 0:0
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_enable_tx_watermark (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_enable_tx_watermark_we),
+ .wd (intr_enable_tx_watermark_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_enable.tx_watermark.q ),
+ // to register interface (read)
+ .qs (intr_enable_tx_watermark_qs)
+ );
+ // F[rx_watermark]: 1:1
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_enable_rx_watermark (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_enable_rx_watermark_we),
+ .wd (intr_enable_rx_watermark_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_enable.rx_watermark.q ),
+ // to register interface (read)
+ .qs (intr_enable_rx_watermark_qs)
+ );
+ // F[tx_empty]: 2:2
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_enable_tx_empty (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_enable_tx_empty_we),
+ .wd (intr_enable_tx_empty_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_enable.tx_empty.q ),
+ // to register interface (read)
+ .qs (intr_enable_tx_empty_qs)
+ );
+ // F[rx_overflow]: 3:3
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_enable_rx_overflow (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_enable_rx_overflow_we),
+ .wd (intr_enable_rx_overflow_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_enable.rx_overflow.q ),
+ // to register interface (read)
+ .qs (intr_enable_rx_overflow_qs)
+ );
+ // F[rx_frame_err]: 4:4
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_enable_rx_frame_err (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_enable_rx_frame_err_we),
+ .wd (intr_enable_rx_frame_err_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_enable.rx_frame_err.q ),
+ // to register interface (read)
+ .qs (intr_enable_rx_frame_err_qs)
+ );
+ // F[rx_break_err]: 5:5
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_enable_rx_break_err (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_enable_rx_break_err_we),
+ .wd (intr_enable_rx_break_err_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_enable.rx_break_err.q ),
+ // to register interface (read)
+ .qs (intr_enable_rx_break_err_qs)
+ );
+ // F[rx_timeout]: 6:6
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_enable_rx_timeout (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_enable_rx_timeout_we),
+ .wd (intr_enable_rx_timeout_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_enable.rx_timeout.q ),
+ // to register interface (read)
+ .qs (intr_enable_rx_timeout_qs)
+ );
+ // F[rx_parity_err]: 7:7
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_intr_enable_rx_parity_err (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (intr_enable_rx_parity_err_we),
+ .wd (intr_enable_rx_parity_err_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.intr_enable.rx_parity_err.q ),
+ // to register interface (read)
+ .qs (intr_enable_rx_parity_err_qs)
+ );
+ // R[intr_test]: V(True)
+ // F[tx_watermark]: 0:0
+ prim_subreg_ext #(
+ .DW (1)
+ ) u_intr_test_tx_watermark (
+ .re (1'b0),
+ .we (intr_test_tx_watermark_we),
+ .wd (intr_test_tx_watermark_wd),
+ .d ('0),
+ .qre (),
+ .qe (reg2hw.intr_test.tx_watermark.qe),
+ .q (reg2hw.intr_test.tx_watermark.q ),
+ .qs ()
+ );
+ // F[rx_watermark]: 1:1
+ prim_subreg_ext #(
+ .DW (1)
+ ) u_intr_test_rx_watermark (
+ .re (1'b0),
+ .we (intr_test_rx_watermark_we),
+ .wd (intr_test_rx_watermark_wd),
+ .d ('0),
+ .qre (),
+ .qe (reg2hw.intr_test.rx_watermark.qe),
+ .q (reg2hw.intr_test.rx_watermark.q ),
+ .qs ()
+ );
+ // F[tx_empty]: 2:2
+ prim_subreg_ext #(
+ .DW (1)
+ ) u_intr_test_tx_empty (
+ .re (1'b0),
+ .we (intr_test_tx_empty_we),
+ .wd (intr_test_tx_empty_wd),
+ .d ('0),
+ .qre (),
+ .qe (reg2hw.intr_test.tx_empty.qe),
+ .q (reg2hw.intr_test.tx_empty.q ),
+ .qs ()
+ );
+ // F[rx_overflow]: 3:3
+ prim_subreg_ext #(
+ .DW (1)
+ ) u_intr_test_rx_overflow (
+ .re (1'b0),
+ .we (intr_test_rx_overflow_we),
+ .wd (intr_test_rx_overflow_wd),
+ .d ('0),
+ .qre (),
+ .qe (reg2hw.intr_test.rx_overflow.qe),
+ .q (reg2hw.intr_test.rx_overflow.q ),
+ .qs ()
+ );
+ // F[rx_frame_err]: 4:4
+ prim_subreg_ext #(
+ .DW (1)
+ ) u_intr_test_rx_frame_err (
+ .re (1'b0),
+ .we (intr_test_rx_frame_err_we),
+ .wd (intr_test_rx_frame_err_wd),
+ .d ('0),
+ .qre (),
+ .qe (reg2hw.intr_test.rx_frame_err.qe),
+ .q (reg2hw.intr_test.rx_frame_err.q ),
+ .qs ()
+ );
+ // F[rx_break_err]: 5:5
+ prim_subreg_ext #(
+ .DW (1)
+ ) u_intr_test_rx_break_err (
+ .re (1'b0),
+ .we (intr_test_rx_break_err_we),
+ .wd (intr_test_rx_break_err_wd),
+ .d ('0),
+ .qre (),
+ .qe (reg2hw.intr_test.rx_break_err.qe),
+ .q (reg2hw.intr_test.rx_break_err.q ),
+ .qs ()
+ );
+ // F[rx_timeout]: 6:6
+ prim_subreg_ext #(
+ .DW (1)
+ ) u_intr_test_rx_timeout (
+ .re (1'b0),
+ .we (intr_test_rx_timeout_we),
+ .wd (intr_test_rx_timeout_wd),
+ .d ('0),
+ .qre (),
+ .qe (reg2hw.intr_test.rx_timeout.qe),
+ .q (reg2hw.intr_test.rx_timeout.q ),
+ .qs ()
+ );
+ // F[rx_parity_err]: 7:7
+ prim_subreg_ext #(
+ .DW (1)
+ ) u_intr_test_rx_parity_err (
+ .re (1'b0),
+ .we (intr_test_rx_parity_err_we),
+ .wd (intr_test_rx_parity_err_wd),
+ .d ('0),
+ .qre (),
+ .qe (reg2hw.intr_test.rx_parity_err.qe),
+ .q (reg2hw.intr_test.rx_parity_err.q ),
+ .qs ()
+ );
+ // R[ctrl]: V(False)
+ // F[tx]: 0:0
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ctrl_tx (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ctrl_tx_we),
+ .wd (ctrl_tx_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ctrl.tx.q ),
+ // to register interface (read)
+ .qs (ctrl_tx_qs)
+ );
+ // F[rx]: 1:1
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ctrl_rx (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ctrl_rx_we),
+ .wd (ctrl_rx_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ctrl.rx.q ),
+ // to register interface (read)
+ .qs (ctrl_rx_qs)
+ );
+ // F[nf]: 2:2
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ctrl_nf (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ctrl_nf_we),
+ .wd (ctrl_nf_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q ( ),
+ // to register interface (read)
+ .qs (ctrl_nf_qs)
+ );
+ // F[slpbk]: 4:4
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ctrl_slpbk (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ctrl_slpbk_we),
+ .wd (ctrl_slpbk_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ctrl.slpbk.q ),
+ // to register interface (read)
+ .qs (ctrl_slpbk_qs)
+ );
+ // F[llpbk]: 5:5
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ctrl_llpbk (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ctrl_llpbk_we),
+ .wd (ctrl_llpbk_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ctrl.llpbk.q ),
+ // to register interface (read)
+ .qs (ctrl_llpbk_qs)
+ );
+ // F[parity_en]: 6:6
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ctrl_parity_en (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ctrl_parity_en_we),
+ .wd (ctrl_parity_en_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ctrl.parity_en.q ),
+ // to register interface (read)
+ .qs (ctrl_parity_en_qs)
+ );
+ // F[parity_odd]: 7:7
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ctrl_parity_odd (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ctrl_parity_odd_we),
+ .wd (ctrl_parity_odd_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ctrl.parity_odd.q ),
+ // to register interface (read)
+ .qs (ctrl_parity_odd_qs)
+ );
+ // F[rxblvl]: 9:8
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_ctrl_rxblvl (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ctrl_rxblvl_we),
+ .wd (ctrl_rxblvl_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ctrl.rxblvl.q ),
+ // to register interface (read)
+ .qs (ctrl_rxblvl_qs)
+ );
+ // F[nco]: 31:16
+ prim_subreg #(
+ .DW (16),
+ .RESVAL (16'h0)
+ ) u_ctrl_nco (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ctrl_nco_we),
+ .wd (ctrl_nco_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ctrl.nco.q ),
+ // to register interface (read)
+ .qs (ctrl_nco_qs)
+ );
+ // R[status]: V(True)
+ // F[txfull]: 0:0
+ prim_subreg_ext #(
+ .DW (1)
+ ) u_status_txfull (
+ .re (status_txfull_re),
+ .we (1'b0),
+ .wd ('0),
+ .d (hw2reg.status.txfull.d),
+ .qre (,
+ .qe (),
+ .q (reg2hw.status.txfull.q ),
+ .qs (status_txfull_qs)
+ );
+ // F[rxfull]: 1:1
+ prim_subreg_ext #(
+ .DW (1)
+ ) u_status_rxfull (
+ .re (status_rxfull_re),
+ .we (1'b0),
+ .wd ('0),
+ .d (hw2reg.status.rxfull.d),
+ .qre (,
+ .qe (),
+ .q (reg2hw.status.rxfull.q ),
+ .qs (status_rxfull_qs)
+ );
+ // F[txempty]: 2:2
+ prim_subreg_ext #(
+ .DW (1)
+ ) u_status_txempty (
+ .re (status_txempty_re),
+ .we (1'b0),
+ .wd ('0),
+ .d (hw2reg.status.txempty.d),
+ .qre (,
+ .qe (),
+ .q (reg2hw.status.txempty.q ),
+ .qs (status_txempty_qs)
+ );
+ // F[txidle]: 3:3
+ prim_subreg_ext #(
+ .DW (1)
+ ) u_status_txidle (
+ .re (status_txidle_re),
+ .we (1'b0),
+ .wd ('0),
+ .d (hw2reg.status.txidle.d),
+ .qre (,
+ .qe (),
+ .q (reg2hw.status.txidle.q ),
+ .qs (status_txidle_qs)
+ );
+ // F[rxidle]: 4:4
+ prim_subreg_ext #(
+ .DW (1)
+ ) u_status_rxidle (
+ .re (status_rxidle_re),
+ .we (1'b0),
+ .wd ('0),
+ .d (hw2reg.status.rxidle.d),
+ .qre (,
+ .qe (),
+ .q (reg2hw.status.rxidle.q ),
+ .qs (status_rxidle_qs)
+ );
+ // F[rxempty]: 5:5
+ prim_subreg_ext #(
+ .DW (1)
+ ) u_status_rxempty (
+ .re (status_rxempty_re),
+ .we (1'b0),
+ .wd ('0),
+ .d (hw2reg.status.rxempty.d),
+ .qre (,
+ .qe (),
+ .q (reg2hw.status.rxempty.q ),
+ .qs (status_rxempty_qs)
+ );
+ // R[rdata]: V(True)
+ prim_subreg_ext #(
+ .DW (8)
+ ) u_rdata (
+ .re (rdata_re),
+ .we (1'b0),
+ .wd ('0),
+ .d (hw2reg.rdata.d),
+ .qre (,
+ .qe (),
+ .q (reg2hw.rdata.q ),
+ .qs (rdata_qs)
+ );
+ // R[wdata]: V(False)
+ prim_subreg #(
+ .DW (8),
+ .RESVAL (8'h0)
+ ) u_wdata (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (wdata_we),
+ .wd (wdata_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (reg2hw.wdata.qe),
+ .q (reg2hw.wdata.q ),
+ .qs ()
+ );
+ // R[fifo_ctrl]: V(False)
+ // F[rxrst]: 0:0
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_fifo_ctrl_rxrst (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (fifo_ctrl_rxrst_we),
+ .wd (fifo_ctrl_rxrst_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (reg2hw.fifo_ctrl.rxrst.qe),
+ .q (reg2hw.fifo_ctrl.rxrst.q ),
+ .qs ()
+ );
+ // F[txrst]: 1:1
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_fifo_ctrl_txrst (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (fifo_ctrl_txrst_we),
+ .wd (fifo_ctrl_txrst_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (reg2hw.fifo_ctrl.txrst.qe),
+ .q (reg2hw.fifo_ctrl.txrst.q ),
+ .qs ()
+ );
+ // F[rxilvl]: 4:2
+ prim_subreg #(
+ .DW (3),
+ .RESVAL (3'h0)
+ ) u_fifo_ctrl_rxilvl (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (fifo_ctrl_rxilvl_we),
+ .wd (fifo_ctrl_rxilvl_wd),
+ // from internal hardware
+ .de (,
+ .d (hw2reg.fifo_ctrl.rxilvl.d ),
+ // to internal hardware
+ .qe (reg2hw.fifo_ctrl.rxilvl.qe),
+ .q (reg2hw.fifo_ctrl.rxilvl.q ),
+ // to register interface (read)
+ .qs (fifo_ctrl_rxilvl_qs)
+ );
+ // F[txilvl]: 6:5
+ prim_subreg #(
+ .DW (2),
+ .RESVAL (2'h0)
+ ) u_fifo_ctrl_txilvl (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (fifo_ctrl_txilvl_we),
+ .wd (fifo_ctrl_txilvl_wd),
+ // from internal hardware
+ .de (,
+ .d (hw2reg.fifo_ctrl.txilvl.d ),
+ // to internal hardware
+ .qe (reg2hw.fifo_ctrl.txilvl.qe),
+ .q (reg2hw.fifo_ctrl.txilvl.q ),
+ // to register interface (read)
+ .qs (fifo_ctrl_txilvl_qs)
+ );
+ // R[fifo_status]: V(True)
+ // F[txlvl]: 5:0
+ prim_subreg_ext #(
+ .DW (6)
+ ) u_fifo_status_txlvl (
+ .re (fifo_status_txlvl_re),
+ .we (1'b0),
+ .wd ('0),
+ .d (hw2reg.fifo_status.txlvl.d),
+ .qre (),
+ .qe (),
+ .q (),
+ .qs (fifo_status_txlvl_qs)
+ );
+ // F[rxlvl]: 21:16
+ prim_subreg_ext #(
+ .DW (6)
+ ) u_fifo_status_rxlvl (
+ .re (fifo_status_rxlvl_re),
+ .we (1'b0),
+ .wd ('0),
+ .d (hw2reg.fifo_status.rxlvl.d),
+ .qre (),
+ .qe (),
+ .q (),
+ .qs (fifo_status_rxlvl_qs)
+ );
+ // R[ovrd]: V(False)
+ // F[txen]: 0:0
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ovrd_txen (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ovrd_txen_we),
+ .wd (ovrd_txen_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ovrd.txen.q ),
+ // to register interface (read)
+ .qs (ovrd_txen_qs)
+ );
+ // F[txval]: 1:1
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_ovrd_txval (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (ovrd_txval_we),
+ .wd (ovrd_txval_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.ovrd.txval.q ),
+ // to register interface (read)
+ .qs (ovrd_txval_qs)
+ );
+ // R[val]: V(True)
+ prim_subreg_ext #(
+ .DW (16)
+ ) u_val (
+ .re (val_re),
+ .we (1'b0),
+ .wd ('0),
+ .d (hw2reg.val.d),
+ .qre (),
+ .qe (),
+ .q (),
+ .qs (val_qs)
+ );
+ // R[timeout_ctrl]: V(False)
+ // F[val]: 23:0
+ prim_subreg #(
+ .DW (24),
+ .RESVAL (24'h0)
+ ) u_timeout_ctrl_val (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (timeout_ctrl_val_we),
+ .wd (timeout_ctrl_val_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.timeout_ctrl.val.q ),
+ // to register interface (read)
+ .qs (timeout_ctrl_val_qs)
+ );
+ // F[en]: 31:31
+ prim_subreg #(
+ .DW (1),
+ .RESVAL (1'h0)
+ ) u_timeout_ctrl_en (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ // from register interface
+ .we (timeout_ctrl_en_we),
+ .wd (timeout_ctrl_en_wd),
+ // from internal hardware
+ .de (1'b0),
+ .d ('0 ),
+ // to internal hardware
+ .qe (),
+ .q (reg2hw.timeout_ctrl.en.q ),
+ // to register interface (read)
+ .qs (timeout_ctrl_en_qs)
+ );
+ logic [11:0] addr_hit;
+ always_comb begin
+ addr_hit = '0;
+ addr_hit[ 0] = (reg_addr == UART_INTR_STATE_OFFSET);
+ addr_hit[ 1] = (reg_addr == UART_INTR_ENABLE_OFFSET);
+ addr_hit[ 2] = (reg_addr == UART_INTR_TEST_OFFSET);
+ addr_hit[ 3] = (reg_addr == UART_CTRL_OFFSET);
+ addr_hit[ 4] = (reg_addr == UART_STATUS_OFFSET);
+ addr_hit[ 5] = (reg_addr == UART_RDATA_OFFSET);
+ addr_hit[ 6] = (reg_addr == UART_WDATA_OFFSET);
+ addr_hit[ 7] = (reg_addr == UART_FIFO_CTRL_OFFSET);
+ addr_hit[ 8] = (reg_addr == UART_FIFO_STATUS_OFFSET);
+ addr_hit[ 9] = (reg_addr == UART_OVRD_OFFSET);
+ addr_hit[10] = (reg_addr == UART_VAL_OFFSET);
+ addr_hit[11] = (reg_addr == UART_TIMEOUT_CTRL_OFFSET);
+ end
+ assign addrmiss = (reg_re || reg_we) ? ~|addr_hit : 1'b0 ;
+ // Check sub-word write is permitted
+ always_comb begin
+ wr_err = 1'b0;
+ if (addr_hit[ 0] && reg_we && (UART_PERMIT[ 0] != (UART_PERMIT[ 0] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 1] && reg_we && (UART_PERMIT[ 1] != (UART_PERMIT[ 1] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 2] && reg_we && (UART_PERMIT[ 2] != (UART_PERMIT[ 2] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 3] && reg_we && (UART_PERMIT[ 3] != (UART_PERMIT[ 3] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 4] && reg_we && (UART_PERMIT[ 4] != (UART_PERMIT[ 4] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 5] && reg_we && (UART_PERMIT[ 5] != (UART_PERMIT[ 5] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 6] && reg_we && (UART_PERMIT[ 6] != (UART_PERMIT[ 6] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 7] && reg_we && (UART_PERMIT[ 7] != (UART_PERMIT[ 7] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 8] && reg_we && (UART_PERMIT[ 8] != (UART_PERMIT[ 8] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[ 9] && reg_we && (UART_PERMIT[ 9] != (UART_PERMIT[ 9] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[10] && reg_we && (UART_PERMIT[10] != (UART_PERMIT[10] & reg_be))) wr_err = 1'b1 ;
+ if (addr_hit[11] && reg_we && (UART_PERMIT[11] != (UART_PERMIT[11] & reg_be))) wr_err = 1'b1 ;
+ end
+ assign intr_state_tx_watermark_we = addr_hit[0] & reg_we & ~wr_err;
+ assign intr_state_tx_watermark_wd = reg_wdata[0];
+ assign intr_state_rx_watermark_we = addr_hit[0] & reg_we & ~wr_err;
+ assign intr_state_rx_watermark_wd = reg_wdata[1];
+ assign intr_state_tx_empty_we = addr_hit[0] & reg_we & ~wr_err;
+ assign intr_state_tx_empty_wd = reg_wdata[2];
+ assign intr_state_rx_overflow_we = addr_hit[0] & reg_we & ~wr_err;
+ assign intr_state_rx_overflow_wd = reg_wdata[3];
+ assign intr_state_rx_frame_err_we = addr_hit[0] & reg_we & ~wr_err;
+ assign intr_state_rx_frame_err_wd = reg_wdata[4];
+ assign intr_state_rx_break_err_we = addr_hit[0] & reg_we & ~wr_err;
+ assign intr_state_rx_break_err_wd = reg_wdata[5];
+ assign intr_state_rx_timeout_we = addr_hit[0] & reg_we & ~wr_err;
+ assign intr_state_rx_timeout_wd = reg_wdata[6];
+ assign intr_state_rx_parity_err_we = addr_hit[0] & reg_we & ~wr_err;
+ assign intr_state_rx_parity_err_wd = reg_wdata[7];
+ assign intr_enable_tx_watermark_we = addr_hit[1] & reg_we & ~wr_err;
+ assign intr_enable_tx_watermark_wd = reg_wdata[0];
+ assign intr_enable_rx_watermark_we = addr_hit[1] & reg_we & ~wr_err;
+ assign intr_enable_rx_watermark_wd = reg_wdata[1];
+ assign intr_enable_tx_empty_we = addr_hit[1] & reg_we & ~wr_err;
+ assign intr_enable_tx_empty_wd = reg_wdata[2];
+ assign intr_enable_rx_overflow_we = addr_hit[1] & reg_we & ~wr_err;
+ assign intr_enable_rx_overflow_wd = reg_wdata[3];
+ assign intr_enable_rx_frame_err_we = addr_hit[1] & reg_we & ~wr_err;
+ assign intr_enable_rx_frame_err_wd = reg_wdata[4];
+ assign intr_enable_rx_break_err_we = addr_hit[1] & reg_we & ~wr_err;
+ assign intr_enable_rx_break_err_wd = reg_wdata[5];
+ assign intr_enable_rx_timeout_we = addr_hit[1] & reg_we & ~wr_err;
+ assign intr_enable_rx_timeout_wd = reg_wdata[6];
+ assign intr_enable_rx_parity_err_we = addr_hit[1] & reg_we & ~wr_err;
+ assign intr_enable_rx_parity_err_wd = reg_wdata[7];
+ assign intr_test_tx_watermark_we = addr_hit[2] & reg_we & ~wr_err;
+ assign intr_test_tx_watermark_wd = reg_wdata[0];
+ assign intr_test_rx_watermark_we = addr_hit[2] & reg_we & ~wr_err;
+ assign intr_test_rx_watermark_wd = reg_wdata[1];
+ assign intr_test_tx_empty_we = addr_hit[2] & reg_we & ~wr_err;
+ assign intr_test_tx_empty_wd = reg_wdata[2];
+ assign intr_test_rx_overflow_we = addr_hit[2] & reg_we & ~wr_err;
+ assign intr_test_rx_overflow_wd = reg_wdata[3];
+ assign intr_test_rx_frame_err_we = addr_hit[2] & reg_we & ~wr_err;
+ assign intr_test_rx_frame_err_wd = reg_wdata[4];
+ assign intr_test_rx_break_err_we = addr_hit[2] & reg_we & ~wr_err;
+ assign intr_test_rx_break_err_wd = reg_wdata[5];
+ assign intr_test_rx_timeout_we = addr_hit[2] & reg_we & ~wr_err;
+ assign intr_test_rx_timeout_wd = reg_wdata[6];
+ assign intr_test_rx_parity_err_we = addr_hit[2] & reg_we & ~wr_err;
+ assign intr_test_rx_parity_err_wd = reg_wdata[7];
+ assign ctrl_tx_we = addr_hit[3] & reg_we & ~wr_err;
+ assign ctrl_tx_wd = reg_wdata[0];
+ assign ctrl_rx_we = addr_hit[3] & reg_we & ~wr_err;
+ assign ctrl_rx_wd = reg_wdata[1];
+ assign ctrl_nf_we = addr_hit[3] & reg_we & ~wr_err;
+ assign ctrl_nf_wd = reg_wdata[2];
+ assign ctrl_slpbk_we = addr_hit[3] & reg_we & ~wr_err;
+ assign ctrl_slpbk_wd = reg_wdata[4];
+ assign ctrl_llpbk_we = addr_hit[3] & reg_we & ~wr_err;
+ assign ctrl_llpbk_wd = reg_wdata[5];
+ assign ctrl_parity_en_we = addr_hit[3] & reg_we & ~wr_err;
+ assign ctrl_parity_en_wd = reg_wdata[6];
+ assign ctrl_parity_odd_we = addr_hit[3] & reg_we & ~wr_err;
+ assign ctrl_parity_odd_wd = reg_wdata[7];
+ assign ctrl_rxblvl_we = addr_hit[3] & reg_we & ~wr_err;
+ assign ctrl_rxblvl_wd = reg_wdata[9:8];
+ assign ctrl_nco_we = addr_hit[3] & reg_we & ~wr_err;
+ assign ctrl_nco_wd = reg_wdata[31:16];
+ assign status_txfull_re = addr_hit[4] && reg_re;
+ assign status_rxfull_re = addr_hit[4] && reg_re;
+ assign status_txempty_re = addr_hit[4] && reg_re;
+ assign status_txidle_re = addr_hit[4] && reg_re;
+ assign status_rxidle_re = addr_hit[4] && reg_re;
+ assign status_rxempty_re = addr_hit[4] && reg_re;
+ assign rdata_re = addr_hit[5] && reg_re;
+ assign wdata_we = addr_hit[6] & reg_we & ~wr_err;
+ assign wdata_wd = reg_wdata[7:0];
+ assign fifo_ctrl_rxrst_we = addr_hit[7] & reg_we & ~wr_err;
+ assign fifo_ctrl_rxrst_wd = reg_wdata[0];
+ assign fifo_ctrl_txrst_we = addr_hit[7] & reg_we & ~wr_err;
+ assign fifo_ctrl_txrst_wd = reg_wdata[1];
+ assign fifo_ctrl_rxilvl_we = addr_hit[7] & reg_we & ~wr_err;
+ assign fifo_ctrl_rxilvl_wd = reg_wdata[4:2];
+ assign fifo_ctrl_txilvl_we = addr_hit[7] & reg_we & ~wr_err;
+ assign fifo_ctrl_txilvl_wd = reg_wdata[6:5];
+ assign fifo_status_txlvl_re = addr_hit[8] && reg_re;
+ assign fifo_status_rxlvl_re = addr_hit[8] && reg_re;
+ assign ovrd_txen_we = addr_hit[9] & reg_we & ~wr_err;
+ assign ovrd_txen_wd = reg_wdata[0];
+ assign ovrd_txval_we = addr_hit[9] & reg_we & ~wr_err;
+ assign ovrd_txval_wd = reg_wdata[1];
+ assign val_re = addr_hit[10] && reg_re;
+ assign timeout_ctrl_val_we = addr_hit[11] & reg_we & ~wr_err;
+ assign timeout_ctrl_val_wd = reg_wdata[23:0];
+ assign timeout_ctrl_en_we = addr_hit[11] & reg_we & ~wr_err;
+ assign timeout_ctrl_en_wd = reg_wdata[31];
+ // Read data return
+ always_comb begin
+ reg_rdata_next = '0;
+ unique case (1'b1)
+ addr_hit[0]: begin
+ reg_rdata_next[0] = intr_state_tx_watermark_qs;
+ reg_rdata_next[1] = intr_state_rx_watermark_qs;
+ reg_rdata_next[2] = intr_state_tx_empty_qs;
+ reg_rdata_next[3] = intr_state_rx_overflow_qs;
+ reg_rdata_next[4] = intr_state_rx_frame_err_qs;
+ reg_rdata_next[5] = intr_state_rx_break_err_qs;
+ reg_rdata_next[6] = intr_state_rx_timeout_qs;
+ reg_rdata_next[7] = intr_state_rx_parity_err_qs;
+ end
+ addr_hit[1]: begin
+ reg_rdata_next[0] = intr_enable_tx_watermark_qs;
+ reg_rdata_next[1] = intr_enable_rx_watermark_qs;
+ reg_rdata_next[2] = intr_enable_tx_empty_qs;
+ reg_rdata_next[3] = intr_enable_rx_overflow_qs;
+ reg_rdata_next[4] = intr_enable_rx_frame_err_qs;
+ reg_rdata_next[5] = intr_enable_rx_break_err_qs;
+ reg_rdata_next[6] = intr_enable_rx_timeout_qs;
+ reg_rdata_next[7] = intr_enable_rx_parity_err_qs;
+ end
+ addr_hit[2]: begin
+ reg_rdata_next[0] = '0;
+ reg_rdata_next[1] = '0;
+ reg_rdata_next[2] = '0;
+ reg_rdata_next[3] = '0;
+ reg_rdata_next[4] = '0;
+ reg_rdata_next[5] = '0;
+ reg_rdata_next[6] = '0;
+ reg_rdata_next[7] = '0;
+ end
+ addr_hit[3]: begin
+ reg_rdata_next[0] = ctrl_tx_qs;
+ reg_rdata_next[1] = ctrl_rx_qs;
+ reg_rdata_next[2] = ctrl_nf_qs;
+ reg_rdata_next[4] = ctrl_slpbk_qs;
+ reg_rdata_next[5] = ctrl_llpbk_qs;
+ reg_rdata_next[6] = ctrl_parity_en_qs;
+ reg_rdata_next[7] = ctrl_parity_odd_qs;
+ reg_rdata_next[9:8] = ctrl_rxblvl_qs;
+ reg_rdata_next[31:16] = ctrl_nco_qs;
+ end
+ addr_hit[4]: begin
+ reg_rdata_next[0] = status_txfull_qs;
+ reg_rdata_next[1] = status_rxfull_qs;
+ reg_rdata_next[2] = status_txempty_qs;
+ reg_rdata_next[3] = status_txidle_qs;
+ reg_rdata_next[4] = status_rxidle_qs;
+ reg_rdata_next[5] = status_rxempty_qs;
+ end
+ addr_hit[5]: begin
+ reg_rdata_next[7:0] = rdata_qs;
+ end
+ addr_hit[6]: begin
+ reg_rdata_next[7:0] = '0;
+ end
+ addr_hit[7]: begin
+ reg_rdata_next[0] = '0;
+ reg_rdata_next[1] = '0;
+ reg_rdata_next[4:2] = fifo_ctrl_rxilvl_qs;
+ reg_rdata_next[6:5] = fifo_ctrl_txilvl_qs;
+ end
+ addr_hit[8]: begin
+ reg_rdata_next[5:0] = fifo_status_txlvl_qs;
+ reg_rdata_next[21:16] = fifo_status_rxlvl_qs;
+ end
+ addr_hit[9]: begin
+ reg_rdata_next[0] = ovrd_txen_qs;
+ reg_rdata_next[1] = ovrd_txval_qs;
+ end
+ addr_hit[10]: begin
+ reg_rdata_next[15:0] = val_qs;
+ end
+ addr_hit[11]: begin
+ reg_rdata_next[23:0] = timeout_ctrl_val_qs;
+ reg_rdata_next[31] = timeout_ctrl_en_qs;
+ end
+ default: begin
+ reg_rdata_next = '1;
+ end
+ endcase
+ end
+ // // Assertions for Register Interface
+ // `ASSERT_PULSE(wePulse, reg_we)
+ // `ASSERT_PULSE(rePulse, reg_re)
+ // `ASSERT(reAfterRv, $rose(reg_re || reg_we) |=> tl_o.d_valid)
+ // `ASSERT(en2addrHit, (reg_we || reg_re) |-> $onehot0(addr_hit))
+ // // this is formulated as an assumption such that the FPV testbenches do disprove this
+ // // property by mistake
+ // `ASSUME(reqParity, tl_reg_h2d.a_valid |-> tl_reg_h2d.a_user.parity_en == 1'b0)
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..829895a
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,105 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Description: UART Receive Module
+module uart_rx (
+ input clk_i,
+ input rst_ni,
+ input rx_enable,
+ input tick_baud_x16,
+ input parity_enable,
+ input parity_odd,
+ output logic tick_baud,
+ output logic rx_valid,
+ output [7:0] rx_data,
+ output logic idle,
+ output frame_err,
+ output rx_parity_err,
+ input rx
+ logic rx_valid_q;
+ logic [10:0] sreg_q, sreg_d;
+ logic [3:0] bit_cnt_q, bit_cnt_d;
+ logic [3:0] baud_div_q, baud_div_d;
+ logic tick_baud_d, tick_baud_q;
+ logic idle_d, idle_q;
+ assign tick_baud = tick_baud_q;
+ assign idle = idle_q;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ sreg_q <= 11'h0;
+ bit_cnt_q <= 4'h0;
+ baud_div_q <= 4'h0;
+ tick_baud_q <= 1'b0;
+ idle_q <= 1'b1;
+ end else begin
+ sreg_q <= sreg_d;
+ bit_cnt_q <= bit_cnt_d;
+ baud_div_q <= baud_div_d;
+ tick_baud_q <= tick_baud_d;
+ idle_q <= idle_d;
+ end
+ end
+ always_comb begin
+ if (!rx_enable) begin
+ sreg_d = 11'h0;
+ bit_cnt_d = 4'h0;
+ baud_div_d = 4'h0;
+ tick_baud_d = 1'b0;
+ idle_d = 1'b1;
+ end else begin
+ tick_baud_d = 1'b0;
+ sreg_d = sreg_q;
+ bit_cnt_d = bit_cnt_q;
+ baud_div_d = baud_div_q;
+ idle_d = idle_q;
+ if (tick_baud_x16) begin
+ {tick_baud_d, baud_div_d} = {1'b0,baud_div_q} + 5'h1;
+ end
+ if (idle_q && !rx) begin
+ // start of char, sample in the middle of the bit time
+ baud_div_d = 4'd8;
+ tick_baud_d = 1'b0;
+ bit_cnt_d = (parity_enable ? 4'd11 : 4'd10);
+ sreg_d = 11'h0;
+ idle_d = 1'b0;
+ end else if (!idle_q && tick_baud_q) begin
+ if ((bit_cnt_q == (parity_enable ? 4'd11 : 4'd10)) && rx) begin
+ // must have been a glitch on the input, start bit is not set
+ // in the middle of the bit time, abort
+ idle_d = 1'b1;
+ bit_cnt_d = 4'h0;
+ end else begin
+ sreg_d = {rx, sreg_q[10:1]};
+ bit_cnt_d = bit_cnt_q - 4'h1;
+ idle_d = (bit_cnt_q == 4'h1);
+ end
+ end
+ end
+ end
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) rx_valid_q <= 1'b0;
+ else rx_valid_q <= tick_baud_q & (bit_cnt_q == 4'h1);
+ end
+ assign rx_valid = rx_valid_q;
+ assign rx_data = parity_enable ? sreg_q[8:1] : sreg_q[9:2];
+ // (rx_parity = sreg_q[9])
+ assign frame_err = rx_valid_q & ~sreg_q[10];
+ assign rx_parity_err = parity_enable & rx_valid_q &
+ (^{sreg_q[9:1],parity_odd});
diff --git a/verilog/rtl/uart_rx_prog.v b/verilog/rtl/uart_rx_prog.v
new file mode 100644
index 0000000..fb43991
--- /dev/null
+++ b/verilog/rtl/uart_rx_prog.v
@@ -0,0 +1,156 @@
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Set Parameter CLKS_PER_BIT as follows:
+// CLKS_PER_BIT = (Frequency of i_Clock)/(Frequency of UART)
+// Example: 10 MHz Clock, 115200 baud UART
+// (10000000)/(115200) = 87
+module uart_rx_prog (
+ input clk_i,
+ input rst_ni,
+ input i_Rx_Serial,
+ input [15:0] CLKS_PER_BIT,
+ output o_Rx_DV,
+ output [7:0] o_Rx_Byte
+ );
+ parameter s_IDLE = 3'b000;
+ parameter s_RX_START_BIT = 3'b001;
+ parameter s_RX_DATA_BITS = 3'b010;
+ parameter s_RX_STOP_BIT = 3'b011;
+ parameter s_CLEANUP = 3'b100;
+ reg r_Rx_Data_R ;
+ reg r_Rx_Data ;
+ reg [15:0] r_Clock_Count ;
+ reg [2:0] r_Bit_Index ; //8 bits total
+ reg [7:0] r_Rx_Byte ;
+ reg r_Rx_DV ;
+ reg [2:0] r_SM_Main ;
+ // Purpose: Double-register the incoming data.
+ // This allows it to be used in the UART RX Clock Domain.
+ // (It removes problems caused by metastability)
+ always @(posedge clk_i)
+ begin
+ if (~rst_ni) begin
+ r_Rx_Data_R <= 1'b1;
+ r_Rx_Data <= 1'b1;
+ end else begin
+ r_Rx_Data_R <= i_Rx_Serial;
+ r_Rx_Data <= r_Rx_Data_R;
+ end
+ end
+ // Purpose: Control RX state machine
+ always @(posedge clk_i or negedge rst_ni)
+ begin
+ if (~rst_ni) begin
+ r_SM_Main <= s_IDLE;
+ r_Rx_DV <= 1'b0;
+ r_Clock_Count <= 0;
+ r_Bit_Index <= 0;
+ end else begin
+ case (r_SM_Main)
+ s_IDLE :
+ begin
+ r_Rx_DV <= 1'b0;
+ r_Clock_Count <= 0;
+ r_Bit_Index <= 0;
+ if (r_Rx_Data == 1'b0) // Start bit detected
+ r_SM_Main <= s_RX_START_BIT;
+ else
+ r_SM_Main <= s_IDLE;
+ end
+ // Check middle of start bit to make sure it's still low
+ begin
+ if (r_Clock_Count == ((CLKS_PER_BIT-1)>>1))
+ begin
+ if (r_Rx_Data == 1'b0)
+ begin
+ r_Clock_Count <= 0; // reset counter, found the middle
+ r_SM_Main <= s_RX_DATA_BITS;
+ end
+ else
+ r_SM_Main <= s_IDLE;
+ end
+ else
+ begin
+ r_Clock_Count <= r_Clock_Count + 1;
+ r_SM_Main <= s_RX_START_BIT;
+ end
+ end // case: s_RX_START_BIT
+ // Wait CLKS_PER_BIT-1 clock cycles to sample serial data
+ begin
+ if (r_Clock_Count < CLKS_PER_BIT-1)
+ begin
+ r_Clock_Count <= r_Clock_Count + 1;
+ r_SM_Main <= s_RX_DATA_BITS;
+ end
+ else
+ begin
+ r_Clock_Count <= 0;
+ r_Rx_Byte[r_Bit_Index] <= r_Rx_Data;
+ // Check if we have received all bits
+ if (r_Bit_Index < 7)
+ begin
+ r_Bit_Index <= r_Bit_Index + 1;
+ r_SM_Main <= s_RX_DATA_BITS;
+ end
+ else
+ begin
+ r_Bit_Index <= 0;
+ r_SM_Main <= s_RX_STOP_BIT;
+ end
+ end
+ end // case: s_RX_DATA_BITS
+ // Receive Stop bit. Stop bit = 1
+ begin
+ // Wait CLKS_PER_BIT-1 clock cycles for Stop bit to finish
+ if (r_Clock_Count < CLKS_PER_BIT-1)
+ begin
+ r_Clock_Count <= r_Clock_Count + 1;
+ r_SM_Main <= s_RX_STOP_BIT;
+ end
+ else
+ begin
+ r_Rx_DV <= 1'b1;
+ r_Clock_Count <= 0;
+ r_SM_Main <= s_CLEANUP;
+ end
+ end // case: s_RX_STOP_BIT
+ // Stay here 1 clock
+ begin
+ r_SM_Main <= s_IDLE;
+ r_Rx_DV <= 1'b0;
+ end
+ default :
+ r_SM_Main <= s_IDLE;
+ endcase
+ end
+ end
+ assign o_Rx_DV = r_Rx_DV;
+ assign o_Rx_Byte = r_Rx_Byte;
+endmodule // uart_rx
diff --git a/verilog/rtl/ b/verilog/rtl/
new file mode 100644
index 0000000..d10d16a
--- /dev/null
+++ b/verilog/rtl/
@@ -0,0 +1,79 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+// Description: UART Transmit Module
+module uart_tx (
+ input clk_i,
+ input rst_ni,
+ input tx_enable,
+ input tick_baud_x16,
+ input logic parity_enable,
+ input wr,
+ input logic wr_parity,
+ input [7:0] wr_data,
+ output idle,
+ output logic tx
+ logic [3:0] baud_div_q;
+ logic tick_baud_q;
+ logic [3:0] bit_cnt_q, bit_cnt_d;
+ logic [10:0] sreg_q, sreg_d;
+ logic tx_q, tx_d;
+ assign tx = tx_q;
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ baud_div_q <= 4'h0;
+ tick_baud_q <= 1'b0;
+ end else if (tick_baud_x16) begin
+ {tick_baud_q, baud_div_q} <= {1'b0,baud_div_q} + 5'h1;
+ end else begin
+ tick_baud_q <= 1'b0;
+ end
+ end
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ bit_cnt_q <= 4'h0;
+ sreg_q <= 11'h7ff;
+ tx_q <= 1'b1;
+ end else begin
+ bit_cnt_q <= bit_cnt_d;
+ sreg_q <= sreg_d;
+ tx_q <= tx_d;
+ end
+ end
+ always_comb begin
+ if (!tx_enable) begin
+ bit_cnt_d = 4'h0;
+ sreg_d = 11'h7ff;
+ tx_d = 1'b1;
+ end else begin
+ bit_cnt_d = bit_cnt_q;
+ sreg_d = sreg_q;
+ tx_d = tx_q;
+ if (wr) begin
+ sreg_d = {1'b1, (parity_enable ? wr_parity : 1'b1), wr_data, 1'b0};
+ bit_cnt_d = (parity_enable ? 4'd11 : 4'd10);
+ end else if (tick_baud_q && (bit_cnt_q != 4'h0)) begin
+ sreg_d = {1'b1, sreg_q[10:1]};
+ tx_d = sreg_q[0];
+ bit_cnt_d = bit_cnt_q - 4'h1;
+ end
+ end
+ end
+ assign idle = (tx_enable) ? (bit_cnt_q == 4'h0) : 1'b1;