Integration Completed
diff --git a/verilog/rtl/BrqRV_EB1/design/ahb_to_axi4.sv b/verilog/rtl/BrqRV_EB1/design/ahb_to_axi4.sv
new file mode 100644
index 0000000..190ed8b
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/ahb_to_axi4.sv
@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//********************************************************************************
+// $Id$
+//
+// Owner:
+// Function: AHB to AXI4 Bridge
+// Comments:
+//
+//********************************************************************************
+module ahb_to_axi4
+import eb1_pkg::*;
+#(
+   TAG = 1,
+   `include "eb1_param.vh"
+)
+//   ,TAG  = 1)
+(
+   input                   clk,
+   input                   rst_l,
+   input                   scan_mode,
+   input                   bus_clk_en,
+   input                   clk_override,
+
+   // AXI signals
+   // AXI Write Channels
+   output logic            axi_awvalid,
+   input  logic            axi_awready,
+   output logic [TAG-1:0]  axi_awid,
+   output logic [31:0]     axi_awaddr,
+   output logic [2:0]      axi_awsize,
+   output logic [2:0]      axi_awprot,
+   output logic [7:0]      axi_awlen,
+   output logic [1:0]      axi_awburst,
+
+   output logic            axi_wvalid,
+   input  logic            axi_wready,
+   output logic [63:0]     axi_wdata,
+   output logic [7:0]      axi_wstrb,
+   output logic            axi_wlast,
+
+   input  logic            axi_bvalid,
+   output logic            axi_bready,
+   input  logic [1:0]      axi_bresp,
+   input  logic [TAG-1:0]  axi_bid,
+
+   // AXI Read Channels
+   output logic            axi_arvalid,
+   input  logic            axi_arready,
+   output logic [TAG-1:0]  axi_arid,
+   output logic [31:0]     axi_araddr,
+   output logic [2:0]      axi_arsize,
+   output logic [2:0]      axi_arprot,
+   output logic [7:0]      axi_arlen,
+   output logic [1:0]      axi_arburst,
+
+   input  logic            axi_rvalid,
+   output logic            axi_rready,
+   input  logic [TAG-1:0]  axi_rid,
+   input  logic [63:0]     axi_rdata,
+   input  logic [1:0]      axi_rresp,
+
+   // AHB-Lite signals
+   input logic [31:0]      ahb_haddr,     // ahb bus address
+   input logic [2:0]       ahb_hburst,    // tied to 0
+   input logic             ahb_hmastlock, // tied to 0
+   input logic [3:0]       ahb_hprot,     // tied to 4'b0011
+   input logic [2:0]       ahb_hsize,     // size of bus transaction (possible values 0,1,2,3)
+   input logic [1:0]       ahb_htrans,    // Transaction type (possible values 0,2 only right now)
+   input logic             ahb_hwrite,    // ahb bus write
+   input logic [63:0]      ahb_hwdata,    // ahb bus write data
+   input logic             ahb_hsel,      // this slave was selected
+   input logic             ahb_hreadyin,  // previous hready was accepted or not
+
+   output logic [63:0]      ahb_hrdata,      // ahb bus read data
+   output logic             ahb_hreadyout,   // slave ready to accept transaction
+   output logic             ahb_hresp        // slave response (high indicates erro)
+
+);
+
+   logic [7:0]       master_wstrb;
+
+ typedef enum logic [1:0] {   IDLE   = 2'b00,    // Nothing in the buffer. No commands yet recieved
+                              WR     = 2'b01,    // Write Command recieved
+                              RD     = 2'b10,    // Read Command recieved
+                              PEND   = 2'b11     // Waiting on Read Data from core
+                            } state_t;
+   state_t      buf_state, buf_nxtstate;
+   logic        buf_state_en;
+
+   // Buffer signals (one entry buffer)
+   logic                    buf_read_error_in, buf_read_error;
+   logic [63:0]             buf_rdata;
+
+   logic                    ahb_hready;
+   logic                    ahb_hready_q;
+   logic [1:0]              ahb_htrans_in, ahb_htrans_q;
+   logic [2:0]              ahb_hsize_q;
+   logic                    ahb_hwrite_q;
+   logic [31:0]             ahb_haddr_q;
+   logic [63:0]             ahb_hwdata_q;
+   logic                    ahb_hresp_q;
+
+    //Miscellaneous signals
+   logic                    ahb_addr_in_dccm, ahb_addr_in_iccm, ahb_addr_in_pic;
+   logic                    ahb_addr_in_dccm_region_nc, ahb_addr_in_iccm_region_nc, ahb_addr_in_pic_region_nc;
+   // signals needed for the read data coming back from the core and to block any further commands as AHB is a blocking bus
+   logic                    buf_rdata_en;
+
+   logic                    ahb_addr_clk_en, buf_rdata_clk_en;
+   logic                    bus_clk, ahb_addr_clk, buf_rdata_clk;
+   // Command buffer is the holding station where we convert to AXI and send to core
+   logic                    cmdbuf_wr_en, cmdbuf_rst;
+   logic                    cmdbuf_full;
+   logic                    cmdbuf_vld, cmdbuf_write;
+   logic [1:0]              cmdbuf_size;
+   logic [7:0]              cmdbuf_wstrb;
+   logic [31:0]             cmdbuf_addr;
+   logic [63:0]             cmdbuf_wdata;
+
+// FSM to control the bus states and when to block the hready and load the command buffer
+   always_comb begin
+      buf_nxtstate      = IDLE;
+      buf_state_en      = 1'b0;
+      buf_rdata_en      = 1'b0;              // signal to load the buffer when the core sends read data back
+      buf_read_error_in = 1'b0;              // signal indicating that an error came back with the read from the core
+      cmdbuf_wr_en      = 1'b0;              // all clear from the gasket to load the buffer with the command for reads, command/dat for writes
+      case (buf_state)
+         IDLE: begin  // No commands recieved
+                  buf_nxtstate      = ahb_hwrite ? WR : RD;
+                  buf_state_en      = ahb_hready & ahb_htrans[1] & ahb_hsel;                 // only transition on a valid hrtans
+          end
+         WR: begin // Write command recieved last cycle
+                  buf_nxtstate      = (ahb_hresp | (ahb_htrans[1:0] == 2'b0) | ~ahb_hsel) ? IDLE : ahb_hwrite  ? WR : RD;
+                  buf_state_en      = (~cmdbuf_full | ahb_hresp) ;
+                  cmdbuf_wr_en      = ~cmdbuf_full & ~(ahb_hresp | ((ahb_htrans[1:0] == 2'b01) & ahb_hsel));   // Dont send command to the buffer in case of an error or when the master is not ready with the data now.
+         end
+         RD: begin // Read command recieved last cycle.
+                 buf_nxtstate      = ahb_hresp ? IDLE :PEND;                                       // If error go to idle, else wait for read data
+                 buf_state_en      = (~cmdbuf_full | ahb_hresp);                                   // only when command can go, or if its an error
+                 cmdbuf_wr_en      = ~ahb_hresp & ~cmdbuf_full;                                    // send command only when no error
+         end
+         PEND: begin // Read Command has been sent. Waiting on Data.
+                 buf_nxtstate      = IDLE;                                                          // go back for next command and present data next cycle
+                 buf_state_en      = axi_rvalid & ~cmdbuf_write;                                    // read data is back
+                 buf_rdata_en      = buf_state_en;                                                  // buffer the read data coming back from core
+                 buf_read_error_in = buf_state_en & |axi_rresp[1:0];                                // buffer error flag if return has Error ( ECC )
+         end
+     endcase
+   end // always_comb begin
+
+    rvdffs_fpga #($bits(state_t)) state_reg (.*, .din(buf_nxtstate), .dout({buf_state}), .en(buf_state_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk));
+
+   assign master_wstrb[7:0]   = ({8{ahb_hsize_q[2:0] == 3'b0}}  & (8'b1    << ahb_haddr_q[2:0])) |
+                                ({8{ahb_hsize_q[2:0] == 3'b1}}  & (8'b11   << ahb_haddr_q[2:0])) |
+                                ({8{ahb_hsize_q[2:0] == 3'b10}} & (8'b1111 << ahb_haddr_q[2:0])) |
+                                ({8{ahb_hsize_q[2:0] == 3'b11}} & 8'b1111_1111);
+
+   // AHB signals
+   assign ahb_hreadyout       = ahb_hresp ? (ahb_hresp_q & ~ahb_hready_q) :
+                                         ((~cmdbuf_full | (buf_state == IDLE)) & ~(buf_state == RD | buf_state == PEND)  & ~buf_read_error);
+
+   assign ahb_hready          = ahb_hreadyout & ahb_hreadyin;
+   assign ahb_htrans_in[1:0]  = {2{ahb_hsel}} & ahb_htrans[1:0];
+   assign ahb_hrdata[63:0]    = buf_rdata[63:0];
+   assign ahb_hresp        = ((ahb_htrans_q[1:0] != 2'b0) & (buf_state != IDLE)  &
+
+                             ((~(ahb_addr_in_dccm | ahb_addr_in_iccm)) |                                                                                   // request not for ICCM or DCCM
+                             ((ahb_addr_in_iccm | (ahb_addr_in_dccm &  ahb_hwrite_q)) & ~((ahb_hsize_q[1:0] == 2'b10) | (ahb_hsize_q[1:0] == 2'b11))) |    // ICCM Rd/Wr OR DCCM Wr not the right size
+                             ((ahb_hsize_q[2:0] == 3'h1) & ahb_haddr_q[0])   |                                                                             // HW size but unaligned
+                             ((ahb_hsize_q[2:0] == 3'h2) & (|ahb_haddr_q[1:0])) |                                                                          // W size but unaligned
+                             ((ahb_hsize_q[2:0] == 3'h3) & (|ahb_haddr_q[2:0])))) |                                                                        // DW size but unaligned
+                             buf_read_error |                                                                                                              // Read ECC error
+                             (ahb_hresp_q & ~ahb_hready_q);
+
+   // Buffer signals - needed for the read data and ECC error response
+   rvdff_fpga  #(.WIDTH(64)) buf_rdata_ff     (.din(axi_rdata[63:0]),   .dout(buf_rdata[63:0]), .clk(buf_rdata_clk), .clken(buf_rdata_clk_en), .rawclk(clk), .*);
+   rvdff_fpga  #(.WIDTH(1))  buf_read_error_ff(.din(buf_read_error_in), .dout(buf_read_error),  .clk(bus_clk),       .clken(bus_clk_en),       .rawclk(clk), .*);          // buf_read_error will be high only one cycle
+
+   // All the Master signals are captured before presenting it to the command buffer. We check for Hresp before sending it to the cmd buffer.
+   rvdff_fpga #(.WIDTH(1))  hresp_ff  (.din(ahb_hresp),          .dout(ahb_hresp_q),       .clk(bus_clk),      .clken(bus_clk_en),      .rawclk(clk), .*);
+   rvdff_fpga #(.WIDTH(1))  hready_ff (.din(ahb_hready),         .dout(ahb_hready_q),      .clk(bus_clk),      .clken(bus_clk_en),      .rawclk(clk), .*);
+   rvdff_fpga #(.WIDTH(2))  htrans_ff (.din(ahb_htrans_in[1:0]), .dout(ahb_htrans_q[1:0]), .clk(bus_clk),      .clken(bus_clk_en),      .rawclk(clk), .*);
+   rvdff_fpga #(.WIDTH(3))  hsize_ff  (.din(ahb_hsize[2:0]),     .dout(ahb_hsize_q[2:0]),  .clk(ahb_addr_clk), .clken(ahb_addr_clk_en), .rawclk(clk), .*);
+   rvdff_fpga #(.WIDTH(1))  hwrite_ff (.din(ahb_hwrite),         .dout(ahb_hwrite_q),      .clk(ahb_addr_clk), .clken(ahb_addr_clk_en), .rawclk(clk), .*);
+   rvdff_fpga #(.WIDTH(32)) haddr_ff  (.din(ahb_haddr[31:0]),    .dout(ahb_haddr_q[31:0]), .clk(ahb_addr_clk), .clken(ahb_addr_clk_en), .rawclk(clk), .*);
+
+   // Address check  dccm
+   rvrangecheck #(.CCM_SADR(pt.DCCM_SADR),
+                  .CCM_SIZE(pt.DCCM_SIZE)) addr_dccm_rangecheck (
+      .addr(ahb_haddr_q[31:0]),
+      .in_range(ahb_addr_in_dccm),
+      .in_region(ahb_addr_in_dccm_region_nc)
+   );
+
+   // Address check  iccm
+   if (pt.ICCM_ENABLE == 1) begin: GenICCM
+      rvrangecheck #(.CCM_SADR(pt.ICCM_SADR),
+                     .CCM_SIZE(pt.ICCM_SIZE)) addr_iccm_rangecheck (
+         .addr(ahb_haddr_q[31:0]),
+         .in_range(ahb_addr_in_iccm),
+         .in_region(ahb_addr_in_iccm_region_nc)
+      );
+   end else begin: GenNoICCM
+      assign ahb_addr_in_iccm = '0;
+      assign ahb_addr_in_iccm_region_nc = '0;
+   end
+
+   // PIC memory address check
+   rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR),
+                  .CCM_SIZE(pt.PIC_SIZE)) addr_pic_rangecheck (
+      .addr(ahb_haddr_q[31:0]),
+      .in_range(ahb_addr_in_pic),
+      .in_region(ahb_addr_in_pic_region_nc)
+   );
+
+   // Command Buffer - Holding for the commands to be sent for the AXI. It will be converted to the AXI signals.
+   assign cmdbuf_rst         = (((axi_awvalid & axi_awready) | (axi_arvalid & axi_arready)) & ~cmdbuf_wr_en) | (ahb_hresp & ~cmdbuf_write);
+   assign cmdbuf_full        = (cmdbuf_vld & ~((axi_awvalid & axi_awready) | (axi_arvalid & axi_arready)));
+
+   rvdffsc_fpga #(.WIDTH(1))  cmdbuf_vldff      (.din(1'b1),              .dout(cmdbuf_vld),         .en(cmdbuf_wr_en), .clear(cmdbuf_rst), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
+   rvdffs_fpga  #(.WIDTH(1))  cmdbuf_writeff    (.din(ahb_hwrite_q),      .dout(cmdbuf_write),       .en(cmdbuf_wr_en),                     .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
+   rvdffs_fpga  #(.WIDTH(2))  cmdbuf_sizeff     (.din(ahb_hsize_q[1:0]),  .dout(cmdbuf_size[1:0]),   .en(cmdbuf_wr_en),                     .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
+   rvdffs_fpga  #(.WIDTH(8))  cmdbuf_wstrbff    (.din(master_wstrb[7:0]), .dout(cmdbuf_wstrb[7:0]),  .en(cmdbuf_wr_en),                     .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
+   rvdffe       #(.WIDTH(32)) cmdbuf_addrff     (.din(ahb_haddr_q[31:0]), .dout(cmdbuf_addr[31:0]),  .en(cmdbuf_wr_en & bus_clk_en),        .clk(clk), .*);
+   rvdffe       #(.WIDTH(64)) cmdbuf_wdataff    (.din(ahb_hwdata[63:0]),  .dout(cmdbuf_wdata[63:0]), .en(cmdbuf_wr_en & bus_clk_en),        .clk(clk), .*);
+
+   // AXI Write Command Channel
+   assign axi_awvalid           = cmdbuf_vld & cmdbuf_write;
+   assign axi_awid[TAG-1:0]     = '0;
+   assign axi_awaddr[31:0]      = cmdbuf_addr[31:0];
+   assign axi_awsize[2:0]       = {1'b0, cmdbuf_size[1:0]};
+   assign axi_awprot[2:0]       = 3'b0;
+   assign axi_awlen[7:0]        = '0;
+   assign axi_awburst[1:0]      = 2'b01;
+   // AXI Write Data Channel - This is tied to the command channel as we only write the command buffer once we have the data.
+   assign axi_wvalid            = cmdbuf_vld & cmdbuf_write;
+   assign axi_wdata[63:0]       = cmdbuf_wdata[63:0];
+   assign axi_wstrb[7:0]        = cmdbuf_wstrb[7:0];
+   assign axi_wlast             = 1'b1;
+  // AXI Write Response - Always ready. AHB does not require a write response.
+   assign axi_bready            = 1'b1;
+   // AXI Read Channels
+   assign axi_arvalid           = cmdbuf_vld & ~cmdbuf_write;
+   assign axi_arid[TAG-1:0]     = '0;
+   assign axi_araddr[31:0]      = cmdbuf_addr[31:0];
+   assign axi_arsize[2:0]       = {1'b0, cmdbuf_size[1:0]};
+   assign axi_arprot            = 3'b0;
+   assign axi_arlen[7:0]        = '0;
+   assign axi_arburst[1:0]      = 2'b01;
+   // AXI Read Response Channel - Always ready as AHB reads are blocking and the the buffer is available for the read coming back always.
+   assign axi_rready            = 1'b1;
+
+   // Clock header logic
+   assign ahb_addr_clk_en = bus_clk_en & (ahb_hready & ahb_htrans[1]);
+   assign buf_rdata_clk_en    = bus_clk_en & buf_rdata_en;
+
+`ifdef RV_FPGA_OPTIMIZE
+   assign bus_clk = 1'b0;
+   assign ahb_addr_clk = 1'b0;
+   assign buf_rdata_clk = 1'b0;
+`else
+   rvclkhdr bus_cgc       (.en(bus_clk_en),       .l1clk(bus_clk),       .*);
+   rvclkhdr ahb_addr_cgc  (.en(ahb_addr_clk_en),  .l1clk(ahb_addr_clk),  .*);
+   rvclkhdr buf_rdata_cgc (.en(buf_rdata_clk_en), .l1clk(buf_rdata_clk), .*);
+`endif
+
+`ifdef RV_ASSERT_ON
+   property ahb_error_protocol;
+      @(posedge bus_clk) (ahb_hready & ahb_hresp) |-> (~$past(ahb_hready) & $past(ahb_hresp));
+   endproperty
+   assert_ahb_error_protocol: assert property (ahb_error_protocol) else
+      $display("Bus Error with hReady isn't preceded with Bus Error without hready");
+
+`endif
+
+endmodule // ahb_to_axi4
\ No newline at end of file
diff --git a/verilog/rtl/BrqRV_EB1/design/axi4_to_ahb.sv b/verilog/rtl/BrqRV_EB1/design/axi4_to_ahb.sv
new file mode 100644
index 0000000..18e5313
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/axi4_to_ahb.sv
@@ -0,0 +1,477 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//********************************************************************************
+// $Id$
+//
+// Owner:
+// Function: AXI4 -> AHB Bridge
+// Comments:
+//
+//********************************************************************************
+module axi4_to_ahb
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+,parameter TAG  = 1) (
+
+   input                   clk,
+   input                   free_clk,
+   input                   rst_l,
+   input                   scan_mode,
+   input                   bus_clk_en,
+   input                   clk_override,
+   input                   dec_tlu_force_halt,
+
+   // AXI signals
+   // AXI Write Channels
+   input  logic            axi_awvalid,
+   output logic            axi_awready,
+   input  logic [TAG-1:0]  axi_awid,
+   input  logic [31:0]     axi_awaddr,
+   input  logic [2:0]      axi_awsize,
+   input  logic [2:0]      axi_awprot,
+
+   input  logic            axi_wvalid,
+   output logic            axi_wready,
+   input  logic [63:0]     axi_wdata,
+   input  logic [7:0]      axi_wstrb,
+   input  logic            axi_wlast,
+
+   output logic            axi_bvalid,
+   input  logic            axi_bready,
+   output logic [1:0]      axi_bresp,
+   output logic [TAG-1:0]  axi_bid,
+
+   // AXI Read Channels
+   input  logic            axi_arvalid,
+   output logic            axi_arready,
+   input  logic [TAG-1:0]  axi_arid,
+   input  logic [31:0]     axi_araddr,
+   input  logic [2:0]      axi_arsize,
+   input  logic [2:0]      axi_arprot,
+
+   output logic            axi_rvalid,
+   input  logic            axi_rready,
+   output logic [TAG-1:0]  axi_rid,
+   output logic [63:0]     axi_rdata,
+   output logic [1:0]      axi_rresp,
+   output logic            axi_rlast,
+
+   // AHB-Lite signals
+   output logic [31:0]     ahb_haddr,       // ahb bus address
+   output logic [2:0]      ahb_hburst,      // tied to 0
+   output logic            ahb_hmastlock,   // tied to 0
+   output logic [3:0]      ahb_hprot,       // tied to 4'b0011
+   output logic [2:0]      ahb_hsize,       // size of bus transaction (possible values 0,1,2,3)
+   output logic [1:0]      ahb_htrans,      // Transaction type (possible values 0,2 only right now)
+   output logic            ahb_hwrite,      // ahb bus write
+   output logic [63:0]     ahb_hwdata,      // ahb bus write data
+
+   input logic [63:0]      ahb_hrdata,      // ahb bus read data
+   input logic             ahb_hready,      // slave ready to accept transaction
+   input logic             ahb_hresp        // slave response (high indicates erro)
+
+);
+
+   localparam ID   = 1;
+   localparam PRTY = 1;
+   typedef enum logic [2:0] {IDLE=3'b000, CMD_RD=3'b001, CMD_WR=3'b010, DATA_RD=3'b011, DATA_WR=3'b100, DONE=3'b101, STREAM_RD=3'b110, STREAM_ERR_RD=3'b111} state_t;
+   state_t buf_state, buf_nxtstate;
+
+   logic             slave_valid;
+   logic             slave_ready;
+   logic [TAG-1:0]   slave_tag;
+   logic [63:0]      slave_rdata;
+   logic [3:0]       slave_opc;
+
+   logic             wrbuf_en, wrbuf_data_en;
+   logic             wrbuf_cmd_sent, wrbuf_rst;
+   logic             wrbuf_vld;
+   logic             wrbuf_data_vld;
+   logic [TAG-1:0]   wrbuf_tag;
+   logic [2:0]       wrbuf_size;
+   logic [31:0]      wrbuf_addr;
+   logic [63:0]      wrbuf_data;
+   logic [7:0]       wrbuf_byteen;
+
+   logic             master_valid;
+   logic             master_ready;
+   logic [TAG-1:0]   master_tag;
+   logic [31:0]      master_addr;
+   logic [63:0]      master_wdata;
+   logic [2:0]       master_size;
+   logic [2:0]       master_opc;
+   logic [7:0]       master_byteen;
+
+   // Buffer signals (one entry buffer)
+   logic [31:0]                buf_addr;
+   logic [1:0]                 buf_size;
+   logic                       buf_write;
+   logic [7:0]                 buf_byteen;
+   logic                       buf_aligned;
+   logic [63:0]                buf_data;
+   logic [TAG-1:0]             buf_tag;
+
+   //Miscellaneous signals
+   logic                       buf_rst;
+   logic [TAG-1:0]             buf_tag_in;
+   logic [31:0]                buf_addr_in;
+   logic [7:0]                 buf_byteen_in;
+   logic [63:0]                buf_data_in;
+   logic                       buf_write_in;
+   logic                       buf_aligned_in;
+   logic [2:0]                 buf_size_in;
+
+   logic                       buf_state_en;
+   logic                       buf_wr_en;
+   logic                       buf_data_wr_en;
+   logic                       slvbuf_error_en;
+   logic                       wr_cmd_vld;
+
+   logic                       cmd_done_rst, cmd_done, cmd_doneQ;
+   logic                       trxn_done;
+   logic [2:0]                 buf_cmd_byte_ptr, buf_cmd_byte_ptrQ, buf_cmd_nxtbyte_ptr;
+   logic                       buf_cmd_byte_ptr_en;
+   logic                       found;
+
+   logic                       slave_valid_pre;
+   logic                       ahb_hready_q;
+   logic                       ahb_hresp_q;
+   logic [1:0]                 ahb_htrans_q;
+   logic                       ahb_hwrite_q;
+   logic [63:0]                ahb_hrdata_q;
+
+
+   logic                       slvbuf_write;
+   logic                       slvbuf_error;
+   logic [TAG-1:0]             slvbuf_tag;
+
+   logic                       slvbuf_error_in;
+   logic                       slvbuf_wr_en;
+   logic                       bypass_en;
+   logic                       rd_bypass_idle;
+
+   logic                       last_addr_en;
+   logic [31:0]                last_bus_addr;
+
+   // Clocks
+   logic                       buf_clken;
+   logic                       ahbm_data_clken;
+
+   logic                       buf_clk;
+   logic                       bus_clk;
+   logic                       ahbm_data_clk;
+
+   logic                       dec_tlu_force_halt_bus, dec_tlu_force_halt_bus_ns, dec_tlu_force_halt_bus_q;
+
+   // Function to get the length from byte enable
+   function automatic logic [1:0] get_write_size;
+      input logic [7:0] byteen;
+
+      logic [1:0]       size;
+
+      size[1:0] = (2'b11 & {2{(byteen[7:0] == 8'hff)}}) |
+                  (2'b10 & {2{((byteen[7:0] == 8'hf0) | (byteen[7:0] == 8'h0f))}}) |
+                  (2'b01 & {2{((byteen[7:0] == 8'hc0) | (byteen[7:0] == 8'h30) | (byteen[7:0] == 8'h0c) | (byteen[7:0] == 8'h03))}});
+
+      return size[1:0];
+   endfunction // get_write_size
+
+   // Function to get the length from byte enable
+   function automatic logic [2:0] get_write_addr;
+      input logic [7:0] byteen;
+
+      logic [2:0]       addr;
+
+      addr[2:0] = (3'h0 & {3{((byteen[7:0] == 8'hff) | (byteen[7:0] == 8'h0f) | (byteen[7:0] == 8'h03))}}) |
+                  (3'h2 & {3{(byteen[7:0] == 8'h0c)}})                                                     |
+                  (3'h4 & {3{((byteen[7:0] == 8'hf0) | (byteen[7:0] == 8'h03))}})                          |
+                  (3'h6 & {3{(byteen[7:0] == 8'hc0)}});
+
+      return addr[2:0];
+   endfunction // get_write_addr
+
+   // Function to get the next byte pointer
+   function automatic logic [2:0] get_nxtbyte_ptr (logic [2:0] current_byte_ptr, logic [7:0] byteen, logic get_next);
+      logic [2:0] start_ptr;
+      logic       found;
+      found = '0;
+      //get_nxtbyte_ptr[2:0] = current_byte_ptr[2:0];
+      start_ptr[2:0] = get_next ? (current_byte_ptr[2:0] + 3'b1) : current_byte_ptr[2:0];
+      for (int j=0; j<8; j++) begin
+         if (~found) begin
+            get_nxtbyte_ptr[2:0] = 3'(j);
+            found |= (byteen[j] & (3'(j) >= start_ptr[2:0])) ;
+         end
+      end
+   endfunction // get_nextbyte_ptr
+
+   // Create bus synchronized version of force halt
+   assign dec_tlu_force_halt_bus = dec_tlu_force_halt | dec_tlu_force_halt_bus_q;
+   assign dec_tlu_force_halt_bus_ns = ~bus_clk_en & dec_tlu_force_halt_bus;
+   rvdff  #(.WIDTH(1))   force_halt_busff(.din(dec_tlu_force_halt_bus_ns), .dout(dec_tlu_force_halt_bus_q), .clk(free_clk), .*);
+
+   // Write buffer
+   assign wrbuf_en       = axi_awvalid & axi_awready & master_ready;
+   assign wrbuf_data_en  = axi_wvalid & axi_wready & master_ready;
+   assign wrbuf_cmd_sent = master_valid & master_ready & (master_opc[2:1] == 2'b01);
+   assign wrbuf_rst      = (wrbuf_cmd_sent & ~wrbuf_en) | dec_tlu_force_halt_bus;
+
+   assign axi_awready = ~(wrbuf_vld & ~wrbuf_cmd_sent) & master_ready;
+   assign axi_wready  = ~(wrbuf_data_vld & ~wrbuf_cmd_sent) & master_ready;
+   assign axi_arready = ~(wrbuf_vld & wrbuf_data_vld) & master_ready;
+   assign axi_rlast   = 1'b1;
+
+   assign wr_cmd_vld          = (wrbuf_vld & wrbuf_data_vld);
+   assign master_valid        = wr_cmd_vld | axi_arvalid;
+   assign master_tag[TAG-1:0] = wr_cmd_vld ? wrbuf_tag[TAG-1:0] : axi_arid[TAG-1:0];
+   assign master_opc[2:0]     = wr_cmd_vld ? 3'b011 : 3'b0;
+   assign master_addr[31:0]   = wr_cmd_vld ? wrbuf_addr[31:0] : axi_araddr[31:0];
+   assign master_size[2:0]    = wr_cmd_vld ? wrbuf_size[2:0] : axi_arsize[2:0];
+   assign master_byteen[7:0]  = wrbuf_byteen[7:0];
+   assign master_wdata[63:0]  = wrbuf_data[63:0];
+
+   // AXI response channel signals
+   assign axi_bvalid       = slave_valid & slave_ready & slave_opc[3];
+   assign axi_bresp[1:0]   = slave_opc[0] ? 2'b10 : (slave_opc[1] ? 2'b11 : 2'b0);
+   assign axi_bid[TAG-1:0] = slave_tag[TAG-1:0];
+
+   assign axi_rvalid       = slave_valid & slave_ready & (slave_opc[3:2] == 2'b0);
+   assign axi_rresp[1:0]   = slave_opc[0] ? 2'b10 : (slave_opc[1] ? 2'b11 : 2'b0);
+   assign axi_rid[TAG-1:0] = slave_tag[TAG-1:0];
+   assign axi_rdata[63:0]  = slave_rdata[63:0];
+   assign slave_ready        = axi_bready & axi_rready;
+
+ // FIFO state machine
+   always_comb begin
+      buf_nxtstate   = IDLE;
+      buf_state_en   = 1'b0;
+      buf_wr_en      = 1'b0;
+      buf_data_wr_en = 1'b0;
+      slvbuf_error_in   = 1'b0;
+      slvbuf_error_en   = 1'b0;
+      buf_write_in   = 1'b0;
+      cmd_done       = 1'b0;
+      trxn_done      = 1'b0;
+      buf_cmd_byte_ptr_en = 1'b0;
+      buf_cmd_byte_ptr[2:0] = '0;
+      slave_valid_pre   = 1'b0;
+      master_ready   = 1'b0;
+      ahb_htrans[1:0]  = 2'b0;
+      slvbuf_wr_en     = 1'b0;
+      bypass_en        = 1'b0;
+      rd_bypass_idle   = 1'b0;
+
+      case (buf_state)
+         IDLE: begin
+                  master_ready   = 1'b1;
+                  buf_write_in = (master_opc[2:1] == 2'b01);
+                  buf_nxtstate = buf_write_in ? CMD_WR : CMD_RD;
+                  buf_state_en = master_valid & master_ready;
+                  buf_wr_en    = buf_state_en;
+                  buf_data_wr_en = buf_state_en & (buf_nxtstate == CMD_WR);
+                  buf_cmd_byte_ptr_en   = buf_state_en;
+                  buf_cmd_byte_ptr[2:0] = buf_write_in ? get_nxtbyte_ptr(3'b0,buf_byteen_in[7:0],1'b0) : master_addr[2:0];
+                  bypass_en       = buf_state_en;
+                  rd_bypass_idle  = bypass_en & (buf_nxtstate == CMD_RD);
+                  ahb_htrans[1:0] = {2{bypass_en}} & 2'b10;
+          end
+         CMD_RD: begin
+                  buf_nxtstate    = (master_valid & (master_opc[2:0] == 3'b000))? STREAM_RD : DATA_RD;
+                  buf_state_en    = ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) & ~ahb_hwrite_q;
+                  cmd_done        = buf_state_en & ~master_valid;
+                  slvbuf_wr_en    = buf_state_en;
+                  master_ready  = buf_state_en & (buf_nxtstate == STREAM_RD);
+                  buf_wr_en       = master_ready;
+                  bypass_en       = master_ready & master_valid;
+                  buf_cmd_byte_ptr[2:0] = bypass_en ? master_addr[2:0] : buf_addr[2:0];
+                  ahb_htrans[1:0] = 2'b10 & {2{~buf_state_en | bypass_en}};
+         end
+         STREAM_RD: begin
+                  master_ready  =  (ahb_hready_q & ~ahb_hresp_q) & ~(master_valid & master_opc[2:1] == 2'b01);
+                  buf_wr_en       = (master_valid & master_ready & (master_opc[2:0] == 3'b000)); // update the fifo if we are streaming the read commands
+                  buf_nxtstate    = ahb_hresp_q ? STREAM_ERR_RD : (buf_wr_en ? STREAM_RD : DATA_RD);            // assuming that the master accpets the slave response right away.
+                  buf_state_en    = (ahb_hready_q | ahb_hresp_q);
+                  buf_data_wr_en  = buf_state_en;
+                  slvbuf_error_in = ahb_hresp_q;
+                  slvbuf_error_en = buf_state_en;
+                  slave_valid_pre  = buf_state_en & ~ahb_hresp_q;             // send a response right away if we are not going through an error response.
+                  cmd_done        = buf_state_en & ~master_valid;                     // last one of the stream should not send a htrans
+                  bypass_en       = master_ready & master_valid & (buf_nxtstate == STREAM_RD) & buf_state_en;
+                  buf_cmd_byte_ptr[2:0] = bypass_en ? master_addr[2:0] : buf_addr[2:0];
+                  ahb_htrans[1:0] = 2'b10 & {2{~((buf_nxtstate != STREAM_RD) & buf_state_en)}};
+                  slvbuf_wr_en    = buf_wr_en;                                         // shifting the contents from the buf to slv_buf for streaming cases
+         end // case: STREAM_RD
+         STREAM_ERR_RD: begin
+                  buf_nxtstate = DATA_RD;
+                  buf_state_en = ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) & ~ahb_hwrite_q;
+                  slave_valid_pre = buf_state_en;
+                  slvbuf_wr_en   = buf_state_en;     // Overwrite slvbuf with buffer
+                  buf_cmd_byte_ptr[2:0] = buf_addr[2:0];
+                  ahb_htrans[1:0] = 2'b10 & {2{~buf_state_en}};
+         end
+         DATA_RD: begin
+                  buf_nxtstate   = DONE;
+                  buf_state_en   = (ahb_hready_q | ahb_hresp_q);
+                  buf_data_wr_en = buf_state_en;
+                  slvbuf_error_in= ahb_hresp_q;
+                  slvbuf_error_en= buf_state_en;
+                  slvbuf_wr_en   = buf_state_en;
+
+         end
+         CMD_WR: begin
+                  buf_nxtstate = DATA_WR;
+                  trxn_done    = ahb_hready_q & ahb_hwrite_q & (ahb_htrans_q[1:0] != 2'b0);
+                  buf_state_en = trxn_done;
+                  buf_cmd_byte_ptr_en = buf_state_en;
+                  slvbuf_wr_en    = buf_state_en;
+                  buf_cmd_byte_ptr    = trxn_done ? get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1) : buf_cmd_byte_ptrQ;
+                  cmd_done            = trxn_done & (buf_aligned | (buf_cmd_byte_ptrQ == 3'b111) |
+                                                     (buf_byteen[get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1)] == 1'b0));
+                  ahb_htrans[1:0] = {2{~(cmd_done | cmd_doneQ)}} & 2'b10;
+         end
+         DATA_WR: begin
+                  buf_state_en = (cmd_doneQ & ahb_hready_q) | ahb_hresp_q;
+                  master_ready = buf_state_en & ~ahb_hresp_q & slave_ready;   // Ready to accept new command if current command done and no error
+                  buf_nxtstate = (ahb_hresp_q | ~slave_ready) ? DONE :
+                                  ((master_valid & master_ready) ? ((master_opc[2:1] == 2'b01) ? CMD_WR : CMD_RD) : IDLE);
+                  slvbuf_error_in = ahb_hresp_q;
+                  slvbuf_error_en = buf_state_en;
+
+                  buf_write_in = (master_opc[2:1] == 2'b01);
+                  buf_wr_en = buf_state_en & ((buf_nxtstate == CMD_WR) | (buf_nxtstate == CMD_RD));
+                  buf_data_wr_en = buf_wr_en;
+
+                  cmd_done     = (ahb_hresp_q | (ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) &
+                                 ((buf_cmd_byte_ptrQ == 3'b111) | (buf_byteen[get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1)] == 1'b0))));
+                  bypass_en       = buf_state_en & buf_write_in & (buf_nxtstate == CMD_WR);   // Only bypass for writes for the time being
+                  ahb_htrans[1:0] = {2{(~(cmd_done | cmd_doneQ) | bypass_en)}} & 2'b10;
+                  slave_valid_pre  = buf_state_en & (buf_nxtstate != DONE);
+
+                  trxn_done = ahb_hready_q & ahb_hwrite_q & (ahb_htrans_q[1:0] != 2'b0);
+                  buf_cmd_byte_ptr_en = trxn_done | bypass_en;
+                  buf_cmd_byte_ptr = bypass_en ? get_nxtbyte_ptr(3'b0,buf_byteen_in[7:0],1'b0) :
+                                                 trxn_done ? get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1) : buf_cmd_byte_ptrQ;
+            end
+         DONE: begin
+                  buf_nxtstate = IDLE;
+                  buf_state_en = slave_ready;
+                  slvbuf_error_en = 1'b1;
+                  slave_valid_pre = 1'b1;
+         end
+      endcase
+   end
+
+   assign buf_rst              = dec_tlu_force_halt_bus;
+   assign cmd_done_rst         = slave_valid_pre;
+   assign buf_addr_in[31:3]    = master_addr[31:3];
+   assign buf_addr_in[2:0]     = (buf_aligned_in & (master_opc[2:1] == 2'b01)) ? get_write_addr(master_byteen[7:0]) : master_addr[2:0];
+   assign buf_tag_in[TAG-1:0]  = master_tag[TAG-1:0];
+   assign buf_byteen_in[7:0]   = wrbuf_byteen[7:0];
+   assign buf_data_in[63:0]    = (buf_state == DATA_RD) ? ahb_hrdata_q[63:0] : master_wdata[63:0];
+   assign buf_size_in[1:0]     = (buf_aligned_in & (master_size[1:0] == 2'b11) & (master_opc[2:1] == 2'b01)) ? get_write_size(master_byteen[7:0]) : master_size[1:0];
+   assign buf_aligned_in       = (master_opc[2:0] == 3'b0)    |   // reads are always aligned since they are either DW or sideeffects
+                                 (master_size[1:0] == 2'b0) |  (master_size[1:0] == 2'b01) | (master_size[1:0] == 2'b10) | // Always aligned for Byte/HW/Word since they can be only for non-idempotent. IFU/SB are always aligned
+                                 ((master_size[1:0] == 2'b11) &
+                                  ((master_byteen[7:0] == 8'h3)  | (master_byteen[7:0] == 8'hc)   | (master_byteen[7:0] == 8'h30) | (master_byteen[7:0] == 8'hc0) |
+                                   (master_byteen[7:0] == 8'hf)  | (master_byteen[7:0] == 8'hf0)  | (master_byteen[7:0] == 8'hff)));
+
+   // Generate the ahb signals
+   assign ahb_haddr[31:3] = bypass_en ? master_addr[31:3]  : buf_addr[31:3];
+   assign ahb_haddr[2:0]  = {3{(ahb_htrans == 2'b10)}} & buf_cmd_byte_ptr[2:0];    // Trxn should be aligned during IDLE
+   assign ahb_hsize[2:0]  = bypass_en ? {1'b0, ({2{buf_aligned_in}} & buf_size_in[1:0])} :
+                                        {1'b0, ({2{buf_aligned}} & buf_size[1:0])};   // Send the full size for aligned trxn
+   assign ahb_hburst[2:0] = 3'b0;
+   assign ahb_hmastlock   = 1'b0;
+   assign ahb_hprot[3:0]  = {3'b001,~axi_arprot[2]};
+   assign ahb_hwrite      = bypass_en ? (master_opc[2:1] == 2'b01) : buf_write;
+   assign ahb_hwdata[63:0] = buf_data[63:0];
+
+   assign slave_valid          = slave_valid_pre;// & (~slvbuf_posted_write | slvbuf_error);
+   assign slave_opc[3:2]       = slvbuf_write ? 2'b11 : 2'b00;
+   assign slave_opc[1:0]       = {2{slvbuf_error}} & 2'b10;
+   assign slave_rdata[63:0]    = slvbuf_error ? {2{last_bus_addr[31:0]}} : ((buf_state == DONE) ? buf_data[63:0] : ahb_hrdata_q[63:0]);
+   assign slave_tag[TAG-1:0]   = slvbuf_tag[TAG-1:0];
+
+   assign last_addr_en = (ahb_htrans[1:0] != 2'b0) & ahb_hready & ahb_hwrite ;
+
+
+   rvdffsc_fpga #(.WIDTH(1))   wrbuf_vldff     (.din(1'b1),              .dout(wrbuf_vld),          .en(wrbuf_en),      .clear(wrbuf_rst), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
+   rvdffsc_fpga #(.WIDTH(1))   wrbuf_data_vldff(.din(1'b1),              .dout(wrbuf_data_vld),     .en(wrbuf_data_en), .clear(wrbuf_rst), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
+   rvdffs_fpga  #(.WIDTH(TAG)) wrbuf_tagff     (.din(axi_awid[TAG-1:0]), .dout(wrbuf_tag[TAG-1:0]), .en(wrbuf_en),                         .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
+   rvdffs_fpga  #(.WIDTH(3))   wrbuf_sizeff    (.din(axi_awsize[2:0]),   .dout(wrbuf_size[2:0]),    .en(wrbuf_en),                         .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
+   rvdffe       #(.WIDTH(32))  wrbuf_addrff    (.din(axi_awaddr[31:0]),  .dout(wrbuf_addr[31:0]),   .en(wrbuf_en & bus_clk_en),            .clk(clk), .*);
+   rvdffe       #(.WIDTH(64))  wrbuf_dataff    (.din(axi_wdata[63:0]),   .dout(wrbuf_data[63:0]),   .en(wrbuf_data_en & bus_clk_en),       .clk(clk), .*);
+   rvdffs_fpga  #(.WIDTH(8))   wrbuf_byteenff  (.din(axi_wstrb[7:0]),    .dout(wrbuf_byteen[7:0]),  .en(wrbuf_data_en),                    .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
+
+   rvdffs_fpga #(.WIDTH(32))   last_bus_addrff (.din(ahb_haddr[31:0]),   .dout(last_bus_addr[31:0]), .en(last_addr_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
+
+   rvdffsc_fpga #(.WIDTH($bits(state_t))) buf_state_ff  (.din(buf_nxtstate),        .dout({buf_state}),      .en(buf_state_en), .clear(buf_rst), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
+   rvdffs_fpga #(.WIDTH(1))               buf_writeff   (.din(buf_write_in),        .dout(buf_write),        .en(buf_wr_en),                     .clk(buf_clk), .clken(buf_clken),  .rawclk(clk), .*);
+   rvdffs_fpga #(.WIDTH(TAG))             buf_tagff     (.din(buf_tag_in[TAG-1:0]), .dout(buf_tag[TAG-1:0]), .en(buf_wr_en),                     .clk(buf_clk), .clken(buf_clken),  .rawclk(clk), .*);
+   rvdffe      #(.WIDTH(32))              buf_addrff    (.din(buf_addr_in[31:0]),   .dout(buf_addr[31:0]),   .en(buf_wr_en & bus_clk_en),        .clk(clk), .*);
+   rvdffs_fpga #(.WIDTH(2))               buf_sizeff    (.din(buf_size_in[1:0]),    .dout(buf_size[1:0]),    .en(buf_wr_en),                     .clk(buf_clk), .clken(buf_clken),  .rawclk(clk), .*);
+   rvdffs_fpga #(.WIDTH(1))               buf_alignedff (.din(buf_aligned_in),      .dout(buf_aligned),      .en(buf_wr_en),                     .clk(buf_clk), .clken(buf_clken),  .rawclk(clk), .*);
+   rvdffs_fpga #(.WIDTH(8))               buf_byteenff  (.din(buf_byteen_in[7:0]),  .dout(buf_byteen[7:0]),  .en(buf_wr_en),                     .clk(buf_clk), .clken(buf_clken),  .rawclk(clk), .*);
+   rvdffe      #(.WIDTH(64))              buf_dataff    (.din(buf_data_in[63:0]),   .dout(buf_data[63:0]),   .en(buf_data_wr_en & bus_clk_en),   .clk(clk), .*);
+
+
+   rvdffs_fpga #(.WIDTH(1))   slvbuf_writeff  (.din(buf_write),        .dout(slvbuf_write),        .en(slvbuf_wr_en),    .clk(buf_clk), .clken(buf_clken), .rawclk(clk), .*);
+   rvdffs_fpga #(.WIDTH(TAG)) slvbuf_tagff    (.din(buf_tag[TAG-1:0]), .dout(slvbuf_tag[TAG-1:0]), .en(slvbuf_wr_en),    .clk(buf_clk), .clken(buf_clken), .rawclk(clk), .*);
+   rvdffs_fpga #(.WIDTH(1))   slvbuf_errorff  (.din(slvbuf_error_in),  .dout(slvbuf_error),        .en(slvbuf_error_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
+
+   rvdffsc_fpga #(.WIDTH(1)) buf_cmd_doneff     (.din(1'b1),                  .dout(cmd_doneQ),              .en(cmd_done),            .clear(cmd_done_rst), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
+   rvdffs_fpga #(.WIDTH(3))  buf_cmd_byte_ptrff (.din(buf_cmd_byte_ptr[2:0]), .dout(buf_cmd_byte_ptrQ[2:0]), .en(buf_cmd_byte_ptr_en),                       .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
+
+   rvdff_fpga #(.WIDTH(1))  hready_ff (.din(ahb_hready),       .dout(ahb_hready_q),       .clk(bus_clk),       .clken(bus_clk_en),      .rawclk(clk), .*);
+   rvdff_fpga #(.WIDTH(2))  htrans_ff (.din(ahb_htrans[1:0]),  .dout(ahb_htrans_q[1:0]),  .clk(bus_clk),       .clken(bus_clk_en),      .rawclk(clk), .*);
+   rvdff_fpga #(.WIDTH(1))  hwrite_ff (.din(ahb_hwrite),       .dout(ahb_hwrite_q),       .clk(bus_clk),       .clken(bus_clk_en),      .rawclk(clk), .*);
+   rvdff_fpga #(.WIDTH(1))  hresp_ff  (.din(ahb_hresp),        .dout(ahb_hresp_q),        .clk(bus_clk),       .clken(bus_clk_en),      .rawclk(clk), .*);
+   rvdff_fpga #(.WIDTH(64)) hrdata_ff (.din(ahb_hrdata[63:0]), .dout(ahb_hrdata_q[63:0]), .clk(ahbm_data_clk), .clken(ahbm_data_clken), .rawclk(clk), .*);
+
+   // Clock headers
+   // clock enables for ahbm addr/data
+   assign buf_clken       = bus_clk_en & (buf_wr_en | slvbuf_wr_en | clk_override);
+   assign ahbm_data_clken = bus_clk_en & ((buf_state != IDLE) | clk_override);
+
+`ifdef RV_FPGA_OPTIMIZE
+   assign bus_clk = 1'b0;
+   assign buf_clk = 1'b0;
+   assign ahbm_data_clk = 1'b0;
+`else
+   rvclkhdr bus_cgc       (.en(bus_clk_en),      .l1clk(bus_clk),       .*);
+   rvclkhdr buf_cgc       (.en(buf_clken),       .l1clk(buf_clk), .*);
+   rvclkhdr ahbm_data_cgc (.en(ahbm_data_clken), .l1clk(ahbm_data_clk), .*);
+`endif
+
+`ifdef RV_ASSERT_ON
+   property ahb_trxn_aligned;
+     @(posedge bus_clk) ahb_htrans[1]  |-> ((ahb_hsize[2:0] == 3'h0)                              |
+                                        ((ahb_hsize[2:0] == 3'h1) & (ahb_haddr[0] == 1'b0))   |
+                                        ((ahb_hsize[2:0] == 3'h2) & (ahb_haddr[1:0] == 2'b0)) |
+                                        ((ahb_hsize[2:0] == 3'h3) & (ahb_haddr[2:0] == 3'b0)));
+   endproperty
+   assert_ahb_trxn_aligned: assert property (ahb_trxn_aligned) else
+     $display("Assertion ahb_trxn_aligned failed: ahb_htrans=2'h%h, ahb_hsize=3'h%h, ahb_haddr=32'h%h",ahb_htrans[1:0], ahb_hsize[2:0], ahb_haddr[31:0]);
+
+   property ahb_error_protocol;
+      @(posedge bus_clk) (ahb_hready & ahb_hresp) |-> (~$past(ahb_hready) & $past(ahb_hresp));
+   endproperty
+   assert_ahb_error_protocol: assert property (ahb_error_protocol) else
+      $display("Bus Error with hReady isn't preceded with Bus Error without hready");
+`endif
+
+endmodule // axi4_to_ahb
diff --git a/verilog/rtl/BrqRV_EB1/design/beh_lib.sv b/verilog/rtl/BrqRV_EB1/design/beh_lib.sv
new file mode 100644
index 0000000..af6cd8c
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/beh_lib.sv
@@ -0,0 +1,818 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// all flops call the rvdff flop
+
+`include "common_defines.vh"
+
+module rvdff #( parameter WIDTH=1, SHORT=0 )
+   (
+     input logic [WIDTH-1:0] din,
+     input logic           clk,
+     input logic                   rst_l,
+
+     output logic [WIDTH-1:0] dout
+     );
+
+if (SHORT == 1) begin
+   assign dout = din;
+end
+else begin
+`ifdef RV_CLOCKGATE
+   always @(posedge tb_top.clk) begin
+      #0 $strobe("CG: %0t %m din %x dout %x clk %b width %d",$time,din,dout,clk,WIDTH);
+   end
+`endif
+
+   always_ff @(posedge clk or negedge rst_l) begin
+      if (rst_l == 0)
+        dout[WIDTH-1:0] <= 0;
+      else
+        dout[WIDTH-1:0] <= din[WIDTH-1:0];
+   end
+
+end
+endmodule
+
+// rvdff with 2:1 input mux to flop din iff sel==1
+module rvdffs #( parameter WIDTH=1, SHORT=0 )
+   (
+     input logic [WIDTH-1:0] din,
+     input logic             en,
+     input logic           clk,
+     input logic                   rst_l,
+     output logic [WIDTH-1:0] dout
+     );
+
+if (SHORT == 1) begin : genblock
+   assign dout = din;
+end
+else begin : genblock
+   rvdff #(WIDTH) dffs (.din((en) ? din[WIDTH-1:0] : dout[WIDTH-1:0]), .*);
+end
+
+endmodule
+
+// rvdff with en and clear
+module rvdffsc #( parameter WIDTH=1, SHORT=0 )
+   (
+     input logic [WIDTH-1:0] din,
+     input logic             en,
+     input logic             clear,
+     input logic           clk,
+     input logic                   rst_l,
+     output logic [WIDTH-1:0] dout
+     );
+
+   logic [WIDTH-1:0]          din_new;
+if (SHORT == 1) begin
+   assign dout = din;
+end
+else begin
+   assign din_new = {WIDTH{~clear}} & (en ? din[WIDTH-1:0] : dout[WIDTH-1:0]);
+   rvdff #(WIDTH) dffsc (.din(din_new[WIDTH-1:0]), .*);
+end
+endmodule
+
+// _fpga versions
+module rvdff_fpga #( parameter WIDTH=1, SHORT=0 )
+   (
+     input logic [WIDTH-1:0] din,
+     input logic           clk,
+     input logic           clken,
+     input logic           rawclk,
+     input logic           rst_l,
+
+     output logic [WIDTH-1:0] dout
+     );
+
+if (SHORT == 1) begin
+   assign dout = din;
+end
+else begin
+   `ifdef RV_FPGA_OPTIMIZE
+    rvdffs #(WIDTH) dffs (.clk(rawclk), .en(clken), .*);
+`else
+    rvdff #(WIDTH)  dff (.*);
+`endif
+end
+endmodule
+
+// rvdff with 2:1 input mux to flop din iff sel==1
+module rvdffs_fpga #( parameter WIDTH=1, SHORT=0 )
+   (
+     input logic [WIDTH-1:0] din,
+     input logic             en,
+     input logic           clk,
+     input logic           clken,
+     input logic           rawclk,
+     input logic           rst_l,
+
+     output logic [WIDTH-1:0] dout
+     );
+
+if (SHORT == 1) begin : genblock
+   assign dout = din;
+end
+else begin : genblock
+`ifdef RV_FPGA_OPTIMIZE
+   rvdffs #(WIDTH)   dffs (.clk(rawclk), .en(clken & en), .*);
+`else
+   rvdffs #(WIDTH)   dffs (.*);
+`endif
+end
+
+endmodule
+
+// rvdff with en and clear
+module rvdffsc_fpga #( parameter WIDTH=1, SHORT=0 )
+   (
+     input logic [WIDTH-1:0] din,
+     input logic             en,
+     input logic             clear,
+     input logic             clk,
+     input logic             clken,
+     input logic             rawclk,
+     input logic             rst_l,
+
+     output logic [WIDTH-1:0] dout
+     );
+
+   logic [WIDTH-1:0]          din_new;
+if (SHORT == 1) begin
+   assign dout = din;
+end
+else begin
+`ifdef RV_FPGA_OPTIMIZE
+   rvdffs  #(WIDTH)   dffs  (.clk(rawclk), .din(din[WIDTH-1:0] & {WIDTH{~clear}}),.en((en | clear) & clken), .*);
+`else
+   rvdffsc #(WIDTH)   dffsc (.*);
+`endif
+end
+endmodule
+
+
+module rvdffe #( parameter WIDTH=1, SHORT=0, OVERRIDE=0 )
+   (
+     input  logic [WIDTH-1:0] din,
+     input  logic           en,
+     input  logic           clk,
+     input  logic           rst_l,
+     input  logic             scan_mode,
+     output logic [WIDTH-1:0] dout
+     );
+
+   logic                      l1clk;
+
+if (SHORT == 1) begin : genblock
+   if (1) begin : genblock
+      assign dout = din;
+   end
+end
+else begin : genblock
+
+`ifndef RV_PHYSICAL
+   if (WIDTH >= 8 || OVERRIDE==1) begin: genblock
+`endif
+
+`ifdef RV_FPGA_OPTIMIZE
+      rvdffs #(WIDTH) dff ( .* );
+`else
+      rvclkhdr clkhdr ( .* );
+      rvdff #(WIDTH) dff (.*, .clk(l1clk));
+`endif
+
+`ifndef RV_PHYSICAL
+   end
+   else
+      $error("%m: rvdffe must be WIDTH >= 8");
+`endif
+end // else: !if(SHORT == 1)
+
+endmodule // rvdffe
+
+
+module rvdffpcie #( parameter WIDTH=31 )
+   (
+     input  logic [WIDTH-1:0] din,
+     input  logic             clk,
+     input  logic             rst_l,
+     input  logic             en,
+     input  logic             scan_mode,
+     output logic [WIDTH-1:0] dout
+     );
+
+
+
+`ifndef RV_PHYSICAL
+   if (WIDTH == 31) begin: genblock
+`endif
+
+`ifdef RV_FPGA_OPTIMIZE
+      rvdffs #(WIDTH) dff ( .* );
+`else
+
+      rvdfflie #(.WIDTH(WIDTH), .LEFT(19)) dff (.*);
+
+`endif
+
+`ifndef RV_PHYSICAL
+   end
+   else
+      $error("%m: rvdffpcie width must be 31");
+`endif
+endmodule
+
+// format: { LEFT, EXTRA }
+// LEFT # of bits will be done with rvdffie, all else EXTRA with rvdffe
+module rvdfflie #( parameter WIDTH=16, LEFT=8 )
+   (
+     input  logic [WIDTH-1:0] din,
+     input  logic             clk,
+     input  logic             rst_l,
+     input  logic             en,
+     input  logic             scan_mode,
+     output logic [WIDTH-1:0] dout
+     );
+
+   localparam EXTRA = WIDTH-LEFT;
+
+
+
+
+
+
+
+   localparam LMSB = WIDTH-1;
+   localparam LLSB = LMSB-LEFT+1;
+   localparam XMSB = LLSB-1;
+   localparam XLSB = LLSB-EXTRA;
+
+
+`ifndef RV_PHYSICAL
+   if (WIDTH >= 16 && LEFT >= 8 && EXTRA >= 8) begin: genblock
+`endif
+
+`ifdef RV_FPGA_OPTIMIZE
+      rvdffs #(WIDTH) dff ( .* );
+`else
+
+      rvdffiee #(LEFT)  dff_left  (.*, .din(din[LMSB:LLSB]), .dout(dout[LMSB:LLSB]));
+
+
+      rvdffe  #(EXTRA)  dff_extra (.*, .din(din[XMSB:XLSB]), .dout(dout[XMSB:XLSB]));
+
+
+
+
+`endif
+
+`ifndef RV_PHYSICAL
+   end
+   else
+      $error("%m: rvdfflie musb be WIDTH >= 16 && LEFT >= 8 && EXTRA >= 8");
+`endif
+endmodule
+
+
+
+
+// special power flop for predict packet
+// format: { LEFT, RIGHT==31 }
+// LEFT # of bits will be done with rvdffe; RIGHT is enabled by LEFT[LSB] & en
+module rvdffppe #( parameter WIDTH=32 )
+   (
+     input  logic [WIDTH-1:0] din,
+     input  logic             clk,
+     input  logic             rst_l,
+     input  logic             en,
+     input  logic             scan_mode,
+     output logic [WIDTH-1:0] dout
+     );
+
+   localparam RIGHT = 31;
+   localparam LEFT = WIDTH - RIGHT;
+
+   localparam LMSB = WIDTH-1;
+   localparam LLSB = LMSB-LEFT+1;
+   localparam RMSB = LLSB-1;
+   localparam RLSB = LLSB-RIGHT;
+
+
+`ifndef RV_PHYSICAL
+   if (WIDTH>=32 && LEFT>=8 && RIGHT>=8) begin: genblock
+`endif
+
+`ifdef RV_FPGA_OPTIMIZE
+      rvdffs #(WIDTH) dff ( .* );
+`else
+      rvdffe #(LEFT)     dff_left (.*, .din(din[LMSB:LLSB]), .dout(dout[LMSB:LLSB]));
+
+      rvdffe #(RIGHT)   dff_right (.*, .din(din[RMSB:RLSB]), .dout(dout[RMSB:RLSB]), .en(en & din[LLSB]));  // qualify with pret
+
+
+`endif
+
+`ifndef RV_PHYSICAL
+   end
+   else
+      $error("%m: must be WIDTH>=32 && LEFT>=8 && RIGHT>=8");
+`endif
+endmodule
+
+
+
+
+module rvdffie #( parameter WIDTH=1, OVERRIDE=0 )
+   (
+     input  logic [WIDTH-1:0] din,
+
+     input  logic           clk,
+     input  logic           rst_l,
+     input  logic             scan_mode,
+     output logic [WIDTH-1:0] dout
+     );
+
+   logic                      l1clk;
+   logic                      en;
+
+
+
+
+
+
+
+
+`ifndef RV_PHYSICAL
+   if (WIDTH >= 8 || OVERRIDE==1) begin: genblock
+`endif
+
+      assign en = |(din ^ dout);
+
+`ifdef RV_FPGA_OPTIMIZE
+      rvdffs #(WIDTH) dff ( .* );
+`else
+      rvclkhdr clkhdr ( .* );
+      rvdff #(WIDTH) dff (.*, .clk(l1clk));
+`endif
+
+`ifndef RV_PHYSICAL
+   end
+   else
+     $error("%m: rvdffie must be WIDTH >= 8");
+`endif
+
+
+endmodule
+
+// ie flop but it has an .en input
+module rvdffiee #( parameter WIDTH=1, OVERRIDE=0 )
+   (
+     input  logic [WIDTH-1:0] din,
+
+     input  logic           clk,
+     input  logic           rst_l,
+     input  logic           scan_mode,
+     input  logic           en,
+     output logic [WIDTH-1:0] dout
+     );
+
+   logic                      l1clk;
+   logic                      final_en;
+
+`ifndef RV_PHYSICAL
+   if (WIDTH >= 8 || OVERRIDE==1) begin: genblock
+`endif
+
+      assign final_en = (|(din ^ dout)) & en;
+
+`ifdef RV_FPGA_OPTIMIZE
+      rvdffs #(WIDTH) dff ( .*, .en(final_en) );
+`else
+      rvdffe #(WIDTH) dff (.*,  .en(final_en));
+`endif
+
+`ifndef RV_PHYSICAL
+   end
+   else
+      $error("%m: rvdffie width must be >= 8");
+`endif
+
+endmodule
+
+
+
+module rvsyncss #(parameter WIDTH = 251)
+   (
+     input  logic                 clk,
+     input  logic                 rst_l,
+     input  logic [WIDTH-1:0]     din,
+     output logic [WIDTH-1:0]     dout
+     );
+
+   logic [WIDTH-1:0]              din_ff1;
+
+   rvdff #(WIDTH) sync_ff1  (.*, .din (din[WIDTH-1:0]),     .dout(din_ff1[WIDTH-1:0]));
+   rvdff #(WIDTH) sync_ff2  (.*, .din (din_ff1[WIDTH-1:0]), .dout(dout[WIDTH-1:0]));
+
+endmodule // rvsyncss
+
+module rvsyncss_fpga #(parameter WIDTH = 251)
+   (
+     input  logic                 gw_clk,
+     input  logic                 rawclk,
+     input  logic                 clken,
+     input  logic                 rst_l,
+     input  logic [WIDTH-1:0]     din,
+     output logic [WIDTH-1:0]     dout
+     );
+
+   logic [WIDTH-1:0]              din_ff1;
+
+   rvdff_fpga #(WIDTH) sync_ff1  (.*, .clk(gw_clk), .rawclk(rawclk), .clken(clken), .din (din[WIDTH-1:0]),     .dout(din_ff1[WIDTH-1:0]));
+   rvdff_fpga #(WIDTH) sync_ff2  (.*, .clk(gw_clk), .rawclk(rawclk), .clken(clken), .din (din_ff1[WIDTH-1:0]), .dout(dout[WIDTH-1:0]));
+
+endmodule // rvsyncss
+
+module rvlsadder
+  (
+    input logic [31:0] rs1,
+    input logic [11:0] offset,
+
+    output logic [31:0] dout
+    );
+
+   logic                cout;
+   logic                sign;
+
+   logic [31:12]        rs1_inc;
+   logic [31:12]        rs1_dec;
+
+   assign {cout,dout[11:0]} = {1'b0,rs1[11:0]} + {1'b0,offset[11:0]};
+
+   assign rs1_inc[31:12] = rs1[31:12] + 1;
+
+   assign rs1_dec[31:12] = rs1[31:12] - 1;
+
+   assign sign = offset[11];
+
+   assign dout[31:12] = ({20{  sign ^~  cout}} &     rs1[31:12]) |
+                        ({20{ ~sign &   cout}}  & rs1_inc[31:12]) |
+                        ({20{  sign &  ~cout}}  & rs1_dec[31:12]);
+
+endmodule // rvlsadder
+
+// assume we only maintain pc[31:1] in the pipe
+
+module rvbradder
+  (
+    input [31:1] pc,
+    input [12:1] offset,
+
+    output [31:1] dout
+    );
+
+   logic          cout;
+   logic          sign;
+
+   logic [31:13]  pc_inc;
+   logic [31:13]  pc_dec;
+
+   assign {cout,dout[12:1]} = {1'b0,pc[12:1]} + {1'b0,offset[12:1]};
+
+   assign pc_inc[31:13] = pc[31:13] + 1;
+
+   assign pc_dec[31:13] = pc[31:13] - 1;
+
+   assign sign = offset[12];
+
+
+   assign dout[31:13] = ({19{  sign ^~  cout}} &     pc[31:13]) |
+                        ({19{ ~sign &   cout}}  & pc_inc[31:13]) |
+                        ({19{  sign &  ~cout}}  & pc_dec[31:13]);
+
+
+endmodule // rvbradder
+
+
+// 2s complement circuit
+module rvtwoscomp #( parameter WIDTH=32 )
+   (
+     input logic [WIDTH-1:0] din,
+
+     output logic [WIDTH-1:0] dout
+     );
+
+   logic [WIDTH-1:1]          dout_temp;   // holding for all other bits except for the lsb. LSB is always din
+
+   genvar                     i;
+
+   for ( i = 1; i < WIDTH; i++ )  begin : flip_after_first_one
+      assign dout_temp[i] = (|din[i-1:0]) ? ~din[i] : din[i];
+   end : flip_after_first_one
+
+   assign dout[WIDTH-1:0]  = { dout_temp[WIDTH-1:1], din[0] };
+
+endmodule  // 2'scomp
+
+// find first
+module rvfindfirst1 #( parameter WIDTH=32, SHIFT=$clog2(WIDTH) )
+   (
+     input logic [WIDTH-1:0] din,
+
+     output logic [SHIFT-1:0] dout
+     );
+   logic                      done;
+
+   always_comb begin
+      dout[SHIFT-1:0] = {SHIFT{1'b0}};
+      done    = 1'b0;
+
+      for ( int i = WIDTH-1; i > 0; i-- )  begin : find_first_one
+         done |= din[i];
+         dout[SHIFT-1:0] += done ? 1'b0 : 1'b1;
+      end : find_first_one
+   end
+endmodule // rvfindfirst1
+
+module rvfindfirst1hot #( parameter WIDTH=32 )
+   (
+     input logic [WIDTH-1:0] din,
+
+     output logic [WIDTH-1:0] dout
+     );
+   logic                      done;
+
+   always_comb begin
+      dout[WIDTH-1:0] = {WIDTH{1'b0}};
+      done    = 1'b0;
+      for ( int i = 0; i < WIDTH; i++ )  begin : find_first_one
+         dout[i] = ~done & din[i];
+         done   |= din[i];
+      end : find_first_one
+   end
+endmodule // rvfindfirst1hot
+
+// mask and match function matches bits after finding the first 0 position
+// find first starting from LSB. Skip that location and match the rest of the bits
+module rvmaskandmatch #( parameter WIDTH=32 )
+   (
+     input  logic [WIDTH-1:0] mask,     // this will have the mask in the lower bit positions
+     input  logic [WIDTH-1:0] data,     // this is what needs to be matched on the upper bits with the mask's upper bits
+     input  logic             masken,   // when 1 : do mask. 0 : full match
+     output logic             match
+     );
+
+   logic [WIDTH-1:0]          matchvec;
+   logic                      masken_or_fullmask;
+
+   assign masken_or_fullmask = masken &  ~(&mask[WIDTH-1:0]);
+
+   assign matchvec[0]        = masken_or_fullmask | (mask[0] == data[0]);
+   genvar                     i;
+
+   for ( i = 1; i < WIDTH; i++ )  begin : match_after_first_zero
+      assign matchvec[i] = (&mask[i-1:0] & masken_or_fullmask) ? 1'b1 : (mask[i] == data[i]);
+   end : match_after_first_zero
+
+   assign match  = &matchvec[WIDTH-1:0];    // all bits either matched or were masked off
+
+endmodule // rvmaskandmatch
+
+
+
+
+// Check if the S_ADDR <= addr < E_ADDR
+module rvrangecheck  #(CCM_SADR = 32'h0,
+                       CCM_SIZE  = 128) (
+   input  logic [31:0]   addr,                             // Address to be checked for range
+   output logic          in_range,                            // S_ADDR <= start_addr < E_ADDR
+   output logic          in_region
+);
+
+   localparam REGION_BITS = 4;
+   localparam MASK_BITS = 10 + $clog2(CCM_SIZE);
+
+   logic [31:0]          start_addr;
+   logic [3:0]           region;
+
+   assign start_addr[31:0]        = CCM_SADR;
+   assign region[REGION_BITS-1:0] = start_addr[31:(32-REGION_BITS)];
+
+   assign in_region = (addr[31:(32-REGION_BITS)] == region[REGION_BITS-1:0]);
+   if (CCM_SIZE  == 48)
+    assign in_range  = (addr[31:MASK_BITS] == start_addr[31:MASK_BITS]) & ~(&addr[MASK_BITS-1 : MASK_BITS-2]);
+   else
+    assign in_range  = (addr[31:MASK_BITS] == start_addr[31:MASK_BITS]);
+
+endmodule  // rvrangechecker
+
+// 16 bit even parity generator
+module rveven_paritygen #(WIDTH = 16)  (
+                                         input  logic [WIDTH-1:0]  data_in,         // Data
+                                         output logic              parity_out       // generated even parity
+                                         );
+
+   assign  parity_out =  ^(data_in[WIDTH-1:0]) ;
+
+endmodule  // rveven_paritygen
+
+module rveven_paritycheck #(WIDTH = 16)  (
+                                           input  logic [WIDTH-1:0]  data_in,         // Data
+                                           input  logic              parity_in,
+                                           output logic              parity_err       // Parity error
+                                           );
+
+   assign  parity_err =  ^(data_in[WIDTH-1:0]) ^ parity_in ;
+
+endmodule  // rveven_paritycheck
+
+module rvecc_encode  (
+                      input [31:0] din,
+                      output [6:0] ecc_out
+                      );
+logic [5:0] ecc_out_temp;
+
+   assign ecc_out_temp[0] = din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30];
+   assign ecc_out_temp[1] = din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31];
+   assign ecc_out_temp[2] = din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31];
+   assign ecc_out_temp[3] = din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25];
+   assign ecc_out_temp[4] = din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25];
+   assign ecc_out_temp[5] = din[26]^din[27]^din[28]^din[29]^din[30]^din[31];
+
+   assign ecc_out[6:0] = {(^din[31:0])^(^ecc_out_temp[5:0]),ecc_out_temp[5:0]};
+
+endmodule // rvecc_encode
+
+module rvecc_decode  (
+                      input         en,
+                      input [31:0]  din,
+                      input [6:0]   ecc_in,
+                      input         sed_ded,    // only do detection and no correction. Used for the I$
+                      output [31:0] dout,
+                      output [6:0]  ecc_out,
+                      output        single_ecc_error,
+                      output        double_ecc_error
+
+                      );
+
+   logic [6:0]                      ecc_check;
+   logic [38:0]                     error_mask;
+   logic [38:0]                     din_plus_parity, dout_plus_parity;
+
+   // Generate the ecc bits
+   assign ecc_check[0] = ecc_in[0]^din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30];
+   assign ecc_check[1] = ecc_in[1]^din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31];
+   assign ecc_check[2] = ecc_in[2]^din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31];
+   assign ecc_check[3] = ecc_in[3]^din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25];
+   assign ecc_check[4] = ecc_in[4]^din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25];
+   assign ecc_check[5] = ecc_in[5]^din[26]^din[27]^din[28]^din[29]^din[30]^din[31];
+
+   // This is the parity bit
+   assign ecc_check[6] = ((^din[31:0])^(^ecc_in[6:0])) & ~sed_ded;
+
+   assign single_ecc_error = en & (ecc_check[6:0] != 0) & ecc_check[6];   // this will never be on for sed_ded
+   assign double_ecc_error = en & (ecc_check[6:0] != 0) & ~ecc_check[6];  // all errors in the sed_ded case will be recorded as DE
+
+   // Generate the mask for error correctiong
+   for (genvar i=1; i<40; i++) begin
+      assign error_mask[i-1] = (ecc_check[5:0] == i);
+   end
+
+   // Generate the corrected data
+   assign din_plus_parity[38:0] = {ecc_in[6], din[31:26], ecc_in[5], din[25:11], ecc_in[4], din[10:4], ecc_in[3], din[3:1], ecc_in[2], din[0], ecc_in[1:0]};
+
+   assign dout_plus_parity[38:0] = single_ecc_error ? (error_mask[38:0] ^ din_plus_parity[38:0]) : din_plus_parity[38:0];
+   assign dout[31:0]             = {dout_plus_parity[37:32], dout_plus_parity[30:16], dout_plus_parity[14:8], dout_plus_parity[6:4], dout_plus_parity[2]};
+   assign ecc_out[6:0]           = {(dout_plus_parity[38] ^ (ecc_check[6:0] == 7'b1000000)), dout_plus_parity[31], dout_plus_parity[15], dout_plus_parity[7], dout_plus_parity[3], dout_plus_parity[1:0]};
+
+endmodule // rvecc_decode
+
+module rvecc_encode_64  (
+                      input [63:0] din,
+                      output [6:0] ecc_out
+                      );
+  assign ecc_out[0] = din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]^din[32]^din[34]^din[36]^din[38]^din[40]^din[42]^din[44]^din[46]^din[48]^din[50]^din[52]^din[54]^din[56]^din[57]^din[59]^din[61]^din[63];
+
+   assign ecc_out[1] = din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]^din[32]^din[35]^din[36]^din[39]^din[40]^din[43]^din[44]^din[47]^din[48]^din[51]^din[52]^din[55]^din[56]^din[58]^din[59]^din[62]^din[63];
+
+   assign ecc_out[2] = din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]^din[32]^din[37]^din[38]^din[39]^din[40]^din[45]^din[46]^din[47]^din[48]^din[53]^din[54]^din[55]^din[56]^din[60]^din[61]^din[62]^din[63];
+
+   assign ecc_out[3] = din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56];
+
+   assign ecc_out[4] = din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56];
+
+   assign ecc_out[5] = din[26]^din[27]^din[28]^din[29]^din[30]^din[31]^din[32]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56];
+
+   assign ecc_out[6] = din[57]^din[58]^din[59]^din[60]^din[61]^din[62]^din[63];
+
+endmodule // rvecc_encode_64
+
+
+module rvecc_decode_64  (
+                      input         en,
+                      input [63:0]  din,
+                      input [6:0]   ecc_in,
+                      output        ecc_error
+                      );
+
+   logic [6:0]                      ecc_check;
+
+   // Generate the ecc bits
+   assign ecc_check[0] = ecc_in[0]^din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]^din[32]^din[34]^din[36]^din[38]^din[40]^din[42]^din[44]^din[46]^din[48]^din[50]^din[52]^din[54]^din[56]^din[57]^din[59]^din[61]^din[63];
+
+   assign ecc_check[1] = ecc_in[1]^din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]^din[32]^din[35]^din[36]^din[39]^din[40]^din[43]^din[44]^din[47]^din[48]^din[51]^din[52]^din[55]^din[56]^din[58]^din[59]^din[62]^din[63];
+
+   assign ecc_check[2] = ecc_in[2]^din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]^din[32]^din[37]^din[38]^din[39]^din[40]^din[45]^din[46]^din[47]^din[48]^din[53]^din[54]^din[55]^din[56]^din[60]^din[61]^din[62]^din[63];
+
+   assign ecc_check[3] = ecc_in[3]^din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56];
+
+   assign ecc_check[4] = ecc_in[4]^din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56];
+
+   assign ecc_check[5] = ecc_in[5]^din[26]^din[27]^din[28]^din[29]^din[30]^din[31]^din[32]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56];
+
+   assign ecc_check[6] = ecc_in[6]^din[57]^din[58]^din[59]^din[60]^din[61]^din[62]^din[63];
+
+   assign ecc_error = en & (ecc_check[6:0] != 0);  // all errors in the sed_ded case will be recorded as DE
+
+ endmodule // rvecc_decode_64
+
+// Skywater cell
+//sky130_fd_sc_hd__dlclkp_1 CG( .CLK(clk), .GCLK(l1clk), .GATE(en_i | test_en_i));
+
+
+/*module `TEC_RV_ICG 
+  (
+   input logic SE, EN, CK,
+   output Q
+   );
+
+   logic  en_ff;
+   logic  enable;
+
+   assign      enable = EN | SE;
+
+`ifdef VERILATOR
+   always @(negedge CK) begin
+      en_ff <= enable;
+   end
+`else
+   always @(CK, enable) begin
+      if(!CK)
+        en_ff = enable;
+   end
+`endif
+   assign Q = CK & en_ff;
+
+endmodule
+*/
+`ifndef RV_FPGA_OPTIMIZE
+module rvclkhdr
+  (
+   input  logic en,
+   input  logic clk,
+   input  logic scan_mode,
+   output logic l1clk
+   );
+
+   logic   SE;
+   assign       SE = 0;
+
+   `TEC_RV_ICG clkhdr( .CLK(clk), .GCLK(l1clk), .GATE(en)); /*clkhdr ( .*, .EN(en), .CK(clk), .Q(l1clk));*/
+
+endmodule // rvclkhdr
+`endif
+
+module rvoclkhdr
+  (
+   input  logic en,
+   input  logic clk,
+   input  logic scan_mode,
+   output logic l1clk
+   );
+
+   logic   SE;
+   assign       SE = 0;
+
+`ifdef RV_FPGA_OPTIMIZE
+   assign l1clk = clk;
+`else
+   `TEC_RV_ICG clkhdr( .CLK(clk), .GCLK(l1clk), .GATE(en)); //clkhdr ( .*, .EN(en), .CK(clk), .Q(l1clk));
+`endif
+
+endmodule
+
+
+
diff --git a/verilog/rtl/BrqRV_EB1/design/common_defines.vh b/verilog/rtl/BrqRV_EB1/design/common_defines.vh
new file mode 100644
index 0000000..2c9f8c5
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/common_defines.vh
@@ -0,0 +1,247 @@
+// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
+// This is an automatically generated file by hshabbir on و 08:16:54 PKT ت 08 جون 2021
+//
+// cmd:    brqrv -target=default -set build_axi4 
+//
+`define RV_ROOT "/home/hshabbir/caravel_BrqRV_EB1/verilog/rtl/BrqRV_EB1"
+`define RV_RET_STACK_SIZE 8
+`define RV_EXT_ADDRWIDTH 32
+`define RV_STERR_ROLLBACK 0
+`define SDVT_AHB 0
+`define RV_EXT_DATAWIDTH 64
+`define RV_LDERR_ROLLBACK 1
+`define CLOCK_PERIOD 100
+`define RV_ASSERT_ON 
+`define RV_BUILD_AXI4 1
+`define TOP tb_top
+`define RV_BUILD_AXI_NATIVE 1
+`define CPU_TOP `RV_TOP.brqrv
+`define RV_TOP `TOP.rvtop
+`define RV_UNUSED_REGION2 'h70000000
+`define RV_EXTERNAL_DATA 'hd0580000
+`define RV_SERIALIO 'he0580000
+`define RV_UNUSED_REGION7 'h20000000
+`define RV_UNUSED_REGION5 'h40000000
+`define RV_DEBUG_SB_MEM 'hb0580000
+`define RV_EXTERNAL_DATA_1 'hc0000000
+`define RV_UNUSED_REGION0 'h90000000
+`define RV_UNUSED_REGION3 'h60000000
+`define RV_UNUSED_REGION9 'h00000000
+`define RV_UNUSED_REGION8 'h10000000
+`define RV_UNUSED_REGION6 'h30000000
+`define RV_UNUSED_REGION1 'h80000000
+`define RV_UNUSED_REGION4 'h50000000
+`define RV_BHT_ADDR_LO 2
+`define RV_BHT_SIZE 256
+`define RV_BHT_GHR_HASH_1 
+`define RV_BHT_GHR_SIZE 7
+`define RV_BHT_ADDR_HI 8
+`define RV_BHT_HASH_STRING {hashin[7+1:2]^ghr[7-1:0]}// cf2
+`define RV_BHT_ARRAY_DEPTH 128
+`define RV_BHT_GHR_RANGE 6:0
+`define RV_INST_ACCESS_ADDR5 'h00000000
+`define RV_DATA_ACCESS_MASK3 'hffffffff
+`define RV_INST_ACCESS_MASK7 'hffffffff
+`define RV_DATA_ACCESS_MASK0 'hffffffff
+`define RV_INST_ACCESS_ADDR6 'h00000000
+`define RV_INST_ACCESS_ENABLE3 1'h0
+`define RV_INST_ACCESS_MASK6 'hffffffff
+`define RV_DATA_ACCESS_ENABLE6 1'h0
+`define RV_INST_ACCESS_ENABLE5 1'h0
+`define RV_DATA_ACCESS_ENABLE7 1'h0
+`define RV_INST_ACCESS_ENABLE1 1'h0
+`define RV_DATA_ACCESS_ADDR0 'h00000000
+`define RV_DATA_ACCESS_ADDR3 'h00000000
+`define RV_INST_ACCESS_ADDR7 'h00000000
+`define RV_INST_ACCESS_ENABLE0 1'h0
+`define RV_INST_ACCESS_MASK5 'hffffffff
+`define RV_DATA_ACCESS_MASK4 'hffffffff
+`define RV_INST_ACCESS_MASK2 'hffffffff
+`define RV_INST_ACCESS_MASK1 'hffffffff
+`define RV_INST_ACCESS_ADDR2 'h00000000
+`define RV_INST_ACCESS_ENABLE2 1'h0
+`define RV_INST_ACCESS_ADDR1 'h00000000
+`define RV_INST_ACCESS_ENABLE4 1'h0
+`define RV_DATA_ACCESS_ADDR4 'h00000000
+`define RV_DATA_ACCESS_ADDR6 'h00000000
+`define RV_DATA_ACCESS_ENABLE3 1'h0
+`define RV_INST_ACCESS_MASK0 'hffffffff
+`define RV_DATA_ACCESS_MASK7 'hffffffff
+`define RV_INST_ACCESS_MASK3 'hffffffff
+`define RV_DATA_ACCESS_ADDR5 'h00000000
+`define RV_DATA_ACCESS_MASK5 'hffffffff
+`define RV_DATA_ACCESS_ENABLE0 1'h0
+`define RV_INST_ACCESS_ADDR3 'h00000000
+`define RV_DATA_ACCESS_ADDR7 'h00000000
+`define RV_DATA_ACCESS_ENABLE5 1'h0
+`define RV_INST_ACCESS_ENABLE6 1'h0
+`define RV_DATA_ACCESS_ENABLE1 1'h0
+`define RV_INST_ACCESS_ENABLE7 1'h0
+`define RV_INST_ACCESS_ADDR0 'h00000000
+`define RV_DATA_ACCESS_MASK6 'hffffffff
+`define RV_DATA_ACCESS_MASK2 'hffffffff
+`define RV_DATA_ACCESS_MASK1 'hffffffff
+`define RV_INST_ACCESS_MASK4 'hffffffff
+`define RV_INST_ACCESS_ADDR4 'h00000000
+`define RV_DATA_ACCESS_ENABLE4 1'h0
+`define RV_DATA_ACCESS_ADDR2 'h00000000
+`define RV_DATA_ACCESS_ADDR1 'h00000000
+`define RV_DATA_ACCESS_ENABLE2 1'h0
+`define RV_ICCM_BITS 12
+`define RV_ICCM_OFFSET 10'h0ffff000
+`define RV_ICCM_SIZE_4 
+`define RV_ICCM_BANK_BITS 2
+`define RV_ICCM_ENABLE 1
+`define RV_ICCM_SADR 32'haffff000
+`define RV_ICCM_DATA_CELL ram_256x39
+`define RV_ICCM_EADR 32'hafffffff
+`define RV_ICCM_RESERVED 'h400
+`define RV_ICCM_REGION 4'ha
+`define RV_ICCM_SIZE 4
+`define RV_ICCM_BANK_HI 3
+`define RV_ICCM_BANK_INDEX_LO 4
+`define RV_ICCM_ROWS 256
+`define RV_ICCM_INDEX_BITS 8
+`define RV_ICCM_NUM_BANKS 4
+`define RV_ICCM_NUM_BANKS_4 
+//`define TEC_RV_ICG clockhdr
+`define RV_LSU2DMA 0
+`define RV_LSU_NUM_NBLOAD_WIDTH 2
+`define RV_ICCM_ONLY 1
+`define RV_BITMANIP_ZBC 0
+`define RV_BITMANIP_ZBS 0
+`define RV_FPGA_OPTIMIZE 0
+`define RV_LSU_NUM_NBLOAD 4
+`define RV_DIV_BIT 3
+`define RV_DIV_NEW 1
+`define RV_DMA_BUF_DEPTH 5
+`define RV_FAST_INTERRUPT_REDIRECT 1
+`define RV_BITMANIP_ZBP 0
+`define RV_BITMANIP_ZBA 0
+`define RV_LSU_STBUF_DEPTH 4
+`define RV_BITMANIP_ZBB 0
+`define RV_BITMANIP_ZBR 0
+`define RV_BITMANIP_ZBE 0
+`define RV_TIMER_LEGAL_EN 1
+`define RV_BITMANIP_ZBF 0
+`define REGWIDTH 32
+`define RV_CONFIG_KEY 32'hdeadbeef
+`define RV_BTB_INDEX1_HI 8
+`define RV_BTB_SIZE 256
+`define RV_BTB_BTAG_SIZE 6
+`define RV_BTB_FOLD2_INDEX_HASH 0
+`define RV_BTB_INDEX3_LO 16
+`define RV_BTB_INDEX2_HI 15
+`define RV_BTB_ARRAY_DEPTH 128
+`define RV_BTB_INDEX1_LO 2
+`define RV_BTB_ADDR_LO 2
+`define RV_BTB_INDEX3_HI 22
+`define RV_BTB_ADDR_HI 8
+`define RV_BTB_TOFFSET_SIZE 12
+`define RV_BTB_INDEX2_LO 9
+`define RV_BTB_BTAG_FOLD 0
+`define RV_BTB_ENABLE 1
+`define RV_XLEN 32
+`define RV_IFU_BUS_TAG 3
+`define RV_LSU_BUS_ID 1
+`define RV_IFU_BUS_PRTY 2
+`define RV_LSU_BUS_TAG 3
+`define RV_IFU_BUS_ID 1
+`define RV_SB_BUS_PRTY 2
+`define RV_LSU_BUS_PRTY 2
+`define RV_DMA_BUS_ID 1
+`define RV_SB_BUS_ID 1
+`define RV_BUS_PRTY_DEFAULT 2'h3
+`define RV_DMA_BUS_PRTY 2
+`define RV_SB_BUS_TAG 1
+`define RV_DMA_BUS_TAG 1
+`define RV_ICACHE_TAG_NUM_BYPASS 2
+`define RV_ICACHE_STATUS_BITS 1
+`define RV_ICACHE_BEAT_ADDR_HI 5
+`define RV_ICACHE_SCND_LAST 6
+`define RV_ICACHE_TAG_LO 13
+`define RV_ICACHE_BANK_WIDTH 8
+`define RV_ICACHE_DATA_CELL ram_512x71
+`define RV_ICACHE_NUM_BYPASS_WIDTH 2
+`define RV_ICACHE_WAYPACK 1
+`define RV_ICACHE_LN_SZ 64
+`define RV_ICACHE_NUM_BEATS 8
+`define RV_ICACHE_NUM_LINES_WAY 128
+`define RV_ICACHE_NUM_LINES_BANK 64
+`define RV_ICACHE_TAG_DEPTH 128
+`define RV_ICACHE_DATA_DEPTH 512
+`define RV_ICACHE_DATA_WIDTH 64
+`define RV_ICACHE_TAG_CELL ram_128x25
+`define RV_ICACHE_NUM_BYPASS 2
+`define RV_ICACHE_FDATA_WIDTH 71
+`define RV_ICACHE_NUM_LINES 256
+`define RV_ICACHE_DATA_INDEX_LO 4
+`define RV_ICACHE_BANK_BITS 1
+`define RV_ICACHE_TAG_NUM_BYPASS_WIDTH 2
+`define RV_ICACHE_2BANKS 1
+`define RV_ICACHE_BANKS_WAY 2
+`define RV_ICACHE_BANK_LO 3
+`define RV_ICACHE_ECC 1
+`define RV_ICACHE_INDEX_HI 12
+`define RV_ICACHE_TAG_INDEX_LO 6
+`define RV_ICACHE_TAG_BYPASS_ENABLE 1
+`define RV_ICACHE_BANK_HI 3
+`define RV_ICACHE_BEAT_BITS 3
+`define RV_ICACHE_BYPASS_ENABLE 1
+`define RV_ICACHE_NUM_WAYS 2
+`define RV_ICACHE_SIZE 16
+`define RV_NMI_VEC 'h11110000
+`define RV_DCCM_EADR 32'hf0040fff
+`define RV_DCCM_SIZE 4
+`define RV_DCCM_REGION 4'hf
+`define RV_DCCM_RESERVED 'h400
+`define RV_DCCM_INDEX_BITS 8
+`define RV_DCCM_ROWS 256
+`define RV_DCCM_FDATA_WIDTH 39
+`define RV_DCCM_NUM_BANKS_4 
+`define RV_DCCM_NUM_BANKS 4
+`define RV_DCCM_BITS 12
+`define RV_DCCM_DATA_WIDTH 32
+`define RV_DCCM_SIZE_4 
+`define RV_DCCM_OFFSET 28'h40000
+`define RV_DCCM_WIDTH_BITS 2
+`define RV_DCCM_BYTE_WIDTH 4
+`define RV_DCCM_ENABLE 1
+`define RV_DCCM_ECC_WIDTH 7
+`define RV_DCCM_BANK_BITS 2
+`define RV_DCCM_DATA_CELL ram_256x39
+`define RV_DCCM_SADR 32'hf0040000
+`define RV_LSU_SB_BITS 12
+`define RV_RESET_VEC 'haffff000
+`define RV_PIC_BITS 15
+`define RV_PIC_MEIGWCTRL_OFFSET 'h4000
+`define RV_PIC_MEIGWCTRL_MASK 'h3
+`define RV_PIC_MEIGWCLR_OFFSET 'h5000
+`define RV_PIC_MEIE_MASK 'h1
+`define RV_PIC_MEIP_MASK 'h0
+`define RV_PIC_MEIPT_COUNT 31
+`define RV_PIC_MEIPL_COUNT 31
+`define RV_PIC_MEIPT_MASK 'h0
+`define RV_PIC_BASE_ADDR 32'hf00c0000
+`define RV_PIC_MEIPL_MASK 'hf
+`define RV_PIC_INT_WORDS 1
+`define RV_PIC_MPICCFG_MASK 'h1
+`define RV_PIC_MEIPT_OFFSET 'h3004
+`define RV_PIC_TOTAL_INT_PLUS1 32
+`define RV_PIC_MEIPL_OFFSET 'h0000
+`define RV_PIC_MEIE_COUNT 31
+`define RV_PIC_MEIGWCTRL_COUNT 31
+`define RV_PIC_REGION 4'hf
+`define RV_PIC_MEIGWCLR_MASK 'h0
+`define RV_PIC_SIZE 32
+`define RV_PIC_MEIE_OFFSET 'h2000
+`define RV_PIC_MPICCFG_OFFSET 'h3000
+`define RV_PIC_MPICCFG_COUNT 1
+`define RV_PIC_MEIP_OFFSET 'h1000
+`define RV_PIC_TOTAL_INT 31
+`define RV_PIC_OFFSET 10'hc0000
+`define RV_PIC_MEIGWCLR_COUNT 31
+`define RV_PIC_MEIP_COUNT 1
+`define RV_TARGET default
+`define RV_NUMIREGS 32
+`undef RV_ASSERT_ON
diff --git a/verilog/rtl/BrqRV_EB1/design/dmi_jtag_to_core_sync.v b/verilog/rtl/BrqRV_EB1/design/dmi_jtag_to_core_sync.v
new file mode 100644
index 0000000..25328ea
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/dmi_jtag_to_core_sync.v
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: Apache-2.0

+// Copyright 2018 MERL Corporation or it's affiliates.

+// 

+// Licensed under the Apache License, Version 2.0 (the "License");

+// you may not use this file except in compliance with the License.

+// You may obtain a copy of the License at

+// 

+// http://www.apache.org/licenses/LICENSE-2.0

+// 

+// Unless required by applicable law or agreed to in writing, software

+// distributed under the License is distributed on an "AS IS" BASIS,

+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+// See the License for the specific language governing permissions and

+// limitations under the License.

+//------------------------------------------------------------------------------------

+//

+//  Copyright MERL, 2019

+//  Owner : Alex Grobman

+//  Description:  

+//                This module Synchronizes the signals between JTAG (TCK) and

+//                processor (Core_clk)

+//

+//-------------------------------------------------------------------------------------

+

+module dmi_jtag_to_core_sync (

+// JTAG signals

+input       rd_en,      // 1 bit  Read Enable from JTAG

+input       wr_en,      // 1 bit  Write enable from JTAG

+

+// Processor Signals

+input       rst_n,      // Core reset

+input       clk,        // Core clock

+

+output      reg_en,     // 1 bit  Write interface bit to Processor

+output      reg_wr_en   // 1 bit  Write enable to Processor

+);

+  

+wire        c_rd_en;

+wire        c_wr_en;

+reg [2:0]   rden, wren;

+ 

+

+// Outputs

+assign reg_en    = c_wr_en | c_rd_en;

+assign reg_wr_en = c_wr_en;

+

+

+// synchronizers  

+always @ ( posedge clk or negedge rst_n) begin

+    if(!rst_n) begin

+        rden <= '0;

+        wren <= '0;

+    end

+    else begin

+        rden <= {rden[1:0], rd_en};

+        wren <= {wren[1:0], wr_en};

+    end

+end

+

+assign c_rd_en = rden[1] & ~rden[2];

+assign c_wr_en = wren[1] & ~wren[2];

+ 

+

+endmodule

diff --git a/verilog/rtl/BrqRV_EB1/design/dmi_wrapper.v b/verilog/rtl/BrqRV_EB1/design/dmi_wrapper.v
new file mode 100644
index 0000000..3f0682f
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/dmi_wrapper.v
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: Apache-2.0

+// Copyright 2018 MERL Corporation or it's affiliates.

+// 

+// Licensed under the Apache License, Version 2.0 (the "License");

+// you may not use this file except in compliance with the License.

+// You may obtain a copy of the License at

+// 

+// http://www.apache.org/licenses/LICENSE-2.0

+// 

+// Unless required by applicable law or agreed to in writing, software

+// distributed under the License is distributed on an "AS IS" BASIS,

+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+// See the License for the specific language governing permissions and

+// limitations under the License.

+//------------------------------------------------------------------------------------

+//

+//  Copyright MERL, 2018

+//  Owner : Anusha Narayanamoorthy

+//  Description:  

+//                Wrapper module for JTAG_TAP and DMI synchronizer

+//

+//-------------------------------------------------------------------------------------

+

+module dmi_wrapper(

+

+  // JTAG signals

+  input              trst_n,              // JTAG reset

+  input              tck,                 // JTAG clock

+  input              tms,                 // Test mode select   

+  input              tdi,                 // Test Data Input

+  output             tdo,                 // Test Data Output           

+  output             tdoEnable,           // Test Data Output enable             

+

+  // Processor Signals

+  input              core_rst_n,          // Core reset                  

+  input              core_clk,            // Core clock                  

+  input [31:1]       jtag_id,             // JTAG ID

+  input [31:0]       rd_data,             // 32 bit Read data from  Processor                       

+  output [31:0]      reg_wr_data,         // 32 bit Write data to Processor                      

+  output [6:0]       reg_wr_addr,         // 7 bit reg address to Processor                   

+  output             reg_en,              // 1 bit  Read enable to Processor                                    

+  output             reg_wr_en,           // 1 bit  Write enable to Processor 

+  output             dmi_hard_reset  

+);

+

+

+  

+

+

+  //Wire Declaration

+  wire                     rd_en;

+  wire                     wr_en;

+  wire                     dmireset;

+

+ 

+  //jtag_tap instantiation

+ rvjtag_tap i_jtag_tap(

+   .trst(trst_n),                      // dedicated JTAG TRST (active low) pad signal or asynchronous active low power on reset

+   .tck(tck),                          // dedicated JTAG TCK pad signal

+   .tms(tms),                          // dedicated JTAG TMS pad signal

+   .tdi(tdi),                          // dedicated JTAG TDI pad signal

+   .tdo(tdo),                          // dedicated JTAG TDO pad signal

+   .tdoEnable(tdoEnable),              // enable for TDO pad

+   .wr_data(reg_wr_data),              // 32 bit Write data

+   .wr_addr(reg_wr_addr),              // 7 bit Write address

+   .rd_en(rd_en),                      // 1 bit  read enable

+   .wr_en(wr_en),                      // 1 bit  Write enable

+   .rd_data(rd_data),                  // 32 bit Read data

+   .rd_status(2'b0),

+   .idle(3'h0),                         // no need to wait to sample data

+   .dmi_stat(2'b0),                     // no need to wait or error possible

+   .version(4'h1),                      // debug spec 0.13 compliant

+   .jtag_id(jtag_id),

+   .dmi_hard_reset(dmi_hard_reset),

+   .dmi_reset(dmireset)

+);

+

+

+  // dmi_jtag_to_core_sync instantiation

+  dmi_jtag_to_core_sync i_dmi_jtag_to_core_sync(

+    .wr_en(wr_en),                          // 1 bit  Write enable

+    .rd_en(rd_en),                          // 1 bit  Read enable

+

+    .rst_n(core_rst_n),

+    .clk(core_clk),

+    .reg_en(reg_en),                          // 1 bit  Write interface bit

+    .reg_wr_en(reg_wr_en)                          // 1 bit  Write enable

+  );

+

+endmodule

diff --git a/verilog/rtl/BrqRV_EB1/design/dbg/eb1_dbg.sv b/verilog/rtl/BrqRV_EB1/design/eb1_dbg.sv
similarity index 100%
rename from verilog/rtl/BrqRV_EB1/design/dbg/eb1_dbg.sv
rename to verilog/rtl/BrqRV_EB1/design/eb1_dbg.sv
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_dec.sv b/verilog/rtl/BrqRV_EB1/design/eb1_dec.sv
new file mode 100644
index 0000000..3c08145
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_dec.sv
@@ -0,0 +1,454 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// dec: decode unit - decode, bypassing, ARF, interrupts
+//
+//********************************************************************************
+// $Id$
+//
+//
+// Function: Decode
+// Comments: Decode, dependency scoreboard, ARF
+//
+//
+// A -> D -> EX1 ... WB
+//
+//********************************************************************************
+
+module eb1_dec
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )
+  (
+   input logic clk,                          // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
+   input logic active_clk,                   // Clock only while core active.  Through two clock headers. For flops without second clock header built in.
+   input logic free_clk,                     // Clock always.                  Through two clock headers. For flops without second clock header built in.
+   input logic free_l2clk,                   // Clock always.                  Through one clock header.  For flops with    second header built in.
+
+   input logic lsu_fastint_stall_any,        // needed by lsu for 2nd pass of dma with ecc correction, stall next cycle
+
+   output logic dec_extint_stall,            // Stall on external interrupt
+
+   output logic dec_i0_decode_d,             // Valid instruction at D-stage and not blocked
+   output logic dec_pause_state_cg,          // to top for active state clock gating
+
+   output logic dec_tlu_core_empty,
+
+   input logic rst_l,                        // reset, active low
+   input logic [31:1] rst_vec,               // reset vector, from core pins
+
+   input logic        nmi_int,               // NMI pin
+   input logic [31:1] nmi_vec,               // NMI vector, from pins
+
+   input logic  i_cpu_halt_req,              // Asynchronous Halt request to CPU
+   input logic  i_cpu_run_req,               // Asynchronous Restart request to CPU
+
+   output logic o_cpu_halt_status,           // Halt status of core (pmu/fw)
+   output logic o_cpu_halt_ack,              // Halt request ack
+   output logic o_cpu_run_ack,               // Run request ack
+   output logic o_debug_mode_status,         // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request
+
+   input logic [31:4] core_id,               // CORE ID
+
+   // external MPC halt/run interface
+   input logic mpc_debug_halt_req,           // Async halt request
+   input logic mpc_debug_run_req,            // Async run request
+   input logic mpc_reset_run_req,            // Run/halt after reset
+   output logic mpc_debug_halt_ack,          // Halt ack
+   output logic mpc_debug_run_ack,           // Run ack
+   output logic debug_brkpt_status,          // debug breakpoint
+
+    input logic       exu_pmu_i0_br_misp,    // slot 0 branch misp
+   input logic       exu_pmu_i0_br_ataken,   // slot 0 branch actual taken
+   input logic       exu_pmu_i0_pc4,         // slot 0 4 byte branch
+
+
+   input logic                                lsu_nonblock_load_valid_m,      // valid nonblock load at m
+   input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0]  lsu_nonblock_load_tag_m,        // -> corresponding tag
+   input logic                                lsu_nonblock_load_inv_r,        // invalidate request for nonblock load r
+   input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0]  lsu_nonblock_load_inv_tag_r,    // -> corresponding tag
+   input logic                                lsu_nonblock_load_data_valid,   // valid nonblock load data back
+   input logic                                lsu_nonblock_load_data_error,   // nonblock load bus error
+   input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0]  lsu_nonblock_load_data_tag,     // -> corresponding tag
+   input logic [31:0]                         lsu_nonblock_load_data,         // nonblock load data
+
+   input logic       lsu_pmu_bus_trxn,           // D side bus transaction
+   input logic       lsu_pmu_bus_misaligned,     // D side bus misaligned
+   input logic       lsu_pmu_bus_error,          // D side bus error
+   input logic       lsu_pmu_bus_busy,           // D side bus busy
+   input logic       lsu_pmu_misaligned_m,       // D side load or store misaligned
+   input logic       lsu_pmu_load_external_m,    // D side bus load
+   input logic       lsu_pmu_store_external_m,   // D side bus store
+   input logic       dma_pmu_dccm_read,          // DMA DCCM read
+   input logic       dma_pmu_dccm_write,         // DMA DCCM write
+   input logic       dma_pmu_any_read,           // DMA read
+   input logic       dma_pmu_any_write,          // DMA write
+
+   input logic [31:1] lsu_fir_addr,          // Fast int address
+   input logic [1:0] lsu_fir_error,          // Fast int lookup error
+
+   input logic       ifu_pmu_instr_aligned,  // aligned instructions
+   input logic       ifu_pmu_fetch_stall,    // fetch unit stalled
+   input logic       ifu_pmu_ic_miss,        // icache miss
+   input logic       ifu_pmu_ic_hit,         // icache hit
+   input logic       ifu_pmu_bus_error,      // Instruction side bus error
+   input logic       ifu_pmu_bus_busy,       // Instruction side bus busy
+   input logic       ifu_pmu_bus_trxn,       // Instruction side bus transaction
+
+   input logic       ifu_ic_error_start,     // IC single bit error
+   input logic       ifu_iccm_rd_ecc_single_err, // ICCM single bit error
+
+   input logic [3:0]  lsu_trigger_match_m,
+   input logic        dbg_cmd_valid,         // debugger abstract command valid
+   input logic        dbg_cmd_write,         // command is a write
+   input logic  [1:0] dbg_cmd_type,          // command type
+   input logic [31:0] dbg_cmd_addr,          // command address
+   input logic  [1:0] dbg_cmd_wrdata,        // command write data, for fence/fence_i
+
+
+   input logic        ifu_i0_icaf,           // icache access fault
+   input logic [1:0]  ifu_i0_icaf_type,      // icache access fault type
+
+   input logic   ifu_i0_icaf_second,         // i0 has access fault on second 2B of 4B inst
+   input logic   ifu_i0_dbecc,               // icache/iccm double-bit error
+
+   input logic lsu_idle_any,                 // lsu idle for halting
+
+   input eb1_br_pkt_t i0_brp,                                  // branch packet
+   input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index
+   input logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr,            // BP FGHR
+   input logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag,           // BP tag
+   input logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index,          // Fully associt btb index
+
+   input eb1_lsu_error_pkt_t lsu_error_pkt_r,         // LSU exception/error packet
+   input logic         lsu_single_ecc_error_incr,      // LSU inc SB error counter
+
+   input logic         lsu_imprecise_error_load_any,   // LSU imprecise load bus error
+   input logic         lsu_imprecise_error_store_any,  // LSU imprecise store bus error
+   input logic [31:0]  lsu_imprecise_error_addr_any,   // LSU imprecise bus error address
+
+   input logic [31:0]  exu_div_result,      // final div result
+   input logic         exu_div_wren,        // Divide write enable to GPR
+
+   input logic [31:0] exu_csr_rs1_x,        // rs1 for csr instruction
+
+   input logic [31:0] lsu_result_m,         // load result
+   input logic [31:0] lsu_result_corr_r,    // load result - corrected load data
+
+   input logic        lsu_load_stall_any,   // This is for blocking loads
+   input logic        lsu_store_stall_any,  // This is for blocking stores
+   input logic        dma_dccm_stall_any,   // stall any load/store at decode, pmu event
+   input logic        dma_iccm_stall_any,   // iccm stalled, pmu event
+
+   input logic       iccm_dma_sb_error,     // ICCM DMA single bit error
+
+   input logic exu_flush_final,             // slot0 flush
+
+   input logic [31:1] exu_npc_r,            // next PC
+
+   input logic [31:0] exu_i0_result_x,      // alu result x
+
+
+   input logic         ifu_i0_valid,                  // fetch valids to instruction buffer
+   input logic [31:0]  ifu_i0_instr,                  // fetch inst's to instruction buffer
+   input logic [31:1]  ifu_i0_pc,                     // pc's for instruction buffer
+   input logic         ifu_i0_pc4,                    // indication of 4B or 2B for corresponding inst
+   input logic  [31:1] exu_i0_pc_x,                   // pc's for e1 from the alu's
+
+   input logic mexintpend,                            // External interrupt pending
+   input logic timer_int,                             // Timer interrupt pending (from pin)
+   input logic soft_int,                              // Software interrupt pending (from pin)
+
+   input logic [7:0] pic_claimid,                     // PIC claimid
+   input logic [3:0] pic_pl,                          // PIC priv level
+   input logic       mhwakeup,                        // High priority wakeup
+
+   output logic [3:0] dec_tlu_meicurpl,               // to PIC, Current priv level
+   output logic [3:0] dec_tlu_meipt,                  // to PIC
+
+   input logic [70:0] ifu_ic_debug_rd_data,           // diagnostic icache read data
+   input logic ifu_ic_debug_rd_data_valid,            // diagnostic icache read data valid
+   output eb1_cache_debug_pkt_t dec_tlu_ic_diag_pkt, // packet of DICAWICS, DICAD0/1, DICAGO info for icache diagnostics
+
+
+// Debug start
+   input logic dbg_halt_req,                 // DM requests a halt
+   input logic dbg_resume_req,               // DM requests a resume
+   input logic ifu_miss_state_idle,          // I-side miss buffer empty
+
+   output logic dec_tlu_dbg_halted,          // Core is halted and ready for debug command
+   output logic dec_tlu_debug_mode,          // Core is in debug mode
+   output logic dec_tlu_resume_ack,          // Resume acknowledge
+   output logic dec_tlu_flush_noredir_r,     // Tell fetch to idle on this flush
+   output logic dec_tlu_mpc_halted_only,     // Core is halted only due to MPC
+   output logic dec_tlu_flush_leak_one_r,    // single step
+   output logic dec_tlu_flush_err_r,         // iside perr/ecc rfpc
+   output logic [31:2] dec_tlu_meihap,       // Fast ext int base
+
+   output logic dec_debug_wdata_rs1_d,       // insert debug write data into rs1 at decode
+
+   output logic [31:0] dec_dbg_rddata,       // debug command read data
+
+   output logic dec_dbg_cmd_done,            // abstract command is done
+   output logic dec_dbg_cmd_fail,            // abstract command failed (illegal reg address)
+
+   output eb1_trigger_pkt_t  [3:0] trigger_pkt_any, // info needed by debug trigger blocks
+
+   output logic dec_tlu_force_halt,          // halt has been forced
+// Debug end
+   // branch info from pipe0 for errors or counter updates
+   input logic [1:0]  exu_i0_br_hist_r,             // history
+   input logic        exu_i0_br_error_r,            // error
+   input logic        exu_i0_br_start_error_r,      // start error
+   input logic        exu_i0_br_valid_r,            // valid
+   input logic        exu_i0_br_mp_r,               // mispredict
+   input logic        exu_i0_br_middle_r,           // middle of bank
+
+   // branch info from pipe1 for errors or counter updates
+
+   input logic             exu_i0_br_way_r,         // way hit or repl
+
+   output logic         dec_i0_rs1_en_d,            // Qualify GPR RS1 data
+   output logic         dec_i0_rs2_en_d,            // Qualify GPR RS2 data
+   output logic  [31:0] gpr_i0_rs1_d,               // gpr rs1 data
+   output logic  [31:0] gpr_i0_rs2_d,               // gpr rs2 data
+
+   output logic [31:0] dec_i0_immed_d,              // immediate data
+   output logic [12:1] dec_i0_br_immed_d,           // br immediate data
+
+   output        eb1_alu_pkt_t i0_ap,              // alu packet
+
+   output logic          dec_i0_alu_decode_d,       // schedule on D-stage alu
+   output logic          dec_i0_branch_d,           // Branch in D-stage
+
+   output logic          dec_i0_select_pc_d,        // select pc onto rs1 for jal's
+
+   output logic [31:1]  dec_i0_pc_d,                // pc's at decode
+   output logic [3:0]   dec_i0_rs1_bypass_en_d,     // rs1 bypass enable
+   output logic [3:0]   dec_i0_rs2_bypass_en_d,     // rs2 bypass enable
+
+   output logic [31:0]  dec_i0_result_r,            // Result R-stage
+
+   output eb1_lsu_pkt_t    lsu_p,                  // lsu packet
+   output logic             dec_qual_lsu_d,         // LSU instruction at D.  Use to quiet LSU operands
+   output eb1_mul_pkt_t    mul_p,                  // mul packet
+   output eb1_div_pkt_t    div_p,                  // div packet
+   output logic             dec_div_cancel,         // cancel divide operation
+
+   output logic [11:0] dec_lsu_offset_d,            // 12b offset for load/store addresses
+
+   output logic        dec_csr_ren_d,               // CSR read enable
+   output logic [31:0] dec_csr_rddata_d,            // CSR read data
+
+   output logic        dec_tlu_flush_lower_r,       // tlu flush due to late mp, exception, rfpc, or int
+   output logic        dec_tlu_flush_lower_wb,
+   output logic [31:1] dec_tlu_flush_path_r,        // tlu flush target
+   output logic        dec_tlu_i0_kill_writeb_r,    // I0 is flushed, don't writeback any results to arch state
+   output logic        dec_tlu_fence_i_r,           // flush is a fence_i rfnpc, flush icache
+
+   output logic [31:1] pred_correct_npc_x,          // npc if prediction is correct at e2 stage
+
+   output eb1_br_tlu_pkt_t dec_tlu_br0_r_pkt,      // slot 0 branch predictor update packet
+
+   output logic dec_tlu_perfcnt0,                   // toggles when slot0 perf counter 0 has an event inc
+   output logic dec_tlu_perfcnt1,                   // toggles when slot0 perf counter 1 has an event inc
+   output logic dec_tlu_perfcnt2,                   // toggles when slot0 perf counter 2 has an event inc
+   output logic dec_tlu_perfcnt3,                   // toggles when slot0 perf counter 3 has an event inc
+
+   output eb1_predict_pkt_t dec_i0_predict_p_d,                        // prediction packet to alus
+   output logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d,                // DEC predict fghr
+   output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d,     // DEC predict index
+   output logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d,               // DEC predict branch tag
+
+   output logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associt btb error index
+
+   output logic dec_lsu_valid_raw_d,
+
+   output logic [31:0] dec_tlu_mrac_ff,              // CSR for memory region control
+
+   output logic [1:0] dec_data_en,                   // clock-gate control logic
+   output logic [1:0] dec_ctl_en,
+
+   input logic [15:0] ifu_i0_cinst,                  // 16b compressed instruction
+
+   output eb1_trace_pkt_t  trace_rv_trace_pkt,      // trace packet
+
+   // feature disable from mfdc
+   output logic  dec_tlu_external_ldfwd_disable,     // disable external load forwarding
+   output logic  dec_tlu_sideeffect_posted_disable,  // disable posted stores to side-effect address
+   output logic  dec_tlu_core_ecc_disable,           // disable core ECC
+   output logic  dec_tlu_bpred_disable,              // disable branch prediction
+   output logic  dec_tlu_wb_coalescing_disable,      // disable writebuffer coalescing
+   output logic [2:0]  dec_tlu_dma_qos_prty,         // DMA QoS priority coming from MFDC [18:16]
+
+   // clock gating overrides from mcgc
+   output logic  dec_tlu_misc_clk_override,          // override misc clock domain gating
+   output logic  dec_tlu_ifu_clk_override,           // override fetch clock domain gating
+   output logic  dec_tlu_lsu_clk_override,           // override load/store clock domain gating
+   output logic  dec_tlu_bus_clk_override,           // override bus clock domain gating
+   output logic  dec_tlu_pic_clk_override,           // override PIC clock domain gating
+   output logic  dec_tlu_picio_clk_override,         // override PICIO clock domain gating
+   output logic  dec_tlu_dccm_clk_override,          // override DCCM clock domain gating
+   output logic  dec_tlu_icm_clk_override,           // override ICCM clock domain gating
+
+   output logic  dec_tlu_i0_commit_cmt,              // committed i0 instruction
+   input  logic  scan_mode                           // Flop scan mode control
+ 
+
+   );
+
+
+   logic  dec_tlu_dec_clk_override;      // to and from dec blocks
+   logic  clk_override;
+
+   logic               dec_ib0_valid_d;
+
+   logic               dec_pmu_instr_decoded;
+   logic               dec_pmu_decode_stall;
+   logic               dec_pmu_presync_stall;
+   logic               dec_pmu_postsync_stall;
+
+   logic dec_tlu_wr_pause_r;             // CSR write to pause reg is at R.
+
+   logic [4:0]  dec_i0_rs1_d;
+   logic [4:0]  dec_i0_rs2_d;
+
+   logic [31:0] dec_i0_instr_d;
+
+   logic  dec_tlu_trace_disable;
+   logic  dec_tlu_pipelining_disable;
+
+
+   logic [4:0]  dec_i0_waddr_r;
+   logic        dec_i0_wen_r;
+   logic [31:0] dec_i0_wdata_r;
+   logic        dec_csr_wen_r;           // csr write enable at wb
+   logic [11:0] dec_csr_wraddr_r;        // write address for csryes
+   logic [31:0] dec_csr_wrdata_r;        // csr write data at wb
+
+   logic [11:0] dec_csr_rdaddr_d;        // read address for csr
+   logic        dec_csr_legal_d;         // csr indicates legal operation
+
+   logic        dec_csr_wen_unq_d;       // valid csr with write - for csr legal
+   logic        dec_csr_any_unq_d;       // valid csr - for csr legal
+   logic        dec_csr_stall_int_ff;    // csr is mie/mstatus
+
+   eb1_trap_pkt_t dec_tlu_packet_r;
+
+   logic        dec_i0_pc4_d;
+   logic        dec_tlu_presync_d;
+   logic        dec_tlu_postsync_d;
+   logic        dec_tlu_debug_stall;
+
+   logic [31:0] dec_illegal_inst;
+
+   logic                      dec_i0_icaf_d;
+
+   logic                      dec_i0_dbecc_d;
+   logic                      dec_i0_icaf_second_d;
+   logic [3:0]                dec_i0_trigger_match_d;
+   logic                      dec_debug_fence_d;
+   logic                      dec_nonblock_load_wen;
+   logic [4:0]                dec_nonblock_load_waddr;
+   logic                      dec_tlu_flush_pause_r;
+   eb1_br_pkt_t                   dec_i0_brp;
+   logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index;
+   logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr;
+   logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag;
+   logic [$clog2(pt.BTB_SIZE)-1:0] dec_i0_bp_fa_index;          // Fully associt btb index
+
+   logic [31:1]               dec_tlu_i0_pc_r;
+   logic                      dec_tlu_i0_kill_writeb_wb;
+   logic                      dec_tlu_i0_valid_r;
+
+   logic                      dec_pause_state;
+
+   logic [1:0]                dec_i0_icaf_type_d;   // i0 instruction access fault type
+
+   logic                      dec_tlu_flush_extint; // Fast ext int started
+
+   logic [31:0]               dec_i0_inst_wb;
+   logic [31:1]               dec_i0_pc_wb;
+   logic                      dec_tlu_i0_valid_wb1,  dec_tlu_int_valid_wb1;
+   logic [4:0]                dec_tlu_exc_cause_wb1;
+   logic [31:0]               dec_tlu_mtval_wb1;
+   logic                      dec_tlu_i0_exc_valid_wb1;
+
+   logic [4:0]                div_waddr_wb;
+   logic                      dec_div_active;
+
+   logic                      dec_debug_valid_d;
+
+
+// Adding signals for vector
+   
+   //logic stall_scalar;
+   
+   
+   
+   assign clk_override = dec_tlu_dec_clk_override;
+
+
+   assign dec_dbg_rddata[31:0] = dec_i0_wdata_r[31:0];
+
+
+   eb1_dec_ib_ctl #(.pt(pt)) instbuff (.*);
+
+
+   eb1_dec_decode_ctl #(.pt(pt)) decode (.*);
+
+
+   eb1_dec_tlu_ctl #(.pt(pt)) tlu (.*);
+
+
+   eb1_dec_gpr_ctl #(.pt(pt)) arf (.*,
+                    // inputs
+                    .raddr0(dec_i0_rs1_d[4:0]),
+                    .raddr1(dec_i0_rs2_d[4:0]),
+
+                    .wen0(dec_i0_wen_r),          .waddr0(dec_i0_waddr_r[4:0]),          .wd0(dec_i0_wdata_r[31:0]),
+                    .wen1(dec_nonblock_load_wen), .waddr1(dec_nonblock_load_waddr[4:0]), .wd1(lsu_nonblock_load_data[31:0]),
+                    .wen2(exu_div_wren),          .waddr2(div_waddr_wb),                 .wd2(exu_div_result[31:0]),
+
+                    // outputs
+                    .rd0(gpr_i0_rs1_d[31:0]), .rd1(gpr_i0_rs2_d[31:0])
+                    );
+
+
+// Trigger
+
+   eb1_dec_trigger #(.pt(pt)) dec_trigger (.*);
+
+
+
+
+// trace
+   assign trace_rv_trace_pkt.trace_rv_i_insn_ip      =   dec_i0_inst_wb[31:0];
+   assign trace_rv_trace_pkt.trace_rv_i_address_ip   = { dec_i0_pc_wb[31:1], 1'b0};
+
+   assign trace_rv_trace_pkt.trace_rv_i_valid_ip     = dec_tlu_int_valid_wb1 | dec_tlu_i0_valid_wb1 | dec_tlu_i0_exc_valid_wb1;
+   assign trace_rv_trace_pkt.trace_rv_i_exception_ip = dec_tlu_int_valid_wb1 |  dec_tlu_i0_exc_valid_wb1;
+   assign trace_rv_trace_pkt.trace_rv_i_ecause_ip    = dec_tlu_exc_cause_wb1[4:0];     // replicate across ports
+   assign trace_rv_trace_pkt.trace_rv_i_interrupt_ip = dec_tlu_int_valid_wb1;
+   assign trace_rv_trace_pkt.trace_rv_i_tval_ip      = dec_tlu_mtval_wb1[31:0];        // replicate across ports
+
+
+
+// end trace
+
+
+endmodule // eb1_dec
+
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_dec_decode_ctl.sv b/verilog/rtl/BrqRV_EB1/design/eb1_dec_decode_ctl.sv
new file mode 100644
index 0000000..b663545
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_dec_decode_ctl.sv
@@ -0,0 +1,1825 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+module eb1_dec_decode_ctl
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )
+  (
+   input logic dec_tlu_trace_disable,
+   input logic dec_debug_valid_d,
+
+   input logic dec_tlu_flush_extint,         // Flush external interrupt
+
+   input logic dec_tlu_force_halt,           // invalidate nonblock load cam on a force halt event
+
+   output logic dec_extint_stall,            // Stall from external interrupt
+
+   input  logic [15:0] ifu_i0_cinst,         // 16b compressed instruction
+   output logic [31:0] dec_i0_inst_wb,       // 32b instruction at wb+1 for trace encoder
+   output logic [31:1] dec_i0_pc_wb,         // 31b pc at wb+1 for trace encoder
+
+
+   input logic                                lsu_nonblock_load_valid_m,       // valid nonblock load at m
+   input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0]  lsu_nonblock_load_tag_m,         // -> corresponding tag
+   input logic                                lsu_nonblock_load_inv_r,         // invalidate request for nonblock load r
+   input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0]  lsu_nonblock_load_inv_tag_r,     // -> corresponding tag
+   input logic                                lsu_nonblock_load_data_valid,    // valid nonblock load data back
+   input logic                                lsu_nonblock_load_data_error,    // nonblock load bus error
+   input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0]  lsu_nonblock_load_data_tag,      // -> corresponding tag
+
+
+   input logic [3:0] dec_i0_trigger_match_d,          // i0 decode trigger matches
+
+   input logic dec_tlu_wr_pause_r,                    // pause instruction at r
+   input logic dec_tlu_pipelining_disable,            // pipeline disable - presync, i0 decode only
+
+   input logic [3:0]  lsu_trigger_match_m,            // lsu trigger matches
+
+   input logic lsu_pmu_misaligned_m,                  // perf mon: load/store misalign
+   input logic dec_tlu_debug_stall,                   // debug stall decode
+   input logic dec_tlu_flush_leak_one_r,              // leak1 instruction
+
+   input logic dec_debug_fence_d,                     // debug fence instruction
+
+   input logic [1:0] dbg_cmd_wrdata,                  // disambiguate fence, fence_i
+
+   input logic dec_i0_icaf_d,                         // icache access fault
+   input logic dec_i0_icaf_second_d,                  // i0 instruction access fault on second 2B of 4B inst
+   input logic [1:0] dec_i0_icaf_type_d,              // i0 instruction access fault type
+
+   input logic dec_i0_dbecc_d,                        // icache/iccm double-bit error
+
+   input eb1_br_pkt_t dec_i0_brp,                    // branch packet
+   input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index,   // i0 branch index
+   input logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr,  // BP FGHR
+   input logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag, // BP tag
+   input logic [$clog2(pt.BTB_SIZE)-1:0] dec_i0_bp_fa_index,          // Fully associt btb index
+
+   input logic lsu_idle_any,                          // lsu idle: if fence instr & ~lsu_idle then stall decode
+
+   input logic lsu_load_stall_any,                    // stall any load at decode
+   input logic lsu_store_stall_any,                   // stall any store at decode
+   input logic dma_dccm_stall_any,                    // stall any load/store at decode
+
+   input logic exu_div_wren,                          // nonblocking divide write enable to GPR.
+
+   input logic dec_tlu_i0_kill_writeb_wb,             // I0 is flushed, don't writeback any results to arch state
+   input logic dec_tlu_flush_lower_wb,                // trap lower flush
+   input logic dec_tlu_i0_kill_writeb_r,              // I0 is flushed, don't writeback any results to arch state
+   input logic dec_tlu_flush_lower_r,                 // trap lower flush
+   input logic dec_tlu_flush_pause_r,                 // don't clear pause state on initial lower flush
+   input logic dec_tlu_presync_d,                     // CSR read needs to be presync'd
+   input logic dec_tlu_postsync_d,                    // CSR ops that need to be postsync'd
+
+   input logic dec_i0_pc4_d,                          // inst is 4B inst else 2B
+
+   input logic [31:0] dec_csr_rddata_d,               // csr read data at wb
+   input logic dec_csr_legal_d,                       // csr indicates legal operation
+
+   input logic [31:0] exu_csr_rs1_x,                  // rs1 for csr instr
+
+   input logic [31:0] lsu_result_m,                   // load result
+   input logic [31:0] lsu_result_corr_r,              // load result - corrected data for writing gpr's, not for bypassing
+
+   input logic exu_flush_final,                       // lower flush or i0 flush at X or D
+
+   input logic [31:1] exu_i0_pc_x,                    // pcs at e1
+
+   input logic [31:0] dec_i0_instr_d,                 // inst at decode
+
+   input logic  dec_ib0_valid_d,                      // inst valid at decode
+
+   input logic [31:0] exu_i0_result_x,                // from primary alu's
+
+   input logic  clk,                                  // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
+   input logic  active_clk,                           // Clock only while core active.  Through two clock headers. For flops without second clock header built in.
+   input logic  free_l2clk,                           // Clock always.                  Through one clock header.  For flops with    second header built in.
+
+   input logic  clk_override,                         // Override non-functional clock gating
+   input logic  rst_l,                                // Flop reset
+
+
+
+   output logic        dec_i0_rs1_en_d,               // rs1 enable at decode
+   output logic        dec_i0_rs2_en_d,               // rs2 enable at decode
+
+   output logic [4:0]  dec_i0_rs1_d,                  // rs1 logical source
+   output logic [4:0]  dec_i0_rs2_d,                  // rs2 logical source
+
+   output logic [31:0] dec_i0_immed_d,                // 32b immediate data decode
+
+
+   output logic [12:1] dec_i0_br_immed_d,             // 12b branch immediate
+
+   output eb1_alu_pkt_t i0_ap,                       // alu packets
+
+   output logic        dec_i0_decode_d,               // i0 decode
+
+   output logic        dec_i0_alu_decode_d,           // decode to D-stage alu
+   output logic        dec_i0_branch_d,               // Branch in D-stage
+
+   output logic [4:0]  dec_i0_waddr_r,                // i0 logical source to write to gpr's
+   output logic        dec_i0_wen_r,                  // i0 write enable
+   output logic [31:0] dec_i0_wdata_r,                // i0 write data
+
+   output logic        dec_i0_select_pc_d,            // i0 select pc for rs1 - branches
+
+   output logic [3:0]    dec_i0_rs1_bypass_en_d,      // i0 rs1 bypass enable
+   output logic [3:0]    dec_i0_rs2_bypass_en_d,      // i0 rs2 bypass enable
+   output logic [31:0]   dec_i0_result_r,             // Result R-stage
+
+   output eb1_lsu_pkt_t    lsu_p,                    // load/store packet
+   output logic             dec_qual_lsu_d,           // LSU instruction at D.  Use to quiet LSU operands
+
+   output eb1_mul_pkt_t    mul_p,                    // multiply packet
+
+   output eb1_div_pkt_t    div_p,                    // divide packet
+   output logic [4:0]       div_waddr_wb,             // DIV write address to GPR
+   output logic             dec_div_cancel,           // cancel the divide operation
+
+   output logic        dec_lsu_valid_raw_d,
+   output logic [11:0] dec_lsu_offset_d,
+
+   output logic        dec_csr_ren_d,                 // valid csr decode
+   output logic        dec_csr_wen_unq_d,             // valid csr with write - for csr legal
+   output logic        dec_csr_any_unq_d,             // valid csr - for csr legal
+   output logic [11:0] dec_csr_rdaddr_d,              // read address for csr
+   output logic        dec_csr_wen_r,                 // csr write enable at r
+   output logic [11:0] dec_csr_wraddr_r,              // write address for csr
+   output logic [31:0] dec_csr_wrdata_r,              // csr write data at r
+   output logic        dec_csr_stall_int_ff,          // csr is mie/mstatus
+
+   output              dec_tlu_i0_valid_r,            // i0 valid inst at c
+
+   output eb1_trap_pkt_t   dec_tlu_packet_r,              // trap packet
+
+   output logic [31:1] dec_tlu_i0_pc_r,               // i0 trap pc
+
+   output logic [31:0] dec_illegal_inst,              // illegal inst
+   output logic [31:1] pred_correct_npc_x,            // npc e2 if the prediction is correct
+
+   output eb1_predict_pkt_t dec_i0_predict_p_d,      // i0 predict packet decode
+   output logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d, // i0 predict fghr
+   output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d, // i0 predict index
+   output logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d, // i0_predict branch tag
+
+   output logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associt btb error index
+
+   output logic [1:0] dec_data_en,                    // clock-gating logic
+   output logic [1:0] dec_ctl_en,
+
+   output logic       dec_pmu_instr_decoded,          // number of instructions decode this cycle encoded
+   output logic       dec_pmu_decode_stall,           // decode is stalled
+   output logic       dec_pmu_presync_stall,          // decode has presync stall
+   output logic       dec_pmu_postsync_stall,         // decode has postsync stall
+
+   output logic       dec_nonblock_load_wen,          // write enable for nonblock load
+   output logic [4:0] dec_nonblock_load_waddr,        // logical write addr for nonblock load
+   output logic       dec_pause_state,                // core in pause state
+   output logic       dec_pause_state_cg,             // pause state for clock-gating
+
+   output logic       dec_div_active,                 // non-block divide is active
+
+   input  logic       scan_mode
+   
+   );
+
+
+
+
+   eb1_dec_pkt_t           i0_dp_raw, i0_dp;
+
+   logic [31:0]        i0;
+   logic               i0_valid_d;
+
+   logic [31:0]        i0_result_r;
+
+   logic [2:0]         i0_rs1bypass, i0_rs2bypass;
+
+   logic               i0_jalimm20;
+   logic               i0_uiimm20;
+
+   logic               lsu_decode_d;
+   logic [31:0]        i0_immed_d;
+   logic               i0_presync;
+   logic               i0_postsync;
+
+   logic               postsync_stall;
+   logic               ps_stall;
+
+   logic               prior_inflight, prior_inflight_wb;
+
+   logic               csr_clr_d, csr_set_d, csr_write_d;
+
+   logic               csr_clr_x,csr_set_x,csr_write_x,csr_imm_x;
+   logic [31:0]        csr_mask_x;
+   logic [31:0]        write_csr_data_x;
+   logic [31:0]        write_csr_data_in;
+   logic [31:0]        write_csr_data;
+   logic               csr_data_wen;
+
+   logic [4:0]         csrimm_x;
+
+   logic [31:0]        csr_rddata_x;
+
+   logic               mul_decode_d;
+   logic               div_decode_d;
+   logic               div_e1_to_r;
+   logic               div_flush;
+   logic               div_active_in;
+   logic               div_active;
+   logic               i0_nonblock_div_stall;
+   logic               i0_div_prior_div_stall;
+   logic               nonblock_div_cancel;
+
+   logic               i0_legal;
+   logic               shift_illegal;
+   logic               illegal_inst_en;
+   logic               illegal_lockout_in, illegal_lockout;
+   logic               i0_legal_decode_d;
+   logic               i0_exulegal_decode_d, i0_exudecode_d, i0_exublock_d;
+
+   logic [12:1]        last_br_immed_d;
+   logic               i0_rs1_depend_i0_x, i0_rs1_depend_i0_r;
+   logic               i0_rs2_depend_i0_x, i0_rs2_depend_i0_r;
+
+   logic               i0_div_decode_d;
+   logic               i0_load_block_d;
+   logic [1:0]         i0_rs1_depth_d, i0_rs2_depth_d;
+
+   logic               i0_load_stall_d;
+   logic               i0_store_stall_d;
+
+   logic               i0_predict_nt, i0_predict_t;
+
+   logic               i0_notbr_error, i0_br_toffset_error;
+   logic               i0_ret_error;
+   logic               i0_br_error;
+   logic               i0_br_error_all;
+   logic [11:0]        i0_br_offset;
+
+   logic [20:1]        i0_pcall_imm;                          // predicted jal's
+   logic               i0_pcall_12b_offset;
+   logic               i0_pcall_raw;
+   logic               i0_pcall_case;
+   logic               i0_pcall;
+
+   logic               i0_pja_raw;
+   logic               i0_pja_case;
+   logic               i0_pja;
+
+   logic               i0_pret_case;
+   logic               i0_pret_raw, i0_pret;
+
+   logic               i0_jal;                                // jal's that are not predicted
+
+
+   logic               i0_predict_br;
+
+   logic               store_data_bypass_d, store_data_bypass_m;
+
+   eb1_class_pkt_t         i0_rs1_class_d, i0_rs2_class_d;
+
+   eb1_class_pkt_t         i0_d_c, i0_x_c, i0_r_c;
+
+
+   logic               i0_ap_pc2, i0_ap_pc4;
+
+   logic               i0_rd_en_d;
+
+   logic               load_ldst_bypass_d;
+
+   logic               leak1_i0_stall_in, leak1_i0_stall;
+   logic               leak1_i1_stall_in, leak1_i1_stall;
+   logic               leak1_mode;
+
+   logic               i0_csr_write_only_d;
+
+   logic               prior_inflight_x, prior_inflight_eff;
+   logic               any_csr_d;
+
+   logic               prior_csr_write;
+
+   logic [3:0]        i0_pipe_en;
+   logic              i0_r_ctl_en,  i0_x_ctl_en,  i0_wb_ctl_en;
+   logic              i0_x_data_en, i0_r_data_en, i0_wb_data_en;
+
+   logic              debug_fence_i;
+   logic              debug_fence;
+
+   logic              i0_csr_write;
+   logic              presync_stall;
+
+   logic              i0_instr_error;
+   logic              i0_icaf_d;
+
+   logic              clear_pause;
+   logic              pause_state_in, pause_state;
+   logic              pause_stall;
+
+   logic              i0_brp_valid;
+   logic              nonblock_load_cancel;
+   logic              lsu_idle;
+   logic              lsu_pmu_misaligned_r;
+   logic              csr_ren_qual_d;
+   logic              csr_read_x;
+   logic              i0_block_d;
+   logic              i0_block_raw_d;  // This is use to create the raw valid
+   logic              ps_stall_in;
+   logic [31:0]       i0_result_x;
+
+   eb1_dest_pkt_t         d_d, x_d, r_d, wbd;
+   eb1_dest_pkt_t         x_d_in, r_d_in;
+
+   eb1_trap_pkt_t         d_t, x_t, x_t_in, r_t_in, r_t;
+
+   logic [3:0]        lsu_trigger_match_r;
+
+   logic [31:1]       dec_i0_pc_r;
+
+   logic csr_read, csr_write;
+   logic i0_br_unpred;
+
+   logic nonblock_load_valid_m_delay;
+   logic i0_wen_r;
+
+   logic tlu_wr_pause_r1;
+   logic tlu_wr_pause_r2;
+
+   logic flush_final_r;
+
+   logic bitmanip_zbb_legal;
+   logic bitmanip_zbs_legal;
+   logic bitmanip_zbe_legal;
+   logic bitmanip_zbc_legal;
+   logic bitmanip_zbp_legal;
+   logic bitmanip_zbr_legal;
+   logic bitmanip_zbf_legal;
+   logic bitmanip_zba_legal;
+   logic bitmanip_zbb_zbp_legal;
+   logic bitmanip_legal;
+
+   logic              data_gate_en;
+   logic              data_gate_clk;
+
+
+   localparam NBLOAD_SIZE     = pt.LSU_NUM_NBLOAD;
+   localparam NBLOAD_SIZE_MSB = int'(pt.LSU_NUM_NBLOAD)-1;
+   localparam NBLOAD_TAG_MSB  = pt.LSU_NUM_NBLOAD_WIDTH-1;
+
+
+   logic                     cam_write, cam_inv_reset, cam_data_reset;
+   logic [NBLOAD_TAG_MSB:0]  cam_write_tag, cam_inv_reset_tag, cam_data_reset_tag;
+   logic [NBLOAD_SIZE_MSB:0] cam_wen;
+
+   logic [NBLOAD_TAG_MSB:0]  load_data_tag;
+   logic [NBLOAD_SIZE_MSB:0] nonblock_load_write;
+
+   eb1_load_cam_pkt_t [NBLOAD_SIZE_MSB:0] cam;
+   eb1_load_cam_pkt_t [NBLOAD_SIZE_MSB:0] cam_in;
+   eb1_load_cam_pkt_t [NBLOAD_SIZE_MSB:0] cam_raw;
+
+   logic [4:0] nonblock_load_rd;
+   logic i0_nonblock_load_stall;
+   logic i0_nonblock_boundary_stall;
+
+   logic i0_rs1_nonblock_load_bypass_en_d, i0_rs2_nonblock_load_bypass_en_d;
+
+   logic i0_load_kill_wen_r;
+
+   logic found;
+
+   logic [NBLOAD_SIZE_MSB:0] cam_inv_reset_val, cam_data_reset_val;
+
+   logic debug_fence_raw;
+
+   logic [31:0] i0_result_r_raw;
+   logic [31:0] i0_result_corr_r;
+
+   logic [12:1] last_br_immed_x;
+
+   logic [31:0]        i0_inst_d;
+   logic [31:0]        i0_inst_x;
+   logic [31:0]        i0_inst_r;
+   logic [31:0]        i0_inst_wb_in;
+   logic [31:0]        i0_inst_wb;
+
+   logic [31:1]        i0_pc_wb;
+
+   logic               i0_wb_en;
+
+   logic               trace_enable;
+
+   logic               debug_valid_x;
+
+   eb1_inst_pkt_t i0_itype;
+   eb1_reg_pkt_t i0r;
+   
+
+
+   rvdffie  #(8) misc1ff (.*,
+                          .clk(free_l2clk),
+                          .din( {leak1_i1_stall_in,leak1_i0_stall_in,dec_tlu_flush_extint,pause_state_in ,dec_tlu_wr_pause_r, tlu_wr_pause_r1,illegal_lockout_in,ps_stall_in}),
+                          .dout({leak1_i1_stall,   leak1_i0_stall,   dec_extint_stall,    pause_state,       tlu_wr_pause_r1,tlu_wr_pause_r2,illegal_lockout,   ps_stall   })
+                          );
+
+   rvdffie  #(8) misc2ff (.*,
+                          .clk(free_l2clk),
+                          .din( {lsu_trigger_match_m[3:0],lsu_pmu_misaligned_m,div_active_in,exu_flush_final,  dec_debug_valid_d}),
+                          .dout({lsu_trigger_match_r[3:0],lsu_pmu_misaligned_r,div_active,       flush_final_r,    debug_valid_x})
+                          );
+
+if(pt.BTB_ENABLE==1) begin
+// branch prediction
+
+
+   // in leak1_mode, ignore any predictions for i0, treat branch as if we haven't seen it before
+   // in leak1 mode, also ignore branch errors for i0
+   assign i0_brp_valid                        =  dec_i0_brp.valid & ~leak1_mode & ~i0_icaf_d;
+
+   assign dec_i0_predict_p_d.misp        =  '0;
+   assign dec_i0_predict_p_d.ataken      =  '0;
+   assign dec_i0_predict_p_d.boffset     =  '0;
+
+   assign dec_i0_predict_p_d.pcall       =  i0_pcall;  // don't mark as pcall if branch error
+   assign dec_i0_predict_p_d.pja         =  i0_pja;
+   assign dec_i0_predict_p_d.pret        =  i0_pret;
+   assign dec_i0_predict_p_d.prett[31:1] =  dec_i0_brp.prett[31:1];
+   assign dec_i0_predict_p_d.pc4         =  dec_i0_pc4_d;
+   assign dec_i0_predict_p_d.hist[1:0]   =  dec_i0_brp.hist[1:0];
+   assign dec_i0_predict_p_d.valid       =  i0_brp_valid & i0_legal_decode_d;
+   assign i0_notbr_error                 =  i0_brp_valid & ~(i0_dp_raw.condbr | i0_pcall_raw | i0_pja_raw | i0_pret_raw);
+
+   // no toffset error for a pret
+   assign i0_br_toffset_error                               =  i0_brp_valid & dec_i0_brp.hist[1] & (dec_i0_brp.toffset[11:0] != i0_br_offset[11:0]) & ~i0_pret_raw;
+   assign i0_ret_error                                      =  i0_brp_valid & (dec_i0_brp.ret ^ i0_pret_raw);
+   assign i0_br_error                                       =  dec_i0_brp.br_error | i0_notbr_error | i0_br_toffset_error | i0_ret_error;
+   assign dec_i0_predict_p_d.br_error                       =  i0_br_error & i0_legal_decode_d & ~leak1_mode;
+   assign dec_i0_predict_p_d.br_start_error                 =  dec_i0_brp.br_start_error & i0_legal_decode_d & ~leak1_mode;
+   assign i0_predict_index_d[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] =  dec_i0_bp_index;
+
+   assign i0_predict_btag_d[pt.BTB_BTAG_SIZE-1:0]           =  dec_i0_bp_btag[pt.BTB_BTAG_SIZE-1:0];
+   assign i0_br_error_all                                   = (i0_br_error | dec_i0_brp.br_start_error) & ~leak1_mode;
+   assign dec_i0_predict_p_d.toffset[11:0]                  =  i0_br_offset[11:0];
+   assign i0_predict_fghr_d[pt.BHT_GHR_SIZE-1:0]            =  dec_i0_bp_fghr[pt.BHT_GHR_SIZE-1:0];
+   assign dec_i0_predict_p_d.way                            =  dec_i0_brp.way;
+
+
+   if(pt.BTB_FULLYA) begin
+      logic btb_error_found, btb_error_found_f;
+      logic [$clog2(pt.BTB_SIZE)-1:0] fa_error_index_ns;
+
+      assign btb_error_found = (i0_br_error_all | btb_error_found_f) & ~dec_tlu_flush_lower_r;
+      assign fa_error_index_ns = (i0_br_error_all & ~btb_error_found_f) ? dec_i0_bp_fa_index : dec_fa_error_index;
+
+      rvdff #($clog2(pt.BTB_SIZE)+1) btberrorfa_f   (.*, .clk(active_clk),
+                                                         .din({btb_error_found,    fa_error_index_ns}),
+                                                         .dout({btb_error_found_f, dec_fa_error_index}));
+
+
+   end
+   else
+     assign dec_fa_error_index = 'b0;
+
+
+   //   end
+end // if (pt.BTB_ENABLE==1)
+else begin
+
+   always_comb begin
+      dec_i0_predict_p_d = '0;
+      dec_i0_predict_p_d.pcall       =  i0_pcall;  // don't mark as pcall if branch error
+      dec_i0_predict_p_d.pja         =  i0_pja;
+      dec_i0_predict_p_d.pret        =  i0_pret;
+      dec_i0_predict_p_d.pc4         =  dec_i0_pc4_d;
+   end
+
+   assign i0_br_error_all = '0;
+   assign i0_predict_index_d = '0;
+   assign i0_predict_btag_d = '0;
+   assign i0_predict_fghr_d = '0;
+   assign i0_brp_valid = '0;
+end // else: !if(pt.BTB_ENABLE==1)
+
+   // on br error turn anything into a nop
+   // on i0 instruction fetch access fault turn anything into a nop
+   // nop =>   alu rs1 imm12 rd lor
+
+   assign i0_icaf_d = dec_i0_icaf_d | dec_i0_dbecc_d;
+
+   assign i0_instr_error = i0_icaf_d;
+
+   always_comb begin
+      i0_dp = i0_dp_raw;
+      if (i0_br_error_all | i0_instr_error) begin
+         i0_dp          =   '0;
+         i0_dp.alu      = 1'b1;
+         i0_dp.rs1      = 1'b1;
+         i0_dp.rs2      = 1'b1;
+         i0_dp.lor      = 1'b1;
+         i0_dp.legal    = 1'b1;
+         i0_dp.postsync = 1'b1;
+      end
+   end
+
+   assign i0[31:0] = dec_i0_instr_d[31:0];
+
+   assign dec_i0_select_pc_d = i0_dp.pc;
+
+   // branches that can be predicted
+
+   assign i0_predict_br =  i0_dp.condbr | i0_pcall | i0_pja | i0_pret;
+
+   assign i0_predict_nt = ~(dec_i0_brp.hist[1] & i0_brp_valid) & i0_predict_br;
+   assign i0_predict_t  =  (dec_i0_brp.hist[1] & i0_brp_valid) & i0_predict_br;
+
+   assign i0_ap.add     =  i0_dp.add;
+   assign i0_ap.sub     =  i0_dp.sub;
+   assign i0_ap.land    =  i0_dp.land;
+   assign i0_ap.lor     =  i0_dp.lor;
+   assign i0_ap.lxor    =  i0_dp.lxor;
+   assign i0_ap.sll     =  i0_dp.sll;
+   assign i0_ap.srl     =  i0_dp.srl;
+   assign i0_ap.sra     =  i0_dp.sra;
+   assign i0_ap.slt     =  i0_dp.slt;
+   assign i0_ap.unsign  =  i0_dp.unsign;
+   assign i0_ap.beq     =  i0_dp.beq;
+   assign i0_ap.bne     =  i0_dp.bne;
+   assign i0_ap.blt     =  i0_dp.blt;
+   assign i0_ap.bge     =  i0_dp.bge;
+
+   assign i0_ap.clz     =  i0_dp.clz;
+   assign i0_ap.ctz     =  i0_dp.ctz;
+   assign i0_ap.pcnt    =  i0_dp.pcnt;
+   assign i0_ap.sext_b  =  i0_dp.sext_b;
+   assign i0_ap.sext_h  =  i0_dp.sext_h;
+   assign i0_ap.sh1add  =  i0_dp.sh1add;
+   assign i0_ap.sh2add  =  i0_dp.sh2add;
+   assign i0_ap.sh3add  =  i0_dp.sh3add;
+   assign i0_ap.zba     =  i0_dp.zba;
+   assign i0_ap.slo     =  i0_dp.slo;
+   assign i0_ap.sro     =  i0_dp.sro;
+   assign i0_ap.min     =  i0_dp.min;
+   assign i0_ap.max     =  i0_dp.max;
+   assign i0_ap.pack    =  i0_dp.pack;
+   assign i0_ap.packu   =  i0_dp.packu;
+   assign i0_ap.packh   =  i0_dp.packh;
+   assign i0_ap.rol     =  i0_dp.rol;
+   assign i0_ap.ror     =  i0_dp.ror;
+   assign i0_ap.grev    =  i0_dp.grev;
+   assign i0_ap.gorc    =  i0_dp.gorc;
+   assign i0_ap.zbb     =  i0_dp.zbb;
+   assign i0_ap.sbset   =  i0_dp.sbset;
+   assign i0_ap.sbclr   =  i0_dp.sbclr;
+   assign i0_ap.sbinv   =  i0_dp.sbinv;
+   assign i0_ap.sbext   =  i0_dp.sbext;
+
+   assign i0_ap.csr_write =  i0_csr_write_only_d;
+   assign i0_ap.csr_imm   =  i0_dp.csr_imm;
+   assign i0_ap.jal       =  i0_jal;
+
+   assign i0_ap_pc2 = ~dec_i0_pc4_d;
+   assign i0_ap_pc4 =  dec_i0_pc4_d;
+
+   assign i0_ap.predict_nt = i0_predict_nt;
+   assign i0_ap.predict_t  = i0_predict_t;
+
+
+// non block load cam logic
+
+   always_comb begin
+      found = 0;
+      cam_wen[NBLOAD_SIZE_MSB:0] = '0;
+      for (int i=0; i<NBLOAD_SIZE; i++) begin
+         if (~found) begin
+            if (~cam[i].valid) begin
+               cam_wen[i] = cam_write;
+               found = 1'b1;
+            end
+            else begin
+               cam_wen[i] = 0;
+            end
+         end
+         else
+            cam_wen[i] = 0;
+      end
+   end
+
+
+   assign cam_write          = lsu_nonblock_load_valid_m;
+   assign cam_write_tag[NBLOAD_TAG_MSB:0] = lsu_nonblock_load_tag_m[NBLOAD_TAG_MSB:0];
+
+   assign cam_inv_reset          = lsu_nonblock_load_inv_r;
+   assign cam_inv_reset_tag[NBLOAD_TAG_MSB:0] = lsu_nonblock_load_inv_tag_r[NBLOAD_TAG_MSB:0];
+
+   assign cam_data_reset          = lsu_nonblock_load_data_valid | lsu_nonblock_load_data_error;
+   assign cam_data_reset_tag[NBLOAD_TAG_MSB:0] = lsu_nonblock_load_data_tag[NBLOAD_TAG_MSB:0];
+
+   assign nonblock_load_rd[4:0] = (x_d.i0load) ? x_d.i0rd[4:0] : 5'b0;  // rd data
+
+
+   // checks
+
+`ifdef RV_ASSERT_ON
+   assert_dec_data_valid_data_error_onehot:    assert #0 ($onehot0({lsu_nonblock_load_data_valid,lsu_nonblock_load_data_error}));
+   assert_dec_cam_inv_reset_onehot:            assert #0 ($onehot0(cam_inv_reset_val[NBLOAD_SIZE_MSB:0]));
+   assert_dec_cam_data_reset_onehot:           assert #0 ($onehot0(cam_data_reset_val[NBLOAD_SIZE_MSB:0]));
+`endif
+
+
+
+    // case of multiple loads to same dest ie. x1 ... you have to invalidate the older one
+
+   for (genvar i=0; i<NBLOAD_SIZE; i++) begin : cam_array
+
+      assign cam_inv_reset_val[i] = cam_inv_reset   & (cam_inv_reset_tag[NBLOAD_TAG_MSB:0]  == cam[i].tag[NBLOAD_TAG_MSB:0]) & cam[i].valid;
+
+      assign cam_data_reset_val[i] = cam_data_reset & (cam_data_reset_tag[NBLOAD_TAG_MSB:0] == cam_raw[i].tag[NBLOAD_TAG_MSB:0]) & cam_raw[i].valid;
+
+      always_comb begin
+
+         cam[i] = cam_raw[i];
+
+         if (cam_data_reset_val[i])
+           cam[i].valid = 1'b0;
+
+         cam_in[i] = '0;
+
+         if (cam_wen[i]) begin
+            cam_in[i].valid    = 1'b1;
+            cam_in[i].wb       = 1'b0;
+            cam_in[i].tag[NBLOAD_TAG_MSB:0] = cam_write_tag[NBLOAD_TAG_MSB:0];
+            cam_in[i].rd[4:0]  = nonblock_load_rd[4:0];
+         end
+         else if ( (cam_inv_reset_val[i]) |
+                   (i0_wen_r & (r_d_in.i0rd[4:0] == cam[i].rd[4:0]) & cam[i].wb) )
+           cam_in[i].valid = 1'b0;
+         else
+           cam_in[i] = cam[i];
+
+         if (nonblock_load_valid_m_delay & (lsu_nonblock_load_inv_tag_r[NBLOAD_TAG_MSB:0]==cam[i].tag[NBLOAD_TAG_MSB:0]) & cam[i].valid)
+           cam_in[i].wb = 1'b1;
+
+         // force debug halt forces cam valids to 0; highest priority
+         if (dec_tlu_force_halt)
+           cam_in[i].valid = 1'b0;
+      end
+
+
+   rvdffie #( $bits(eb1_load_cam_pkt_t) ) cam_ff (.*, .din(cam_in[i]), .dout(cam_raw[i]));
+
+
+   assign nonblock_load_write[i] = (load_data_tag[NBLOAD_TAG_MSB:0] == cam_raw[i].tag[NBLOAD_TAG_MSB:0]) & cam_raw[i].valid;
+
+
+end : cam_array
+
+
+
+   assign load_data_tag[NBLOAD_TAG_MSB:0] = lsu_nonblock_load_data_tag[NBLOAD_TAG_MSB:0];
+
+`ifdef RV_ASSERT_ON
+   assert_dec_cam_nonblock_load_write_onehot:   assert #0 ($onehot0(nonblock_load_write[NBLOAD_SIZE_MSB:0]));
+`endif
+
+
+   assign nonblock_load_cancel = ((r_d_in.i0rd[4:0] == dec_nonblock_load_waddr[4:0]) & i0_wen_r);     // cancel if any younger inst (including another nonblock) committing this cycle
+
+
+   assign dec_nonblock_load_wen = lsu_nonblock_load_data_valid & |nonblock_load_write[NBLOAD_SIZE_MSB:0] & ~nonblock_load_cancel;
+
+   always_comb begin
+
+      dec_nonblock_load_waddr[4:0] = '0;
+      i0_nonblock_load_stall = i0_nonblock_boundary_stall;
+
+      for (int i=0; i<NBLOAD_SIZE; i++) begin
+         dec_nonblock_load_waddr[4:0] |= ({5{nonblock_load_write[i]}} & cam[i].rd[4:0]);
+         i0_nonblock_load_stall |= dec_i0_rs1_en_d & cam[i].valid & (cam[i].rd[4:0] == i0r.rs1[4:0]);
+         i0_nonblock_load_stall |= dec_i0_rs2_en_d & cam[i].valid & (cam[i].rd[4:0] == i0r.rs2[4:0]);
+      end
+
+   end
+
+   assign i0_nonblock_boundary_stall = ((nonblock_load_rd[4:0]==i0r.rs1[4:0]) & lsu_nonblock_load_valid_m & dec_i0_rs1_en_d) |
+                                       ((nonblock_load_rd[4:0]==i0r.rs2[4:0]) & lsu_nonblock_load_valid_m & dec_i0_rs2_en_d);
+
+
+
+// don't writeback a nonblock load
+
+   rvdffs #(1) wbnbloaddelayff (.*, .clk(active_clk), .en(i0_r_ctl_en ), .din(lsu_nonblock_load_valid_m),        .dout(nonblock_load_valid_m_delay) );
+
+   assign i0_load_kill_wen_r = nonblock_load_valid_m_delay &  r_d.i0load;
+
+
+
+// end non block load cam logic
+
+// pmu start
+
+
+
+
+   assign csr_read = csr_ren_qual_d;
+   assign csr_write = dec_csr_wen_unq_d;
+
+   assign i0_br_unpred = i0_dp.jal & ~i0_predict_br;
+
+   // the classes must be mutually exclusive with one another
+
+   always_comb begin
+      i0_itype = NULL;
+
+      if (i0_legal_decode_d) begin
+         if (i0_dp.mul)                  i0_itype = MUL;
+         if (i0_dp.load)                 i0_itype = LOAD;
+         if (i0_dp.store)                i0_itype = STORE;
+         if (i0_dp.pm_alu)               i0_itype = ALU;
+         if (i0_dp.zbb | i0_dp.zbs |
+             i0_dp.zbe | i0_dp.zbc |
+             i0_dp.zbp | i0_dp.zbr |
+             i0_dp.zbf | i0_dp.zba)
+                                         i0_itype = BITMANIPU;
+         if ( csr_read & ~csr_write)     i0_itype = CSRREAD;
+         if (~csr_read &  csr_write)     i0_itype = CSRWRITE;
+         if ( csr_read &  csr_write)     i0_itype = CSRRW;
+         if (i0_dp.ebreak)               i0_itype = EBREAK;
+         if (i0_dp.ecall)                i0_itype = ECALL;
+         if (i0_dp.fence)                i0_itype = FENCE;
+         if (i0_dp.fence_i)              i0_itype = FENCEI;  // fencei will set this even with fence attribute
+         if (i0_dp.mret)                 i0_itype = MRET;
+         if (i0_dp.condbr)               i0_itype = CONDBR;
+         if (i0_dp.jal)                  i0_itype = JAL;
+      end
+   end
+
+
+
+
+
+// end pmu
+
+
+   eb1_dec_dec_ctl i0_dec (.inst(i0[31:0]),.out(i0_dp_raw));
+   
+
+
+   rvdff #(1) lsu_idle_ff (.*, .clk(active_clk), .din(lsu_idle_any), .dout(lsu_idle));
+
+
+
+   assign leak1_i1_stall_in = (dec_tlu_flush_leak_one_r | (leak1_i1_stall & ~dec_tlu_flush_lower_r));
+
+
+   assign leak1_mode = leak1_i1_stall;
+
+   assign leak1_i0_stall_in = ((dec_i0_decode_d & leak1_i1_stall) | (leak1_i0_stall & ~dec_tlu_flush_lower_r));
+
+
+
+
+   // 12b jal's can be predicted - these are calls
+
+   assign i0_pcall_imm[20:1] = {i0[31],i0[19:12],i0[20],i0[30:21]};
+
+   assign i0_pcall_12b_offset = (i0_pcall_imm[12]) ? (i0_pcall_imm[20:13] == 8'hff) : (i0_pcall_imm[20:13] == 8'h0);
+
+   assign i0_pcall_case  = i0_pcall_12b_offset & i0_dp_raw.imm20 &  (i0r.rd[4:0] == 5'd1 | i0r.rd[4:0] == 5'd5);
+   assign i0_pja_case    = i0_pcall_12b_offset & i0_dp_raw.imm20 & ~(i0r.rd[4:0] == 5'd1 | i0r.rd[4:0] == 5'd5);
+
+   assign i0_pcall_raw   = i0_dp_raw.jal &   i0_pcall_case;   // this includes ja
+   assign i0_pcall       = i0_dp.jal     &   i0_pcall_case;
+
+   assign i0_pja_raw     = i0_dp_raw.jal &   i0_pja_case;
+   assign i0_pja         = i0_dp.jal     &   i0_pja_case;
+
+
+   assign i0_br_offset[11:0] = (i0_pcall_raw | i0_pja_raw) ? i0_pcall_imm[12:1] : {i0[31],i0[7],i0[30:25],i0[11:8]};
+
+   assign i0_pret_case = (i0_dp_raw.jal & i0_dp_raw.imm12 & (i0r.rd[4:0] == 5'b0) & (i0r.rs1[4:0] == 5'd1 | i0r.rs1[4:0] == 5'd5));  // jalr with rd==0, rs1==1 or rs1==5 is a ret
+
+   assign i0_pret_raw = i0_dp_raw.jal &   i0_pret_case;
+   assign i0_pret     = i0_dp.jal     &   i0_pret_case;
+
+   assign i0_jal      = i0_dp.jal     &  ~i0_pcall_case & ~i0_pja_case & ~i0_pret_case;
+
+   // lsu stuff
+   // load/store mutually exclusive
+   assign dec_lsu_offset_d[11:0] = ({12{ ~dec_extint_stall & i0_dp.lsu & i0_dp.load}} &               i0[31:20]) |
+                                   ({12{ ~dec_extint_stall & i0_dp.lsu & i0_dp.store}} &             {i0[31:25],i0[11:7]});
+
+
+
+   assign div_p.valid    =  div_decode_d;
+
+   assign div_p.unsign   =  i0_dp.unsign;
+   assign div_p.rem      =  i0_dp.rem;
+
+
+   assign mul_p.valid    =  mul_decode_d;
+
+   assign mul_p.rs1_sign =  i0_dp.rs1_sign;
+   assign mul_p.rs2_sign =  i0_dp.rs2_sign;
+   assign mul_p.low      =  i0_dp.low;
+   assign mul_p.bext     =  i0_dp.bext;
+   assign mul_p.bdep     =  i0_dp.bdep;
+   assign mul_p.clmul    =  i0_dp.clmul;
+   assign mul_p.clmulh   =  i0_dp.clmulh;
+   assign mul_p.clmulr   =  i0_dp.clmulr;
+   assign mul_p.grev     =  i0_dp.grev;
+   assign mul_p.gorc     =  i0_dp.gorc;
+   assign mul_p.shfl     =  i0_dp.shfl;
+   assign mul_p.unshfl   =  i0_dp.unshfl;
+   assign mul_p.crc32_b  =  i0_dp.crc32_b;
+   assign mul_p.crc32_h  =  i0_dp.crc32_h;
+   assign mul_p.crc32_w  =  i0_dp.crc32_w;
+   assign mul_p.crc32c_b =  i0_dp.crc32c_b;
+   assign mul_p.crc32c_h =  i0_dp.crc32c_h;
+   assign mul_p.crc32c_w =  i0_dp.crc32c_w;
+   assign mul_p.bfp      =  i0_dp.bfp;
+
+   always_comb  begin
+      lsu_p = '0;
+
+      if (dec_extint_stall) begin
+         lsu_p.load = 1'b1;
+         lsu_p.word = 1'b1;
+         lsu_p.fast_int = 1'b1;
+         lsu_p.valid = 1'b1;
+      end
+      else begin
+         lsu_p.valid = lsu_decode_d;
+
+         lsu_p.load                         =  i0_dp.load ;
+         lsu_p.store                        =  i0_dp.store;
+         lsu_p.by                           =  i0_dp.by   ;
+         lsu_p.half                         =  i0_dp.half ;
+         lsu_p.word                         =  i0_dp.word ;
+         lsu_p.stack                        = (i0r.rs1[4:0]==5'd2);   // stack reference
+
+         lsu_p.load_ldst_bypass_d          =  load_ldst_bypass_d ;
+         lsu_p.store_data_bypass_d         =  store_data_bypass_d;
+         lsu_p.store_data_bypass_m         =  store_data_bypass_m;
+
+         lsu_p.unsign  =  i0_dp.unsign;
+      end
+   end
+
+
+   assign  dec_lsu_valid_raw_d    = (i0_valid_d & (i0_dp_raw.load | i0_dp_raw.store) & ~dma_dccm_stall_any & ~i0_block_raw_d) | dec_extint_stall;
+
+
+
+   assign i0r.rs1[4:0] = i0[19:15];
+   assign i0r.rs2[4:0] = i0[24:20];
+   assign i0r.rd[4:0]  = i0[11:7];
+
+
+   assign dec_i0_rs1_en_d   =  (i0_dp.rs1 & (i0r.rs1[4:0] != 5'd0));  // if rs1_en=0 then read will be all 0's
+   assign dec_i0_rs2_en_d   =  (i0_dp.rs2 & (i0r.rs2[4:0] != 5'd0));
+   assign i0_rd_en_d        =  (i0_dp.rd  & (i0r.rd[4:0]  != 5'd0));
+
+   assign dec_i0_rs1_d[4:0] =  i0r.rs1[4:0];
+   assign dec_i0_rs2_d[4:0] =  i0r.rs2[4:0];
+
+
+   assign i0_jalimm20       =  i0_dp.jal & i0_dp.imm20;   // jal
+   assign i0_uiimm20        = ~i0_dp.jal & i0_dp.imm20;
+
+
+   // csr logic
+
+   assign dec_csr_ren_d  = i0_dp.csr_read & i0_valid_d;
+   assign csr_ren_qual_d = i0_dp.csr_read & i0_legal_decode_d;
+
+   assign csr_clr_d =   i0_dp.csr_clr   & i0_legal_decode_d;
+   assign csr_set_d   = i0_dp.csr_set   & i0_legal_decode_d;
+   assign csr_write_d = i0_csr_write    & i0_legal_decode_d;
+
+   assign i0_csr_write_only_d = i0_csr_write & ~i0_dp.csr_read;
+
+   assign dec_csr_wen_unq_d = (i0_dp.csr_clr | i0_dp.csr_set | i0_csr_write) & i0_valid_d;   // for csr legal, can't write read-only csr
+
+   assign dec_csr_any_unq_d = any_csr_d & i0_valid_d;
+
+
+   assign dec_csr_rdaddr_d[11:0] =  {12{dec_csr_any_unq_d}} & i0[31:20];
+   assign dec_csr_wraddr_r[11:0] =  {12{r_d.csrwen & r_d.i0valid}} & r_d.csrwaddr[11:0];
+
+
+   // make sure csr doesn't write same cycle as dec_tlu_flush_lower_wb
+   // also use valid so it's flushable
+   assign dec_csr_wen_r = r_d.csrwen & r_d.i0valid & ~dec_tlu_i0_kill_writeb_r;
+
+   // If we are writing MIE or MSTATUS, hold off the external interrupt for a cycle on the write.
+   assign dec_csr_stall_int_ff = ((r_d.csrwaddr[11:0] == 12'h300) | (r_d.csrwaddr[11:0] == 12'h304)) & r_d.csrwen & r_d.i0valid & ~dec_tlu_i0_kill_writeb_wb;
+
+
+   rvdff #(5) csrmiscff (.*,
+                        .clk (active_clk),
+                        .din ({csr_ren_qual_d, csr_clr_d, csr_set_d, csr_write_d, i0_dp.csr_imm}),
+                        .dout({csr_read_x,     csr_clr_x, csr_set_x, csr_write_x, csr_imm_x})
+                       );
+
+
+
+
+   // perform the update operation if any
+
+   rvdffe #(37) csr_rddata_x_ff (.*, .en(i0_x_data_en & any_csr_d), .din( {i0[19:15],dec_csr_rddata_d[31:0]}), .dout({csrimm_x[4:0],csr_rddata_x[31:0]}));
+
+
+   assign csr_mask_x[31:0]       = ({32{ csr_imm_x}} & {27'b0,csrimm_x[4:0]}) |
+                                   ({32{~csr_imm_x}} &  exu_csr_rs1_x[31:0] );
+
+
+   assign write_csr_data_x[31:0] = ({32{csr_clr_x}}   & (csr_rddata_x[31:0] & ~csr_mask_x[31:0])) |
+                                   ({32{csr_set_x}}   & (csr_rddata_x[31:0] |  csr_mask_x[31:0])) |
+                                   ({32{csr_write_x}} & (                      csr_mask_x[31:0]));
+
+
+// pause instruction
+
+
+
+
+   assign clear_pause = (dec_tlu_flush_lower_r & ~dec_tlu_flush_pause_r) |
+                        (pause_state & (write_csr_data[31:1] == 31'b0));        // if 0 or 1 then exit pause state - 1 cycle pause
+
+   assign pause_state_in = (dec_tlu_wr_pause_r | pause_state) & ~clear_pause;
+
+
+
+   assign dec_pause_state = pause_state;
+
+
+
+      assign dec_pause_state_cg = pause_state & ~tlu_wr_pause_r1 & ~tlu_wr_pause_r2;
+
+// end pause
+
+
+   assign csr_data_wen = ((csr_clr_x | csr_set_x | csr_write_x) & csr_read_x) | dec_tlu_wr_pause_r | pause_state;
+
+   assign write_csr_data_in[31:0] = (pause_state)         ? (write_csr_data[31:0] - 32'b1) :
+                                    (dec_tlu_wr_pause_r) ? dec_csr_wrdata_r[31:0] : write_csr_data_x[31:0];
+
+   // will hold until write-back at which time the CSR will be updated while GPR is possibly written with prior CSR
+   rvdffe #(32) write_csr_ff (.*, .clk(free_l2clk), .en(csr_data_wen), .din(write_csr_data_in[31:0]), .dout(write_csr_data[31:0]));
+
+   assign pause_stall = pause_state;
+
+   // for csr write only data is produced by the alu
+   assign dec_csr_wrdata_r[31:0]  = (r_d.csrwonly & r_d.i0valid) ? i0_result_corr_r[31:0] : write_csr_data[31:0];
+
+
+
+   assign dec_i0_immed_d[31:0] =  i0_immed_d[31:0];
+
+   assign     i0_immed_d[31:0] = ({32{i0_dp.imm12}}                         & { {20{i0[31]}},i0[31:20] }) |  // jalr
+                                 ({32{i0_dp.shimm5}}                        & {  27'b0,      i0[24:20] }) |
+                                 ({32{i0_jalimm20}}                         & { {12{i0[31]}},i0[19:12],i0[20],i0[30:21],1'b0}) |
+                                 ({32{i0_uiimm20}}                          & { i0[31:12],12'b0 }) |
+                                 ({32{i0_csr_write_only_d & i0_dp.csr_imm}} & {  27'b0,      i0[19:15]});  // for csr's that only write csr, dont read csr
+
+
+   // all conditional branches are currently predict_nt
+   // change this to generate the sequential address for all other cases for NPC requirements at commit
+   assign dec_i0_br_immed_d[12:1] = (i0_ap.predict_nt & ~i0_dp.jal) ? i0_br_offset[11:0] : {10'b0,i0_ap_pc4,i0_ap_pc2};
+
+
+   assign last_br_immed_d[12:1] = ((i0_ap.predict_nt) ? {10'b0,i0_ap_pc4,i0_ap_pc2} : i0_br_offset[11:0] );
+
+   assign i0_valid_d = dec_ib0_valid_d;
+
+   // load_stall includes bus_barrier
+
+   assign i0_load_stall_d = (i0_dp.load ) & (lsu_load_stall_any | dma_dccm_stall_any);
+
+   assign i0_store_stall_d =  i0_dp.store & (lsu_store_stall_any | dma_dccm_stall_any);
+
+
+
+// some CSR reads need to be presync'd
+   assign i0_presync = i0_dp.presync | dec_tlu_presync_d | debug_fence_i | debug_fence_raw | dec_tlu_pipelining_disable;  // both fence's presync
+
+// some CSR writes need to be postsync'd
+   assign i0_postsync = i0_dp.postsync | dec_tlu_postsync_d | debug_fence_i | // only fence_i postsync
+                        (i0_csr_write_only_d & (i0[31:20] == 12'h7c2));   // wr_pause must postsync
+
+
+// debug fence csr
+   assign debug_fence_i     = dec_debug_fence_d & dbg_cmd_wrdata[0];
+   assign debug_fence_raw   = dec_debug_fence_d & dbg_cmd_wrdata[1];
+
+   assign debug_fence       = debug_fence_raw | debug_fence_i;    // fence_i causes a fence
+
+   assign i0_csr_write = i0_dp.csr_write & ~dec_debug_fence_d;
+// end debug
+
+
+   // lets make ebreak, ecall, mret postsync, so break sync into pre and post
+
+   assign presync_stall      = (i0_presync & prior_inflight_eff);
+
+   assign prior_inflight_eff = (i0_dp.div)  ?  prior_inflight_x  :  prior_inflight;
+
+   assign i0_div_prior_div_stall = i0_dp.div & div_active;
+
+   // Raw block has everything excepts the stalls coming from the lsu
+   assign i0_block_raw_d = (i0_dp.csr_read & prior_csr_write) |
+                            dec_extint_stall |
+                            pause_stall |
+                            leak1_i0_stall |
+                            dec_tlu_debug_stall |
+                            postsync_stall |
+                            presync_stall  |
+                            ((i0_dp.fence | debug_fence) & ~lsu_idle) |
+                            i0_nonblock_load_stall |
+                            i0_load_block_d |
+                            i0_nonblock_div_stall |
+                            i0_div_prior_div_stall;
+
+   assign i0_block_d    = i0_block_raw_d | i0_store_stall_d | i0_load_stall_d;
+   assign i0_exublock_d = i0_block_raw_d;
+
+
+   // block reads if there is a prior csr write in the pipeline
+   assign prior_csr_write = x_d.csrwonly |
+                            r_d.csrwonly |
+                            wbd.csrwonly;
+
+
+
+   if       (pt.BITMANIP_ZBB == 1)
+     assign bitmanip_zbb_legal      =  1'b1;
+   else
+     assign bitmanip_zbb_legal      = ~(i0_dp.zbb & ~i0_dp.zbp);
+
+   if       (pt.BITMANIP_ZBS == 1)
+     assign bitmanip_zbs_legal      =  1'b1;
+   else
+     assign bitmanip_zbs_legal      = ~i0_dp.zbs;
+
+   if       (pt.BITMANIP_ZBE == 1)
+     assign bitmanip_zbe_legal      =  1'b1;
+   else
+     assign bitmanip_zbe_legal      = ~i0_dp.zbe;
+
+   if       (pt.BITMANIP_ZBC == 1)
+     assign bitmanip_zbc_legal      =  1'b1;
+   else
+     assign bitmanip_zbc_legal      = ~i0_dp.zbc;
+
+   if       (pt.BITMANIP_ZBP == 1)
+     assign bitmanip_zbp_legal      =  1'b1;
+   else
+     assign bitmanip_zbp_legal      = ~(i0_dp.zbp & ~i0_dp.zbb);
+
+   if       (pt.BITMANIP_ZBR == 1)
+     assign bitmanip_zbr_legal      =  1'b1;
+   else
+     assign bitmanip_zbr_legal      = ~i0_dp.zbr;
+
+   if       (pt.BITMANIP_ZBF == 1)
+     assign bitmanip_zbf_legal      =  1'b1;
+   else
+     assign bitmanip_zbf_legal      = ~i0_dp.zbf;
+
+   if (pt.BITMANIP_ZBA == 1)
+     assign bitmanip_zba_legal      =  1'b1;
+   else
+     assign bitmanip_zba_legal      = ~i0_dp.zba;
+
+   if     ( (pt.BITMANIP_ZBB == 1) | (pt.BITMANIP_ZBP == 1) )
+     assign bitmanip_zbb_zbp_legal  =  1'b1;
+   else
+     assign bitmanip_zbb_zbp_legal  = ~(i0_dp.zbb & i0_dp.zbp);
+
+
+   assign any_csr_d      =  i0_dp.csr_read | i0_csr_write;
+   assign bitmanip_legal =  bitmanip_zbb_legal & bitmanip_zbs_legal & bitmanip_zbe_legal & bitmanip_zbc_legal & bitmanip_zbp_legal & bitmanip_zbr_legal & bitmanip_zbf_legal & bitmanip_zba_legal & bitmanip_zbb_zbp_legal;
+
+   assign i0_legal       =  (i0_dp.legal) & (~any_csr_d | dec_csr_legal_d) & bitmanip_legal;
+
+
+
+   // illegal inst handling
+
+
+   assign shift_illegal      = dec_i0_decode_d & ~i0_legal;
+
+   assign illegal_inst_en    = shift_illegal & ~illegal_lockout;
+
+   rvdffe #(32) illegal_any_ff (.*, .en(illegal_inst_en), .din(i0_inst_d[31:0]), .dout(dec_illegal_inst[31:0]));
+
+   assign illegal_lockout_in = (shift_illegal | illegal_lockout) & ~flush_final_r;
+
+
+
+   // allow illegals to flow down the pipe
+   assign dec_i0_decode_d = i0_valid_d & ~i0_block_d    & ~dec_tlu_flush_lower_r & ~flush_final_r;
+   assign i0_exudecode_d  = i0_valid_d & ~i0_exublock_d & ~dec_tlu_flush_lower_r & ~flush_final_r;
+
+   // define i0 legal decode
+   assign i0_legal_decode_d    = dec_i0_decode_d & i0_legal;
+   assign i0_exulegal_decode_d = i0_exudecode_d  & i0_legal;
+
+
+   // performance monitor signals
+   assign dec_pmu_instr_decoded = dec_i0_decode_d;
+
+   assign dec_pmu_decode_stall = i0_valid_d & ~dec_i0_decode_d;
+
+   assign dec_pmu_postsync_stall = postsync_stall & i0_valid_d;
+   assign dec_pmu_presync_stall  = presync_stall & i0_valid_d;
+
+
+
+   // illegals will postsync
+   assign ps_stall_in =  ( dec_i0_decode_d & (i0_postsync | ~i0_legal) ) |
+                         ( ps_stall & prior_inflight_x                 );
+
+
+
+   assign postsync_stall =  ps_stall;
+
+
+   assign prior_inflight_x    =  x_d.i0valid;
+   assign prior_inflight_wb   =  r_d.i0valid;
+
+   assign prior_inflight = prior_inflight_x | prior_inflight_wb;
+
+   assign dec_i0_alu_decode_d = i0_exulegal_decode_d & i0_dp.alu;
+   assign dec_i0_branch_d     = i0_dp.condbr | i0_dp.jal | i0_br_error_all;
+
+   assign lsu_decode_d = i0_legal_decode_d    & i0_dp.lsu;
+   assign mul_decode_d = i0_exulegal_decode_d & i0_dp.mul;
+   assign div_decode_d = i0_exulegal_decode_d & i0_dp.div;
+
+   assign dec_qual_lsu_d = i0_dp.lsu;
+
+
+
+
+
+// scheduling logic for alu
+
+   assign i0_rs1_depend_i0_x  = dec_i0_rs1_en_d & x_d.i0v & (x_d.i0rd[4:0] == i0r.rs1[4:0]);
+   assign i0_rs1_depend_i0_r  = dec_i0_rs1_en_d & r_d.i0v & (r_d.i0rd[4:0] == i0r.rs1[4:0]);
+
+   assign i0_rs2_depend_i0_x  = dec_i0_rs2_en_d & x_d.i0v & (x_d.i0rd[4:0] == i0r.rs2[4:0]);
+   assign i0_rs2_depend_i0_r  = dec_i0_rs2_en_d & r_d.i0v & (r_d.i0rd[4:0] == i0r.rs2[4:0]);
+
+
+// order the producers as follows:  , i0_x, i0_r, i0_wb
+
+   assign {i0_rs1_class_d, i0_rs1_depth_d[1:0]} = (i0_rs1_depend_i0_x ) ? { i0_x_c,  2'd1  } :
+                                                  (i0_rs1_depend_i0_r ) ? { i0_r_c,  2'd2  } : '0;
+
+   assign {i0_rs2_class_d, i0_rs2_depth_d[1:0]} = (i0_rs2_depend_i0_x ) ? { i0_x_c,  2'd1  } :
+                                                  (i0_rs2_depend_i0_r ) ? { i0_r_c,  2'd2  } : '0;
+
+
+// stores will bypass load data in the lsu pipe
+
+   if (pt.LOAD_TO_USE_PLUS1 == 1) begin : genblock
+      assign i0_load_block_d = (i0_rs1_class_d.load & i0_rs1_depth_d[0]) |
+                               (i0_rs2_class_d.load & i0_rs2_depth_d[0] & ~i0_dp.store);
+
+      assign load_ldst_bypass_d    =  (i0_dp.load | i0_dp.store) & i0_rs1_depth_d[1] & i0_rs1_class_d.load;
+
+      assign store_data_bypass_d =                  i0_dp.store  & i0_rs2_depth_d[1] & i0_rs2_class_d.load;
+
+      assign store_data_bypass_m =                  i0_dp.store  & i0_rs2_depth_d[0] & i0_rs2_class_d.load;
+   end
+   else begin : genblock
+
+      assign i0_load_block_d = 1'b0;
+
+      assign load_ldst_bypass_d    =  (i0_dp.load | i0_dp.store) & i0_rs1_depth_d[0] & i0_rs1_class_d.load;
+
+      assign store_data_bypass_d =                  i0_dp.store  & i0_rs2_depth_d[0] & i0_rs2_class_d.load;
+
+      assign store_data_bypass_m = 1'b0;
+   end
+
+
+
+
+
+
+   assign dec_tlu_i0_valid_r     =  r_d.i0valid & ~dec_tlu_flush_lower_wb;
+
+
+   assign d_t.legal              =  i0_legal_decode_d;
+   assign d_t.icaf               =  i0_icaf_d & i0_legal_decode_d;                // dbecc is icaf exception
+   assign d_t.icaf_second        =  dec_i0_icaf_second_d & i0_legal_decode_d;     // this includes icaf and dbecc
+   assign d_t.icaf_type[1:0]     =  dec_i0_icaf_type_d[1:0];
+
+   assign d_t.fence_i            = (i0_dp.fence_i | debug_fence_i) & i0_legal_decode_d;
+
+// put pmu info into the trap packet
+   assign d_t.pmu_i0_itype       =  i0_itype;
+   assign d_t.pmu_i0_br_unpred   =  i0_br_unpred;
+   assign d_t.pmu_divide         =  1'b0;
+   assign d_t.pmu_lsu_misaligned =  1'b0;
+
+   assign d_t.i0trigger[3:0]     =  dec_i0_trigger_match_d[3:0] & {4{dec_i0_decode_d}};
+
+
+
+   rvdfflie #( .WIDTH($bits(eb1_trap_pkt_t)),.LEFT(9) ) trap_xff (.*, .en(i0_x_ctl_en), .din(d_t),  .dout(x_t));
+
+   always_comb begin
+      x_t_in = x_t;
+      x_t_in.i0trigger[3:0] = x_t.i0trigger & ~{4{dec_tlu_flush_lower_wb}};
+   end
+
+
+   rvdfflie  #( .WIDTH($bits(eb1_trap_pkt_t)),.LEFT(9) ) trap_r_ff (.*, .en(i0_x_ctl_en), .din(x_t_in),  .dout(r_t));
+
+
+    always_comb begin
+
+      r_t_in                             =  r_t;
+
+      r_t_in.i0trigger[3:0]              = ({4{(r_d.i0load | r_d.i0store)}} & lsu_trigger_match_r[3:0]) | r_t.i0trigger[3:0];
+      r_t_in.pmu_lsu_misaligned          = lsu_pmu_misaligned_r;   // only valid if a load/store is valid in DC3 stage
+
+      if (dec_tlu_flush_lower_wb) r_t_in = '0 ;
+
+   end
+
+
+   always_comb begin
+
+      dec_tlu_packet_r                 =  r_t_in;
+      dec_tlu_packet_r.pmu_divide      =  r_d.i0div & r_d.i0valid;
+
+   end
+
+
+// end tlu stuff
+
+
+   assign i0_d_c.mul                =  i0_dp.mul  & i0_legal_decode_d;
+   assign i0_d_c.load               =  i0_dp.load & i0_legal_decode_d;
+   assign i0_d_c.alu                =  i0_dp.alu  & i0_legal_decode_d;
+
+   rvdffs #( $bits(eb1_class_pkt_t) ) i0_x_c_ff   (.*, .en(i0_x_ctl_en),  .clk(active_clk), .din(i0_d_c),  .dout(i0_x_c));
+   rvdffs #( $bits(eb1_class_pkt_t) ) i0_r_c_ff   (.*, .en(i0_r_ctl_en),  .clk(active_clk), .din(i0_x_c),  .dout(i0_r_c));
+
+
+   assign d_d.i0rd[4:0]             =  i0r.rd[4:0];
+   assign d_d.i0v                   =  i0_rd_en_d  & i0_legal_decode_d;
+   assign d_d.i0valid               =  dec_i0_decode_d;  // has flush_final_r
+
+   assign d_d.i0load                =  i0_dp.load  & i0_legal_decode_d;
+   assign d_d.i0store               =  i0_dp.store & i0_legal_decode_d;
+   assign d_d.i0div                 =  i0_dp.div   & i0_legal_decode_d;
+
+
+   assign d_d.csrwen                =  dec_csr_wen_unq_d   & i0_legal_decode_d;
+   assign d_d.csrwonly              =  i0_csr_write_only_d & dec_i0_decode_d;
+   assign d_d.csrwaddr[11:0]        =  (d_d.csrwen) ? i0[31:20] : '0;    // csr write address for rd==0 case
+
+
+   rvdff  #(3) i0cgff               (.*, .clk(active_clk),            .din(i0_pipe_en[3:1]), .dout(i0_pipe_en[2:0]));
+
+   assign i0_pipe_en[3]             =  dec_i0_decode_d;
+
+   assign i0_x_ctl_en               = (|i0_pipe_en[3:2] | clk_override);
+   assign i0_r_ctl_en               = (|i0_pipe_en[2:1] | clk_override);
+   assign i0_wb_ctl_en              = (|i0_pipe_en[1:0] | clk_override);
+   assign i0_x_data_en              = ( i0_pipe_en[3]   | clk_override);
+   assign i0_r_data_en              = ( i0_pipe_en[2]   | clk_override);
+   assign i0_wb_data_en             = ( i0_pipe_en[1]   | clk_override);
+
+   assign dec_data_en[1:0]          = {i0_x_data_en, i0_r_data_en};
+   assign dec_ctl_en[1:0]           = {i0_x_ctl_en,  i0_r_ctl_en};
+
+
+
+   rvdfflie #( .WIDTH($bits(eb1_dest_pkt_t)),.LEFT(15) ) e1ff (.*, .en(i0_x_ctl_en), .din(d_d),  .dout(x_d));
+
+   always_comb begin
+      x_d_in = x_d;
+
+      x_d_in.i0v         = x_d.i0v     & ~dec_tlu_flush_lower_wb & ~dec_tlu_flush_lower_r;
+      x_d_in.i0valid     = x_d.i0valid & ~dec_tlu_flush_lower_wb & ~dec_tlu_flush_lower_r;
+   end
+
+   rvdfflie #( .WIDTH($bits(eb1_dest_pkt_t)), .LEFT(15) ) r_d_ff (.*, .en(i0_r_ctl_en), .din(x_d_in), .dout(r_d));
+
+
+   always_comb begin
+
+        r_d_in = r_d;
+
+
+      // for the bench
+      r_d_in.i0rd[4:0]   =  r_d.i0rd[4:0];
+
+      r_d_in.i0v         = (r_d.i0v      & ~dec_tlu_flush_lower_wb);
+      r_d_in.i0valid     = (r_d.i0valid  & ~dec_tlu_flush_lower_wb);
+
+      r_d_in.i0load      =  r_d.i0load   & ~dec_tlu_flush_lower_wb;
+      r_d_in.i0store     =  r_d.i0store  & ~dec_tlu_flush_lower_wb;
+
+   end
+
+
+   rvdfflie #(.WIDTH($bits(eb1_dest_pkt_t)), .LEFT(15)) wbff (.*, .en(i0_wb_ctl_en), .din(r_d_in), .dout(wbd));
+
+   assign dec_i0_waddr_r[4:0]       =  r_d_in.i0rd[4:0];
+
+   assign     i0_wen_r              =  r_d_in.i0v & ~dec_tlu_i0_kill_writeb_r;
+   assign dec_i0_wen_r              =  i0_wen_r   & ~r_d_in.i0div & ~i0_load_kill_wen_r;  // don't write a nonblock load 1st time down the pipe
+   assign dec_i0_wdata_r[31:0]      =  i0_result_corr_r[31:0];
+
+
+   // divide stuff
+   assign div_e1_to_r         = (x_d.i0div & x_d.i0valid) |
+                                (r_d.i0div & r_d.i0valid);
+
+   assign div_active_in = i0_div_decode_d | (div_active & ~exu_div_wren & ~nonblock_div_cancel);
+
+
+   assign dec_div_active = div_active;
+
+   // nonblocking div scheme
+
+   assign i0_nonblock_div_stall  = (dec_i0_rs1_en_d & div_active & (div_waddr_wb[4:0] == i0r.rs1[4:0])) |
+                                   (dec_i0_rs2_en_d & div_active & (div_waddr_wb[4:0] == i0r.rs2[4:0]));
+
+
+   assign div_flush              = (x_d.i0div & x_d.i0valid & (x_d.i0rd[4:0]==5'b0)                           ) |
+                                   (x_d.i0div & x_d.i0valid & dec_tlu_flush_lower_r                           ) |
+                                   (r_d.i0div & r_d.i0valid & dec_tlu_flush_lower_r & dec_tlu_i0_kill_writeb_r);
+
+
+   // cancel if any younger inst committing this cycle to same dest as nonblock divide
+   assign nonblock_div_cancel    = (div_active &  div_flush) |
+                                   (div_active & ~div_e1_to_r & (r_d.i0rd[4:0] == div_waddr_wb[4:0]) & i0_wen_r);
+
+   assign dec_div_cancel         =  nonblock_div_cancel;
+
+
+
+   assign i0_div_decode_d            =  i0_legal_decode_d & i0_dp.div;
+
+// for load_to_use_plus1, the load result data is merged in R stage instead of D
+
+   if ( pt.LOAD_TO_USE_PLUS1 == 1 ) begin : genblock1
+      assign i0_result_x[31:0]          = exu_i0_result_x[31:0];
+      assign i0_result_r[31:0]          = (r_d.i0v & r_d.i0load) ? lsu_result_m[31:0] : i0_result_r_raw[31:0];
+   end
+   else begin : genblock1
+      assign i0_result_x[31:0]          = (x_d.i0v & x_d.i0load) ? lsu_result_m[31:0] : exu_i0_result_x[31:0];
+      assign i0_result_r[31:0]          = i0_result_r_raw[31:0];
+   end
+
+
+   rvdffe #(32) i0_result_r_ff       (.*, .en(i0_r_data_en & (x_d.i0v | x_d.csrwen | debug_valid_x)),  .din(i0_result_x[31:0]),       .dout(i0_result_r_raw[31:0]));
+
+   // correct lsu load data - don't use for bypass, do pass down the pipe
+   assign i0_result_corr_r[31:0]     = (r_d.i0v & r_d.i0load) ? lsu_result_corr_r[31:0] : i0_result_r_raw[31:0];
+
+
+   rvdffe #(12) e1brpcff             (.*, .en(i0_x_data_en), .din(last_br_immed_d[12:1] ), .dout(last_br_immed_x[12:1]));
+
+
+
+   assign i0_wb_en                   =  i0_wb_data_en;
+
+   assign i0_inst_wb_in[31:0]        =  i0_inst_r[31:0];
+   assign i0_inst_d[31:0]            = (dec_i0_pc4_d)    ?  i0[31:0]                                  :  {16'b0, ifu_i0_cinst[15:0]};
+
+
+   assign trace_enable = ~dec_tlu_trace_disable;
+
+
+   rvdffe #(.WIDTH(5),.OVERRIDE(1))  i0rdff  (.*, .en(i0_div_decode_d),        .din(i0r.rd[4:0]),             .dout(div_waddr_wb[4:0]));
+
+   rvdffe #(32) i0xinstff            (.*, .en(i0_x_data_en & trace_enable),    .din(i0_inst_d[31:0]),         .dout(i0_inst_x[31:0]));
+   rvdffe #(32) i0cinstff            (.*, .en(i0_r_data_en & trace_enable),    .din(i0_inst_x[31:0]),         .dout(i0_inst_r[31:0]));
+
+   rvdffe #(32) i0wbinstff           (.*, .en(i0_wb_en & trace_enable),        .din(i0_inst_wb_in[31:0]),     .dout(i0_inst_wb[31:0]));
+   rvdffe #(31) i0wbpcff             (.*, .en(i0_wb_en & trace_enable),        .din(dec_tlu_i0_pc_r[31:1]),   .dout(  i0_pc_wb[31:1]));
+
+   assign dec_i0_inst_wb[31:0] = i0_inst_wb[31:0];
+   assign dec_i0_pc_wb[31:1] = i0_pc_wb[31:1];
+
+
+
+   rvdffpcie #(31) i0_pc_r_ff           (.*, .en(i0_r_data_en), .din(exu_i0_pc_x[31:1]), .dout(dec_i0_pc_r[31:1]));
+
+   assign dec_tlu_i0_pc_r[31:1]      = dec_i0_pc_r[31:1];
+
+
+   rvbradder ibradder_correct (
+                     .pc(exu_i0_pc_x[31:1]),
+                     .offset(last_br_immed_x[12:1]),
+                     .dout(pred_correct_npc_x[31:1]));
+
+
+
+   // add nonblock load rs1/rs2 bypass cases
+
+   assign i0_rs1_nonblock_load_bypass_en_d  = dec_i0_rs1_en_d & dec_nonblock_load_wen & (dec_nonblock_load_waddr[4:0] == i0r.rs1[4:0]);
+
+   assign i0_rs2_nonblock_load_bypass_en_d  = dec_i0_rs2_en_d & dec_nonblock_load_wen & (dec_nonblock_load_waddr[4:0] == i0r.rs2[4:0]);
+
+
+
+   // bit 2 is priority match, bit 0 lowest priority, i0_x, i0_r
+
+   assign i0_rs1bypass[2]                =  i0_rs1_depth_d[0] & (i0_rs1_class_d.alu | i0_rs1_class_d.mul                      );
+   assign i0_rs1bypass[1]                =  i0_rs1_depth_d[0] & (                                          i0_rs1_class_d.load);
+   assign i0_rs1bypass[0]                =  i0_rs1_depth_d[1] & (i0_rs1_class_d.alu | i0_rs1_class_d.mul | i0_rs1_class_d.load);
+
+   assign i0_rs2bypass[2]                =  i0_rs2_depth_d[0] & (i0_rs2_class_d.alu | i0_rs2_class_d.mul                      );
+   assign i0_rs2bypass[1]                =  i0_rs2_depth_d[0] & (                                          i0_rs2_class_d.load);
+   assign i0_rs2bypass[0]                =  i0_rs2_depth_d[1] & (i0_rs2_class_d.alu | i0_rs2_class_d.mul | i0_rs2_class_d.load);
+
+
+   assign dec_i0_rs1_bypass_en_d[3]      =  i0_rs1_nonblock_load_bypass_en_d & ~i0_rs1bypass[0] & ~i0_rs1bypass[1] & ~i0_rs1bypass[2];
+   assign dec_i0_rs1_bypass_en_d[2]      =  i0_rs1bypass[2];
+   assign dec_i0_rs1_bypass_en_d[1]      =  i0_rs1bypass[1];
+   assign dec_i0_rs1_bypass_en_d[0]      =  i0_rs1bypass[0];
+
+   assign dec_i0_rs2_bypass_en_d[3]      =  i0_rs2_nonblock_load_bypass_en_d & ~i0_rs2bypass[0] & ~i0_rs2bypass[1] & ~i0_rs2bypass[2];
+   assign dec_i0_rs2_bypass_en_d[2]      =  i0_rs2bypass[2];
+   assign dec_i0_rs2_bypass_en_d[1]      =  i0_rs2bypass[1];
+   assign dec_i0_rs2_bypass_en_d[0]      =  i0_rs2bypass[0];
+
+
+   assign dec_i0_result_r[31:0]          =  i0_result_r[31:0];
+
+
+endmodule // eb1_dec_decode_ctl
+
+
+
+
+
+// file "decode" is human readable file that has all of the instruction decodes defined and is part of git repo
+// modify this file as needed
+
+// to generate all the equations below from "decode" except legal equation:
+
+// 1) coredecode -in decode > coredecode.e
+
+// 2) espresso -Dso -oeqntott coredecode.e | addassign -pre out.  > equations
+
+// to generate the legal (32b instruction is legal) equation below:
+
+// 1) coredecode -in decode -legal > legal.e
+
+// 2) espresso -Dso -oeqntott legal.e | addassign -pre out. > legal_equation
+
+module eb1_dec_dec_ctl
+import eb1_pkg::*;
+  (
+   input logic [31:0] inst,
+
+   output eb1_dec_pkt_t out
+   );
+
+   logic [31:0] i;
+
+
+   assign i[31:0] = inst[31:0];
+
+
+assign out.alu = (i[30]&i[24]&i[23]&!i[22]&!i[21]&!i[20]&i[14]&!i[5]&i[4]) | (i[29]
+    &!i[27]&!i[24]&i[4]) | (!i[25]&!i[13]&!i[12]&i[4]) | (!i[30]&!i[25]
+    &i[13]&i[12]) | (i[27]&i[25]&i[14]&i[4]) | (i[29]&i[27]&!i[14]&i[4]) | (
+    i[29]&!i[14]&i[5]&i[4]) | (!i[27]&!i[25]&i[14]&i[4]) | (i[30]&!i[29]
+    &!i[13]&i[4]) | (!i[30]&!i[27]&!i[25]&i[4]) | (i[13]&!i[5]&i[4]) | (
+    !i[12]&!i[5]&i[4]) | (i[2]) | (i[6]) | (i[30]&i[24]&i[23]&i[22]&i[21]
+    &i[20]&!i[5]&i[4]) | (!i[30]&i[29]&!i[24]&!i[23]&i[22]&i[21]&i[20]
+    &!i[5]&i[4]) | (!i[30]&i[24]&!i[23]&!i[22]&!i[21]&!i[20]&!i[5]&i[4]);
+
+assign out.rs1 = (!i[14]&!i[13]&!i[2]) | (!i[13]&i[11]&!i[2]) | (i[19]&i[13]&!i[2]) | (
+    !i[13]&i[10]&!i[2]) | (i[18]&i[13]&!i[2]) | (!i[13]&i[9]&!i[2]) | (
+    i[17]&i[13]&!i[2]) | (!i[13]&i[8]&!i[2]) | (i[16]&i[13]&!i[2]) | (
+    !i[13]&i[7]&!i[2]) | (i[15]&i[13]&!i[2]) | (!i[4]&!i[3]) | (!i[6]
+    &!i[2]);
+
+assign out.rs2 = (i[5]&!i[4]&!i[2]) | (!i[6]&i[5]&!i[2]);
+
+assign out.imm12 = (!i[4]&!i[3]&i[2]) | (i[13]&!i[5]&i[4]&!i[2]) | (!i[13]&!i[12]
+    &i[6]&i[4]) | (!i[12]&!i[5]&i[4]&!i[2]);
+
+assign out.rd = (!i[5]&!i[2]) | (i[5]&i[2]) | (i[4]);
+
+assign out.shimm5 = (i[27]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (!i[30]&!i[13]&i[12]
+    &!i[5]&i[4]&!i[2]) | (i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]);
+
+assign out.imm20 = (i[5]&i[3]) | (i[4]&i[2]);
+
+assign out.pc = (!i[5]&!i[3]&i[2]) | (i[5]&i[3]);
+
+assign out.load = (!i[5]&!i[4]&!i[2]);
+
+assign out.store = (!i[6]&i[5]&!i[4]);
+
+assign out.lsu = (!i[6]&!i[4]&!i[2]);
+
+assign out.add = (!i[14]&!i[13]&!i[12]&!i[5]&i[4]) | (!i[5]&!i[3]&i[2]) | (!i[30]
+    &!i[25]&!i[14]&!i[13]&!i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.sub = (i[30]&!i[14]&!i[12]&!i[6]&i[5]&i[4]&!i[2]) | (!i[29]&!i[25]&!i[14]
+    &i[13]&!i[6]&i[4]&!i[2]) | (i[27]&i[25]&i[14]&!i[6]&i[5]&!i[2]) | (
+    !i[14]&i[13]&!i[5]&i[4]&!i[2]) | (i[6]&!i[4]&!i[2]);
+
+assign out.land = (!i[27]&!i[25]&i[14]&i[13]&i[12]&!i[6]&!i[2]) | (i[14]&i[13]&i[12]
+    &!i[5]&!i[2]);
+
+assign out.lor = (!i[6]&i[3]) | (!i[29]&!i[27]&!i[25]&i[14]&i[13]&!i[12]&!i[6]&!i[2]) | (
+    i[5]&i[4]&i[2]) | (!i[13]&!i[12]&i[6]&i[4]) | (i[14]&i[13]&!i[12]
+    &!i[5]&!i[2]);
+
+assign out.lxor = (!i[29]&!i[27]&!i[25]&i[14]&!i[13]&!i[12]&i[4]&!i[2]) | (i[14]
+    &!i[13]&!i[12]&!i[5]&i[4]&!i[2]);
+
+assign out.sll = (!i[29]&!i[27]&!i[25]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.sra = (i[30]&!i[29]&!i[27]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.srl = (!i[30]&!i[29]&!i[27]&!i[25]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.slt = (!i[29]&!i[25]&!i[14]&i[13]&!i[6]&i[4]&!i[2]) | (!i[14]&i[13]&!i[5]
+    &i[4]&!i[2]);
+
+assign out.unsign = (!i[27]&i[25]&i[14]&i[12]&!i[6]&i[5]&!i[2]) | (!i[14]&i[13]
+    &i[12]&!i[5]&!i[2]) | (i[13]&i[6]&!i[4]&!i[2]) | (i[14]&!i[5]&!i[4]) | (
+    !i[25]&!i[14]&i[13]&i[12]&!i[6]&!i[2]) | (i[27]&i[25]&i[14]&i[13]
+    &!i[6]&i[5]&!i[2]);
+
+assign out.condbr = (i[6]&!i[4]&!i[2]);
+
+assign out.beq = (!i[14]&!i[12]&i[6]&!i[4]&!i[2]);
+
+assign out.bne = (!i[14]&i[12]&i[6]&!i[4]&!i[2]);
+
+assign out.bge = (i[14]&i[12]&i[5]&!i[4]&!i[2]);
+
+assign out.blt = (i[14]&!i[12]&i[5]&!i[4]&!i[2]);
+
+assign out.jal = (i[6]&i[2]);
+
+assign out.by = (!i[13]&!i[12]&!i[6]&!i[4]&!i[2]);
+
+assign out.half = (i[12]&!i[6]&!i[4]&!i[2]);
+
+assign out.word = (i[13]&!i[6]&!i[4]);
+
+assign out.csr_read = (i[13]&i[6]&i[4]) | (i[7]&i[6]&i[4]) | (i[8]&i[6]&i[4]) | (
+    i[9]&i[6]&i[4]) | (i[10]&i[6]&i[4]) | (i[11]&i[6]&i[4]);
+
+assign out.csr_clr = (i[15]&i[13]&i[12]&i[6]&i[4]) | (i[16]&i[13]&i[12]&i[6]&i[4]) | (
+    i[17]&i[13]&i[12]&i[6]&i[4]) | (i[18]&i[13]&i[12]&i[6]&i[4]) | (
+    i[19]&i[13]&i[12]&i[6]&i[4]);
+
+assign out.csr_set = (i[15]&!i[12]&i[6]&i[4]) | (i[16]&!i[12]&i[6]&i[4]) | (i[17]
+    &!i[12]&i[6]&i[4]) | (i[18]&!i[12]&i[6]&i[4]) | (i[19]&!i[12]&i[6]
+    &i[4]);
+
+assign out.csr_write = (!i[13]&i[12]&i[6]&i[4]);
+
+assign out.csr_imm = (i[14]&!i[13]&i[6]&i[4]) | (i[15]&i[14]&i[6]&i[4]) | (i[16]
+    &i[14]&i[6]&i[4]) | (i[17]&i[14]&i[6]&i[4]) | (i[18]&i[14]&i[6]&i[4]) | (
+    i[19]&i[14]&i[6]&i[4]);
+
+assign out.presync = (!i[5]&i[3]) | (!i[13]&i[7]&i[6]&i[4]) | (!i[13]&i[8]&i[6]&i[4]) | (
+    !i[13]&i[9]&i[6]&i[4]) | (!i[13]&i[10]&i[6]&i[4]) | (!i[13]&i[11]
+    &i[6]&i[4]) | (i[15]&i[13]&i[6]&i[4]) | (i[16]&i[13]&i[6]&i[4]) | (
+    i[17]&i[13]&i[6]&i[4]) | (i[18]&i[13]&i[6]&i[4]) | (i[19]&i[13]&i[6]
+    &i[4]);
+
+assign out.postsync = (i[12]&!i[5]&i[3]) | (!i[22]&!i[13]&!i[12]&i[6]&i[4]) | (
+    !i[13]&i[7]&i[6]&i[4]) | (!i[13]&i[8]&i[6]&i[4]) | (!i[13]&i[9]&i[6]
+    &i[4]) | (!i[13]&i[10]&i[6]&i[4]) | (!i[13]&i[11]&i[6]&i[4]) | (
+    i[15]&i[13]&i[6]&i[4]) | (i[16]&i[13]&i[6]&i[4]) | (i[17]&i[13]&i[6]
+    &i[4]) | (i[18]&i[13]&i[6]&i[4]) | (i[19]&i[13]&i[6]&i[4]);
+
+assign out.ebreak = (!i[22]&i[20]&!i[13]&!i[12]&i[6]&i[4]);
+
+assign out.ecall = (!i[21]&!i[20]&!i[13]&!i[12]&i[6]&i[4]);
+
+assign out.mret = (i[29]&!i[13]&!i[12]&i[6]&i[4]);
+
+assign out.mul = (!i[30]&i[27]&i[24]&i[20]&i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (
+    i[29]&i[27]&!i[24]&i[23]&i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (
+    i[29]&i[27]&!i[24]&!i[20]&i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (
+    i[27]&!i[25]&i[13]&!i[12]&!i[6]&i[5]&i[4]&!i[2]) | (i[30]&i[27]&i[13]
+    &!i[6]&i[5]&i[4]&!i[2]) | (i[29]&i[27]&i[22]&!i[20]&i[14]&!i[13]
+    &i[12]&!i[5]&i[4]&!i[2]) | (i[29]&i[27]&!i[21]&i[20]&i[14]&!i[13]
+    &i[12]&!i[5]&i[4]&!i[2]) | (i[29]&i[27]&!i[22]&i[21]&i[14]&!i[13]
+    &i[12]&!i[5]&i[4]&!i[2]) | (i[30]&i[29]&i[27]&!i[23]&i[14]&!i[13]
+    &i[12]&!i[5]&i[4]&!i[2]) | (!i[30]&i[27]&i[23]&i[14]&!i[13]&i[12]
+    &!i[5]&i[4]&!i[2]) | (!i[30]&!i[29]&i[27]&!i[25]&!i[13]&i[12]&!i[6]
+    &i[4]&!i[2]) | (i[25]&!i[14]&!i[6]&i[5]&i[4]&!i[2]) | (i[30]&!i[27]
+    &i[24]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (i[29]&i[27]&i[14]
+    &!i[6]&i[5]&!i[2]);
+
+assign out.rs1_sign = (!i[27]&i[25]&!i[14]&i[13]&!i[12]&!i[6]&i[5]&i[4]&!i[2]) | (
+    !i[27]&i[25]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.rs2_sign = (!i[27]&i[25]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.low = (i[25]&!i[14]&!i[13]&!i[12]&i[5]&i[4]&!i[2]);
+
+assign out.div = (!i[27]&i[25]&i[14]&!i[6]&i[5]&!i[2]);
+
+assign out.rem = (!i[27]&i[25]&i[14]&i[13]&!i[6]&i[5]&!i[2]);
+
+assign out.fence = (!i[5]&i[3]);
+
+assign out.fence_i = (i[12]&!i[5]&i[3]);
+
+assign out.clz = (i[30]&!i[27]&!i[24]&!i[22]&!i[21]&!i[20]&!i[14]&!i[13]&i[12]&!i[5]
+    &i[4]&!i[2]);
+
+assign out.ctz = (i[30]&!i[27]&!i[24]&!i[22]&i[20]&!i[14]&!i[13]&i[12]&!i[5]&i[4]
+    &!i[2]);
+
+assign out.pcnt = (i[30]&!i[27]&!i[24]&i[21]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]);
+
+assign out.sext_b = (i[30]&!i[27]&i[22]&!i[20]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]);
+
+assign out.sext_h = (i[30]&!i[27]&i[22]&i[20]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]);
+
+assign out.slo = (!i[30]&i[29]&!i[27]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.sro = (!i[30]&i[29]&!i[27]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.min = (i[27]&i[25]&i[14]&!i[12]&!i[6]&i[5]&!i[2]);
+
+assign out.max = (i[27]&i[25]&i[14]&i[12]&!i[6]&i[5]&!i[2]);
+
+assign out.pack = (!i[30]&i[27]&!i[25]&!i[13]&!i[12]&i[5]&i[4]&!i[2]);
+
+assign out.packu = (i[30]&i[27]&!i[13]&!i[12]&i[5]&i[4]&!i[2]);
+
+assign out.packh = (!i[30]&i[27]&!i[25]&i[13]&i[12]&!i[6]&i[5]&!i[2]);
+
+assign out.rol = (i[30]&!i[27]&!i[14]&i[12]&!i[6]&i[5]&i[4]&!i[2]);
+
+assign out.ror = (i[30]&i[29]&!i[27]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.zbb = (i[30]&!i[27]&!i[24]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (
+    !i[30]&i[27]&i[14]&i[13]&i[12]&!i[6]&i[5]&!i[2]) | (i[30]&i[29]&!i[27]
+    &i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (i[27]&!i[13]&!i[12]&i[5]
+    &i[4]&!i[2]) | (i[30]&i[14]&!i[13]&!i[12]&!i[6]&i[5]&!i[2]) | (i[30]
+    &!i[27]&i[13]&!i[6]&i[5]&i[4]&!i[2]) | (i[30]&i[29]&!i[27]&!i[6]&i[5]
+    &i[4]&!i[2]) | (i[30]&i[29]&i[24]&i[23]&i[22]&i[21]&i[20]&i[14]&!i[13]
+    &i[12]&!i[5]&i[4]&!i[2]) | (!i[30]&i[29]&i[27]&!i[24]&!i[23]&i[22]
+    &i[21]&i[20]&i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (!i[30]&i[27]
+    &i[24]&!i[23]&!i[22]&!i[21]&!i[20]&i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (
+    i[30]&i[29]&i[24]&i[23]&!i[22]&!i[21]&!i[20]&i[14]&!i[13]&i[12]&!i[5]
+    &i[4]&!i[2]) | (i[27]&i[25]&i[14]&!i[6]&i[5]&!i[2]);
+
+assign out.sbset = (!i[30]&i[29]&i[27]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.sbclr = (i[30]&!i[29]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.sbinv = (i[30]&i[29]&i[27]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.sbext = (i[30]&!i[29]&i[27]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.zbs = (i[29]&i[27]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]) | (i[30]&!i[29]
+    &i[27]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.bext = (!i[30]&i[27]&!i[25]&i[13]&!i[12]&!i[6]&i[5]&i[4]&!i[2]);
+
+assign out.bdep = (i[30]&i[27]&i[13]&!i[12]&!i[6]&i[5]&i[4]&!i[2]);
+
+assign out.zbe = (i[27]&!i[25]&i[13]&!i[12]&!i[6]&i[5]&i[4]&!i[2]);
+
+assign out.clmul = (i[27]&i[25]&!i[14]&!i[13]&!i[6]&i[5]&i[4]&!i[2]);
+
+assign out.clmulh = (i[27]&!i[14]&i[13]&i[12]&!i[6]&i[5]&!i[2]);
+
+assign out.clmulr = (i[27]&!i[14]&!i[12]&!i[6]&i[5]&i[4]&!i[2]);
+
+assign out.zbc = (i[27]&i[25]&!i[14]&!i[6]&i[5]&i[4]&!i[2]);
+
+assign out.grev = (i[30]&i[29]&i[27]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.gorc = (!i[30]&i[29]&i[27]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.shfl = (!i[30]&!i[29]&i[27]&!i[25]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.unshfl = (!i[30]&!i[29]&i[27]&!i[25]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.zbp = (!i[30]&i[29]&!i[27]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (!i[30]&!i[29]
+    &i[27]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (i[30]&!i[27]&i[13]&!i[6]
+    &i[5]&i[4]&!i[2]) | (i[27]&!i[25]&!i[13]&!i[12]&i[5]&i[4]&!i[2]) | (
+    i[30]&i[14]&!i[13]&!i[12]&i[5]&i[4]&!i[2]) | (i[29]&!i[27]&i[12]&!i[6]
+    &i[5]&i[4]&!i[2]) | (!i[30]&!i[29]&i[27]&!i[25]&i[12]&!i[6]&i[5]&i[4]
+    &!i[2]) | (i[29]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]);
+
+assign out.crc32_b = (i[30]&!i[27]&i[24]&!i[23]&!i[21]&!i[20]&!i[14]&!i[13]&i[12]
+    &!i[5]&i[4]&!i[2]);
+
+assign out.crc32_h = (i[30]&!i[27]&i[24]&!i[23]&i[20]&!i[14]&!i[13]&i[12]&!i[5]&i[4]
+    &!i[2]);
+
+assign out.crc32_w = (i[30]&!i[27]&i[24]&!i[23]&i[21]&!i[14]&!i[13]&i[12]&!i[5]&i[4]
+    &!i[2]);
+
+assign out.crc32c_b = (i[30]&!i[27]&i[23]&!i[21]&!i[20]&!i[14]&!i[13]&i[12]&!i[5]
+    &i[4]&!i[2]);
+
+assign out.crc32c_h = (i[30]&!i[27]&i[23]&i[20]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]);
+
+assign out.crc32c_w = (i[30]&!i[27]&i[23]&i[21]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]);
+
+assign out.zbr = (i[30]&!i[27]&i[24]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]);
+
+assign out.bfp = (i[30]&i[27]&i[13]&i[12]&!i[6]&i[5]&!i[2]);
+
+assign out.zbf = (i[30]&i[27]&i[13]&i[12]&!i[6]&i[5]&!i[2]);
+
+assign out.sh1add = (i[29]&!i[14]&!i[12]&!i[6]&i[5]&i[4]&!i[2]);
+
+assign out.sh2add = (i[29]&i[14]&!i[13]&!i[12]&i[5]&i[4]&!i[2]);
+
+assign out.sh3add = (i[29]&i[14]&i[13]&!i[6]&i[5]&!i[2]);
+
+assign out.zba = (i[29]&!i[12]&!i[6]&i[5]&i[4]&!i[2]);
+
+assign out.pm_alu = (i[28]&i[22]&!i[13]&!i[12]&i[4]) | (!i[30]&!i[29]&!i[27]&!i[25]
+    &!i[6]&i[4]) | (!i[29]&!i[27]&!i[25]&!i[13]&i[12]&!i[6]&i[4]) | (
+    !i[29]&!i[27]&!i[25]&!i[14]&!i[6]&i[4]) | (i[13]&!i[5]&i[4]) | (i[4]
+    &i[2]) | (!i[12]&!i[5]&i[4]);
+
+
+assign out.legal = (!i[31]&!i[30]&!i[29]&i[28]&!i[27]&!i[26]&!i[25]&!i[24]&!i[23]
+    &i[22]&!i[21]&i[20]&!i[19]&!i[18]&!i[17]&!i[16]&!i[15]&!i[14]&!i[11]
+    &!i[10]&!i[9]&!i[8]&!i[7]&i[6]&i[5]&i[4]&!i[3]&!i[2]&i[1]&i[0]) | (
+    !i[31]&!i[30]&i[29]&i[28]&!i[27]&!i[26]&!i[25]&!i[24]&!i[23]&!i[22]
+    &i[21]&!i[20]&!i[19]&!i[18]&!i[17]&!i[16]&!i[15]&!i[14]&!i[11]&!i[10]
+    &!i[9]&!i[8]&!i[7]&i[6]&i[5]&i[4]&!i[3]&!i[2]&i[1]&i[0]) | (!i[31]
+    &!i[30]&!i[29]&!i[28]&!i[27]&!i[26]&!i[25]&!i[24]&!i[23]&!i[22]&!i[21]
+    &!i[19]&!i[18]&!i[17]&!i[16]&!i[15]&!i[14]&!i[11]&!i[10]&!i[9]&!i[8]
+    &!i[7]&i[5]&i[4]&!i[3]&!i[2]&i[1]&i[0]) | (!i[31]&i[29]&!i[28]&!i[26]
+    &!i[25]&i[24]&!i[22]&!i[20]&!i[6]&!i[5]&i[4]&!i[3]&i[1]&i[0]) | (
+    !i[31]&i[29]&!i[28]&!i[26]&!i[25]&i[24]&!i[22]&!i[21]&!i[6]&!i[5]
+    &i[4]&!i[3]&i[1]&i[0]) | (!i[31]&i[29]&!i[28]&!i[26]&!i[25]&!i[23]
+    &!i[22]&!i[20]&!i[6]&!i[5]&i[4]&!i[3]&i[1]&i[0]) | (!i[31]&i[29]
+    &!i[28]&!i[26]&!i[25]&!i[24]&!i[23]&!i[21]&!i[6]&!i[5]&i[4]&!i[3]
+    &i[1]&i[0]) | (!i[31]&!i[30]&!i[29]&!i[28]&!i[26]&i[25]&i[13]&!i[6]
+    &i[4]&!i[3]&i[1]&i[0]) | (!i[31]&!i[30]&!i[28]&!i[26]&!i[25]&!i[24]
+    &!i[6]&!i[5]&i[4]&!i[3]&i[1]&i[0]) | (!i[31]&!i[30]&!i[28]&!i[27]
+    &!i[26]&!i[25]&i[14]&!i[12]&!i[6]&i[4]&!i[3]&i[1]&i[0]) | (!i[31]
+    &!i[30]&!i[28]&!i[27]&!i[26]&!i[25]&i[13]&!i[12]&!i[6]&i[4]&!i[3]
+    &i[1]&i[0]) | (!i[31]&!i[29]&!i[28]&!i[27]&!i[26]&!i[25]&!i[13]&!i[12]
+    &!i[6]&i[4]&!i[3]&i[1]&i[0]) | (!i[31]&!i[28]&!i[27]&!i[26]&!i[25]
+    &i[14]&!i[6]&!i[5]&i[4]&!i[3]&i[1]&i[0]) | (!i[31]&!i[30]&!i[29]
+    &!i[28]&!i[26]&!i[13]&i[12]&i[5]&i[4]&!i[3]&!i[2]&i[1]&i[0]) | (
+    !i[31]&!i[30]&!i[29]&!i[28]&!i[26]&i[14]&!i[6]&i[5]&i[4]&!i[3]&i[1]
+    &i[0]) | (!i[31]&i[30]&!i[28]&i[27]&!i[26]&!i[25]&!i[13]&i[12]&!i[6]
+    &i[4]&!i[3]&i[1]&i[0]) | (!i[31]&i[29]&!i[28]&i[27]&!i[26]&!i[25]
+    &!i[6]&!i[5]&i[4]&!i[3]&i[1]&i[0]) | (!i[31]&!i[30]&!i[28]&!i[27]
+    &!i[26]&!i[25]&!i[6]&!i[5]&i[4]&!i[3]&i[1]&i[0]) | (!i[31]&!i[30]
+    &!i[29]&!i[28]&!i[27]&!i[26]&!i[6]&i[5]&i[4]&!i[3]&i[1]&i[0]) | (
+    !i[14]&!i[13]&!i[12]&i[6]&i[5]&!i[4]&!i[3]&i[1]&i[0]) | (!i[31]&!i[29]
+    &!i[28]&!i[26]&!i[25]&i[14]&!i[6]&i[5]&i[4]&!i[3]&i[1]&i[0]) | (
+    !i[31]&i[29]&!i[28]&!i[26]&!i[25]&!i[13]&i[12]&i[5]&i[4]&!i[3]&!i[2]
+    &i[1]&i[0]) | (i[14]&i[6]&i[5]&!i[4]&!i[3]&!i[2]&i[1]&i[0]) | (!i[14]
+    &!i[13]&i[5]&!i[4]&!i[3]&!i[2]&i[1]&i[0]) | (!i[12]&!i[6]&!i[5]&i[4]
+    &!i[3]&i[1]&i[0]) | (!i[13]&i[12]&i[6]&i[5]&!i[3]&!i[2]&i[1]&i[0]) | (
+    !i[31]&!i[30]&!i[29]&!i[28]&!i[27]&!i[26]&!i[25]&!i[24]&!i[23]&!i[22]
+    &!i[21]&!i[20]&!i[19]&!i[18]&!i[17]&!i[16]&!i[15]&!i[14]&!i[13]&!i[11]
+    &!i[10]&!i[9]&!i[8]&!i[7]&!i[6]&!i[5]&!i[4]&i[3]&i[2]&i[1]&i[0]) | (
+    !i[31]&!i[30]&!i[29]&!i[28]&!i[19]&!i[18]&!i[17]&!i[16]&!i[15]&!i[14]
+    &!i[13]&!i[12]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[6]&!i[5]&!i[4]&i[3]
+    &i[2]&i[1]&i[0]) | (i[13]&i[6]&i[5]&i[4]&!i[3]&!i[2]&i[1]&i[0]) | (
+    i[6]&i[5]&!i[4]&i[3]&i[2]&i[1]&i[0]) | (!i[14]&!i[12]&!i[6]&!i[4]
+    &!i[3]&!i[2]&i[1]&i[0]) | (!i[13]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]
+    &i[0]) | (i[13]&!i[6]&!i[5]&i[4]&!i[3]&i[1]&i[0]) | (!i[6]&i[4]&!i[3]
+    &i[2]&i[1]&i[0]);
+
+
+endmodule // eb1_dec_dec_ctl
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_dec_gpr_ctl.sv b/verilog/rtl/BrqRV_EB1/design/eb1_dec_gpr_ctl.sv
new file mode 100644
index 0000000..62ddf2e
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_dec_gpr_ctl.sv
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+module eb1_dec_gpr_ctl
+import eb1_pkg::*;
+#(
+   `include "eb1_param.vh"
+ )  (
+    input logic [4:0]  raddr0,       // logical read addresses
+    input logic [4:0]  raddr1,
+
+    input logic        wen0,         // write enable
+    input logic [4:0]  waddr0,       // write address
+    input logic [31:0] wd0,          // write data
+
+    input logic        wen1,         // write enable
+    input logic [4:0]  waddr1,       // write address
+    input logic [31:0] wd1,          // write data
+
+    input logic        wen2,         // write enable
+    input logic [4:0]  waddr2,       // write address
+    input logic [31:0] wd2,          // write data
+
+    input logic        clk,
+    input logic        rst_l,
+
+    output logic [31:0] rd0,         // read data
+    output logic [31:0] rd1,
+
+    input  logic        scan_mode
+);
+
+   logic [31:1] [31:0] gpr_out;      // 31 x 32 bit GPRs
+   logic [31:1] [31:0] gpr_in;
+   logic [31:1] w0v,w1v,w2v;
+   logic [31:1] gpr_wr_en;
+
+   // GPR Write Enables
+   assign gpr_wr_en[31:1] = (w0v[31:1] | w1v[31:1] | w2v[31:1]);
+   for ( genvar j=1; j<32; j++ )  begin : gpr
+      rvdffe #(32) gprff (.*, .en(gpr_wr_en[j]), .din(gpr_in[j][31:0]), .dout(gpr_out[j][31:0]));
+   end : gpr
+
+   // the read out
+   always_comb begin
+      rd0[31:0] = 32'b0;
+      rd1[31:0] = 32'b0;
+      w0v[31:1] = 31'b0;
+      w1v[31:1] = 31'b0;
+      w2v[31:1] = 31'b0;
+      gpr_in[31:1] = '0;
+
+      // GPR Read logic
+      for (int j=1; j<32; j++ )  begin
+         rd0[31:0] |= ({32{(raddr0[4:0]== 5'(j))}} & gpr_out[j][31:0]);
+         rd1[31:0] |= ({32{(raddr1[4:0]== 5'(j))}} & gpr_out[j][31:0]);
+      end
+
+     // GPR Write logic
+     for (int j=1; j<32; j++ )  begin
+         w0v[j]     = wen0  & (waddr0[4:0]== 5'(j) );
+         w1v[j]     = wen1  & (waddr1[4:0]== 5'(j) );
+         w2v[j]     = wen2  & (waddr2[4:0]== 5'(j) );
+         gpr_in[j]  =    ({32{w0v[j]}} & wd0[31:0]) |
+                         ({32{w1v[j]}} & wd1[31:0]) |
+                         ({32{w2v[j]}} & wd2[31:0]);
+     end
+   end // always_comb begin
+
+`ifdef RV_ASSERT_ON
+
+   logic  write_collision_unused;
+   assign write_collision_unused = ( (w0v[31:1] == w1v[31:1]) & wen0 & wen1 ) |
+                                   ( (w0v[31:1] == w2v[31:1]) & wen0 & wen2 ) |
+                                   ( (w1v[31:1] == w2v[31:1]) & wen1 & wen2 );
+
+
+   // asserting that no 2 ports will write to the same gpr simultaneously
+   assert_multiple_wen_to_same_gpr: assert #0 (~( write_collision_unused ) );
+
+`endif
+
+endmodule
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_dec_ib_ctl.sv b/verilog/rtl/BrqRV_EB1/design/eb1_dec_ib_ctl.sv
new file mode 100644
index 0000000..0eb95df
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_dec_ib_ctl.sv
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+module eb1_dec_ib_ctl
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )
+  (
+   input logic                 dbg_cmd_valid,                      // valid dbg cmd
+
+   input logic                 dbg_cmd_write,                      // dbg cmd is write
+   input logic [1:0]           dbg_cmd_type,                       // dbg type
+   input logic [31:0]          dbg_cmd_addr,                       // expand to 31:0
+
+   input eb1_br_pkt_t i0_brp,                                     // i0 branch packet from aligner
+   input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index,    // BP index
+   input logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr,               // BP FGHR
+   input logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag,              // BP tag
+   input logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index,          // Fully associt btb index
+
+   input logic       ifu_i0_pc4,                                   // i0 is 4B inst else 2B
+   input logic       ifu_i0_valid,                                 // i0 valid from ifu
+   input logic       ifu_i0_icaf,                                  // i0 instruction access fault
+   input logic [1:0] ifu_i0_icaf_type,                             // i0 instruction access fault type
+
+   input logic   ifu_i0_icaf_second,                               // i0 has access fault on second 2B of 4B inst
+   input logic   ifu_i0_dbecc,                                     // i0 double-bit error
+   input logic [31:0]  ifu_i0_instr,                               // i0 instruction from the aligner
+   input logic [31:1]  ifu_i0_pc,                                  // i0 pc from the aligner
+
+
+   output logic dec_ib0_valid_d,                                   // ib0 valid
+   output logic dec_debug_valid_d,                                 // Debug read or write at D-stage
+
+
+   output logic [31:0] dec_i0_instr_d,                             // i0 inst at decode
+
+   output logic [31:1] dec_i0_pc_d,                                // i0 pc at decode
+
+   output logic dec_i0_pc4_d,                                      // i0 is 4B inst else 2B
+
+   output eb1_br_pkt_t dec_i0_brp,                                // i0 branch packet at decode
+   output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index,   // i0 branch index
+   output logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr,              // BP FGHR
+   output logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag,             // BP tag
+   output logic [$clog2(pt.BTB_SIZE)-1:0] dec_i0_bp_fa_index,          // Fully associt btb index
+
+   output logic dec_i0_icaf_d,                                     // i0 instruction access fault at decode
+   output logic dec_i0_icaf_second_d,                              // i0 instruction access fault on second 2B of 4B inst
+   output logic [1:0] dec_i0_icaf_type_d,                          // i0 instruction access fault type
+   output logic dec_i0_dbecc_d,                                    // i0 double-bit error at decode
+   output logic dec_debug_wdata_rs1_d,                             // put debug write data onto rs1 source: machine is halted
+
+   output logic dec_debug_fence_d                                  // debug fence inst
+
+   );
+
+
+   logic         debug_valid;
+   logic [4:0]   dreg;
+   logic [11:0]  dcsr;
+   logic [31:0]  ib0, ib0_debug_in;
+
+   logic         debug_read;
+   logic         debug_write;
+   logic         debug_read_gpr;
+   logic         debug_write_gpr;
+   logic         debug_read_csr;
+   logic         debug_write_csr;
+
+   logic [34:0]  ifu_i0_pcdata, pc0;
+
+   assign ifu_i0_pcdata[34:0] = { ifu_i0_icaf_second, ifu_i0_dbecc, ifu_i0_icaf,
+                                  ifu_i0_pc[31:1], ifu_i0_pc4 };
+
+   assign pc0[34:0] = ifu_i0_pcdata[34:0];
+
+   assign dec_i0_icaf_second_d = pc0[34];   // icaf's can only decode as i0
+
+   assign dec_i0_dbecc_d = pc0[33];
+
+   assign dec_i0_icaf_d = pc0[32];
+   assign dec_i0_pc_d[31:1] = pc0[31:1];
+   assign dec_i0_pc4_d = pc0[0];
+
+   assign dec_i0_icaf_type_d[1:0] = ifu_i0_icaf_type[1:0];
+
+// GPR accesses
+
+// put reg to read on rs1
+// read ->   or %x0,  %reg,%x0      {000000000000,reg[4:0],110000000110011}
+
+// put write date on rs1
+// write ->  or %reg, %x0, %x0      {00000000000000000110,reg[4:0],0110011}
+
+
+// CSR accesses
+// csr is of form rd, csr, rs1
+
+// read  -> csrrs %x0, %csr, %x0     {csr[11:0],00000010000001110011}
+
+// put write data on rs1
+// write -> csrrw %x0, %csr, %x0     {csr[11:0],00000001000001110011}
+
+// abstract memory command not done here
+   assign debug_valid = dbg_cmd_valid & (dbg_cmd_type[1:0] != 2'h2);
+
+
+   assign debug_read  = debug_valid & ~dbg_cmd_write;
+   assign debug_write = debug_valid &  dbg_cmd_write;
+
+   assign debug_read_gpr  = debug_read  & (dbg_cmd_type[1:0]==2'h0);
+   assign debug_write_gpr = debug_write & (dbg_cmd_type[1:0]==2'h0);
+   assign debug_read_csr  = debug_read  & (dbg_cmd_type[1:0]==2'h1);
+   assign debug_write_csr = debug_write & (dbg_cmd_type[1:0]==2'h1);
+
+   assign dreg[4:0]  = dbg_cmd_addr[4:0];
+   assign dcsr[11:0] = dbg_cmd_addr[11:0];
+
+
+   assign ib0_debug_in[31:0] = ({32{debug_read_gpr}}  & {12'b000000000000,dreg[4:0],15'b110000000110011}) |
+                               ({32{debug_write_gpr}} & {20'b00000000000000000110,dreg[4:0],7'b0110011}) |
+                               ({32{debug_read_csr}}  & {dcsr[11:0],20'b00000010000001110011}) |
+                               ({32{debug_write_csr}} & {dcsr[11:0],20'b00000001000001110011});
+
+
+
+   // machine is in halted state, pipe empty, write will always happen next cycle
+
+   assign dec_debug_wdata_rs1_d = debug_write_gpr | debug_write_csr;
+
+
+   // special fence csr for use only in debug mode
+
+   assign dec_debug_fence_d = debug_write_csr & (dcsr[11:0] == 12'h7c4);
+
+   assign ib0[31:0] = (debug_valid) ? ib0_debug_in[31:0] : ifu_i0_instr[31:0];
+
+   assign dec_ib0_valid_d = ifu_i0_valid | debug_valid;
+
+   assign dec_debug_valid_d = debug_valid;
+
+   assign dec_i0_instr_d[31:0] = ib0[31:0];
+
+   assign dec_i0_brp = i0_brp;
+   assign dec_i0_bp_index = ifu_i0_bp_index;
+   assign dec_i0_bp_fghr = ifu_i0_bp_fghr;
+   assign dec_i0_bp_btag = ifu_i0_bp_btag;
+   assign dec_i0_bp_fa_index = ifu_i0_fa_index;
+
+endmodule
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_dec_tlu_ctl.sv b/verilog/rtl/BrqRV_EB1/design/eb1_dec_tlu_ctl.sv
new file mode 100644
index 0000000..83f0a9d
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_dec_tlu_ctl.sv
@@ -0,0 +1,2947 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or it's affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+//********************************************************************************
+// eb1_dec_tlu_ctl.sv
+//
+//
+// Function: CSRs, Commit/WB, flushing, exceptions, interrupts
+// Comments:
+//
+//********************************************************************************
+
+module eb1_dec_tlu_ctl
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )
+  (
+   input logic clk,
+   input logic free_clk,
+   input logic free_l2clk,
+   input logic rst_l,
+   input logic scan_mode,
+
+   input logic [31:1] rst_vec, // reset vector, from core pins
+   input logic        nmi_int, // nmi pin
+   input logic [31:1] nmi_vec, // nmi vector
+   input logic  i_cpu_halt_req,    // Asynchronous Halt request to CPU
+   input logic  i_cpu_run_req,     // Asynchronous Restart request to CPU
+
+   input logic lsu_fastint_stall_any,   // needed by lsu for 2nd pass of dma with ecc correction, stall next cycle
+
+
+   // perf counter inputs
+   input logic       ifu_pmu_instr_aligned,   // aligned instructions
+   input logic       ifu_pmu_fetch_stall, // fetch unit stalled
+   input logic       ifu_pmu_ic_miss, // icache miss
+   input logic       ifu_pmu_ic_hit, // icache hit
+   input logic       ifu_pmu_bus_error, // Instruction side bus error
+   input logic       ifu_pmu_bus_busy, // Instruction side bus busy
+   input logic       ifu_pmu_bus_trxn, // Instruction side bus transaction
+   input logic       dec_pmu_instr_decoded, // decoded instructions
+   input logic       dec_pmu_decode_stall, // decode stall
+   input logic       dec_pmu_presync_stall, // decode stall due to presync'd inst
+   input logic       dec_pmu_postsync_stall,// decode stall due to postsync'd inst
+   input logic       lsu_store_stall_any,    // SB or WB is full, stall decode
+   input logic       dma_dccm_stall_any,     // DMA stall of lsu
+   input logic       dma_iccm_stall_any,     // DMA stall of ifu
+   input logic       exu_pmu_i0_br_misp,     // pipe 0 branch misp
+   input logic       exu_pmu_i0_br_ataken,   // pipe 0 branch actual taken
+   input logic       exu_pmu_i0_pc4,         // pipe 0 4 byte branch
+   input logic       lsu_pmu_bus_trxn,       // D side bus transaction
+   input logic       lsu_pmu_bus_misaligned, // D side bus misaligned
+   input logic       lsu_pmu_bus_error,      // D side bus error
+   input logic       lsu_pmu_bus_busy,       // D side bus busy
+   input logic       lsu_pmu_load_external_m, // D side bus load
+   input logic       lsu_pmu_store_external_m, // D side bus store
+   input logic       dma_pmu_dccm_read,          // DMA DCCM read
+   input logic       dma_pmu_dccm_write,         // DMA DCCM write
+   input logic       dma_pmu_any_read,           // DMA read
+   input logic       dma_pmu_any_write,          // DMA write
+
+   input logic [31:1] lsu_fir_addr, // Fast int address
+   input logic [1:0] lsu_fir_error, // Fast int lookup error
+
+   input logic       iccm_dma_sb_error,      // I side dma single bit error
+
+   input    eb1_lsu_error_pkt_t lsu_error_pkt_r, // lsu precise exception/error packet
+   input logic         lsu_single_ecc_error_incr, // LSU inc SB error counter
+
+   input logic dec_pause_state, // Pause counter not zero
+   input logic         lsu_imprecise_error_store_any,      // store bus error
+   input logic         lsu_imprecise_error_load_any,      // store bus error
+   input logic [31:0]  lsu_imprecise_error_addr_any, // store bus error address
+
+   input logic        dec_csr_wen_unq_d,       // valid csr with write - for csr legal
+   input logic        dec_csr_any_unq_d,       // valid csr - for csr legal
+   input logic [11:0] dec_csr_rdaddr_d,      // read address for csr
+
+   input logic        dec_csr_wen_r,      // csr write enable at wb
+   input logic [11:0] dec_csr_wraddr_r,      // write address for csr
+   input logic [31:0] dec_csr_wrdata_r,   // csr write data at wb
+
+   input logic        dec_csr_stall_int_ff, // csr is mie/mstatus
+
+   input logic dec_tlu_i0_valid_r, // pipe 0 op at e4 is valid
+
+   input logic [31:1] exu_npc_r, // for NPC tracking
+
+   input logic [31:1] dec_tlu_i0_pc_r, // for PC/NPC tracking
+
+   input eb1_trap_pkt_t dec_tlu_packet_r, // exceptions known at decode
+
+   input logic [31:0] dec_illegal_inst, // For mtval
+   input logic        dec_i0_decode_d,  // decode valid, used for clean icache diagnostics
+
+   // branch info from pipe0 for errors or counter updates
+   input logic [1:0]  exu_i0_br_hist_r, // history
+   input logic        exu_i0_br_error_r, // error
+   input logic        exu_i0_br_start_error_r, // start error
+   input logic        exu_i0_br_valid_r, // valid
+   input logic        exu_i0_br_mp_r, // mispredict
+   input logic        exu_i0_br_middle_r, // middle of bank
+
+   // branch info from pipe1 for errors or counter updates
+
+   input logic             exu_i0_br_way_r, // way hit or repl
+
+   output logic dec_tlu_core_empty,  // core is empty
+   // Debug start
+   output logic dec_dbg_cmd_done, // abstract command done
+   output logic dec_dbg_cmd_fail, // abstract command failed
+   output logic dec_tlu_dbg_halted, // Core is halted and ready for debug command
+   output logic dec_tlu_debug_mode, // Core is in debug mode
+   output logic dec_tlu_resume_ack, // Resume acknowledge
+   output logic dec_tlu_debug_stall, // stall decode while waiting on core to empty
+
+   output logic dec_tlu_flush_noredir_r , // Tell fetch to idle on this flush
+   output logic dec_tlu_mpc_halted_only, // Core is halted only due to MPC
+   output logic dec_tlu_flush_leak_one_r, // single step
+   output logic dec_tlu_flush_err_r, // iside perr/ecc rfpc. This is the D stage of the error
+
+   output logic dec_tlu_flush_extint, // fast ext int started
+   output logic [31:2] dec_tlu_meihap, // meihap for fast int
+
+   input  logic dbg_halt_req, // DM requests a halt
+   input  logic dbg_resume_req, // DM requests a resume
+   input  logic ifu_miss_state_idle, // I-side miss buffer empty
+   input  logic lsu_idle_any, // lsu is idle
+   input  logic dec_div_active, // oop div is active
+   output eb1_trigger_pkt_t  [3:0] trigger_pkt_any, // trigger info for trigger blocks
+
+   input logic  ifu_ic_error_start,     // IC single bit error
+   input logic  ifu_iccm_rd_ecc_single_err, // ICCM single bit error
+
+
+   input logic [70:0] ifu_ic_debug_rd_data, // diagnostic icache read data
+   input logic ifu_ic_debug_rd_data_valid, // diagnostic icache read data valid
+   output eb1_cache_debug_pkt_t dec_tlu_ic_diag_pkt, // packet of DICAWICS, DICAD0/1, DICAGO info for icache diagnostics
+   // Debug end
+
+   input logic [7:0] pic_claimid, // pic claimid for csr
+   input logic [3:0] pic_pl, // pic priv level for csr
+   input logic       mhwakeup, // high priority external int, wakeup if halted
+
+   input logic mexintpend, // external interrupt pending
+   input logic timer_int, // timer interrupt pending
+   input logic soft_int, // software interrupt pending
+
+   output logic o_cpu_halt_status, // PMU interface, halted
+   output logic o_cpu_halt_ack, // halt req ack
+   output logic o_cpu_run_ack, // run req ack
+   output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request
+
+   input logic [31:4] core_id, // Core ID
+
+   // external MPC halt/run interface
+   input logic mpc_debug_halt_req, // Async halt request
+   input logic mpc_debug_run_req, // Async run request
+   input logic mpc_reset_run_req, // Run/halt after reset
+   output logic mpc_debug_halt_ack, // Halt ack
+   output logic mpc_debug_run_ack, // Run ack
+   output logic debug_brkpt_status, // debug breakpoint
+
+   output logic [3:0] dec_tlu_meicurpl, // to PIC
+   output logic [3:0] dec_tlu_meipt, // to PIC
+
+
+   output logic [31:0] dec_csr_rddata_d,      // csr read data at wb
+   output logic dec_csr_legal_d,              // csr indicates legal operation
+
+   output eb1_br_tlu_pkt_t dec_tlu_br0_r_pkt, // branch pkt to bp
+
+   output logic dec_tlu_i0_kill_writeb_wb,    // I0 is flushed, don't writeback any results to arch state
+   output logic dec_tlu_flush_lower_wb,       // commit has a flush (exception, int, mispredict at e4)
+   output logic dec_tlu_i0_commit_cmt,        // committed an instruction
+
+   output logic dec_tlu_i0_kill_writeb_r,    // I0 is flushed, don't writeback any results to arch state
+   output logic dec_tlu_flush_lower_r,       // commit has a flush (exception, int)
+   output logic [31:1] dec_tlu_flush_path_r, // flush pc
+   output logic dec_tlu_fence_i_r,           // flush is a fence_i rfnpc, flush icache
+   output logic dec_tlu_wr_pause_r,           // CSR write to pause reg is at R.
+   output logic dec_tlu_flush_pause_r,        // Flush is due to pause
+
+   output logic dec_tlu_presync_d,            // CSR read needs to be presync'd
+   output logic dec_tlu_postsync_d,           // CSR needs to be presync'd
+
+
+   output logic [31:0] dec_tlu_mrac_ff,        // CSR for memory region control
+
+   output logic dec_tlu_force_halt, // halt has been forced
+
+   output logic dec_tlu_perfcnt0, // toggles when pipe0 perf counter 0 has an event inc
+   output logic dec_tlu_perfcnt1, // toggles when pipe0 perf counter 1 has an event inc
+   output logic dec_tlu_perfcnt2, // toggles when pipe0 perf counter 2 has an event inc
+   output logic dec_tlu_perfcnt3, // toggles when pipe0 perf counter 3 has an event inc
+
+   output logic dec_tlu_i0_exc_valid_wb1, // pipe 0 exception valid
+   output logic dec_tlu_i0_valid_wb1,  // pipe 0 valid
+   output logic dec_tlu_int_valid_wb1, // pipe 2 int valid
+   output logic [4:0] dec_tlu_exc_cause_wb1, // exception or int cause
+   output logic [31:0] dec_tlu_mtval_wb1, // MTVAL value
+
+   // feature disable from mfdc
+   output logic  dec_tlu_external_ldfwd_disable, // disable external load forwarding
+   output logic  dec_tlu_sideeffect_posted_disable,  // disable posted stores to side-effect address
+   output logic  dec_tlu_core_ecc_disable, // disable core ECC
+   output logic  dec_tlu_bpred_disable,           // disable branch prediction
+   output logic  dec_tlu_wb_coalescing_disable,   // disable writebuffer coalescing
+   output logic  dec_tlu_pipelining_disable,      // disable pipelining
+   output logic  dec_tlu_trace_disable,           // disable trace
+   output logic [2:0]  dec_tlu_dma_qos_prty,    // DMA QoS priority coming from MFDC [18:16]
+
+   // clock gating overrides from mcgc
+   output logic  dec_tlu_misc_clk_override, // override misc clock domain gating
+   output logic  dec_tlu_dec_clk_override,  // override decode clock domain gating
+   output logic  dec_tlu_ifu_clk_override,  // override fetch clock domain gating
+   output logic  dec_tlu_lsu_clk_override,  // override load/store clock domain gating
+   output logic  dec_tlu_bus_clk_override,  // override bus clock domain gating
+   output logic  dec_tlu_pic_clk_override,  // override PIC clock domain gating
+   output logic  dec_tlu_picio_clk_override,// override PICIO clock domain gating
+   output logic  dec_tlu_dccm_clk_override, // override DCCM clock domain gating
+   output logic  dec_tlu_icm_clk_override   // override ICCM clock domain gating
+   );
+
+   logic         clk_override, e4e5_int_clk, nmi_fir_type, nmi_lsu_load_type, nmi_lsu_store_type, nmi_int_detected_f, nmi_lsu_load_type_f,
+                 nmi_lsu_store_type_f, allow_dbg_halt_csr_write, dbg_cmd_done_ns, i_cpu_run_req_d1_raw, debug_mode_status, lsu_single_ecc_error_r_d1,
+                 sel_npc_r, sel_npc_resume, ce_int,
+                 nmi_in_debug_mode, dpc_capture_npc, dpc_capture_pc, tdata_load, tdata_opcode, tdata_action, perfcnt_halted, tdata_chain,
+                 tdata_kill_write;
+
+
+   logic reset_delayed, reset_detect, reset_detected;
+   logic wr_mstatus_r, wr_mtvec_r, wr_mcyclel_r, wr_mcycleh_r,
+         wr_minstretl_r, wr_minstreth_r, wr_mscratch_r, wr_mepc_r, wr_mcause_r, wr_mscause_r, wr_mtval_r,
+         wr_mrac_r, wr_meihap_r, wr_meicurpl_r, wr_meipt_r, wr_dcsr_r,
+         wr_dpc_r, wr_meicidpl_r, wr_meivt_r, wr_meicpct_r, wr_micect_r, wr_miccmect_r, wr_mfdht_r, wr_mfdhs_r,
+         wr_mdccmect_r,wr_mhpme3_r, wr_mhpme4_r, wr_mhpme5_r, wr_mhpme6_r;
+   logic wr_mpmc_r;
+   logic [1:1] mpmc_b_ns, mpmc, mpmc_b;
+   logic set_mie_pmu_fw_halt, fw_halted_ns, fw_halted;
+   logic wr_mcountinhibit_r;
+   logic [6:0] mcountinhibit;
+   logic wr_mtsel_r, wr_mtdata1_t0_r, wr_mtdata1_t1_r, wr_mtdata1_t2_r, wr_mtdata1_t3_r, wr_mtdata2_t0_r, wr_mtdata2_t1_r, wr_mtdata2_t2_r, wr_mtdata2_t3_r;
+   logic [31:0] mtdata2_t0, mtdata2_t1, mtdata2_t2, mtdata2_t3, mtdata2_tsel_out, mtdata1_tsel_out;
+   logic [9:0]  mtdata1_t0_ns, mtdata1_t0, mtdata1_t1_ns, mtdata1_t1, mtdata1_t2_ns, mtdata1_t2, mtdata1_t3_ns, mtdata1_t3;
+   logic [9:0] tdata_wrdata_r;
+   logic [1:0] mtsel_ns, mtsel;
+   logic tlu_i0_kill_writeb_r;
+   logic [1:0]  mstatus_ns, mstatus;
+   logic [1:0] mfdhs_ns, mfdhs;
+   logic [31:0] force_halt_ctr, force_halt_ctr_f;
+   logic        force_halt;
+   logic [5:0]  mfdht, mfdht_ns;
+   logic mstatus_mie_ns;
+   logic [30:0] mtvec_ns, mtvec;
+   logic [15:2] dcsr_ns, dcsr;
+   logic [5:0] mip_ns, mip;
+   logic [5:0] mie_ns, mie;
+   logic [31:0] mcyclel_ns, mcyclel;
+   logic [31:0] mcycleh_ns, mcycleh;
+   logic [31:0] minstretl_ns, minstretl;
+   logic [31:0] minstreth_ns, minstreth;
+   logic [31:0] micect_ns, micect, miccmect_ns, miccmect, mdccmect_ns, mdccmect;
+   logic [26:0] micect_inc, miccmect_inc, mdccmect_inc;
+   logic [31:0] mscratch;
+   logic [31:0] mhpmc3, mhpmc3_ns, mhpmc4, mhpmc4_ns, mhpmc5, mhpmc5_ns, mhpmc6, mhpmc6_ns;
+   logic [31:0] mhpmc3h, mhpmc3h_ns, mhpmc4h, mhpmc4h_ns, mhpmc5h, mhpmc5h_ns, mhpmc6h, mhpmc6h_ns;
+   logic [9:0]  mhpme3, mhpme4, mhpme5, mhpme6;
+   logic [31:0] mrac;
+   logic [9:2] meihap;
+   logic [31:10] meivt;
+   logic [3:0] meicurpl_ns, meicurpl;
+   logic [3:0] meicidpl_ns, meicidpl;
+   logic [3:0] meipt_ns, meipt;
+   logic [31:0] mdseac;
+   logic mdseac_locked_ns, mdseac_locked_f, mdseac_en, nmi_lsu_detected;
+   logic [31:1] mepc_ns, mepc;
+   logic [31:1] dpc_ns, dpc;
+   logic [31:0] mcause_ns, mcause;
+   logic [3:0] mscause_ns, mscause, mscause_type;
+   logic [31:0] mtval_ns, mtval;
+   logic dec_pause_state_f, dec_tlu_wr_pause_r_d1, pause_expired_r, pause_expired_wb;
+   logic        tlu_flush_lower_r, tlu_flush_lower_r_d1;
+   logic [31:1] tlu_flush_path_r,  tlu_flush_path_r_d1;
+   logic i0_valid_wb;
+   logic tlu_i0_commit_cmt;
+   logic [31:1] vectored_path, interrupt_path;
+   logic [16:0] dicawics_ns, dicawics;
+   logic        wr_dicawics_r, wr_dicad0_r, wr_dicad1_r, wr_dicad0h_r;
+   logic [31:0] dicad0_ns, dicad0, dicad0h_ns, dicad0h;
+
+   logic [6:0]  dicad1_ns, dicad1_raw;
+   logic [31:0] dicad1;
+   logic        ebreak_r, ebreak_to_debug_mode_r, ecall_r, illegal_r, mret_r, inst_acc_r, fence_i_r,
+                ic_perr_r, iccm_sbecc_r, ebreak_to_debug_mode_r_d1, kill_ebreak_count_r, inst_acc_second_r;
+   logic ce_int_ready, ext_int_ready, timer_int_ready, soft_int_ready, int_timer0_int_ready, int_timer1_int_ready, mhwakeup_ready,
+         take_ext_int, take_ce_int, take_timer_int, take_soft_int, take_int_timer0_int, take_int_timer1_int, take_nmi, take_nmi_r_d1, int_timer0_int_possible, int_timer1_int_possible;
+   logic i0_exception_valid_r, interrupt_valid_r, i0_exception_valid_r_d1, interrupt_valid_r_d1, exc_or_int_valid_r, exc_or_int_valid_r_d1, mdccme_ce_req, miccme_ce_req, mice_ce_req;
+   logic synchronous_flush_r;
+   logic [4:0]  exc_cause_r, exc_cause_wb;
+   logic        mcyclel_cout, mcyclel_cout_f, mcyclela_cout;
+   logic [31:0] mcyclel_inc;
+   logic [31:0] mcycleh_inc;
+
+   logic        minstretl_cout, minstretl_cout_f, minstret_enable, minstretl_cout_ns, minstretl_couta;
+
+   logic [31:0] minstretl_inc, minstretl_read;
+   logic [31:0] minstreth_inc, minstreth_read;
+   logic [31:1] pc_r, pc_r_d1, npc_r, npc_r_d1;
+   logic valid_csr;
+   logic rfpc_i0_r;
+   logic lsu_i0_rfnpc_r;
+   logic dec_tlu_br0_error_r, dec_tlu_br0_start_error_r, dec_tlu_br0_v_r;
+   logic lsu_i0_exc_r, lsu_i0_exc_r_raw, lsu_exc_ma_r, lsu_exc_acc_r, lsu_exc_st_r,
+         lsu_exc_valid_r, lsu_exc_valid_r_raw, lsu_exc_valid_r_d1, lsu_i0_exc_r_d1, block_interrupts;
+   logic i0_trigger_eval_r;
+
+   logic request_debug_mode_r, request_debug_mode_r_d1, request_debug_mode_done, request_debug_mode_done_f;
+   logic take_halt, halt_taken, halt_taken_f, internal_dbg_halt_mode, dbg_tlu_halted_f, take_reset,
+         dbg_tlu_halted, core_empty, lsu_idle_any_f, ifu_miss_state_idle_f, resume_ack_ns,
+         debug_halt_req_f, debug_resume_req_f_raw, debug_resume_req_f, enter_debug_halt_req, dcsr_single_step_done, dcsr_single_step_done_f,
+         debug_halt_req_d1, debug_halt_req_ns, dcsr_single_step_running, dcsr_single_step_running_f, internal_dbg_halt_timers;
+
+   logic [3:0] i0_trigger_r, trigger_action, trigger_enabled,
+               i0_trigger_chain_masked_r;
+   logic       i0_trigger_hit_r, i0_trigger_hit_raw_r, i0_trigger_action_r,
+               trigger_hit_r_d1,
+               mepc_trigger_hit_sel_pc_r;
+   logic [3:0] update_hit_bit_r, i0_iside_trigger_has_pri_r,i0trigger_qual_r, i0_lsu_trigger_has_pri_r;
+   logic cpu_halt_status, cpu_halt_ack, cpu_run_ack, ext_halt_pulse, i_cpu_halt_req_d1, i_cpu_run_req_d1;
+
+   logic inst_acc_r_raw, trigger_hit_dmode_r, trigger_hit_dmode_r_d1;
+   logic [9:0] mcgc, mcgc_ns, mcgc_int;
+   logic [18:0] mfdc;
+   logic i_cpu_halt_req_sync_qual, i_cpu_run_req_sync_qual, pmu_fw_halt_req_ns, pmu_fw_halt_req_f, int_timer_stalled,
+         fw_halt_req, enter_pmu_fw_halt_req, pmu_fw_tlu_halted, pmu_fw_tlu_halted_f, internal_pmu_fw_halt_mode,
+         internal_pmu_fw_halt_mode_f, int_timer0_int_hold, int_timer1_int_hold, int_timer0_int_hold_f, int_timer1_int_hold_f;
+   logic nmi_int_delayed, nmi_int_detected;
+   logic [3:0] trigger_execute, trigger_data, trigger_store;
+   logic dec_tlu_pmu_fw_halted;
+
+   logic mpc_run_state_ns, debug_brkpt_status_ns, mpc_debug_halt_ack_ns, mpc_debug_run_ack_ns, dbg_halt_state_ns, dbg_run_state_ns,
+         dbg_halt_state_f, mpc_debug_halt_req_sync_f, mpc_debug_run_req_sync_f, mpc_halt_state_f, mpc_halt_state_ns, mpc_run_state_f, debug_brkpt_status_f,
+         mpc_debug_halt_ack_f, mpc_debug_run_ack_f, dbg_run_state_f, mpc_debug_halt_req_sync_pulse,
+         mpc_debug_run_req_sync_pulse, debug_brkpt_valid, debug_halt_req, debug_resume_req, dec_tlu_mpc_halted_only_ns;
+   logic take_ext_int_start, ext_int_freeze, take_ext_int_start_d1, take_ext_int_start_d2,
+         take_ext_int_start_d3, ext_int_freeze_d1, csr_meicpct, ignore_ext_int_due_to_lsu_stall;
+   logic mcause_sel_nmi_store, mcause_sel_nmi_load, mcause_sel_nmi_ext, fast_int_meicpct;
+   logic [1:0] mcause_fir_error_type;
+   logic dbg_halt_req_held_ns, dbg_halt_req_held, dbg_halt_req_final;
+   logic iccm_repair_state_ns, iccm_repair_state_d1, iccm_repair_state_rfnpc;
+
+
+   // internal timer, isolated for size reasons
+   logic [31:0] dec_timer_rddata_d;
+   logic dec_timer_read_d, dec_timer_t0_pulse, dec_timer_t1_pulse;
+   logic csr_mitctl0;
+   logic csr_mitctl1;
+   logic csr_mitb0;
+   logic csr_mitb1;
+   logic csr_mitcnt0;
+   logic csr_mitcnt1;
+
+   logic nmi_int_sync, timer_int_sync, soft_int_sync, i_cpu_halt_req_sync, i_cpu_run_req_sync, mpc_debug_halt_req_sync, mpc_debug_run_req_sync, mpc_debug_halt_req_sync_raw;
+   logic csr_wr_clk;
+   logic e4e5_clk, e4_valid, e5_valid, e4e5_valid, internal_dbg_halt_mode_f, internal_dbg_halt_mode_f2;
+   logic lsu_pmu_load_external_r, lsu_pmu_store_external_r;
+   logic dec_tlu_flush_noredir_r_d1, dec_tlu_flush_pause_r_d1;
+   logic lsu_single_ecc_error_r;
+   logic [31:0] lsu_error_pkt_addr_r;
+   logic mcyclel_cout_in;
+   logic i0_valid_no_ebreak_ecall_r;
+   logic minstret_enable_f;
+   logic sel_exu_npc_r, sel_flush_npc_r, sel_hold_npc_r;
+   logic pc0_valid_r;
+   logic [15:0] mfdc_int, mfdc_ns;
+   logic [31:0] mrac_in;
+   logic [31:27] csr_sat;
+   logic [8:6] dcsr_cause;
+   logic enter_debug_halt_req_le, dcsr_cause_upgradeable;
+   logic icache_rd_valid, icache_wr_valid, icache_rd_valid_f, icache_wr_valid_f;
+   logic [3:0]      mhpmc_inc_r, mhpmc_inc_r_d1;
+
+   logic [3:0][9:0] mhpme_vec;
+   logic            mhpmc3_wr_en0, mhpmc3_wr_en1, mhpmc3_wr_en;
+   logic            mhpmc4_wr_en0, mhpmc4_wr_en1, mhpmc4_wr_en;
+   logic            mhpmc5_wr_en0, mhpmc5_wr_en1, mhpmc5_wr_en;
+   logic            mhpmc6_wr_en0, mhpmc6_wr_en1, mhpmc6_wr_en;
+   logic            mhpmc3h_wr_en0, mhpmc3h_wr_en;
+   logic            mhpmc4h_wr_en0, mhpmc4h_wr_en;
+   logic            mhpmc5h_wr_en0, mhpmc5h_wr_en;
+   logic            mhpmc6h_wr_en0, mhpmc6h_wr_en;
+   logic [63:0]     mhpmc3_incr, mhpmc4_incr, mhpmc5_incr, mhpmc6_incr;
+   logic perfcnt_halted_d1, zero_event_r;
+   logic [3:0] perfcnt_during_sleep;
+   logic [9:0] event_r;
+
+   eb1_inst_pkt_t pmu_i0_itype_qual;
+
+   logic csr_mfdht;
+   logic csr_mfdhs;
+   logic csr_misa;
+   logic csr_mvendorid;
+   logic csr_marchid;
+   logic csr_mimpid;
+   logic csr_mhartid;
+   logic csr_mstatus;
+   logic csr_mtvec;
+   logic csr_mip;
+   logic csr_mie;
+   logic csr_mcyclel;
+   logic csr_mcycleh;
+   logic csr_minstretl;
+   logic csr_minstreth;
+   logic csr_mscratch;
+   logic csr_mepc;
+   logic csr_mcause;
+   logic csr_mscause;
+   logic csr_mtval;
+   logic csr_mrac;
+   logic csr_dmst;
+   logic csr_mdseac;
+   logic csr_meihap;
+   logic csr_meivt;
+   logic csr_meipt;
+   logic csr_meicurpl;
+   logic csr_meicidpl;
+   logic csr_dcsr;
+   logic csr_mcgc;
+   logic csr_mfdc;
+   logic csr_dpc;
+   logic csr_mtsel;
+   logic csr_mtdata1;
+   logic csr_mtdata2;
+   logic csr_mhpmc3;
+   logic csr_mhpmc4;
+   logic csr_mhpmc5;
+   logic csr_mhpmc6;
+   logic csr_mhpmc3h;
+   logic csr_mhpmc4h;
+   logic csr_mhpmc5h;
+   logic csr_mhpmc6h;
+   logic csr_mhpme3;
+   logic csr_mhpme4;
+   logic csr_mhpme5;
+   logic csr_mhpme6;
+   logic csr_mcountinhibit;
+   logic csr_mpmc;
+   logic csr_micect;
+   logic csr_miccmect;
+   logic csr_mdccmect;
+   logic csr_dicawics;
+   logic csr_dicad0h;
+   logic csr_dicad0;
+   logic csr_dicad1;
+   logic csr_dicago;
+   logic presync;
+   logic postsync;
+   logic legal;
+   logic dec_csr_wen_r_mod;
+
+   logic flush_clkvalid;
+   logic sel_fir_addr;
+   logic wr_mie_r;
+   logic mtval_capture_pc_r;
+   logic mtval_capture_pc_plus2_r;
+   logic mtval_capture_inst_r;
+   logic mtval_capture_lsu_r;
+   logic mtval_clear_r;
+   logic wr_mcgc_r;
+   logic wr_mfdc_r;
+   logic wr_mdeau_r;
+   logic trigger_hit_for_dscr_cause_r_d1;
+   logic conditionally_illegal;
+
+   logic  [3:0] ifu_mscause ;
+   logic        ifu_ic_error_start_f, ifu_iccm_rd_ecc_single_err_f;
+
+   eb1_dec_timer_ctl  #(.pt(pt)) int_timers(.*);
+   // end of internal timers
+
+   assign clk_override = dec_tlu_dec_clk_override;
+
+   // Async inputs to the core have to be sync'd to the core clock.
+   rvsyncss #(7) syncro_ff(.*,
+                           .clk(free_clk),
+                           .din ({nmi_int,      timer_int,      soft_int,      i_cpu_halt_req,      i_cpu_run_req,      mpc_debug_halt_req,          mpc_debug_run_req}),
+                           .dout({nmi_int_sync, timer_int_sync, soft_int_sync, i_cpu_halt_req_sync, i_cpu_run_req_sync, mpc_debug_halt_req_sync_raw, mpc_debug_run_req_sync}));
+
+   // for CSRs that have inpipe writes only
+
+   rvoclkhdr csrwr_r_cgc   ( .en(dec_csr_wen_r_mod | clk_override), .l1clk(csr_wr_clk), .* );
+
+   assign e4_valid = dec_tlu_i0_valid_r;
+   assign e4e5_valid = e4_valid | e5_valid;
+   assign flush_clkvalid = internal_dbg_halt_mode_f | i_cpu_run_req_d1 | interrupt_valid_r | interrupt_valid_r_d1 |
+                           reset_delayed | pause_expired_r | pause_expired_wb | ic_perr_r | iccm_sbecc_r |
+                           clk_override;
+   rvoclkhdr e4e5_cgc     ( .en(e4e5_valid | clk_override), .l1clk(e4e5_clk), .* );
+   rvoclkhdr e4e5_int_cgc ( .en(e4e5_valid | flush_clkvalid), .l1clk(e4e5_int_clk), .* );
+
+   rvdffie #(11)  freeff (.*, .clk(free_l2clk),
+                          .din ({ifu_ic_error_start, ifu_iccm_rd_ecc_single_err, iccm_repair_state_ns, e4_valid, internal_dbg_halt_mode,
+                                 lsu_pmu_load_external_m, lsu_pmu_store_external_m, tlu_flush_lower_r,  tlu_i0_kill_writeb_r,
+                                 internal_dbg_halt_mode_f, force_halt}),
+                          .dout({ifu_ic_error_start_f, ifu_iccm_rd_ecc_single_err_f, iccm_repair_state_d1, e5_valid, internal_dbg_halt_mode_f,
+                                 lsu_pmu_load_external_r, lsu_pmu_store_external_r, tlu_flush_lower_r_d1, dec_tlu_i0_kill_writeb_wb,
+                                 internal_dbg_halt_mode_f2, dec_tlu_force_halt}));
+
+   assign dec_tlu_i0_kill_writeb_r = tlu_i0_kill_writeb_r;
+
+   assign nmi_int_detected = (nmi_int_sync & ~nmi_int_delayed) | nmi_lsu_detected | (nmi_int_detected_f & ~take_nmi_r_d1) | nmi_fir_type;
+   // if the first nmi is a lsu type, note it. If there's already an nmi pending, ignore. Simultaneous with FIR, drop.
+   assign nmi_lsu_load_type  = (nmi_lsu_detected & lsu_imprecise_error_load_any &  ~(nmi_int_detected_f & ~take_nmi_r_d1)) |
+                               (nmi_lsu_load_type_f  & ~take_nmi_r_d1);
+   assign nmi_lsu_store_type = (nmi_lsu_detected & lsu_imprecise_error_store_any & ~(nmi_int_detected_f & ~take_nmi_r_d1)) |
+                               (nmi_lsu_store_type_f & ~take_nmi_r_d1);
+
+   assign nmi_fir_type = ~nmi_int_detected_f & take_ext_int_start_d3 & |lsu_fir_error[1:0];
+
+   // Filter subsequent bus errors after the first, until the lock on MDSEAC is cleared
+   assign nmi_lsu_detected = ~mdseac_locked_f & (lsu_imprecise_error_load_any | lsu_imprecise_error_store_any) & ~nmi_fir_type;
+
+
+localparam MSTATUS_MIE   = 0;
+localparam MIP_MCEIP     = 5;
+localparam MIP_MITIP0    = 4;
+localparam MIP_MITIP1    = 3;
+localparam MIP_MEIP      = 2;
+localparam MIP_MTIP      = 1;
+localparam MIP_MSIP      = 0;
+
+localparam MIE_MCEIE     = 5;
+localparam MIE_MITIE0    = 4;
+localparam MIE_MITIE1    = 3;
+localparam MIE_MEIE      = 2;
+localparam MIE_MTIE      = 1;
+localparam MIE_MSIE      = 0;
+
+localparam DCSR_EBREAKM  = 15;
+localparam DCSR_STEPIE   = 11;
+localparam DCSR_STOPC    = 10;
+localparam DCSR_STEP     = 2;
+
+
+   assign reset_delayed = reset_detect ^ reset_detected;
+
+   // ----------------------------------------------------------------------
+   // MPC halt
+   // - can interact with debugger halt and v-v
+
+   // fast ints in progress have priority
+   assign mpc_debug_halt_req_sync = mpc_debug_halt_req_sync_raw & ~ext_int_freeze_d1;
+
+    rvdffie #(16)  mpvhalt_ff (.*, .clk(free_l2clk),
+                                 .din({1'b1, reset_detect,
+                                       nmi_int_sync, nmi_int_detected, nmi_lsu_load_type, nmi_lsu_store_type,
+                                       mpc_debug_halt_req_sync, mpc_debug_run_req_sync,
+                                       mpc_halt_state_ns, mpc_run_state_ns, debug_brkpt_status_ns,
+                                       mpc_debug_halt_ack_ns, mpc_debug_run_ack_ns,
+                                       dbg_halt_state_ns, dbg_run_state_ns,
+                                       dec_tlu_mpc_halted_only_ns}),
+                                .dout({reset_detect, reset_detected,
+                                       nmi_int_delayed, nmi_int_detected_f, nmi_lsu_load_type_f, nmi_lsu_store_type_f,
+                                       mpc_debug_halt_req_sync_f, mpc_debug_run_req_sync_f,
+                                       mpc_halt_state_f, mpc_run_state_f, debug_brkpt_status_f,
+                                       mpc_debug_halt_ack_f, mpc_debug_run_ack_f,
+                                       dbg_halt_state_f, dbg_run_state_f,
+                                       dec_tlu_mpc_halted_only}));
+
+   // turn level sensitive requests into pulses
+   assign mpc_debug_halt_req_sync_pulse = mpc_debug_halt_req_sync & ~mpc_debug_halt_req_sync_f;
+   assign mpc_debug_run_req_sync_pulse = mpc_debug_run_req_sync & ~mpc_debug_run_req_sync_f;
+
+   // states
+   assign mpc_halt_state_ns = (mpc_halt_state_f | mpc_debug_halt_req_sync_pulse | (reset_delayed & ~mpc_reset_run_req)) & ~mpc_debug_run_req_sync;
+   assign mpc_run_state_ns = (mpc_run_state_f | (mpc_debug_run_req_sync_pulse & ~mpc_debug_run_ack_f)) & (internal_dbg_halt_mode_f & ~dcsr_single_step_running_f);
+
+   // note, MPC halt can allow the jtag debugger to just start sending commands. When that happens, set the interal debugger halt state to prevent
+   // MPC run from starting the core.
+   assign dbg_halt_state_ns = (dbg_halt_state_f | (dbg_halt_req_final | dcsr_single_step_done_f | trigger_hit_dmode_r_d1 | ebreak_to_debug_mode_r_d1)) & ~dbg_resume_req;
+   assign dbg_run_state_ns = (dbg_run_state_f | dbg_resume_req) & (internal_dbg_halt_mode_f & ~dcsr_single_step_running_f);
+
+   // tell dbg we are only MPC halted
+   assign dec_tlu_mpc_halted_only_ns = ~dbg_halt_state_f & mpc_halt_state_f;
+
+   // this asserts from detection of bkpt until after we leave debug mode
+   assign debug_brkpt_valid = ebreak_to_debug_mode_r_d1 | trigger_hit_dmode_r_d1;
+   assign debug_brkpt_status_ns = (debug_brkpt_valid | debug_brkpt_status_f) & (internal_dbg_halt_mode & ~dcsr_single_step_running_f);
+
+   // acks back to interface
+   assign mpc_debug_halt_ack_ns = mpc_halt_state_f & internal_dbg_halt_mode_f & mpc_debug_halt_req_sync & core_empty;
+   assign mpc_debug_run_ack_ns = (mpc_debug_run_req_sync & ~dbg_halt_state_ns & ~mpc_debug_halt_req_sync) | (mpc_debug_run_ack_f & mpc_debug_run_req_sync) ;
+
+   // Pins
+   assign mpc_debug_halt_ack = mpc_debug_halt_ack_f;
+   assign mpc_debug_run_ack = mpc_debug_run_ack_f;
+   assign debug_brkpt_status = debug_brkpt_status_f;
+
+   // DBG halt req is a pulse, fast ext int in progress has priority
+   assign dbg_halt_req_held_ns = (dbg_halt_req | dbg_halt_req_held) & ext_int_freeze_d1;
+   assign dbg_halt_req_final = (dbg_halt_req | dbg_halt_req_held) & ~ext_int_freeze_d1;
+
+   // combine MPC and DBG halt requests
+   assign debug_halt_req = (dbg_halt_req_final | mpc_debug_halt_req_sync | (reset_delayed & ~mpc_reset_run_req)) & ~internal_dbg_halt_mode_f & ~ext_int_freeze_d1;
+
+   assign debug_resume_req = ~debug_resume_req_f &  // squash back to back resumes
+                             ((mpc_run_state_ns & ~dbg_halt_state_ns) |  // MPC run req
+                              (dbg_run_state_ns & ~mpc_halt_state_ns)); // dbg request is a pulse
+
+
+   // HALT
+   // dbg/pmu/fw requests halt, service as soon as lsu is not blocking interrupts
+   assign take_halt = (debug_halt_req_f | pmu_fw_halt_req_f) & ~synchronous_flush_r & ~mret_r & ~halt_taken_f & ~dec_tlu_flush_noredir_r_d1 & ~take_reset;
+
+   // hold after we take a halt, so we don't keep taking halts
+   assign halt_taken = (dec_tlu_flush_noredir_r_d1 & ~dec_tlu_flush_pause_r_d1 & ~take_ext_int_start_d1) | (halt_taken_f & ~dbg_tlu_halted_f & ~pmu_fw_tlu_halted_f & ~interrupt_valid_r_d1);
+
+   // After doing halt flush (RFNPC) wait until core is idle before asserting a particular halt mode
+   // It takes a cycle for mb_empty to assert after a fetch, take_halt covers that cycle
+   assign core_empty = force_halt |
+                       (lsu_idle_any & lsu_idle_any_f & ifu_miss_state_idle & ifu_miss_state_idle_f & ~debug_halt_req & ~debug_halt_req_d1 & ~dec_div_active);
+
+   assign dec_tlu_core_empty = core_empty;
+
+//--------------------------------------------------------------------------------
+// Debug start
+//
+
+   assign enter_debug_halt_req = (~internal_dbg_halt_mode_f & debug_halt_req) | dcsr_single_step_done_f | trigger_hit_dmode_r_d1 | ebreak_to_debug_mode_r_d1;
+
+   // dbg halt state active from request until non-step resume
+   assign internal_dbg_halt_mode = debug_halt_req_ns | (internal_dbg_halt_mode_f & ~(debug_resume_req_f & ~dcsr[DCSR_STEP]));
+   // dbg halt can access csrs as long as we are not stepping
+   assign allow_dbg_halt_csr_write = internal_dbg_halt_mode_f & ~dcsr_single_step_running_f;
+
+
+   // hold debug_halt_req_ns high until we enter debug halt
+   assign debug_halt_req_ns = enter_debug_halt_req | (debug_halt_req_f & ~dbg_tlu_halted);
+
+   assign dbg_tlu_halted = (debug_halt_req_f & core_empty & halt_taken) | (dbg_tlu_halted_f & ~debug_resume_req_f);
+
+   assign resume_ack_ns = (debug_resume_req_f & dbg_tlu_halted_f & dbg_run_state_ns);
+
+   assign dcsr_single_step_done = dec_tlu_i0_valid_r & ~dec_tlu_dbg_halted & dcsr[DCSR_STEP] & ~rfpc_i0_r;
+
+   assign dcsr_single_step_running = (debug_resume_req_f & dcsr[DCSR_STEP]) | (dcsr_single_step_running_f & ~dcsr_single_step_done_f);
+
+   assign dbg_cmd_done_ns = dec_tlu_i0_valid_r & dec_tlu_dbg_halted;
+
+   // used to hold off commits after an in-pipe debug mode request (triggers, DCSR)
+   assign request_debug_mode_r = (trigger_hit_dmode_r | ebreak_to_debug_mode_r) | (request_debug_mode_r_d1 & ~dec_tlu_flush_lower_wb);
+
+   assign request_debug_mode_done = (request_debug_mode_r_d1 | request_debug_mode_done_f) & ~dbg_tlu_halted_f;
+
+    rvdffie #(18)  halt_ff (.*, .clk(free_l2clk),
+                          .din({dec_tlu_flush_noredir_r, halt_taken, lsu_idle_any, ifu_miss_state_idle, dbg_tlu_halted,
+                                resume_ack_ns, debug_halt_req_ns, debug_resume_req, trigger_hit_dmode_r,
+                                dcsr_single_step_done, debug_halt_req, dec_tlu_wr_pause_r, dec_pause_state,
+                                request_debug_mode_r, request_debug_mode_done, dcsr_single_step_running, dec_tlu_flush_pause_r,
+                                dbg_halt_req_held_ns}),
+                          .dout({dec_tlu_flush_noredir_r_d1, halt_taken_f, lsu_idle_any_f, ifu_miss_state_idle_f, dbg_tlu_halted_f,
+                                 dec_tlu_resume_ack , debug_halt_req_f, debug_resume_req_f_raw, trigger_hit_dmode_r_d1,
+                                 dcsr_single_step_done_f, debug_halt_req_d1, dec_tlu_wr_pause_r_d1, dec_pause_state_f,
+                                 request_debug_mode_r_d1, request_debug_mode_done_f, dcsr_single_step_running_f, dec_tlu_flush_pause_r_d1,
+                                 dbg_halt_req_held}));
+
+   // MPC run collides with DBG halt, fix it here
+   assign debug_resume_req_f = debug_resume_req_f_raw & ~dbg_halt_req;
+
+   assign dec_tlu_debug_stall = debug_halt_req_f;
+   assign dec_tlu_dbg_halted = dbg_tlu_halted_f;
+   assign dec_tlu_debug_mode = internal_dbg_halt_mode_f;
+   assign dec_tlu_pmu_fw_halted = pmu_fw_tlu_halted_f;
+
+   // kill fetch redirection on flush if going to halt, or if there's a fence during db-halt
+   assign dec_tlu_flush_noredir_r = take_halt | (fence_i_r & internal_dbg_halt_mode) | dec_tlu_flush_pause_r | (i0_trigger_hit_r & trigger_hit_dmode_r) | take_ext_int_start;
+
+   assign dec_tlu_flush_extint = take_ext_int_start;
+
+   // 1 cycle after writing the PAUSE counter, flush with noredir to idle F1-D.
+   assign dec_tlu_flush_pause_r = dec_tlu_wr_pause_r_d1 & ~interrupt_valid_r & ~take_ext_int_start;
+
+   // detect end of pause counter and rfpc
+   assign pause_expired_r = ~dec_pause_state & dec_pause_state_f & ~(ext_int_ready | ce_int_ready | timer_int_ready | soft_int_ready | int_timer0_int_hold_f | int_timer1_int_hold_f | nmi_int_detected | ext_int_freeze_d1) & ~interrupt_valid_r_d1 & ~debug_halt_req_f & ~pmu_fw_halt_req_f & ~halt_taken_f;
+
+   assign dec_tlu_flush_leak_one_r = dec_tlu_flush_lower_r  & dcsr[DCSR_STEP] & (dec_tlu_resume_ack | dcsr_single_step_running) & ~dec_tlu_flush_noredir_r;
+   assign dec_tlu_flush_err_r = dec_tlu_flush_lower_r & (ic_perr_r | iccm_sbecc_r);
+
+   // If DM attempts to access an illegal CSR, send cmd_fail back
+   assign dec_dbg_cmd_done = dbg_cmd_done_ns;
+   assign dec_dbg_cmd_fail = illegal_r & dec_dbg_cmd_done;
+
+
+   //--------------------------------------------------------------------------------
+   //--------------------------------------------------------------------------------
+   // Triggers
+   //
+localparam MTDATA1_DMODE             = 9;
+localparam MTDATA1_SEL   = 7;
+localparam MTDATA1_ACTION            = 6;
+localparam MTDATA1_CHAIN             = 5;
+localparam MTDATA1_MATCH             = 4;
+localparam MTDATA1_M_ENABLED         = 3;
+localparam MTDATA1_EXE   = 2;
+localparam MTDATA1_ST    = 1;
+localparam MTDATA1_LD    = 0;
+
+   // Prioritize trigger hits with other exceptions.
+   //
+   // Trigger should have highest priority except:
+   // - trigger is an execute-data and there is an inst_access exception (lsu triggers won't fire, inst. is nop'd by decode)
+   // - trigger is a store-data and there is a lsu_acc_exc or lsu_ma_exc.
+   assign trigger_execute[3:0] = {mtdata1_t3[MTDATA1_EXE], mtdata1_t2[MTDATA1_EXE], mtdata1_t1[MTDATA1_EXE], mtdata1_t0[MTDATA1_EXE]};
+   assign trigger_data[3:0] = {mtdata1_t3[MTDATA1_SEL], mtdata1_t2[MTDATA1_SEL], mtdata1_t1[MTDATA1_SEL], mtdata1_t0[MTDATA1_SEL]};
+   assign trigger_store[3:0] = {mtdata1_t3[MTDATA1_ST], mtdata1_t2[MTDATA1_ST], mtdata1_t1[MTDATA1_ST], mtdata1_t0[MTDATA1_ST]};
+
+   // MSTATUS[MIE] needs to be on to take triggers unless the action is trigger to debug mode.
+   assign trigger_enabled[3:0] = {(mtdata1_t3[MTDATA1_ACTION] | mstatus[MSTATUS_MIE]) & mtdata1_t3[MTDATA1_M_ENABLED],
+                                  (mtdata1_t2[MTDATA1_ACTION] | mstatus[MSTATUS_MIE]) & mtdata1_t2[MTDATA1_M_ENABLED],
+                                  (mtdata1_t1[MTDATA1_ACTION] | mstatus[MSTATUS_MIE]) & mtdata1_t1[MTDATA1_M_ENABLED],
+                                  (mtdata1_t0[MTDATA1_ACTION] | mstatus[MSTATUS_MIE]) & mtdata1_t0[MTDATA1_M_ENABLED]};
+
+   // iside exceptions are always in i0
+   assign i0_iside_trigger_has_pri_r[3:0]  = ~( (trigger_execute[3:0] & trigger_data[3:0] & {4{inst_acc_r_raw}}) | // exe-data with inst_acc
+                                                ({4{exu_i0_br_error_r | exu_i0_br_start_error_r}}));               // branch error in i0
+
+   // lsu excs have to line up with their respective triggers since the lsu op can be i0
+   assign i0_lsu_trigger_has_pri_r[3:0] = ~(trigger_store[3:0] & trigger_data[3:0] & {4{lsu_i0_exc_r_raw}});
+
+   // trigger hits have to be eval'd to cancel side effect lsu ops even though the pipe is already frozen
+   assign i0_trigger_eval_r = dec_tlu_i0_valid_r;
+
+   assign i0trigger_qual_r[3:0] = {4{i0_trigger_eval_r}} & dec_tlu_packet_r.i0trigger[3:0] & i0_iside_trigger_has_pri_r[3:0] & i0_lsu_trigger_has_pri_r[3:0] & trigger_enabled[3:0];
+
+   // Qual trigger hits
+   assign i0_trigger_r[3:0] = ~{4{dec_tlu_flush_lower_wb | dec_tlu_dbg_halted}} & i0trigger_qual_r[3:0];
+
+   // chaining can mask raw trigger info
+   assign i0_trigger_chain_masked_r[3:0]  = {i0_trigger_r[3] & (~mtdata1_t2[MTDATA1_CHAIN] | i0_trigger_r[2]),
+                                             i0_trigger_r[2] & (~mtdata1_t2[MTDATA1_CHAIN] | i0_trigger_r[3]),
+                                             i0_trigger_r[1] & (~mtdata1_t0[MTDATA1_CHAIN] | i0_trigger_r[0]),
+                                             i0_trigger_r[0] & (~mtdata1_t0[MTDATA1_CHAIN] | i0_trigger_r[1])};
+
+   // This is the highest priority by this point.
+   assign i0_trigger_hit_raw_r = |i0_trigger_chain_masked_r[3:0];
+
+   assign i0_trigger_hit_r = i0_trigger_hit_raw_r;
+
+   // Actions include breakpoint, or dmode. Dmode is only possible if the DMODE bit is set.
+   // Otherwise, take a breakpoint.
+   assign trigger_action[3:0] = {mtdata1_t3[MTDATA1_ACTION] & mtdata1_t3[MTDATA1_DMODE],
+                                 mtdata1_t2[MTDATA1_ACTION] & mtdata1_t2[MTDATA1_DMODE] & ~mtdata1_t2[MTDATA1_CHAIN],
+                                 mtdata1_t1[MTDATA1_ACTION] & mtdata1_t1[MTDATA1_DMODE],
+                                 mtdata1_t0[MTDATA1_ACTION] & mtdata1_t0[MTDATA1_DMODE] & ~mtdata1_t0[MTDATA1_CHAIN]};
+
+   // this is needed to set the HIT bit in the triggers
+   assign update_hit_bit_r[3:0] = ({4{|i0_trigger_r[3:0] & ~rfpc_i0_r}} & {i0_trigger_chain_masked_r[3], i0_trigger_r[2], i0_trigger_chain_masked_r[1], i0_trigger_r[0]});
+
+   // action, 1 means dmode. Simultaneous triggers with at least 1 set for dmode force entire action to dmode.
+   assign i0_trigger_action_r = |(i0_trigger_chain_masked_r[3:0] & trigger_action[3:0]);
+
+   assign trigger_hit_dmode_r = (i0_trigger_hit_r & i0_trigger_action_r);
+
+   assign mepc_trigger_hit_sel_pc_r = i0_trigger_hit_r & ~trigger_hit_dmode_r;
+
+
+//
+// Debug end
+//--------------------------------------------------------------------------------
+
+   //----------------------------------------------------------------------
+   //
+   // Commit
+   //
+   //----------------------------------------------------------------------
+
+
+
+   //--------------------------------------------------------------------------------
+   // External halt (not debug halt)
+   // - Fully interlocked handshake
+   // i_cpu_halt_req  ____|--------------|_______________
+   // core_empty      ---------------|___________
+   // o_cpu_halt_ack  _________________|----|__________
+   // o_cpu_halt_status _______________|---------------------|_________
+   // i_cpu_run_req                              ______|----------|____
+   // o_cpu_run_ack                              ____________|------|________
+   //
+
+
+   // debug mode has priority, ignore PMU/FW halt/run while in debug mode
+   assign i_cpu_halt_req_sync_qual = i_cpu_halt_req_sync & ~dec_tlu_debug_mode & ~ext_int_freeze_d1;
+   assign i_cpu_run_req_sync_qual = i_cpu_run_req_sync & ~dec_tlu_debug_mode & pmu_fw_tlu_halted_f & ~ext_int_freeze_d1;
+
+   rvdffie #(10) exthaltff (.*, .clk(free_l2clk), .din({i_cpu_halt_req_sync_qual, i_cpu_run_req_sync_qual,   cpu_halt_status,
+                                                   cpu_halt_ack,   cpu_run_ack, internal_pmu_fw_halt_mode,
+                                                   pmu_fw_halt_req_ns, pmu_fw_tlu_halted,
+                                                   int_timer0_int_hold, int_timer1_int_hold}),
+                                            .dout({i_cpu_halt_req_d1,        i_cpu_run_req_d1_raw,      o_cpu_halt_status,
+                                                   o_cpu_halt_ack, o_cpu_run_ack, internal_pmu_fw_halt_mode_f,
+                                                   pmu_fw_halt_req_f, pmu_fw_tlu_halted_f,
+                                                   int_timer0_int_hold_f, int_timer1_int_hold_f}));
+
+   // only happens if we aren't in dgb_halt
+   assign ext_halt_pulse = i_cpu_halt_req_sync_qual & ~i_cpu_halt_req_d1;
+
+   assign enter_pmu_fw_halt_req =  ext_halt_pulse | fw_halt_req;
+
+   assign pmu_fw_halt_req_ns = (enter_pmu_fw_halt_req | (pmu_fw_halt_req_f & ~pmu_fw_tlu_halted)) & ~debug_halt_req_f;
+
+   assign internal_pmu_fw_halt_mode = pmu_fw_halt_req_ns | (internal_pmu_fw_halt_mode_f & ~i_cpu_run_req_d1 & ~debug_halt_req_f);
+
+   // debug halt has priority
+   assign pmu_fw_tlu_halted = ((pmu_fw_halt_req_f & core_empty & halt_taken & ~enter_debug_halt_req) | (pmu_fw_tlu_halted_f & ~i_cpu_run_req_d1)) & ~debug_halt_req_f;
+
+   assign cpu_halt_ack = (i_cpu_halt_req_d1 & pmu_fw_tlu_halted_f) | (o_cpu_halt_ack & i_cpu_halt_req_sync);
+   assign cpu_halt_status = (pmu_fw_tlu_halted_f & ~i_cpu_run_req_d1) | (o_cpu_halt_status & ~i_cpu_run_req_d1 & ~internal_dbg_halt_mode_f);
+   assign cpu_run_ack = (~pmu_fw_tlu_halted_f & i_cpu_run_req_sync) | (o_cpu_halt_status & i_cpu_run_req_d1_raw) | (o_cpu_run_ack & i_cpu_run_req_sync);
+   assign debug_mode_status = internal_dbg_halt_mode_f;
+   assign o_debug_mode_status = debug_mode_status;
+
+`ifdef RV_ASSERT_ON
+  assert_commit_while_halted: assert #0 (~(tlu_i0_commit_cmt  & o_cpu_halt_status)) else $display("ERROR: Commiting while cpu_halt_status asserted!");
+  assert_flush_while_fastint: assert #0 (~((take_ext_int_start_d1 | take_ext_int_start_d2) & dec_tlu_flush_lower_r)) else $display("ERROR: TLU Flushing inside fast interrupt procedure!");
+`endif
+
+   // high priority interrupts can wakeup from external halt, so can unmasked timer interrupts
+   assign i_cpu_run_req_d1 = i_cpu_run_req_d1_raw | ((nmi_int_detected | timer_int_ready | soft_int_ready | int_timer0_int_hold_f | int_timer1_int_hold_f | (mhwakeup & mhwakeup_ready)) & o_cpu_halt_status & ~i_cpu_halt_req_d1);
+
+   //--------------------------------------------------------------------------------
+   //--------------------------------------------------------------------------------
+
+   assign lsu_single_ecc_error_r = lsu_single_ecc_error_incr;
+
+   assign lsu_error_pkt_addr_r[31:0] = lsu_error_pkt_r.addr[31:0];
+
+
+   assign lsu_exc_valid_r_raw = lsu_error_pkt_r.exc_valid & ~dec_tlu_flush_lower_wb;
+
+   assign lsu_i0_exc_r_raw =  lsu_error_pkt_r.exc_valid;
+
+   assign lsu_i0_exc_r = lsu_i0_exc_r_raw & lsu_exc_valid_r_raw & ~i0_trigger_hit_r & ~rfpc_i0_r;
+
+   assign lsu_exc_valid_r = lsu_i0_exc_r;
+
+   assign lsu_exc_ma_r  =  lsu_i0_exc_r & ~lsu_error_pkt_r.exc_type;
+   assign lsu_exc_acc_r =  lsu_i0_exc_r & lsu_error_pkt_r.exc_type;
+   assign lsu_exc_st_r  =  lsu_i0_exc_r & lsu_error_pkt_r.inst_type;
+
+   // Single bit ECC errors on loads are RFNPC corrected, with the corrected data written to the GPR.
+   // LSU turns the load into a store and patches the data in the DCCM
+   assign lsu_i0_rfnpc_r = dec_tlu_i0_valid_r & ~i0_trigger_hit_r &
+                           (~lsu_error_pkt_r.inst_type & lsu_error_pkt_r.single_ecc_error);
+
+   //  Final commit valids
+   assign tlu_i0_commit_cmt = dec_tlu_i0_valid_r &
+                              ~rfpc_i0_r &
+                              ~lsu_i0_exc_r &
+                              ~inst_acc_r &
+                              ~dec_tlu_dbg_halted &
+                              ~request_debug_mode_r_d1 &
+                              ~i0_trigger_hit_r;
+
+   // unified place to manage the killing of arch state writebacks
+   assign tlu_i0_kill_writeb_r = rfpc_i0_r | lsu_i0_exc_r | inst_acc_r | (illegal_r & dec_tlu_dbg_halted) | i0_trigger_hit_r;
+   assign dec_tlu_i0_commit_cmt = tlu_i0_commit_cmt;
+
+
+   // refetch PC, microarch flush
+   // ic errors only in pipe0
+   assign rfpc_i0_r =  ((dec_tlu_i0_valid_r & ~tlu_flush_lower_r_d1 & (exu_i0_br_error_r | exu_i0_br_start_error_r)) | // inst commit with rfpc
+                        ((ic_perr_r | iccm_sbecc_r) & ~ext_int_freeze_d1)) & // ic/iccm without inst commit
+                       ~i0_trigger_hit_r & // unless there's a trigger. Err signal to ic/iccm will assert anyway to clear the error.
+                       ~lsu_i0_rfnpc_r;
+
+   // From the indication of a iccm single bit error until the first commit or flush, maintain a repair state. In the repair state, rfnpc i0 commits.
+   assign iccm_repair_state_ns = iccm_sbecc_r | (iccm_repair_state_d1 & ~dec_tlu_flush_lower_r);
+
+
+   localparam MCPC          = 12'h7c2;
+
+   // this is a flush of last resort, meaning only assert it if there is no other flush happening.
+   assign iccm_repair_state_rfnpc = tlu_i0_commit_cmt & iccm_repair_state_d1 &
+                                    ~(ebreak_r | ecall_r | mret_r | take_reset | illegal_r | (dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCPC)));
+
+if(pt.BTB_ENABLE==1) begin
+   // go ahead and repair the branch error on other flushes, doesn't have to be the rfpc flush
+   assign dec_tlu_br0_error_r = exu_i0_br_error_r & dec_tlu_i0_valid_r & ~tlu_flush_lower_r_d1;
+   assign dec_tlu_br0_start_error_r = exu_i0_br_start_error_r & dec_tlu_i0_valid_r & ~tlu_flush_lower_r_d1;
+   assign dec_tlu_br0_v_r = exu_i0_br_valid_r & dec_tlu_i0_valid_r & ~tlu_flush_lower_r_d1 & (~exu_i0_br_mp_r | ~exu_pmu_i0_br_ataken);
+
+
+   assign dec_tlu_br0_r_pkt.hist[1:0] = exu_i0_br_hist_r[1:0];
+   assign dec_tlu_br0_r_pkt.br_error = dec_tlu_br0_error_r;
+   assign dec_tlu_br0_r_pkt.br_start_error = dec_tlu_br0_start_error_r;
+   assign dec_tlu_br0_r_pkt.valid = dec_tlu_br0_v_r;
+   assign dec_tlu_br0_r_pkt.way = exu_i0_br_way_r;
+   assign dec_tlu_br0_r_pkt.middle = exu_i0_br_middle_r;
+end // if (pt.BTB_ENABLE==1)
+else begin
+   assign dec_tlu_br0_error_r = '0;
+   assign dec_tlu_br0_start_error_r = '0;
+   assign dec_tlu_br0_v_r = '0;
+   assign dec_tlu_br0_r_pkt  = '0;
+end // else: !if(pt.BTB_ENABLE==1)
+
+
+   // only expect these in pipe 0
+   assign       ebreak_r     =  (dec_tlu_packet_r.pmu_i0_itype == EBREAK)  & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & ~dcsr[DCSR_EBREAKM] & ~rfpc_i0_r;
+   assign       ecall_r      =  (dec_tlu_packet_r.pmu_i0_itype == ECALL)   & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & ~rfpc_i0_r;
+   assign       illegal_r    =  ~dec_tlu_packet_r.legal   & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & ~rfpc_i0_r;
+   assign       mret_r       =  (dec_tlu_packet_r.pmu_i0_itype == MRET)    & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & ~rfpc_i0_r;
+   // fence_i includes debug only fence_i's
+   assign       fence_i_r    =  (dec_tlu_packet_r.fence_i & dec_tlu_i0_valid_r & ~i0_trigger_hit_r) & ~rfpc_i0_r;
+   assign       ic_perr_r    =  ifu_ic_error_start_f & ~ext_int_freeze_d1 & (~internal_dbg_halt_mode_f | dcsr_single_step_running) & ~internal_pmu_fw_halt_mode_f;
+   assign       iccm_sbecc_r =  ifu_iccm_rd_ecc_single_err_f & ~ext_int_freeze_d1 & (~internal_dbg_halt_mode_f | dcsr_single_step_running) & ~internal_pmu_fw_halt_mode_f;
+   assign       inst_acc_r_raw  =  dec_tlu_packet_r.icaf & dec_tlu_i0_valid_r;
+   assign       inst_acc_r = inst_acc_r_raw & ~rfpc_i0_r & ~i0_trigger_hit_r;
+   assign       inst_acc_second_r = dec_tlu_packet_r.icaf_second;
+
+   assign       ebreak_to_debug_mode_r = (dec_tlu_packet_r.pmu_i0_itype == EBREAK)  & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & dcsr[DCSR_EBREAKM] & ~rfpc_i0_r;
+
+   rvdff #(1)  exctype_wb_ff (.*, .clk(e4e5_clk),
+                                .din (ebreak_to_debug_mode_r   ),
+                                .dout(ebreak_to_debug_mode_r_d1));
+
+   assign dec_tlu_fence_i_r = fence_i_r;
+   //
+   // Exceptions
+   //
+   // - MEPC <- PC
+   // - PC <- MTVEC, assert flush_lower
+   // - MCAUSE <- cause
+   // - MSCAUSE <- secondary cause
+   // - MTVAL <-
+   // - MPIE <- MIE
+   // - MIE <- 0
+   //
+   assign i0_exception_valid_r = (ebreak_r | ecall_r | illegal_r | inst_acc_r) & ~rfpc_i0_r & ~dec_tlu_dbg_halted;
+
+   // Cause:
+   //
+   // 0x2 : illegal
+   // 0x3 : breakpoint
+   // 0xb : Environment call M-mode
+
+
+   assign exc_cause_r[4:0] =  ( ({5{take_ext_int}}        & 5'h0b) |
+                                ({5{take_timer_int}}      & 5'h07) |
+                                ({5{take_soft_int}}       & 5'h03) |
+                                ({5{take_int_timer0_int}} & 5'h1d) |
+                                ({5{take_int_timer1_int}} & 5'h1c) |
+                                ({5{take_ce_int}}         & 5'h1e) |
+                                ({5{illegal_r}}           & 5'h02) |
+                                ({5{ecall_r}}             & 5'h0b) |
+                                ({5{inst_acc_r}}          & 5'h01) |
+                                ({5{ebreak_r | i0_trigger_hit_r}}   & 5'h03) |
+                                ({5{lsu_exc_ma_r & ~lsu_exc_st_r}}  & 5'h04) |
+                                ({5{lsu_exc_acc_r & ~lsu_exc_st_r}} & 5'h05) |
+                                ({5{lsu_exc_ma_r & lsu_exc_st_r}}   & 5'h06) |
+                                ({5{lsu_exc_acc_r & lsu_exc_st_r}}  & 5'h07)
+                                ) & ~{5{take_nmi}};
+
+   //
+   // Interrupts
+   //
+   // exceptions that are committed have already happened and will cause an int at E4 to wait a cycle
+   // or more if MSTATUS[MIE] is cleared.
+   //
+   // -in priority order, highest to lowest
+   // -single cycle window where a csr write to MIE/MSTATUS is at E4 when the other conditions for externals are met.
+   //  Hold off externals for a cycle to make sure we are consistent with what was just written
+   assign mhwakeup_ready =  ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[MIP_MEIP]   & mie_ns[MIE_MEIE];
+   assign ext_int_ready   = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[MIP_MEIP]   & mie_ns[MIE_MEIE] & ~ignore_ext_int_due_to_lsu_stall;
+   assign ce_int_ready    = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[MIP_MCEIP]  & mie_ns[MIE_MCEIE];
+   assign soft_int_ready  = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[MIP_MSIP]   & mie_ns[MIE_MSIE];
+   assign timer_int_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[MIP_MTIP]   & mie_ns[MIE_MTIE];
+
+   // MIP for internal timers pulses for 1 clock, resets the timer counter. Mip won't hold past the various stall conditions.
+   assign int_timer0_int_possible = mstatus_mie_ns & mie_ns[MIE_MITIE0];
+   assign int_timer0_int_ready = mip[MIP_MITIP0] & int_timer0_int_possible;
+   assign int_timer1_int_possible = mstatus_mie_ns & mie_ns[MIE_MITIE1];
+   assign int_timer1_int_ready = mip[MIP_MITIP1] & int_timer1_int_possible;
+
+   // Internal timers pulse and reset. If core is PMU/FW halted, the pulse will cause an exit from halt, but won't stick around
+   // Make it sticky, also for 1 cycle stall conditions.
+   assign int_timer_stalled = dec_csr_stall_int_ff | synchronous_flush_r | exc_or_int_valid_r_d1 | mret_r;
+
+   assign int_timer0_int_hold = (int_timer0_int_ready & (pmu_fw_tlu_halted_f | int_timer_stalled)) | (int_timer0_int_possible & int_timer0_int_hold_f & ~interrupt_valid_r & ~take_ext_int_start & ~internal_dbg_halt_mode_f);
+   assign int_timer1_int_hold = (int_timer1_int_ready & (pmu_fw_tlu_halted_f | int_timer_stalled)) | (int_timer1_int_possible & int_timer1_int_hold_f & ~interrupt_valid_r & ~take_ext_int_start & ~internal_dbg_halt_mode_f);
+
+
+   assign internal_dbg_halt_timers = internal_dbg_halt_mode_f & ~dcsr_single_step_running;
+
+
+   assign block_interrupts = ( (internal_dbg_halt_mode & (~dcsr_single_step_running | dec_tlu_i0_valid_r)) | // No ints in db-halt unless we are single stepping
+                               internal_pmu_fw_halt_mode | i_cpu_halt_req_d1 |// No ints in PMU/FW halt. First we exit halt
+                               take_nmi | // NMI is top priority
+                               ebreak_to_debug_mode_r | // Heading to debug mode, hold off ints
+                               synchronous_flush_r | // exception flush this cycle
+                               exc_or_int_valid_r_d1 | // ext/int past cycle (need time for MIE to update)
+                               mret_r |    // mret in progress, for cases were ISR enables ints before mret
+                               ext_int_freeze_d1 // Fast interrupt in progress (optional)
+                               );
+
+
+if (pt.FAST_INTERRUPT_REDIRECT) begin
+
+
+   assign take_ext_int_start = ext_int_ready & ~block_interrupts;
+
+   assign ext_int_freeze = take_ext_int_start | take_ext_int_start_d1 | take_ext_int_start_d2 | take_ext_int_start_d3;
+   assign take_ext_int = take_ext_int_start_d3 & ~|lsu_fir_error[1:0];
+   assign fast_int_meicpct = csr_meicpct & dec_csr_any_unq_d;  // MEICPCT becomes illegal if fast ints are enabled
+
+   assign ignore_ext_int_due_to_lsu_stall = lsu_fastint_stall_any;
+end
+else begin
+   assign take_ext_int_start = 1'b0;
+   assign ext_int_freeze = 1'b0;
+   assign ext_int_freeze_d1 = 1'b0;
+   assign take_ext_int_start_d1 = 1'b0;
+   assign take_ext_int_start_d2 = 1'b0;
+   assign take_ext_int_start_d3 = 1'b0;
+   assign fast_int_meicpct = 1'b0;
+   assign ignore_ext_int_due_to_lsu_stall = 1'b0;
+
+   assign take_ext_int = ext_int_ready & ~block_interrupts;
+end
+
+   assign take_ce_int  = ce_int_ready & ~ext_int_ready & ~block_interrupts;
+   assign take_soft_int = soft_int_ready & ~ext_int_ready & ~ce_int_ready & ~block_interrupts;
+   assign take_timer_int = timer_int_ready & ~soft_int_ready & ~ext_int_ready & ~ce_int_ready & ~block_interrupts;
+   assign take_int_timer0_int = (int_timer0_int_ready | int_timer0_int_hold_f) & int_timer0_int_possible & ~dec_csr_stall_int_ff &
+                                ~timer_int_ready & ~soft_int_ready & ~ext_int_ready & ~ce_int_ready & ~block_interrupts;
+   assign take_int_timer1_int = (int_timer1_int_ready | int_timer1_int_hold_f) & int_timer1_int_possible & ~dec_csr_stall_int_ff &
+                                ~(int_timer0_int_ready | int_timer0_int_hold_f) & ~timer_int_ready & ~soft_int_ready & ~ext_int_ready & ~ce_int_ready & ~block_interrupts;
+
+   assign take_reset = reset_delayed & mpc_reset_run_req;
+   assign take_nmi = nmi_int_detected & ~internal_pmu_fw_halt_mode & (~internal_dbg_halt_mode | (dcsr_single_step_running_f & dcsr[DCSR_STEPIE] & ~dec_tlu_i0_valid_r & ~dcsr_single_step_done_f)) &
+                     ~synchronous_flush_r & ~mret_r & ~take_reset & ~ebreak_to_debug_mode_r & (~ext_int_freeze_d1 | (take_ext_int_start_d3 & |lsu_fir_error[1:0]));
+
+   assign interrupt_valid_r = take_ext_int | take_timer_int | take_soft_int | take_nmi | take_ce_int | take_int_timer0_int | take_int_timer1_int;
+
+
+   // Compute interrupt path:
+   // If vectored async is set in mtvec, flush path for interrupts is MTVEC + (4 * CAUSE);
+   assign vectored_path[31:1]  = {mtvec[30:1], 1'b0} + {25'b0, exc_cause_r[4:0], 1'b0};
+   assign interrupt_path[31:1] = take_nmi ? nmi_vec[31:1] : ((mtvec[0] == 1'b1) ? vectored_path[31:1] : {mtvec[30:1], 1'b0});
+
+   assign sel_npc_r  = lsu_i0_rfnpc_r | fence_i_r | iccm_repair_state_rfnpc | (i_cpu_run_req_d1 & ~interrupt_valid_r) | (rfpc_i0_r & ~dec_tlu_i0_valid_r);
+   assign sel_npc_resume = (i_cpu_run_req_d1 & pmu_fw_tlu_halted_f) | pause_expired_r;
+
+   assign sel_fir_addr = take_ext_int_start_d3 & ~|lsu_fir_error[1:0];
+
+   assign synchronous_flush_r  = i0_exception_valid_r | // exception
+                                 rfpc_i0_r | // rfpc
+                                 lsu_exc_valid_r |  // lsu exception in either pipe 0 or pipe 1
+                                 fence_i_r |  // fence, a rfnpc
+                                 lsu_i0_rfnpc_r | // lsu dccm sb ecc
+                                 iccm_repair_state_rfnpc | // Iccm sb ecc
+                                 debug_resume_req_f | // resume from debug halt, fetch the dpc
+                                 sel_npc_resume |  // resume from pmu/fw halt, or from pause and fetch the NPC
+                                 dec_tlu_wr_pause_r_d1 | // flush at start of pause
+                                 i0_trigger_hit_r; // trigger hit, ebreak or goto debug mode
+
+   assign tlu_flush_lower_r = interrupt_valid_r | mret_r | synchronous_flush_r | take_halt | take_reset | take_ext_int_start;
+
+   assign tlu_flush_path_r[31:1] = take_reset ? rst_vec[31:1] :
+
+                                    ( ({31{sel_fir_addr}} & lsu_fir_addr[31:1]) |
+                                      ({31{~take_nmi & sel_npc_r}} & npc_r[31:1]) |
+                                      ({31{~take_nmi & rfpc_i0_r & dec_tlu_i0_valid_r & ~sel_npc_r}} & dec_tlu_i0_pc_r[31:1]) |
+                                      ({31{interrupt_valid_r & ~sel_fir_addr}} & interrupt_path[31:1]) |
+                                      ({31{(i0_exception_valid_r | lsu_exc_valid_r |
+                                            (i0_trigger_hit_r & ~trigger_hit_dmode_r)) & ~interrupt_valid_r & ~sel_fir_addr}} & {mtvec[30:1],1'b0}) |
+                                      ({31{~take_nmi & mret_r}} & mepc[31:1]) |
+                                      ({31{~take_nmi & debug_resume_req_f}} & dpc[31:1]) |
+                                      ({31{~take_nmi & sel_npc_resume}} & npc_r_d1[31:1]) );
+
+   rvdffpcie #(31)  flush_lower_ff (.*, .en(tlu_flush_lower_r),
+                                 .din({tlu_flush_path_r[31:1]}),
+                                 .dout({tlu_flush_path_r_d1[31:1]}));
+
+   assign dec_tlu_flush_lower_wb = tlu_flush_lower_r_d1;
+   assign dec_tlu_flush_lower_r = tlu_flush_lower_r;
+   assign dec_tlu_flush_path_r[31:1] = tlu_flush_path_r[31:1];
+
+
+   // this is used to capture mepc, etc.
+   assign exc_or_int_valid_r = lsu_exc_valid_r | i0_exception_valid_r | interrupt_valid_r | (i0_trigger_hit_r & ~trigger_hit_dmode_r);
+
+
+   rvdffie #(12)  excinfo_wb_ff (.*,
+                                 .din({interrupt_valid_r, i0_exception_valid_r, exc_or_int_valid_r,
+                                       exc_cause_r[4:0], tlu_i0_commit_cmt & ~illegal_r, i0_trigger_hit_r,
+                                       take_nmi, pause_expired_r }),
+                                 .dout({interrupt_valid_r_d1, i0_exception_valid_r_d1, exc_or_int_valid_r_d1,
+                                        exc_cause_wb[4:0], i0_valid_wb, trigger_hit_r_d1,
+                                        take_nmi_r_d1, pause_expired_wb}));
+
+   //----------------------------------------------------------------------
+   //
+   // CSRs
+   //
+   //----------------------------------------------------------------------
+
+
+   // ----------------------------------------------------------------------
+   // MISA (RO)
+   //  [31:30] XLEN - implementation width, 2'b01 - 32 bits
+   //  [12]    M    - integer mul/div
+   //  [8]     I    - RV32I
+   //  [2]     C    - Compressed extension
+   localparam MISA          = 12'h301;
+
+   // MVENDORID, MARCHID, MIMPID, MHARTID
+   localparam MVENDORID     = 12'hf11;
+   localparam MARCHID       = 12'hf12;
+   localparam MIMPID        = 12'hf13;
+   localparam MHARTID       = 12'hf14;
+
+
+   // ----------------------------------------------------------------------
+   // MSTATUS (RW)
+   // [12:11] MPP  : Prior priv level, always 2'b11, not flopped
+   // [7]     MPIE : Int enable previous [1]
+   // [3]     MIE  : Int enable          [0]
+   localparam MSTATUS       = 12'h300;
+
+
+   //When executing a MRET instruction, supposing MPP holds the value 3, MIE
+   //is set to MPIE; the privilege mode is changed to 3; MPIE is set to 1; and MPP is set to 3
+
+   assign dec_csr_wen_r_mod = dec_csr_wen_r & ~i0_trigger_hit_r & ~rfpc_i0_r;
+   assign wr_mstatus_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MSTATUS);
+
+   // set this even if we don't go to fwhalt due to debug halt. We committed the inst, so ...
+   assign set_mie_pmu_fw_halt = ~mpmc_b_ns[1] & fw_halt_req;
+
+   assign mstatus_ns[1:0] = ( ({2{~wr_mstatus_r & exc_or_int_valid_r}} & {mstatus[MSTATUS_MIE], 1'b0}) |
+                              ({2{ wr_mstatus_r & exc_or_int_valid_r}} & {dec_csr_wrdata_r[3], 1'b0}) |
+                              ({2{mret_r & ~exc_or_int_valid_r}} & {1'b1, mstatus[1]}) |
+                              ({2{set_mie_pmu_fw_halt}} & {mstatus[1], 1'b1}) |
+                              ({2{wr_mstatus_r & ~exc_or_int_valid_r}} & {dec_csr_wrdata_r[7], dec_csr_wrdata_r[3]}) |
+                              ({2{~wr_mstatus_r & ~exc_or_int_valid_r & ~mret_r & ~set_mie_pmu_fw_halt}} & mstatus[1:0]) );
+
+   // gate MIE if we are single stepping and DCSR[STEPIE] is off
+   assign mstatus_mie_ns = mstatus[MSTATUS_MIE] & (~dcsr_single_step_running_f | dcsr[DCSR_STEPIE]);
+
+   // ----------------------------------------------------------------------
+   // MTVEC (RW)
+   // [31:2] BASE : Trap vector base address
+   // [1] - Reserved, not implemented, reads zero
+   // [0]  MODE : 0 = Direct, 1 = Asyncs are vectored to BASE + (4 * CAUSE)
+   localparam MTVEC         = 12'h305;
+
+   assign wr_mtvec_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTVEC);
+   assign mtvec_ns[30:0] = {dec_csr_wrdata_r[31:2], dec_csr_wrdata_r[0]} ;
+   rvdffe #(31)  mtvec_ff (.*, .en(wr_mtvec_r), .din(mtvec_ns[30:0]), .dout(mtvec[30:0]));
+
+   // ----------------------------------------------------------------------
+   // MIP (RW)
+   //
+   // [30] MCEIP  : (RO) M-Mode Correctable Error interrupt pending
+   // [29] MITIP0 : (RO) M-Mode Internal Timer0 interrupt pending
+   // [28] MITIP1 : (RO) M-Mode Internal Timer1 interrupt pending
+   // [11] MEIP   : (RO) M-Mode external interrupt pending
+   // [7]  MTIP   : (RO) M-Mode timer interrupt pending
+   // [3]  MSIP   : (RO) M-Mode software interrupt pending
+   localparam MIP           = 12'h344;
+
+   assign ce_int = (mdccme_ce_req | miccme_ce_req | mice_ce_req);
+
+   assign mip_ns[5:0] = {ce_int, dec_timer_t0_pulse, dec_timer_t1_pulse, mexintpend, timer_int_sync, soft_int_sync};
+
+   // ----------------------------------------------------------------------
+   // MIE (RW)
+   // [30] MCEIE  : (RO) M-Mode Correctable Error interrupt enable
+   // [29] MITIE0 : (RO) M-Mode Internal Timer0 interrupt enable
+   // [28] MITIE1 : (RO) M-Mode Internal Timer1 interrupt enable
+   // [11] MEIE   : (RW) M-Mode external interrupt enable
+   // [7]  MTIE   : (RW) M-Mode timer interrupt enable
+   // [3]  MSIE   : (RW) M-Mode software interrupt enable
+   localparam MIE           = 12'h304;
+
+   assign wr_mie_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MIE);
+   assign mie_ns[5:0] = wr_mie_r ? {dec_csr_wrdata_r[30:28], dec_csr_wrdata_r[11], dec_csr_wrdata_r[7], dec_csr_wrdata_r[3]} : mie[5:0];
+   rvdff #(6)  mie_ff (.*, .clk(csr_wr_clk), .din(mie_ns[5:0]), .dout(mie[5:0]));
+
+
+   // ----------------------------------------------------------------------
+   // MCYCLEL (RW)
+   // [31:0] : Lower Cycle count
+
+   localparam MCYCLEL       = 12'hb00;
+
+   assign kill_ebreak_count_r = ebreak_to_debug_mode_r & dcsr[DCSR_STOPC];
+
+   assign wr_mcyclel_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCYCLEL);
+
+   assign mcyclel_cout_in = ~(kill_ebreak_count_r | (dec_tlu_dbg_halted & dcsr[DCSR_STOPC]) | dec_tlu_pmu_fw_halted | mcountinhibit[0]);
+
+   // split for power
+   assign {mcyclela_cout, mcyclel_inc[7:0]}  = mcyclel[7:0] +  {7'b0, 1'b1};
+   assign {mcyclel_cout,  mcyclel_inc[31:8]} = mcyclel[31:8] + {23'b0, mcyclela_cout};
+
+   assign mcyclel_ns[31:0] = wr_mcyclel_r ? dec_csr_wrdata_r[31:0] : mcyclel_inc[31:0];
+
+   rvdffe #(24) mcyclel_bff      (.*, .clk(free_l2clk), .en(wr_mcyclel_r | (mcyclela_cout & mcyclel_cout_in)),    .din(mcyclel_ns[31:8]), .dout(mcyclel[31:8]));
+   rvdffe #(8)  mcyclel_aff      (.*, .clk(free_l2clk), .en(wr_mcyclel_r | mcyclel_cout_in),  .din(mcyclel_ns[7:0]),  .dout(mcyclel[7:0]));
+
+   // ----------------------------------------------------------------------
+   // MCYCLEH (RW)
+   // [63:32] : Higher Cycle count
+   // Chained with mcyclel. Note: mcyclel overflow due to a mcycleh write gets ignored.
+
+   localparam MCYCLEH       = 12'hb80;
+
+   assign wr_mcycleh_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCYCLEH);
+
+   assign mcycleh_inc[31:0] = mcycleh[31:0] + {31'b0, mcyclel_cout_f};
+   assign mcycleh_ns[31:0]  = wr_mcycleh_r ? dec_csr_wrdata_r[31:0] : mcycleh_inc[31:0];
+
+   rvdffe #(32)  mcycleh_ff (.*, .clk(free_l2clk), .en(wr_mcycleh_r | mcyclel_cout_f), .din(mcycleh_ns[31:0]), .dout(mcycleh[31:0]));
+
+   // ----------------------------------------------------------------------
+   // MINSTRETL (RW)
+   // [31:0] : Lower Instruction retired count
+   // From the spec "Some CSRs, such as the instructions retired counter, instret, may be modified as side effects
+   // of instruction execution. In these cases, if a CSR access instruction reads a CSR, it reads the
+   // value prior to the execution of the instruction. If a CSR access instruction writes a CSR, the
+   // update occurs after the execution of the instruction. In particular, a value written to instret by
+   // one instruction will be the value read by the following instruction (i.e., the increment of instret
+   // caused by the first instruction retiring happens before the write of the new value)."
+   localparam MINSTRETL     = 12'hb02;
+
+   assign i0_valid_no_ebreak_ecall_r = dec_tlu_i0_valid_r & ~(ebreak_r | ecall_r | ebreak_to_debug_mode_r | illegal_r | mcountinhibit[2]);
+
+   assign wr_minstretl_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MINSTRETL);
+
+   assign {minstretl_couta, minstretl_inc[7:0]} = minstretl[7:0] + {7'b0,1'b1};
+   assign {minstretl_cout, minstretl_inc[31:8]} = minstretl[31:8] + {23'b0, minstretl_couta};
+
+   assign minstret_enable = (i0_valid_no_ebreak_ecall_r & tlu_i0_commit_cmt) | wr_minstretl_r;
+
+   assign minstretl_cout_ns = minstretl_cout & ~wr_minstreth_r & i0_valid_no_ebreak_ecall_r & ~dec_tlu_dbg_halted;
+
+   assign minstretl_ns[31:0] = wr_minstretl_r ? dec_csr_wrdata_r[31:0] : minstretl_inc[31:0];
+   rvdffe #(24)  minstretl_bff (.*, .en(wr_minstretl_r | (minstretl_couta & minstret_enable)),
+                                .din(minstretl_ns[31:8]), .dout(minstretl[31:8]));
+   rvdffe #(8)   minstretl_aff (.*, .en(minstret_enable),
+                                .din(minstretl_ns[7:0]),  .dout(minstretl[7:0]));
+
+
+   assign minstretl_read[31:0] = minstretl[31:0];
+   // ----------------------------------------------------------------------
+   // MINSTRETH (RW)
+   // [63:32] : Higher Instret count
+   // Chained with minstretl. Note: minstretl overflow due to a minstreth write gets ignored.
+
+   localparam MINSTRETH     = 12'hb82;
+
+   assign wr_minstreth_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MINSTRETH);
+
+   assign minstreth_inc[31:0] = minstreth[31:0] + {31'b0, minstretl_cout_f};
+   assign minstreth_ns[31:0]  = wr_minstreth_r ? dec_csr_wrdata_r[31:0] : minstreth_inc[31:0];
+   rvdffe #(32)  minstreth_ff (.*, .en((minstret_enable_f & minstretl_cout_f) | wr_minstreth_r), .din(minstreth_ns[31:0]), .dout(minstreth[31:0]));
+
+   assign minstreth_read[31:0] = minstreth_inc[31:0];
+
+   // ----------------------------------------------------------------------
+   // MSCRATCH (RW)
+   // [31:0] : Scratch register
+   localparam MSCRATCH      = 12'h340;
+
+   assign wr_mscratch_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MSCRATCH);
+
+   rvdffe #(32)  mscratch_ff (.*, .en(wr_mscratch_r), .din(dec_csr_wrdata_r[31:0]), .dout(mscratch[31:0]));
+
+   // ----------------------------------------------------------------------
+   // MEPC (RW)
+   // [31:1] : Exception PC
+   localparam MEPC          = 12'h341;
+
+   // NPC
+
+   assign sel_exu_npc_r = ~dec_tlu_dbg_halted & ~tlu_flush_lower_r_d1 & dec_tlu_i0_valid_r;
+   assign sel_flush_npc_r = ~dec_tlu_dbg_halted & tlu_flush_lower_r_d1 & ~dec_tlu_flush_noredir_r_d1;
+   assign sel_hold_npc_r = ~sel_exu_npc_r & ~sel_flush_npc_r;
+
+   assign npc_r[31:1] =  ( ({31{sel_exu_npc_r}} & exu_npc_r[31:1]) |
+                           ({31{~mpc_reset_run_req & reset_delayed}} & rst_vec[31:1]) | // init to reset vector for mpc halt on reset case
+                           ({31{(sel_flush_npc_r)}} & tlu_flush_path_r_d1[31:1]) |
+                           ({31{(sel_hold_npc_r)}} & npc_r_d1[31:1]) );
+
+   rvdffpcie #(31)  npwbc_ff (.*, .en(sel_exu_npc_r | sel_flush_npc_r | reset_delayed), .din(npc_r[31:1]), .dout(npc_r_d1[31:1]));
+
+   // PC has to be captured for exceptions and interrupts. For MRET, we could execute it and then take an
+   // interrupt before the next instruction.
+   assign pc0_valid_r = ~dec_tlu_dbg_halted & dec_tlu_i0_valid_r;
+
+   assign pc_r[31:1]  = ( ({31{ pc0_valid_r}} & dec_tlu_i0_pc_r[31:1]) |
+                          ({31{~pc0_valid_r}} & pc_r_d1[31:1]));
+
+   rvdffpcie #(31)  pwbc_ff (.*, .en(pc0_valid_r), .din(pc_r[31:1]), .dout(pc_r_d1[31:1]));
+
+   assign wr_mepc_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEPC);
+
+   assign mepc_ns[31:1] = ( ({31{i0_exception_valid_r | lsu_exc_valid_r | mepc_trigger_hit_sel_pc_r}} & pc_r[31:1]) |
+                            ({31{interrupt_valid_r}} & npc_r[31:1]) |
+                            ({31{wr_mepc_r & ~exc_or_int_valid_r}} & dec_csr_wrdata_r[31:1]) |
+                            ({31{~wr_mepc_r & ~exc_or_int_valid_r}} & mepc[31:1]) );
+
+
+   rvdffe #(31)  mepc_ff (.*, .en(i0_exception_valid_r | lsu_exc_valid_r | mepc_trigger_hit_sel_pc_r | interrupt_valid_r | wr_mepc_r), .din(mepc_ns[31:1]), .dout(mepc[31:1]));
+
+   // ----------------------------------------------------------------------
+   // MCAUSE (RW)
+   // [31:0] : Exception Cause
+   localparam MCAUSE        = 12'h342;
+
+   assign wr_mcause_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCAUSE);
+   assign mcause_sel_nmi_store = exc_or_int_valid_r & take_nmi & nmi_lsu_store_type;
+   assign mcause_sel_nmi_load = exc_or_int_valid_r & take_nmi & nmi_lsu_load_type;
+   assign mcause_sel_nmi_ext = exc_or_int_valid_r & take_nmi & take_ext_int_start_d3 & |lsu_fir_error[1:0] & ~nmi_int_detected_f;
+   // FIR value decoder
+   // 0 –no error
+   // 1 –uncorrectable ecc  => f000_1000
+   // 2 –dccm region access error => f000_1001
+   // 3 –non dccm region access error => f000_1002
+   assign mcause_fir_error_type[1:0] = {&lsu_fir_error[1:0], lsu_fir_error[1] & ~lsu_fir_error[0]};
+
+   assign mcause_ns[31:0] = ( ({32{mcause_sel_nmi_store}} & {32'hf000_0000}) |
+                              ({32{mcause_sel_nmi_load}} & {32'hf000_0001}) |
+                              ({32{mcause_sel_nmi_ext}} & {28'hf000_100, 2'b0, mcause_fir_error_type[1:0]}) |
+                              ({32{exc_or_int_valid_r & ~take_nmi}} & {interrupt_valid_r, 26'b0, exc_cause_r[4:0]}) |
+                              ({32{wr_mcause_r & ~exc_or_int_valid_r}} & dec_csr_wrdata_r[31:0]) |
+                              ({32{~wr_mcause_r & ~exc_or_int_valid_r}} & mcause[31:0]) );
+
+   rvdffe #(32)  mcause_ff (.*, .en(exc_or_int_valid_r | wr_mcause_r), .din(mcause_ns[31:0]), .dout(mcause[31:0]));
+   // ----------------------------------------------------------------------
+   // MSCAUSE (RW)
+   // [2:0] : Secondary exception Cause
+   localparam MSCAUSE       = 12'h7ff;
+
+   assign wr_mscause_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MSCAUSE);
+
+   assign ifu_mscause[3:0]  =  (dec_tlu_packet_r.icaf_type[1:0] == 2'b00) ? 4'b1001 :
+                               {2'b00 , dec_tlu_packet_r.icaf_type[1:0]} ;
+
+   assign mscause_type[3:0] = ( ({4{lsu_i0_exc_r}} & lsu_error_pkt_r.mscause[3:0]) |
+                                ({4{i0_trigger_hit_r}} & 4'b0001) |
+                                ({4{ebreak_r}} & 4'b0010) |
+                                ({4{inst_acc_r}} & ifu_mscause[3:0])
+                                );
+
+   assign mscause_ns[3:0] = ( ({4{exc_or_int_valid_r}} & mscause_type[3:0]) |
+                              ({4{ wr_mscause_r & ~exc_or_int_valid_r}} & dec_csr_wrdata_r[3:0]) |
+                              ({4{~wr_mscause_r & ~exc_or_int_valid_r}} & mscause[3:0])
+                             );
+
+   rvdff #(4)  mscause_ff (.*, .clk(e4e5_int_clk), .din(mscause_ns[3:0]), .dout(mscause[3:0]));
+   // ----------------------------------------------------------------------
+   // MTVAL (RW)
+   // [31:0] : Exception address if relevant
+   localparam MTVAL         = 12'h343;
+
+   assign wr_mtval_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTVAL);
+   assign mtval_capture_pc_r = exc_or_int_valid_r & (ebreak_r | (inst_acc_r & ~inst_acc_second_r) | mepc_trigger_hit_sel_pc_r) & ~take_nmi;
+   assign mtval_capture_pc_plus2_r = exc_or_int_valid_r & (inst_acc_r & inst_acc_second_r) & ~take_nmi;
+   assign mtval_capture_inst_r = exc_or_int_valid_r & illegal_r & ~take_nmi;
+   assign mtval_capture_lsu_r = exc_or_int_valid_r & lsu_exc_valid_r & ~take_nmi;
+   assign mtval_clear_r = exc_or_int_valid_r & ~mtval_capture_pc_r & ~mtval_capture_inst_r & ~mtval_capture_lsu_r & ~mepc_trigger_hit_sel_pc_r;
+
+
+   assign mtval_ns[31:0] = (({32{mtval_capture_pc_r}} & {pc_r[31:1], 1'b0}) |
+                            ({32{mtval_capture_pc_plus2_r}} & {pc_r[31:1] + 31'b1, 1'b0}) |
+                            ({32{mtval_capture_inst_r}} & dec_illegal_inst[31:0]) |
+                            ({32{mtval_capture_lsu_r}} & lsu_error_pkt_addr_r[31:0]) |
+                            ({32{wr_mtval_r & ~interrupt_valid_r}} & dec_csr_wrdata_r[31:0]) |
+                            ({32{~take_nmi & ~wr_mtval_r & ~mtval_capture_pc_r & ~mtval_capture_inst_r & ~mtval_clear_r & ~mtval_capture_lsu_r}} & mtval[31:0]) );
+
+
+   rvdffe #(32)  mtval_ff (.*, .en(tlu_flush_lower_r | wr_mtval_r), .din(mtval_ns[31:0]), .dout(mtval[31:0]));
+
+   // ----------------------------------------------------------------------
+   // MCGC (RW) Clock gating control
+   // [31:10]: Reserved, reads 0x0
+   // [9]    : picio_clk_override
+   // [7]    : dec_clk_override
+   // [6]    : Unused
+   // [5]    : ifu_clk_override
+   // [4]    : lsu_clk_override
+   // [3]    : bus_clk_override
+   // [2]    : pic_clk_override
+   // [1]    : dccm_clk_override
+   // [0]    : icm_clk_override
+   //
+   localparam MCGC          = 12'h7f8;
+   assign wr_mcgc_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCGC);
+
+   assign mcgc_ns[9:0] = wr_mcgc_r ? {~dec_csr_wrdata_r[9], dec_csr_wrdata_r[8:0]} : mcgc_int[9:0];
+   rvdffe #(10)  mcgc_ff (.*, .en(wr_mcgc_r), .din(mcgc_ns[9:0]), .dout(mcgc_int[9:0]));
+
+   assign mcgc[9:0] = {~mcgc_int[9], mcgc_int[8:0]};
+
+   assign dec_tlu_picio_clk_override= mcgc[9];
+   assign dec_tlu_misc_clk_override = mcgc[8];
+   assign dec_tlu_dec_clk_override  = mcgc[7];
+   //sign dec_tlu_exu_clk_override  = mcgc[6];
+   assign dec_tlu_ifu_clk_override  = mcgc[5];
+   assign dec_tlu_lsu_clk_override  = mcgc[4];
+   assign dec_tlu_bus_clk_override  = mcgc[3];
+   assign dec_tlu_pic_clk_override  = mcgc[2];
+   assign dec_tlu_dccm_clk_override = mcgc[1];
+   assign dec_tlu_icm_clk_override  = mcgc[0];
+
+   // ----------------------------------------------------------------------
+   // MFDC (RW) Feature Disable Control
+   // [31:19] : Reserved, reads 0x0
+   // [18:16] : DMA QoS Prty
+   // [15:13] : Reserved, reads 0x0
+   // [12]   : Disable trace
+   // [11]   : Disable external load forwarding
+   // [10]   : Disable dual issue
+   // [9]    : Disable pic multiple ints
+   // [8]    : Disable core ecc
+   // [7]    : Disable secondary alu?s
+   // [6]    : Unused, 0x0
+   // [5]    : Disable non-blocking loads/divides
+   // [4]    : Disable fast divide
+   // [3]    : Disable branch prediction and return stack
+   // [2]    : Disable write buffer coalescing
+   // [1]    : Disable load misses that bypass the write buffer
+   // [0]    : Disable pipelining - Enable single instruction execution
+   //
+   localparam MFDC          = 12'h7f9;
+
+   assign wr_mfdc_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MFDC);
+
+   rvdffe #(16)  mfdc_ff (.*, .en(wr_mfdc_r), .din({mfdc_ns[15:0]}), .dout(mfdc_int[15:0]));
+
+   // flip poweron value of bit 6 for AXI build
+   if(pt.BUILD_AXI4==1) begin : axi4
+      // flip poweron valid of bit 12
+         assign mfdc_ns[15:0] = {~dec_csr_wrdata_r[18:16], dec_csr_wrdata_r[12], dec_csr_wrdata_r[11:7], ~dec_csr_wrdata_r[6], dec_csr_wrdata_r[5:0]};
+         assign mfdc[18:0] = {~mfdc_int[15:13], 3'b0, mfdc_int[12], mfdc_int[11:7], ~mfdc_int[6], mfdc_int[5:0]};
+   end
+   else begin
+      // flip poweron valid of bit 12
+         assign mfdc_ns[15:0] = {~dec_csr_wrdata_r[18:16],dec_csr_wrdata_r[12:0]};
+         assign mfdc[18:0] = {~mfdc_int[15:13], 3'b0, mfdc_int[12:0]};
+   end
+
+
+   assign dec_tlu_dma_qos_prty[2:0] = mfdc[18:16];
+   assign dec_tlu_trace_disable = mfdc[12];
+   assign dec_tlu_external_ldfwd_disable = mfdc[11];
+   assign dec_tlu_core_ecc_disable = 1'b1;//mfdc[8];
+   assign dec_tlu_sideeffect_posted_disable = mfdc[6];
+   assign dec_tlu_bpred_disable = mfdc[3];
+   assign dec_tlu_wb_coalescing_disable = mfdc[2];
+   assign dec_tlu_pipelining_disable = mfdc[0];
+
+   // ----------------------------------------------------------------------
+   // MCPC (RW) Pause counter
+   // [31:0] : Reads 0x0, decs in the wb register in decode_ctl
+
+   assign dec_tlu_wr_pause_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCPC) & ~interrupt_valid_r & ~take_ext_int_start;
+
+   // ----------------------------------------------------------------------
+   // MRAC (RW)
+   // [31:0] : Region Access Control Register, 16 regions, {side_effect, cachable} pairs
+   localparam MRAC          = 12'h7c0;
+
+   assign wr_mrac_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MRAC);
+
+   // prevent pairs of 0x11, side_effect and cacheable
+   assign mrac_in[31:0] = {dec_csr_wrdata_r[31], dec_csr_wrdata_r[30] & ~dec_csr_wrdata_r[31],
+                           dec_csr_wrdata_r[29], dec_csr_wrdata_r[28] & ~dec_csr_wrdata_r[29],
+                           dec_csr_wrdata_r[27], dec_csr_wrdata_r[26] & ~dec_csr_wrdata_r[27],
+                           dec_csr_wrdata_r[25], dec_csr_wrdata_r[24] & ~dec_csr_wrdata_r[25],
+                           dec_csr_wrdata_r[23], dec_csr_wrdata_r[22] & ~dec_csr_wrdata_r[23],
+                           dec_csr_wrdata_r[21], dec_csr_wrdata_r[20] & ~dec_csr_wrdata_r[21],
+                           dec_csr_wrdata_r[19], dec_csr_wrdata_r[18] & ~dec_csr_wrdata_r[19],
+                           dec_csr_wrdata_r[17], dec_csr_wrdata_r[16] & ~dec_csr_wrdata_r[17],
+                           dec_csr_wrdata_r[15], dec_csr_wrdata_r[14] & ~dec_csr_wrdata_r[15],
+                           dec_csr_wrdata_r[13], dec_csr_wrdata_r[12] & ~dec_csr_wrdata_r[13],
+                           dec_csr_wrdata_r[11], dec_csr_wrdata_r[10] & ~dec_csr_wrdata_r[11],
+                           dec_csr_wrdata_r[9], dec_csr_wrdata_r[8] & ~dec_csr_wrdata_r[9],
+                           dec_csr_wrdata_r[7], dec_csr_wrdata_r[6] & ~dec_csr_wrdata_r[7],
+                           dec_csr_wrdata_r[5], dec_csr_wrdata_r[4] & ~dec_csr_wrdata_r[5],
+                           dec_csr_wrdata_r[3], dec_csr_wrdata_r[2] & ~dec_csr_wrdata_r[3],
+                           dec_csr_wrdata_r[1], dec_csr_wrdata_r[0] & ~dec_csr_wrdata_r[1]};
+
+   rvdffe #(32)  mrac_ff (.*, .en(wr_mrac_r), .din(mrac_in[31:0]), .dout(mrac[31:0]));
+
+   // drive to LSU/IFU
+   assign dec_tlu_mrac_ff[31:0] = mrac[31:0];
+
+   // ----------------------------------------------------------------------
+   // MDEAU (WAR0)
+   // [31:0] : Dbus Error Address Unlock register
+   //
+   localparam MDEAU         = 12'hbc0;
+
+   assign wr_mdeau_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MDEAU);
+
+
+   // ----------------------------------------------------------------------
+   // MDSEAC (R)
+   // [31:0] : Dbus Store Error Address Capture register
+   //
+   localparam MDSEAC        = 12'hfc0;
+
+   // only capture error bus if the MDSEAC reg is not locked
+   assign mdseac_locked_ns = mdseac_en | (mdseac_locked_f & ~wr_mdeau_r);
+
+   assign mdseac_en = (lsu_imprecise_error_store_any | lsu_imprecise_error_load_any) & ~nmi_int_detected_f & ~mdseac_locked_f;
+
+   rvdffe #(32)  mdseac_ff (.*, .en(mdseac_en), .din(lsu_imprecise_error_addr_any[31:0]), .dout(mdseac[31:0]));
+
+   // ----------------------------------------------------------------------
+   // MPMC (R0W1)
+   // [0] : FW halt
+   // [1] : Set MSTATUS[MIE] on halt
+
+   localparam MPMC          = 12'h7c6;
+
+   assign wr_mpmc_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MPMC);
+
+   // allow the cycle of the dbg halt flush that contains the wr_mpmc_r to
+   // set the mstatus bit potentially, use delayed version of internal dbg halt.
+   assign fw_halt_req = wr_mpmc_r & dec_csr_wrdata_r[0] & ~internal_dbg_halt_mode_f2 & ~ext_int_freeze_d1;
+
+   assign fw_halted_ns = (fw_halt_req | fw_halted) & ~set_mie_pmu_fw_halt;
+   assign mpmc_b_ns[1] = wr_mpmc_r ? ~dec_csr_wrdata_r[1] : ~mpmc[1];
+   rvdff #(1)  mpmc_ff (.*, .clk(csr_wr_clk), .din(mpmc_b_ns[1]), .dout(mpmc_b[1]));
+   assign mpmc[1] = ~mpmc_b[1];
+
+   // ----------------------------------------------------------------------
+   // MICECT (I-Cache error counter/threshold)
+   // [31:27] : Icache parity error threshold
+   // [26:0]  : Icache parity error count
+   localparam MICECT        = 12'h7f0;
+
+   assign csr_sat[31:27] = (dec_csr_wrdata_r[31:27] > 5'd26) ? 5'd26 : dec_csr_wrdata_r[31:27];
+
+   assign wr_micect_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MICECT);
+   assign micect_inc[26:0] = micect[26:0] + {26'b0, ic_perr_r};
+   assign micect_ns =  wr_micect_r ? {csr_sat[31:27], dec_csr_wrdata_r[26:0]} : {micect[31:27], micect_inc[26:0]};
+
+   rvdffe #(32)  micect_ff (.*, .en(wr_micect_r | ic_perr_r), .din(micect_ns[31:0]), .dout(micect[31:0]));
+
+   assign mice_ce_req = |({32'hffffffff << micect[31:27]} & {5'b0, micect[26:0]});
+
+   // ----------------------------------------------------------------------
+   // MICCMECT (ICCM error counter/threshold)
+   // [31:27] : ICCM parity error threshold
+   // [26:0]  : ICCM parity error count
+   localparam MICCMECT      = 12'h7f1;
+
+   assign wr_miccmect_r     = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MICCMECT);
+   assign miccmect_inc[26:0] = miccmect[26:0] + {26'b0, iccm_sbecc_r | iccm_dma_sb_error};
+   assign miccmect_ns        = wr_miccmect_r ? {csr_sat[31:27], dec_csr_wrdata_r[26:0]} : {miccmect[31:27], miccmect_inc[26:0]};
+
+   rvdffe #(32)  miccmect_ff (.*, .clk(free_l2clk), .en(wr_miccmect_r | iccm_sbecc_r | iccm_dma_sb_error), .din(miccmect_ns[31:0]), .dout(miccmect[31:0]));
+
+   assign miccme_ce_req = |({32'hffffffff << miccmect[31:27]} & {5'b0, miccmect[26:0]});
+
+   // ----------------------------------------------------------------------
+   // MDCCMECT (DCCM error counter/threshold)
+   // [31:27] : DCCM parity error threshold
+   // [26:0]  : DCCM parity error count
+   localparam MDCCMECT      = 12'h7f2;
+
+   assign wr_mdccmect_r     = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MDCCMECT);
+   assign mdccmect_inc[26:0] = mdccmect[26:0] + {26'b0, lsu_single_ecc_error_r_d1};
+   assign mdccmect_ns        = wr_mdccmect_r ? {csr_sat[31:27], dec_csr_wrdata_r[26:0]} : {mdccmect[31:27], mdccmect_inc[26:0]};
+
+   rvdffe #(32)  mdccmect_ff (.*, .clk(free_l2clk), .en(wr_mdccmect_r | lsu_single_ecc_error_r_d1), .din(mdccmect_ns[31:0]), .dout(mdccmect[31:0]));
+
+   assign mdccme_ce_req = |({32'hffffffff << mdccmect[31:27]} & {5'b0, mdccmect[26:0]});
+
+
+   // ----------------------------------------------------------------------
+   // MFDHT (Force Debug Halt Threshold)
+   // [5:1] : Halt timeout threshold (power of 2)
+   //   [0] : Halt timeout enabled
+   localparam MFDHT         = 12'h7ce;
+
+   assign wr_mfdht_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MFDHT);
+
+   assign mfdht_ns[5:0] = wr_mfdht_r ? dec_csr_wrdata_r[5:0] : mfdht[5:0];
+
+   rvdffs #(6)  mfdht_ff (.*, .clk(csr_wr_clk), .en(wr_mfdht_r), .din(mfdht_ns[5:0]), .dout(mfdht[5:0]));
+
+    // ----------------------------------------------------------------------
+   // MFDHS(RW)
+   // [1] : LSU operation pending when debug halt threshold reached
+   // [0] : IFU operation pending when debug halt threshold reached
+
+   localparam MFDHS         = 12'h7cf;
+
+   assign wr_mfdhs_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MFDHS);
+
+   assign mfdhs_ns[1:0] = wr_mfdhs_r ? dec_csr_wrdata_r[1:0] : ((dbg_tlu_halted & ~dbg_tlu_halted_f) ? {~lsu_idle_any_f, ~ifu_miss_state_idle_f} : mfdhs[1:0]);
+
+   rvdffs #(2)  mfdhs_ff (.*, .clk(free_clk), .en(wr_mfdhs_r | dbg_tlu_halted), .din(mfdhs_ns[1:0]), .dout(mfdhs[1:0]));
+
+   assign force_halt_ctr[31:0] = debug_halt_req_f ? (force_halt_ctr_f[31:0] + 32'b1) : (dbg_tlu_halted_f ? 32'b0 : force_halt_ctr_f[31:0]);
+
+   rvdffe #(32)  forcehaltctr_ff (.*, .en(mfdht[0]), .din(force_halt_ctr[31:0]), .dout(force_halt_ctr_f[31:0]));
+
+   assign force_halt = mfdht[0] & |(force_halt_ctr_f[31:0] & (32'hffffffff << mfdht[5:1]));
+
+
+   // ----------------------------------------------------------------------
+   // MEIVT (External Interrupt Vector Table (R/W))
+   // [31:10]: Base address (R/W)
+   // [9:0]  : Reserved, reads 0x0
+   localparam MEIVT         = 12'hbc8;
+
+   assign wr_meivt_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEIVT);
+
+   rvdffe #(22)  meivt_ff (.*, .en(wr_meivt_r), .din(dec_csr_wrdata_r[31:10]), .dout(meivt[31:10]));
+
+
+   // ----------------------------------------------------------------------
+   // MEIHAP (External Interrupt Handler Access Pointer (R))
+   // [31:10]: Base address (R/W)
+   // [9:2]  : ClaimID (R)
+   // [1:0]  : Reserved, 0x0
+   localparam MEIHAP        = 12'hfc8;
+
+   assign wr_meihap_r = wr_meicpct_r;
+
+   rvdffe #(8)  meihap_ff (.*, .en(wr_meihap_r), .din(pic_claimid[7:0]), .dout(meihap[9:2]));
+
+   assign dec_tlu_meihap[31:2] = {meivt[31:10], meihap[9:2]};
+   // ----------------------------------------------------------------------
+   // MEICURPL (R/W)
+   // [31:4] : Reserved (read 0x0)
+   // [3:0]  : CURRPRI - Priority level of current interrupt service routine (R/W)
+   localparam MEICURPL      = 12'hbcc;
+
+   assign wr_meicurpl_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEICURPL);
+   assign meicurpl_ns[3:0] = wr_meicurpl_r ? dec_csr_wrdata_r[3:0] : meicurpl[3:0];
+
+   rvdff #(4)  meicurpl_ff (.*, .clk(csr_wr_clk), .din(meicurpl_ns[3:0]), .dout(meicurpl[3:0]));
+
+   // PIC needs this reg
+   assign dec_tlu_meicurpl[3:0] = meicurpl[3:0];
+
+
+   // ----------------------------------------------------------------------
+   // MEICIDPL (R/W)
+   // [31:4] : Reserved (read 0x0)
+   // [3:0]  : External Interrupt Claim ID's Priority Level Register
+   localparam MEICIDPL      = 12'hbcb;
+
+   assign wr_meicidpl_r = (dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEICIDPL)) | take_ext_int_start;
+
+   assign meicidpl_ns[3:0] = wr_meicpct_r ? pic_pl[3:0] : (wr_meicidpl_r ? dec_csr_wrdata_r[3:0] : meicidpl[3:0]);
+
+
+   // ----------------------------------------------------------------------
+   // MEICPCT (Capture CLAIMID in MEIHAP and PL in MEICIDPL
+   // [31:1] : Reserved (read 0x0)
+   // [0]    : Capture (W1, Read 0)
+   localparam MEICPCT       = 12'hbca;
+
+   assign wr_meicpct_r = (dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEICPCT)) | take_ext_int_start;
+
+   // ----------------------------------------------------------------------
+   // MEIPT (External Interrupt Priority Threshold)
+   // [31:4] : Reserved (read 0x0)
+   // [3:0]  : PRITHRESH
+   localparam MEIPT         = 12'hbc9;
+
+   assign wr_meipt_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEIPT);
+   assign meipt_ns[3:0] = wr_meipt_r ? dec_csr_wrdata_r[3:0] : meipt[3:0];
+
+   rvdff #(4)  meipt_ff (.*, .clk(csr_wr_clk), .din(meipt_ns[3:0]), .dout(meipt[3:0]));
+
+   // to PIC
+   assign dec_tlu_meipt[3:0] = meipt[3:0];
+   // ----------------------------------------------------------------------
+   // DCSR (R/W) (Only accessible in debug mode)
+   // [31:28] : xdebugver (hard coded to 0x4) RO
+   // [27:16] : 0x0, reserved
+   // [15]    : ebreakm
+   // [14]    : 0x0, reserved
+   // [13]    : ebreaks (0x0 for this core)
+   // [12]    : ebreaku (0x0 for this core)
+   // [11]    : stepie
+   // [10]    : stopcount
+   // [9]     : 0x0 //stoptime
+   // [8:6]   : cause (RO)
+   // [5:4]   : 0x0, reserved
+   // [3]     : nmip
+   // [2]     : step
+   // [1:0]   : prv (0x3 for this core)
+   //
+   localparam DCSR          = 12'h7b0;
+
+   // RV has clarified that 'priority 4' in the spec means top priority.
+   // 4. single step. 3. Debugger request. 2. Ebreak. 1. Trigger.
+
+   // RV debug spec indicates a cause priority change for trigger hits during single step.
+   assign trigger_hit_for_dscr_cause_r_d1 = trigger_hit_dmode_r_d1 | (trigger_hit_r_d1 & dcsr_single_step_done_f);
+
+   assign dcsr_cause[8:6] = ( ({3{dcsr_single_step_done_f & ~ebreak_to_debug_mode_r_d1 & ~trigger_hit_for_dscr_cause_r_d1 & ~debug_halt_req}} & 3'b100) |
+                              ({3{debug_halt_req & ~ebreak_to_debug_mode_r_d1 & ~trigger_hit_for_dscr_cause_r_d1}} &  3'b011) |
+                              ({3{ebreak_to_debug_mode_r_d1 & ~trigger_hit_for_dscr_cause_r_d1}} &  3'b001) |
+                              ({3{trigger_hit_for_dscr_cause_r_d1}} & 3'b010));
+
+   assign wr_dcsr_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DCSR);
+
+
+
+  // Multiple halt enter requests can happen before we are halted.
+  // We have to continue to upgrade based on dcsr_cause priority but we can't downgrade.
+   assign dcsr_cause_upgradeable = internal_dbg_halt_mode_f & (dcsr[8:6] == 3'b011);
+   assign enter_debug_halt_req_le = enter_debug_halt_req & (~dbg_tlu_halted | dcsr_cause_upgradeable);
+
+   assign nmi_in_debug_mode = nmi_int_detected_f & internal_dbg_halt_mode_f;
+   assign dcsr_ns[15:2] = enter_debug_halt_req_le ? {dcsr[15:9], dcsr_cause[8:6], dcsr[5:2]} :
+                          (wr_dcsr_r ? {dec_csr_wrdata_r[15], 3'b0, dec_csr_wrdata_r[11:10], 1'b0, dcsr[8:6], 2'b00, nmi_in_debug_mode | dcsr[3], dec_csr_wrdata_r[2]} :
+                           {dcsr[15:4], nmi_in_debug_mode, dcsr[2]});
+
+   rvdffe #(14)  dcsr_ff (.*, .clk(free_l2clk), .en(enter_debug_halt_req_le | wr_dcsr_r | internal_dbg_halt_mode | take_nmi), .din(dcsr_ns[15:2]), .dout(dcsr[15:2]));
+
+   // ----------------------------------------------------------------------
+   // DPC (R/W) (Only accessible in debug mode)
+   // [31:0] : Debug PC
+   localparam DPC           = 12'h7b1;
+
+   assign wr_dpc_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DPC);
+   assign dpc_capture_npc = dbg_tlu_halted & ~dbg_tlu_halted_f & ~request_debug_mode_done;
+   assign dpc_capture_pc = request_debug_mode_r;
+
+   assign dpc_ns[31:1] = ( ({31{~dpc_capture_pc & ~dpc_capture_npc & wr_dpc_r}} & dec_csr_wrdata_r[31:1]) |
+                           ({31{dpc_capture_pc}} & pc_r[31:1]) |
+                           ({31{~dpc_capture_pc & dpc_capture_npc}} & npc_r[31:1]) );
+
+   rvdffe #(31)  dpc_ff (.*, .en(wr_dpc_r | dpc_capture_pc | dpc_capture_npc), .din(dpc_ns[31:1]), .dout(dpc[31:1]));
+
+   // ----------------------------------------------------------------------
+   // DICAWICS (R/W) (Only accessible in debug mode)
+   // [31:25] : Reserved
+   // [24]    : Array select, 0 is data, 1 is tag
+   // [23:22] : Reserved
+   // [21:20] : Way select
+   // [19:17] : Reserved
+   // [16:3]  : Index
+   // [2:0]   : Reserved
+   localparam DICAWICS      = 12'h7c8;
+
+   assign dicawics_ns[16:0] = {dec_csr_wrdata_r[24], dec_csr_wrdata_r[21:20], dec_csr_wrdata_r[16:3]};
+   assign wr_dicawics_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAWICS);
+
+   rvdffe #(17)  dicawics_ff (.*, .en(wr_dicawics_r), .din(dicawics_ns[16:0]), .dout(dicawics[16:0]));
+
+   // ----------------------------------------------------------------------
+   // DICAD0 (R/W) (Only accessible in debug mode)
+   //
+   // If dicawics[array] is 0
+   // [31:0]  : inst data
+   //
+   // If dicawics[array] is 1
+   // [31:16] : Tag
+   // [15:7]  : Reserved
+   // [6:4]   : LRU
+   // [3:1]   : Reserved
+   // [0]     : Valid
+   localparam DICAD0        = 12'h7c9;
+
+   assign dicad0_ns[31:0] = wr_dicad0_r ? dec_csr_wrdata_r[31:0] : ifu_ic_debug_rd_data[31:0];
+
+   assign wr_dicad0_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAD0);
+
+   rvdffe #(32)  dicad0_ff (.*, .en(wr_dicad0_r | ifu_ic_debug_rd_data_valid), .din(dicad0_ns[31:0]), .dout(dicad0[31:0]));
+
+   // ----------------------------------------------------------------------
+   // DICAD0H (R/W) (Only accessible in debug mode)
+   //
+   // If dicawics[array] is 0
+   // [63:32]  : inst data
+   //
+   localparam DICAD0H       = 12'h7cc;
+
+   assign dicad0h_ns[31:0] = wr_dicad0h_r ? dec_csr_wrdata_r[31:0] : ifu_ic_debug_rd_data[63:32];
+
+   assign wr_dicad0h_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAD0H);
+
+   rvdffe #(32)  dicad0h_ff (.*, .en(wr_dicad0h_r | ifu_ic_debug_rd_data_valid), .din(dicad0h_ns[31:0]), .dout(dicad0h[31:0]));
+
+
+if (pt.ICACHE_ECC == 1) begin
+   // ----------------------------------------------------------------------
+   // DICAD1 (R/W) (Only accessible in debug mode)
+   // [6:0]     : ECC
+   localparam DICAD1        = 12'h7ca;
+
+   assign dicad1_ns[6:0] = wr_dicad1_r ? dec_csr_wrdata_r[6:0] : ifu_ic_debug_rd_data[70:64];
+
+   assign wr_dicad1_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAD1);
+
+   rvdffe #(.WIDTH(7), .OVERRIDE(1))  dicad1_ff (.*, .en(wr_dicad1_r | ifu_ic_debug_rd_data_valid), .din(dicad1_ns[6:0]), .dout(dicad1_raw[6:0]));
+
+   assign dicad1[31:0] = {25'b0, dicad1_raw[6:0]};
+
+end
+else begin
+   // ----------------------------------------------------------------------
+   // DICAD1 (R/W) (Only accessible in debug mode)
+   // [3:0]     : Parity
+   localparam DICAD1        = 12'h7ca;
+
+   assign dicad1_ns[3:0] = wr_dicad1_r ? dec_csr_wrdata_r[3:0] : ifu_ic_debug_rd_data[67:64];
+
+   assign wr_dicad1_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAD1);
+
+   rvdffs #(4)  dicad1_ff (.*, .clk(free_clk), .en(wr_dicad1_r | ifu_ic_debug_rd_data_valid), .din(dicad1_ns[3:0]), .dout(dicad1_raw[3:0]));
+
+   assign dicad1[31:0] = {28'b0, dicad1_raw[3:0]};
+end
+   // ----------------------------------------------------------------------
+   // DICAGO (R/W) (Only accessible in debug mode)
+   // [0]     : Go
+   localparam DICAGO        = 12'h7cb;
+
+if (pt.ICACHE_ECC == 1)
+   assign dec_tlu_ic_diag_pkt.icache_wrdata[70:0] = {      dicad1[6:0], dicad0h[31:0], dicad0[31:0]};
+else
+   assign dec_tlu_ic_diag_pkt.icache_wrdata[70:0] = {3'b0, dicad1[3:0], dicad0h[31:0], dicad0[31:0]};
+
+
+   assign dec_tlu_ic_diag_pkt.icache_dicawics[16:0] = dicawics[16:0];
+
+   assign icache_rd_valid = allow_dbg_halt_csr_write & dec_csr_any_unq_d & dec_i0_decode_d & ~dec_csr_wen_unq_d & (dec_csr_rdaddr_d[11:0] == DICAGO);
+   assign icache_wr_valid = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAGO);
+
+
+   assign dec_tlu_ic_diag_pkt.icache_rd_valid = icache_rd_valid_f;
+   assign dec_tlu_ic_diag_pkt.icache_wr_valid = icache_wr_valid_f;
+
+   // ----------------------------------------------------------------------
+   // MTSEL (R/W)
+   // [1:0] : Trigger select : 00, 01, 10 are data/address triggers. 11 is inst count
+   localparam MTSEL         = 12'h7a0;
+
+   assign wr_mtsel_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTSEL);
+   assign mtsel_ns[1:0] = wr_mtsel_r ? {dec_csr_wrdata_r[1:0]} : mtsel[1:0];
+
+   rvdff #(2)  mtsel_ff (.*, .clk(csr_wr_clk), .din(mtsel_ns[1:0]), .dout(mtsel[1:0]));
+
+   // ----------------------------------------------------------------------
+   // MTDATA1 (R/W)
+   // [31:0] : Trigger Data 1
+   localparam MTDATA1       = 12'h7a1;
+
+   // for triggers 0, 1, 2 and 3 aka Match Control
+   // [31:28] : type, hard coded to 0x2
+   // [27]    : dmode
+   // [26:21] : hard coded to 0x1f
+   // [20]    : hit
+   // [19]    : select (0 - address, 1 - data)
+   // [18]    : timing, always 'before', reads 0x0
+   // [17:12] : action, bits  [17:13] not implemented and reads 0x0
+   // [11]    : chain
+   // [10:7]  : match, bits [10:8] not implemented and reads 0x0
+   // [6]     : M
+   // [5:3]   : not implemented, reads 0x0
+   // [2]     : execute
+   // [1]     : store
+   // [0]     : load
+   //
+   // decoder ring
+   // [27]    : => 9
+   // [20]    : => 8
+   // [19]    : => 7
+   // [12]    : => 6
+   // [11]    : => 5
+   // [7]     : => 4
+   // [6]     : => 3
+   // [2]     : => 2
+   // [1]     : => 1
+   // [0]     : => 0
+
+
+   // don't allow setting load-data.
+   assign tdata_load = dec_csr_wrdata_r[0] & ~dec_csr_wrdata_r[19];
+   // don't allow setting execute-data.
+   assign tdata_opcode = dec_csr_wrdata_r[2] & ~dec_csr_wrdata_r[19];
+   // don't allow clearing DMODE and action=1
+   assign tdata_action = (dec_csr_wrdata_r[27] & dbg_tlu_halted_f) & dec_csr_wrdata_r[12];
+
+   // Chain bit has conditions: WARL for triggers without chains. Force to zero if dmode is 0 but next trigger dmode is 1.
+   assign tdata_chain = mtsel[0] ? 1'b0 : // triggers 1 and 3 chain bit is always zero
+                        mtsel[1] ?  dec_csr_wrdata_r[11] & ~(mtdata1_t3[MTDATA1_DMODE] & ~dec_csr_wrdata_r[27]) : // trigger 2
+                                    dec_csr_wrdata_r[11] & ~(mtdata1_t1[MTDATA1_DMODE] & ~dec_csr_wrdata_r[27]);  // trigger 0
+
+   // Kill mtdata1 write if dmode=1 but prior trigger has dmode=0/chain=1. Only applies to T1 and T3
+   assign tdata_kill_write = mtsel[1] ? dec_csr_wrdata_r[27] & (~mtdata1_t2[MTDATA1_DMODE] & mtdata1_t2[MTDATA1_CHAIN]) : // trigger 3
+                                        dec_csr_wrdata_r[27] & (~mtdata1_t0[MTDATA1_DMODE] & mtdata1_t0[MTDATA1_CHAIN]) ; // trigger 1
+
+
+   assign tdata_wrdata_r[9:0]  = {dec_csr_wrdata_r[27] & dbg_tlu_halted_f,
+                                   dec_csr_wrdata_r[20:19],
+                                   tdata_action,
+                                   tdata_chain,
+                                   dec_csr_wrdata_r[7:6],
+                                   tdata_opcode,
+                                   dec_csr_wrdata_r[1],
+                                   tdata_load};
+
+   // If the DMODE bit is set, tdata1 can only be updated in debug_mode
+   assign wr_mtdata1_t0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA1) & (mtsel[1:0] == 2'b0) & (~mtdata1_t0[MTDATA1_DMODE] | dbg_tlu_halted_f);
+   assign mtdata1_t0_ns[9:0] = wr_mtdata1_t0_r ? tdata_wrdata_r[9:0] :
+                                {mtdata1_t0[9], update_hit_bit_r[0] | mtdata1_t0[8], mtdata1_t0[7:0]};
+
+   assign wr_mtdata1_t1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA1) & (mtsel[1:0] == 2'b01) & (~mtdata1_t1[MTDATA1_DMODE] | dbg_tlu_halted_f) & ~tdata_kill_write;
+   assign mtdata1_t1_ns[9:0] = wr_mtdata1_t1_r ? tdata_wrdata_r[9:0] :
+                                {mtdata1_t1[9], update_hit_bit_r[1] | mtdata1_t1[8], mtdata1_t1[7:0]};
+
+   assign wr_mtdata1_t2_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA1) & (mtsel[1:0] == 2'b10) & (~mtdata1_t2[MTDATA1_DMODE] | dbg_tlu_halted_f);
+   assign mtdata1_t2_ns[9:0] = wr_mtdata1_t2_r ? tdata_wrdata_r[9:0] :
+                                {mtdata1_t2[9], update_hit_bit_r[2] | mtdata1_t2[8], mtdata1_t2[7:0]};
+
+   assign wr_mtdata1_t3_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA1) & (mtsel[1:0] == 2'b11) & (~mtdata1_t3[MTDATA1_DMODE] | dbg_tlu_halted_f) & ~tdata_kill_write;
+   assign mtdata1_t3_ns[9:0] = wr_mtdata1_t3_r ? tdata_wrdata_r[9:0] :
+                                {mtdata1_t3[9], update_hit_bit_r[3] | mtdata1_t3[8], mtdata1_t3[7:0]};
+
+
+   rvdffe #(10)  mtdata1_t0_ff (.*, .en(trigger_enabled[0] | wr_mtdata1_t0_r), .din(mtdata1_t0_ns[9:0]), .dout(mtdata1_t0[9:0]));
+   rvdffe #(10)  mtdata1_t1_ff (.*, .en(trigger_enabled[1] | wr_mtdata1_t1_r), .din(mtdata1_t1_ns[9:0]), .dout(mtdata1_t1[9:0]));
+   rvdffe #(10)  mtdata1_t2_ff (.*, .en(trigger_enabled[2] | wr_mtdata1_t2_r), .din(mtdata1_t2_ns[9:0]), .dout(mtdata1_t2[9:0]));
+   rvdffe #(10)  mtdata1_t3_ff (.*, .en(trigger_enabled[3] | wr_mtdata1_t3_r), .din(mtdata1_t3_ns[9:0]), .dout(mtdata1_t3[9:0]));
+
+   assign mtdata1_tsel_out[31:0] = ( ({32{(mtsel[1:0] == 2'b00)}} & {4'h2, mtdata1_t0[9], 6'b011111, mtdata1_t0[8:7], 6'b0, mtdata1_t0[6:5], 3'b0, mtdata1_t0[4:3], 3'b0, mtdata1_t0[2:0]}) |
+                                     ({32{(mtsel[1:0] == 2'b01)}} & {4'h2, mtdata1_t1[9], 6'b011111, mtdata1_t1[8:7], 6'b0, mtdata1_t1[6:5], 3'b0, mtdata1_t1[4:3], 3'b0, mtdata1_t1[2:0]}) |
+                                     ({32{(mtsel[1:0] == 2'b10)}} & {4'h2, mtdata1_t2[9], 6'b011111, mtdata1_t2[8:7], 6'b0, mtdata1_t2[6:5], 3'b0, mtdata1_t2[4:3], 3'b0, mtdata1_t2[2:0]}) |
+                                     ({32{(mtsel[1:0] == 2'b11)}} & {4'h2, mtdata1_t3[9], 6'b011111, mtdata1_t3[8:7], 6'b0, mtdata1_t3[6:5], 3'b0, mtdata1_t3[4:3], 3'b0, mtdata1_t3[2:0]}));
+
+   assign trigger_pkt_any[0].select = mtdata1_t0[MTDATA1_SEL];
+   assign trigger_pkt_any[0].match = mtdata1_t0[MTDATA1_MATCH];
+   assign trigger_pkt_any[0].store = mtdata1_t0[MTDATA1_ST];
+   assign trigger_pkt_any[0].load = mtdata1_t0[MTDATA1_LD];
+   assign trigger_pkt_any[0].execute = mtdata1_t0[MTDATA1_EXE];
+   assign trigger_pkt_any[0].m = mtdata1_t0[MTDATA1_M_ENABLED];
+
+   assign trigger_pkt_any[1].select = mtdata1_t1[MTDATA1_SEL];
+   assign trigger_pkt_any[1].match = mtdata1_t1[MTDATA1_MATCH];
+   assign trigger_pkt_any[1].store = mtdata1_t1[MTDATA1_ST];
+   assign trigger_pkt_any[1].load = mtdata1_t1[MTDATA1_LD];
+   assign trigger_pkt_any[1].execute = mtdata1_t1[MTDATA1_EXE];
+   assign trigger_pkt_any[1].m = mtdata1_t1[MTDATA1_M_ENABLED];
+
+   assign trigger_pkt_any[2].select = mtdata1_t2[MTDATA1_SEL];
+   assign trigger_pkt_any[2].match = mtdata1_t2[MTDATA1_MATCH];
+   assign trigger_pkt_any[2].store = mtdata1_t2[MTDATA1_ST];
+   assign trigger_pkt_any[2].load = mtdata1_t2[MTDATA1_LD];
+   assign trigger_pkt_any[2].execute = mtdata1_t2[MTDATA1_EXE];
+   assign trigger_pkt_any[2].m = mtdata1_t2[MTDATA1_M_ENABLED];
+
+   assign trigger_pkt_any[3].select = mtdata1_t3[MTDATA1_SEL];
+   assign trigger_pkt_any[3].match = mtdata1_t3[MTDATA1_MATCH];
+   assign trigger_pkt_any[3].store = mtdata1_t3[MTDATA1_ST];
+   assign trigger_pkt_any[3].load = mtdata1_t3[MTDATA1_LD];
+   assign trigger_pkt_any[3].execute = mtdata1_t3[MTDATA1_EXE];
+   assign trigger_pkt_any[3].m = mtdata1_t3[MTDATA1_M_ENABLED];
+
+
+
+
+
+   // ----------------------------------------------------------------------
+   // MTDATA2 (R/W)
+   // [31:0] : Trigger Data 2
+   localparam MTDATA2       = 12'h7a2;
+
+   // If the DMODE bit is set, tdata2 can only be updated in debug_mode
+   assign wr_mtdata2_t0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA2) & (mtsel[1:0] == 2'b0)  & (~mtdata1_t0[MTDATA1_DMODE] | dbg_tlu_halted_f);
+   assign wr_mtdata2_t1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA2) & (mtsel[1:0] == 2'b01) & (~mtdata1_t1[MTDATA1_DMODE] | dbg_tlu_halted_f);
+   assign wr_mtdata2_t2_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA2) & (mtsel[1:0] == 2'b10) & (~mtdata1_t2[MTDATA1_DMODE] | dbg_tlu_halted_f);
+   assign wr_mtdata2_t3_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA2) & (mtsel[1:0] == 2'b11) & (~mtdata1_t3[MTDATA1_DMODE] | dbg_tlu_halted_f);
+
+   rvdffe #(32)  mtdata2_t0_ff (.*, .en(wr_mtdata2_t0_r), .din(dec_csr_wrdata_r[31:0]), .dout(mtdata2_t0[31:0]));
+   rvdffe #(32)  mtdata2_t1_ff (.*, .en(wr_mtdata2_t1_r), .din(dec_csr_wrdata_r[31:0]), .dout(mtdata2_t1[31:0]));
+   rvdffe #(32)  mtdata2_t2_ff (.*, .en(wr_mtdata2_t2_r), .din(dec_csr_wrdata_r[31:0]), .dout(mtdata2_t2[31:0]));
+   rvdffe #(32)  mtdata2_t3_ff (.*, .en(wr_mtdata2_t3_r), .din(dec_csr_wrdata_r[31:0]), .dout(mtdata2_t3[31:0]));
+
+   assign mtdata2_tsel_out[31:0] = ( ({32{(mtsel[1:0] == 2'b00)}} & mtdata2_t0[31:0]) |
+                                     ({32{(mtsel[1:0] == 2'b01)}} & mtdata2_t1[31:0]) |
+                                     ({32{(mtsel[1:0] == 2'b10)}} & mtdata2_t2[31:0]) |
+                                     ({32{(mtsel[1:0] == 2'b11)}} & mtdata2_t3[31:0]));
+
+   assign trigger_pkt_any[0].tdata2[31:0] = mtdata2_t0[31:0];
+   assign trigger_pkt_any[1].tdata2[31:0] = mtdata2_t1[31:0];
+   assign trigger_pkt_any[2].tdata2[31:0] = mtdata2_t2[31:0];
+   assign trigger_pkt_any[3].tdata2[31:0] = mtdata2_t3[31:0];
+
+
+   //----------------------------------------------------------------------
+   // Performance Monitor Counters section starts
+   //----------------------------------------------------------------------
+   localparam MHPME_NOEVENT             = 10'd0;
+   localparam MHPME_CLK_ACTIVE          = 10'd1; // OOP - out of pipe
+   localparam MHPME_ICACHE_HIT          = 10'd2; // OOP
+   localparam MHPME_ICACHE_MISS         = 10'd3; // OOP
+   localparam MHPME_INST_COMMIT         = 10'd4;
+   localparam MHPME_INST_COMMIT_16B     = 10'd5;
+   localparam MHPME_INST_COMMIT_32B     = 10'd6;
+   localparam MHPME_INST_ALIGNED        = 10'd7; // OOP
+   localparam MHPME_INST_DECODED        = 10'd8; // OOP
+   localparam MHPME_INST_MUL            = 10'd9;
+   localparam MHPME_INST_DIV            = 10'd10;
+   localparam MHPME_INST_LOAD           = 10'd11;
+   localparam MHPME_INST_STORE          = 10'd12;
+   localparam MHPME_INST_MALOAD         = 10'd13;
+   localparam MHPME_INST_MASTORE        = 10'd14;
+   localparam MHPME_INST_ALU            = 10'd15;
+   localparam MHPME_INST_CSRREAD        = 10'd16;
+   localparam MHPME_INST_CSRRW          = 10'd17;
+   localparam MHPME_INST_CSRWRITE       = 10'd18;
+   localparam MHPME_INST_EBREAK         = 10'd19;
+   localparam MHPME_INST_ECALL          = 10'd20;
+   localparam MHPME_INST_FENCE          = 10'd21;
+   localparam MHPME_INST_FENCEI         = 10'd22;
+   localparam MHPME_INST_MRET           = 10'd23;
+   localparam MHPME_INST_BRANCH         = 10'd24;
+   localparam MHPME_BRANCH_MP           = 10'd25;
+   localparam MHPME_BRANCH_TAKEN        = 10'd26;
+   localparam MHPME_BRANCH_NOTP         = 10'd27;
+   localparam MHPME_FETCH_STALL         = 10'd28; // OOP
+   localparam MHPME_DECODE_STALL        = 10'd30; // OOP
+   localparam MHPME_POSTSYNC_STALL      = 10'd31; // OOP
+   localparam MHPME_PRESYNC_STALL       = 10'd32; // OOP
+   localparam MHPME_LSU_SB_WB_STALL     = 10'd34; // OOP
+   localparam MHPME_DMA_DCCM_STALL      = 10'd35; // OOP
+   localparam MHPME_DMA_ICCM_STALL      = 10'd36; // OOP
+   localparam MHPME_EXC_TAKEN           = 10'd37;
+   localparam MHPME_TIMER_INT_TAKEN     = 10'd38;
+   localparam MHPME_EXT_INT_TAKEN       = 10'd39;
+   localparam MHPME_FLUSH_LOWER         = 10'd40;
+   localparam MHPME_BR_ERROR            = 10'd41;
+   localparam MHPME_IBUS_TRANS          = 10'd42; // OOP
+   localparam MHPME_DBUS_TRANS          = 10'd43; // OOP
+   localparam MHPME_DBUS_MA_TRANS       = 10'd44; // OOP
+   localparam MHPME_IBUS_ERROR          = 10'd45; // OOP
+   localparam MHPME_DBUS_ERROR          = 10'd46; // OOP
+   localparam MHPME_IBUS_STALL          = 10'd47; // OOP
+   localparam MHPME_DBUS_STALL          = 10'd48; // OOP
+   localparam MHPME_INT_DISABLED        = 10'd49; // OOP
+   localparam MHPME_INT_STALLED         = 10'd50; // OOP
+   localparam MHPME_INST_BITMANIP       = 10'd54;
+   localparam MHPME_DBUS_LOAD           = 10'd55;
+   localparam MHPME_DBUS_STORE          = 10'd56;
+   // Counts even during sleep state
+   localparam MHPME_SLEEP_CYC           = 10'd512; // OOP
+   localparam MHPME_DMA_READ_ALL        = 10'd513; // OOP
+   localparam MHPME_DMA_WRITE_ALL       = 10'd514; // OOP
+   localparam MHPME_DMA_READ_DCCM       = 10'd515; // OOP
+   localparam MHPME_DMA_WRITE_DCCM      = 10'd516; // OOP
+
+   // Pack the event selects into a vector for genvar
+   assign mhpme_vec[0][9:0] = mhpme3[9:0];
+   assign mhpme_vec[1][9:0] = mhpme4[9:0];
+   assign mhpme_vec[2][9:0] = mhpme5[9:0];
+   assign mhpme_vec[3][9:0] = mhpme6[9:0];
+
+   // only consider committed itypes
+   //logic [3:0] pmu_i0_itype_qual;
+   assign pmu_i0_itype_qual[3:0] = dec_tlu_packet_r.pmu_i0_itype[3:0] & {4{tlu_i0_commit_cmt}};
+
+   // Generate the muxed incs for all counters based on event type
+   for (genvar i=0 ; i < 4; i++) begin
+      assign mhpmc_inc_r[i] =  {{~mcountinhibit[i+3]}} &
+           (
+             ({1{(mhpme_vec[i][9:0] == MHPME_CLK_ACTIVE      )}} & 1'b1) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_ICACHE_HIT      )}} & {ifu_pmu_ic_hit}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_ICACHE_MISS     )}} & {ifu_pmu_ic_miss}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_COMMIT     )}} & {tlu_i0_commit_cmt & ~illegal_r}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_COMMIT_16B )}} & {tlu_i0_commit_cmt & ~exu_pmu_i0_pc4 & ~illegal_r}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_COMMIT_32B )}} & {tlu_i0_commit_cmt &  exu_pmu_i0_pc4 & ~illegal_r}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_ALIGNED    )}} & ifu_pmu_instr_aligned)  |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_DECODED    )}} & dec_pmu_instr_decoded)  |
+             ({1{(mhpme_vec[i][9:0] == MHPME_DECODE_STALL    )}} & {dec_pmu_decode_stall}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_MUL        )}} & {(pmu_i0_itype_qual == MUL)})     |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_DIV        )}} & {dec_tlu_packet_r.pmu_divide  & tlu_i0_commit_cmt & ~illegal_r})     |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_LOAD       )}} & {(pmu_i0_itype_qual == LOAD)})    |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_STORE      )}} & {(pmu_i0_itype_qual == STORE)})   |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_MALOAD     )}} & {(pmu_i0_itype_qual == LOAD)} &
+                                                                      {1{dec_tlu_packet_r.pmu_lsu_misaligned}})    |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_MASTORE    )}} & {(pmu_i0_itype_qual == STORE)} &
+                                                                      {1{dec_tlu_packet_r.pmu_lsu_misaligned}})    |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_ALU        )}} & {(pmu_i0_itype_qual == ALU)})     |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_CSRREAD    )}} & {(pmu_i0_itype_qual == CSRREAD)}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_CSRWRITE   )}} & {(pmu_i0_itype_qual == CSRWRITE)})|
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_CSRRW      )}} & {(pmu_i0_itype_qual == CSRRW)})   |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_EBREAK     )}} & {(pmu_i0_itype_qual == EBREAK)})  |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_ECALL      )}} & {(pmu_i0_itype_qual == ECALL)})   |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_FENCE      )}} & {(pmu_i0_itype_qual == FENCE)})   |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_FENCEI     )}} & {(pmu_i0_itype_qual == FENCEI)})  |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_MRET       )}} & {(pmu_i0_itype_qual == MRET)})    |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_BRANCH     )}} & {
+                                                                     ((pmu_i0_itype_qual == CONDBR) | (pmu_i0_itype_qual == JAL))})   |
+             ({1{(mhpme_vec[i][9:0] == MHPME_BRANCH_MP       )}} & {exu_pmu_i0_br_misp & tlu_i0_commit_cmt & ~illegal_r}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_BRANCH_TAKEN    )}} & {exu_pmu_i0_br_ataken & tlu_i0_commit_cmt & ~illegal_r}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_BRANCH_NOTP     )}} & {dec_tlu_packet_r.pmu_i0_br_unpred & tlu_i0_commit_cmt & ~illegal_r}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_FETCH_STALL     )}} & { ifu_pmu_fetch_stall}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_DECODE_STALL    )}} & { dec_pmu_decode_stall}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_POSTSYNC_STALL  )}} & {dec_pmu_postsync_stall}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_PRESYNC_STALL   )}} & {dec_pmu_presync_stall}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_LSU_SB_WB_STALL )}} & { lsu_store_stall_any}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_DMA_DCCM_STALL  )}} & { dma_dccm_stall_any}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_DMA_ICCM_STALL  )}} & { dma_iccm_stall_any}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_EXC_TAKEN       )}} & { (i0_exception_valid_r | i0_trigger_hit_r | lsu_exc_valid_r)}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_TIMER_INT_TAKEN )}} & { take_timer_int | take_int_timer0_int | take_int_timer1_int}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_EXT_INT_TAKEN   )}} & { take_ext_int}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_FLUSH_LOWER     )}} & { tlu_flush_lower_r}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_BR_ERROR        )}} & {(dec_tlu_br0_error_r | dec_tlu_br0_start_error_r) & rfpc_i0_r}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_IBUS_TRANS      )}} & {ifu_pmu_bus_trxn}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_DBUS_TRANS      )}} & {lsu_pmu_bus_trxn}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_DBUS_MA_TRANS   )}} & {lsu_pmu_bus_misaligned}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_IBUS_ERROR      )}} & {ifu_pmu_bus_error}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_DBUS_ERROR      )}} & {lsu_pmu_bus_error}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_IBUS_STALL      )}} & {ifu_pmu_bus_busy}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_DBUS_STALL      )}} & {lsu_pmu_bus_busy}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INT_DISABLED    )}} & {~mstatus[MSTATUS_MIE]}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INT_STALLED     )}} & {~mstatus[MSTATUS_MIE] & |(mip[5:0] & mie[5:0])}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_INST_BITMANIP     )}} & {(pmu_i0_itype_qual == BITMANIPU)}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_DBUS_LOAD       )}} & {tlu_i0_commit_cmt & lsu_pmu_load_external_r & ~illegal_r}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_DBUS_STORE      )}} & {tlu_i0_commit_cmt & lsu_pmu_store_external_r & ~illegal_r}) |
+             // These count even during sleep
+             ({1{(mhpme_vec[i][9:0] == MHPME_SLEEP_CYC       )}} & {dec_tlu_pmu_fw_halted}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_DMA_READ_ALL    )}} & {dma_pmu_any_read}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_DMA_WRITE_ALL   )}} & {dma_pmu_any_write}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_DMA_READ_DCCM   )}} & {dma_pmu_dccm_read}) |
+             ({1{(mhpme_vec[i][9:0] == MHPME_DMA_WRITE_DCCM  )}} & {dma_pmu_dccm_write})
+             );
+   end
+
+
+   if(pt.FAST_INTERRUPT_REDIRECT)
+   rvdffie #(31)  mstatus_ff (.*, .clk(free_l2clk),
+                             .din({mdseac_locked_ns, lsu_single_ecc_error_r, lsu_exc_valid_r, lsu_i0_exc_r,
+                                   take_ext_int_start,    take_ext_int_start_d1, take_ext_int_start_d2, ext_int_freeze,
+                                   mip_ns[5:0], mcyclel_cout & ~wr_mcycleh_r & mcyclel_cout_in,
+                                   minstret_enable, minstretl_cout_ns, fw_halted_ns,
+                                   meicidpl_ns[3:0], icache_rd_valid, icache_wr_valid, mhpmc_inc_r[3:0], perfcnt_halted,
+                                   mstatus_ns[1:0]}),
+                             .dout({mdseac_locked_f, lsu_single_ecc_error_r_d1, lsu_exc_valid_r_d1, lsu_i0_exc_r_d1,
+                                    take_ext_int_start_d1, take_ext_int_start_d2, take_ext_int_start_d3, ext_int_freeze_d1,
+                                    mip[5:0], mcyclel_cout_f, minstret_enable_f, minstretl_cout_f,
+                                    fw_halted, meicidpl[3:0], icache_rd_valid_f, icache_wr_valid_f,
+                                    mhpmc_inc_r_d1[3:0], perfcnt_halted_d1,
+                                    mstatus[1:0]}));
+
+   else
+   rvdffie #(27)  mstatus_ff (.*, .clk(free_l2clk),
+                             .din({mdseac_locked_ns, lsu_single_ecc_error_r, lsu_exc_valid_r, lsu_i0_exc_r,
+                                   mip_ns[5:0], mcyclel_cout & ~wr_mcycleh_r & mcyclel_cout_in,
+                                   minstret_enable, minstretl_cout_ns, fw_halted_ns,
+                                   meicidpl_ns[3:0], icache_rd_valid, icache_wr_valid, mhpmc_inc_r[3:0], perfcnt_halted,
+                                   mstatus_ns[1:0]}),
+                             .dout({mdseac_locked_f, lsu_single_ecc_error_r_d1, lsu_exc_valid_r_d1, lsu_i0_exc_r_d1,
+                                    mip[5:0], mcyclel_cout_f, minstret_enable_f, minstretl_cout_f,
+                                    fw_halted, meicidpl[3:0], icache_rd_valid_f, icache_wr_valid_f,
+                                    mhpmc_inc_r_d1[3:0], perfcnt_halted_d1,
+                                    mstatus[1:0]}));
+
+   assign perfcnt_halted = ((dec_tlu_dbg_halted & dcsr[DCSR_STOPC]) | dec_tlu_pmu_fw_halted);
+   assign perfcnt_during_sleep[3:0] = {4{~(dec_tlu_dbg_halted & dcsr[DCSR_STOPC])}} & {mhpme_vec[3][9],mhpme_vec[2][9],mhpme_vec[1][9],mhpme_vec[0][9]};
+
+   assign dec_tlu_perfcnt0 = mhpmc_inc_r_d1[0] & ~(perfcnt_halted_d1 & ~perfcnt_during_sleep[0]);
+   assign dec_tlu_perfcnt1 = mhpmc_inc_r_d1[1] & ~(perfcnt_halted_d1 & ~perfcnt_during_sleep[1]);
+   assign dec_tlu_perfcnt2 = mhpmc_inc_r_d1[2] & ~(perfcnt_halted_d1 & ~perfcnt_during_sleep[2]);
+   assign dec_tlu_perfcnt3 = mhpmc_inc_r_d1[3] & ~(perfcnt_halted_d1 & ~perfcnt_during_sleep[3]);
+
+   // ----------------------------------------------------------------------
+   // MHPMC3H(RW), MHPMC3(RW)
+   // [63:32][31:0] : Hardware Performance Monitor Counter 3
+   localparam MHPMC3        = 12'hB03;
+   localparam MHPMC3H       = 12'hB83;
+
+   assign mhpmc3_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC3);
+   assign mhpmc3_wr_en1 = (~perfcnt_halted | perfcnt_during_sleep[0]) & (|(mhpmc_inc_r[0]));
+   assign mhpmc3_wr_en  = mhpmc3_wr_en0 | mhpmc3_wr_en1;
+   assign mhpmc3_incr[63:0] = {mhpmc3h[31:0],mhpmc3[31:0]} + {63'b0, 1'b1};
+   assign mhpmc3_ns[31:0] = mhpmc3_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc3_incr[31:0];
+   rvdffe #(32)  mhpmc3_ff (.*, .clk(free_l2clk), .en(mhpmc3_wr_en), .din(mhpmc3_ns[31:0]), .dout(mhpmc3[31:0]));
+
+   assign mhpmc3h_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC3H);
+   assign mhpmc3h_wr_en  = mhpmc3h_wr_en0 | mhpmc3_wr_en1;
+   assign mhpmc3h_ns[31:0] = mhpmc3h_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc3_incr[63:32];
+   rvdffe #(32)  mhpmc3h_ff (.*, .clk(free_l2clk), .en(mhpmc3h_wr_en), .din(mhpmc3h_ns[31:0]), .dout(mhpmc3h[31:0]));
+
+   // ----------------------------------------------------------------------
+   // MHPMC4H(RW), MHPMC4(RW)
+   // [63:32][31:0] : Hardware Performance Monitor Counter 4
+   localparam MHPMC4        = 12'hB04;
+   localparam MHPMC4H       = 12'hB84;
+
+   assign mhpmc4_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC4);
+   assign mhpmc4_wr_en1 = (~perfcnt_halted | perfcnt_during_sleep[1]) & (|(mhpmc_inc_r[1]));
+   assign mhpmc4_wr_en  = mhpmc4_wr_en0 | mhpmc4_wr_en1;
+   assign mhpmc4_incr[63:0] = {mhpmc4h[31:0],mhpmc4[31:0]} + {63'b0,1'b1};
+   assign mhpmc4_ns[31:0] = mhpmc4_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc4_incr[31:0];
+   rvdffe #(32)  mhpmc4_ff (.*, .clk(free_l2clk), .en(mhpmc4_wr_en), .din(mhpmc4_ns[31:0]), .dout(mhpmc4[31:0]));
+
+   assign mhpmc4h_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC4H);
+   assign mhpmc4h_wr_en  = mhpmc4h_wr_en0 | mhpmc4_wr_en1;
+   assign mhpmc4h_ns[31:0] = mhpmc4h_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc4_incr[63:32];
+   rvdffe #(32)  mhpmc4h_ff (.*, .clk(free_l2clk), .en(mhpmc4h_wr_en), .din(mhpmc4h_ns[31:0]), .dout(mhpmc4h[31:0]));
+
+   // ----------------------------------------------------------------------
+   // MHPMC5H(RW), MHPMC5(RW)
+   // [63:32][31:0] : Hardware Performance Monitor Counter 5
+   localparam MHPMC5        = 12'hB05;
+   localparam MHPMC5H       = 12'hB85;
+
+   assign mhpmc5_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC5);
+   assign mhpmc5_wr_en1 = (~perfcnt_halted | perfcnt_during_sleep[2]) & (|(mhpmc_inc_r[2]));
+   assign mhpmc5_wr_en  = mhpmc5_wr_en0 | mhpmc5_wr_en1;
+   assign mhpmc5_incr[63:0] = {mhpmc5h[31:0],mhpmc5[31:0]} + {63'b0,1'b1};
+   assign mhpmc5_ns[31:0] = mhpmc5_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc5_incr[31:0];
+   rvdffe #(32)  mhpmc5_ff (.*, .clk(free_l2clk), .en(mhpmc5_wr_en), .din(mhpmc5_ns[31:0]), .dout(mhpmc5[31:0]));
+
+   assign mhpmc5h_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC5H);
+   assign mhpmc5h_wr_en  = mhpmc5h_wr_en0 | mhpmc5_wr_en1;
+   assign mhpmc5h_ns[31:0] = mhpmc5h_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc5_incr[63:32];
+   rvdffe #(32)  mhpmc5h_ff (.*, .clk(free_l2clk), .en(mhpmc5h_wr_en), .din(mhpmc5h_ns[31:0]), .dout(mhpmc5h[31:0]));
+
+   // ----------------------------------------------------------------------
+   // MHPMC6H(RW), MHPMC6(RW)
+   // [63:32][31:0] : Hardware Performance Monitor Counter 6
+   localparam MHPMC6        = 12'hB06;
+   localparam MHPMC6H       = 12'hB86;
+
+   assign mhpmc6_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC6);
+   assign mhpmc6_wr_en1 = (~perfcnt_halted | perfcnt_during_sleep[3]) & (|(mhpmc_inc_r[3]));
+   assign mhpmc6_wr_en  = mhpmc6_wr_en0 | mhpmc6_wr_en1;
+   assign mhpmc6_incr[63:0] = {mhpmc6h[31:0],mhpmc6[31:0]} + {63'b0,1'b1};
+   assign mhpmc6_ns[31:0] = mhpmc6_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc6_incr[31:0];
+   rvdffe #(32)  mhpmc6_ff (.*, .clk(free_l2clk), .en(mhpmc6_wr_en), .din(mhpmc6_ns[31:0]), .dout(mhpmc6[31:0]));
+
+   assign mhpmc6h_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC6H);
+   assign mhpmc6h_wr_en  = mhpmc6h_wr_en0 | mhpmc6_wr_en1;
+   assign mhpmc6h_ns[31:0] = mhpmc6h_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc6_incr[63:32];
+   rvdffe #(32)  mhpmc6h_ff (.*, .clk(free_l2clk), .en(mhpmc6h_wr_en), .din(mhpmc6h_ns[31:0]), .dout(mhpmc6h[31:0]));
+
+   // ----------------------------------------------------------------------
+   // MHPME3(RW)
+   // [9:0] : Hardware Performance Monitor Event 3
+   localparam MHPME3        = 12'h323;
+
+   // we only have events 0-56 with holes, 512-516, HPME* are WARL so zero otherwise.
+   assign zero_event_r = ( (dec_csr_wrdata_r[9:0] > 10'd516) |
+                           (|dec_csr_wrdata_r[31:10]) |
+                           ((dec_csr_wrdata_r[9:0] < 10'd512) & (dec_csr_wrdata_r[9:0] > 10'd56)) |
+                           ((dec_csr_wrdata_r[9:0] < 10'd54) & (dec_csr_wrdata_r[9:0] > 10'd50)) |
+                           (dec_csr_wrdata_r[9:0] == 10'd29) |
+                           (dec_csr_wrdata_r[9:0] == 10'd33)
+                           );
+
+   assign event_r[9:0] = zero_event_r ? '0 : dec_csr_wrdata_r[9:0];
+
+   assign wr_mhpme3_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPME3);
+   rvdffe #(10)  mhpme3_ff (.*, .en(wr_mhpme3_r), .din(event_r[9:0]), .dout(mhpme3[9:0]));
+   // ----------------------------------------------------------------------
+   // MHPME4(RW)
+   // [9:0] : Hardware Performance Monitor Event 4
+   localparam MHPME4        = 12'h324;
+
+   assign wr_mhpme4_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPME4);
+   rvdffe #(10)  mhpme4_ff (.*, .en(wr_mhpme4_r), .din(event_r[9:0]), .dout(mhpme4[9:0]));
+   // ----------------------------------------------------------------------
+   // MHPME5(RW)
+   // [9:0] : Hardware Performance Monitor Event 5
+   localparam MHPME5        = 12'h325;
+
+   assign wr_mhpme5_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPME5);
+   rvdffe #(10)  mhpme5_ff (.*, .en(wr_mhpme5_r), .din(event_r[9:0]), .dout(mhpme5[9:0]));
+   // ----------------------------------------------------------------------
+   // MHPME6(RW)
+   // [9:0] : Hardware Performance Monitor Event 6
+   localparam MHPME6        = 12'h326;
+
+   assign wr_mhpme6_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPME6);
+   rvdffe #(10)  mhpme6_ff (.*, .en(wr_mhpme6_r), .din(event_r[9:0]), .dout(mhpme6[9:0]));
+
+   //----------------------------------------------------------------------
+   // Performance Monitor Counters section ends
+   //----------------------------------------------------------------------
+   // ----------------------------------------------------------------------
+
+   // MCOUNTINHIBIT(RW)
+   // [31:7] : Reserved, read 0x0
+   // [6]    : HPM6 disable
+   // [5]    : HPM5 disable
+   // [4]    : HPM4 disable
+   // [3]    : HPM3 disable
+   // [2]    : MINSTRET disable
+   // [1]    : reserved, read 0x0
+   // [0]    : MCYCLE disable
+
+   localparam MCOUNTINHIBIT             = 12'h320;
+
+   assign wr_mcountinhibit_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCOUNTINHIBIT);
+   rvdffs #(6)  mcountinhibit_ff (.*, .clk(csr_wr_clk), .en(wr_mcountinhibit_r), .din({dec_csr_wrdata_r[6:2], dec_csr_wrdata_r[0]}), .dout({mcountinhibit[6:2], mcountinhibit[0]}));
+   assign mcountinhibit[1] = 1'b0;
+
+   //--------------------------------------------------------------------------------
+   // trace
+   //--------------------------------------------------------------------------------
+   logic [4:0] dec_tlu_exc_cause_wb1_raw, dec_tlu_exc_cause_wb2;
+   logic       dec_tlu_int_valid_wb1_raw, dec_tlu_int_valid_wb2;
+
+   assign {dec_tlu_i0_valid_wb1,
+           dec_tlu_i0_exc_valid_wb1,
+           dec_tlu_exc_cause_wb1_raw[4:0],
+           dec_tlu_int_valid_wb1_raw}  =   {8{~dec_tlu_trace_disable}} & {i0_valid_wb,
+                                                                          i0_exception_valid_r_d1 | lsu_i0_exc_r_d1 | (trigger_hit_r_d1 & ~trigger_hit_dmode_r_d1),
+                                                                          exc_cause_wb[4:0],
+                                                                          interrupt_valid_r_d1};
+
+
+
+  // skid buffer for ints, reduces trace port count by 1
+   rvdffie #(.WIDTH(6), .OVERRIDE(1))  traceskidff (.*,  .clk(clk),
+                        .din ({dec_tlu_exc_cause_wb1_raw[4:0],
+                               dec_tlu_int_valid_wb1_raw}),
+                        .dout({dec_tlu_exc_cause_wb2[4:0],
+                               dec_tlu_int_valid_wb2}));
+   //skid for ints
+   assign dec_tlu_exc_cause_wb1[4:0] =  dec_tlu_int_valid_wb2 ? dec_tlu_exc_cause_wb2[4:0] : dec_tlu_exc_cause_wb1_raw[4:0];
+   assign dec_tlu_int_valid_wb1 = dec_tlu_int_valid_wb2;
+
+   assign dec_tlu_mtval_wb1  = mtval[31:0];
+
+   // end trace
+   //--------------------------------------------------------------------------------
+
+
+   // ----------------------------------------------------------------------
+   // CSR read mux
+   // ----------------------------------------------------------------------
+
+// file "csrdecode" is human readable file that has all of the CSR decodes defined and is part of git repo
+// modify this file as needed
+
+// to generate all the equations below from "csrdecode" except legal equation:
+
+// 1) coredecode -in csrdecode > corecsrdecode.e
+
+// 2) espresso -Dso -oeqntott corecsrdecode.e | addassign  > csrequations
+
+// to generate the legal CSR equation below:
+
+// 1) coredecode -in csrdecode -legal > csrlegal.e
+
+// 2) espresso -Dso -oeqntott csrlegal.e | addassign  > csrlegal_equation
+// coredecode -in csrdecode > corecsrdecode.e; espresso -Dso -oeqntott corecsrdecode.e | addassign  > csrequations; coredecode -in csrdecode -legal > csrlegal.e; espresso -Dso -oeqntott csrlegal.e | addassign  > csrlegal_equation
+
+assign csr_misa = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[0]);
+
+assign csr_mvendorid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[7]
+    &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]);
+
+assign csr_marchid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[7]
+    &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mimpid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[6]
+    &dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]);
+
+assign csr_mhartid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[7]
+    &dec_csr_rdaddr_d[2]);
+
+assign csr_mstatus = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mtvec = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[0]);
+
+assign csr_mip = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[2]);
+
+assign csr_mie = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]
+    &dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mcyclel = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]
+    &!dec_csr_rdaddr_d[1]);
+
+assign csr_mcycleh = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]
+    &!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]);
+
+assign csr_minstretl = (!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]
+    &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_minstreth = (!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[7]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]
+    &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mscratch = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mepc = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[1]
+    &dec_csr_rdaddr_d[0]);
+
+assign csr_mcause = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]
+    &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mscause = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]
+    &dec_csr_rdaddr_d[2]);
+
+assign csr_mtval = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[1]
+    &dec_csr_rdaddr_d[0]);
+
+assign csr_mrac = (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5]
+    &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]);
+
+assign csr_dmst = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]
+    &dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]);
+
+assign csr_mdseac = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]);
+
+assign csr_meihap = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]
+    &dec_csr_rdaddr_d[3]);
+
+assign csr_meivt = (!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[6]
+    &dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]
+    &!dec_csr_rdaddr_d[0]);
+
+assign csr_meipt = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[1]
+    &dec_csr_rdaddr_d[0]);
+
+assign csr_meicurpl = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6]
+    &dec_csr_rdaddr_d[2]);
+
+assign csr_meicidpl = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6]
+    &dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]);
+
+assign csr_dcsr = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]
+    &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mcgc = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3]
+    &!dec_csr_rdaddr_d[0]);
+
+assign csr_mfdc = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3]
+    &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]);
+
+assign csr_dpc = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]
+    &dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[0]);
+
+assign csr_mtsel = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]
+    &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mtdata1 = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[4]
+    &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[0]);
+
+assign csr_mtdata2 = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[5]
+    &!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[1]);
+
+assign csr_mhpmc3 = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]
+    &dec_csr_rdaddr_d[0]);
+
+assign csr_mhpmc4 = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]
+    &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mhpmc5 = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1]
+    &dec_csr_rdaddr_d[0]);
+
+assign csr_mhpmc6 = (!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]
+    &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mhpmc3h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[4]
+    &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]
+    &dec_csr_rdaddr_d[0]);
+
+assign csr_mhpmc4h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]
+    &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mhpmc5h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[4]
+    &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]
+    &dec_csr_rdaddr_d[0]);
+
+assign csr_mhpmc6h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]
+    &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mhpme3 = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[5]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]
+    &dec_csr_rdaddr_d[0]);
+
+assign csr_mhpme4 = (dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]
+    &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]
+    &!dec_csr_rdaddr_d[0]);
+
+assign csr_mhpme5 = (dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]
+    &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]
+    &dec_csr_rdaddr_d[0]);
+
+assign csr_mhpme6 = (dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]
+    &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]
+    &!dec_csr_rdaddr_d[0]);
+
+assign csr_mcountinhibit = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[5]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]
+    &!dec_csr_rdaddr_d[0]);
+
+assign csr_mitctl0 = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]
+    &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mitctl1 = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[3]
+    &dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]);
+
+assign csr_mitb0 = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4]
+    &!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[0]);
+
+assign csr_mitb1 = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[2]
+    &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mitcnt0 = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]
+    &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mitcnt1 = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[2]
+    &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]);
+
+assign csr_mpmc = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]
+    &dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]);
+
+assign csr_meicpct = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6]
+    &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_micect = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[3]
+    &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_miccmect = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]
+    &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[0]);
+
+assign csr_mdccmect = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]
+    &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mfdht = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]
+    &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_mfdhs = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[2]
+    &dec_csr_rdaddr_d[0]);
+
+assign csr_dicawics = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[5]
+    &dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]
+    &!dec_csr_rdaddr_d[0]);
+
+assign csr_dicad0h = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[3]
+    &dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]);
+
+assign csr_dicad0 = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[4]
+    &dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]);
+
+assign csr_dicad1 = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[3]
+    &!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]);
+
+assign csr_dicago = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[3]
+    &!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]);
+
+assign presync = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3]
+    &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[7]
+    &dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]
+    &!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]
+    &!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]) | (dec_csr_rdaddr_d[11]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]
+    &!dec_csr_rdaddr_d[1]) | (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[4]
+    &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]) | (
+    dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]
+    &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]);
+
+assign postsync = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3]
+    &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11]
+    &!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[2]
+    &dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]) | (dec_csr_rdaddr_d[10]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[0]) | (
+    !dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]
+    &!dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[7]
+    &dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]
+    &!dec_csr_rdaddr_d[1]) | (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[4]
+    &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]);
+
+assign legal = (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]
+    &dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]
+    &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]
+    &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11]
+    &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]
+    &!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1]) | (
+    !dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]
+    &dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]
+    &dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]) | (
+    dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]
+    &dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]
+    &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]
+    &!dec_csr_rdaddr_d[0]) | (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]
+    &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11]
+    &dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]
+    &dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]
+    &dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]
+    &dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11]
+    &dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]
+    &dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]
+    &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]) | (
+    dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]
+    &!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]
+    &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]
+    &dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]
+    &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]
+    &!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[3]
+    &!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]) | (!dec_csr_rdaddr_d[11]
+    &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]
+    &!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]
+    &dec_csr_rdaddr_d[2]) | (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[9]
+    &dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]
+    &dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]) | (
+    !dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]
+    &dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3]
+    &dec_csr_rdaddr_d[1]) | (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]
+    &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]
+    &dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4]
+    &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]) | (dec_csr_rdaddr_d[11]
+    &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7]
+    &!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4]
+    &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]) | (
+    !dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]
+    &dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]
+    &dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]) | (
+    dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]
+    &dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5]
+    &!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]) | (
+    dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]
+    &dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5]
+    &!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1]
+    &!dec_csr_rdaddr_d[0]) | (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]
+    &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[2]) | (!dec_csr_rdaddr_d[11]
+    &dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]
+    &dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]
+    &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[1]) | (
+    !dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]
+    &dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[0]) | (
+    !dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]
+    &dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]
+    &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3]
+    &!dec_csr_rdaddr_d[2]) | (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]
+    &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]
+    &!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]
+    &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]) | (
+    dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]
+    &dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]
+    &dec_csr_rdaddr_d[1]) | (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]
+    &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7]
+    &dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]
+    &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]) | (!dec_csr_rdaddr_d[11]
+    &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]
+    &!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]
+    &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]) | (
+    !dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]
+    &dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]
+    &dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[3]) | (dec_csr_rdaddr_d[11]
+    &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]
+    &!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[3]) | (
+    !dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]
+    &dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]
+    &dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4]) | (dec_csr_rdaddr_d[11]
+    &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]
+    &!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4]);
+
+
+
+assign dec_tlu_presync_d = presync & dec_csr_any_unq_d & ~dec_csr_wen_unq_d;
+assign dec_tlu_postsync_d = postsync & dec_csr_any_unq_d;
+
+   // allow individual configuration of these features
+assign conditionally_illegal = ((csr_mitcnt0 | csr_mitcnt1 | csr_mitb0 | csr_mitb1 | csr_mitctl0 | csr_mitctl1) & !pt.TIMER_LEGAL_EN);
+
+assign valid_csr = ( legal & (~(csr_dcsr | csr_dpc | csr_dmst | csr_dicawics | csr_dicad0 | csr_dicad0h | csr_dicad1 | csr_dicago) | dbg_tlu_halted_f)
+                     & ~fast_int_meicpct & ~conditionally_illegal);
+
+assign dec_csr_legal_d = ( dec_csr_any_unq_d &
+                           valid_csr &          // of a valid CSR
+                           ~(dec_csr_wen_unq_d & (csr_mvendorid | csr_marchid | csr_mimpid | csr_mhartid | csr_mdseac | csr_meihap)) // that's not a write to a RO CSR
+                           );
+   // CSR read mux
+assign dec_csr_rddata_d[31:0] = ( ({32{csr_misa}}      & 32'h40201104) |
+                                  ({32{csr_mvendorid}} & 32'h00000045) |
+                                  ({32{csr_marchid}}   & 32'h00000010) |
+                                  ({32{csr_mimpid}}    & 32'h3) |
+                                  ({32{csr_mhartid}}   & {core_id[31:4], 4'b0}) |
+                                  ({32{csr_mstatus}}   & {{15{1'b0}}, 2'b01, 2'b00, 2'b11, 3'b0, mstatus[1], 3'b0, mstatus[0], 3'b0}) |
+                                  ({32{csr_mtvec}}     & {mtvec[30:1], 1'b0, mtvec[0]}) |
+                                  ({32{csr_mip}}       & {1'b0, mip[5:3], 16'b0, mip[2], 3'b0, mip[1], 3'b0, mip[0], 3'b0}) |
+                                  ({32{csr_mie}}       & {1'b0, mie[5:3], 16'b0, mie[2], 3'b0, mie[1], 3'b0, mie[0], 3'b0}) |
+                                  ({32{csr_mcyclel}}   & mcyclel[31:0]) |
+                                  ({32{csr_mcycleh}}   & mcycleh_inc[31:0]) |
+                                  ({32{csr_minstretl}} & minstretl_read[31:0]) |
+                                  ({32{csr_minstreth}} & minstreth_read[31:0]) |
+                                  ({32{csr_mscratch}}  & mscratch[31:0]) |
+                                  ({32{csr_mepc}}      & {mepc[31:1], 1'b0}) |
+                                  ({32{csr_mcause}}    & mcause[31:0]) |
+                                  ({32{csr_mscause}}   & {28'b0, mscause[3:0]}) |
+                                  ({32{csr_mtval}}     & mtval[31:0]) |
+                                  ({32{csr_mrac}}      & mrac[31:0]) |
+                                  ({32{csr_mdseac}}    & mdseac[31:0]) |
+                                  ({32{csr_meivt}}     & {meivt[31:10], 10'b0}) |
+                                  ({32{csr_meihap}}    & {meivt[31:10], meihap[9:2], 2'b0}) |
+                                  ({32{csr_meicurpl}}  & {28'b0, meicurpl[3:0]}) |
+                                  ({32{csr_meicidpl}}  & {28'b0, meicidpl[3:0]}) |
+                                  ({32{csr_meipt}}     & {28'b0, meipt[3:0]}) |
+                                  ({32{csr_mcgc}}      & {22'b0, mcgc[9:0]}) |
+                                  ({32{csr_mfdc}}      & {13'b0, mfdc[18:0]}) |
+                                  ({32{csr_dcsr}}      & {16'h4000, dcsr[15:2], 2'b11}) |
+                                  ({32{csr_dpc}}       & {dpc[31:1], 1'b0}) |
+                                  ({32{csr_dicad0}}    & dicad0[31:0]) |
+                                  ({32{csr_dicad0h}}   & dicad0h[31:0]) |
+                                  ({32{csr_dicad1}}    & dicad1[31:0]) |
+                                  ({32{csr_dicawics}}  & {7'b0, dicawics[16], 2'b0, dicawics[15:14], 3'b0, dicawics[13:0], 3'b0}) |
+                                  ({32{csr_mtsel}}     & {30'b0, mtsel[1:0]}) |
+                                  ({32{csr_mtdata1}}   & {mtdata1_tsel_out[31:0]}) |
+                                  ({32{csr_mtdata2}}   & {mtdata2_tsel_out[31:0]}) |
+                                  ({32{csr_micect}}    & {micect[31:0]}) |
+                                  ({32{csr_miccmect}}  & {miccmect[31:0]}) |
+                                  ({32{csr_mdccmect}}  & {mdccmect[31:0]}) |
+                                  ({32{csr_mhpmc3}}    & mhpmc3[31:0]) |
+                                  ({32{csr_mhpmc4}}    & mhpmc4[31:0]) |
+                                  ({32{csr_mhpmc5}}    & mhpmc5[31:0]) |
+                                  ({32{csr_mhpmc6}}    & mhpmc6[31:0]) |
+                                  ({32{csr_mhpmc3h}}   & mhpmc3h[31:0]) |
+                                  ({32{csr_mhpmc4h}}   & mhpmc4h[31:0]) |
+                                  ({32{csr_mhpmc5h}}   & mhpmc5h[31:0]) |
+                                  ({32{csr_mhpmc6h}}   & mhpmc6h[31:0]) |
+                                  ({32{csr_mfdht}}     & {26'b0, mfdht[5:0]}) |
+                                  ({32{csr_mfdhs}}     & {30'b0, mfdhs[1:0]}) |
+                                  ({32{csr_mhpme3}}    & {22'b0,mhpme3[9:0]}) |
+                                  ({32{csr_mhpme4}}    & {22'b0,mhpme4[9:0]}) |
+                                  ({32{csr_mhpme5}}    & {22'b0,mhpme5[9:0]}) |
+                                  ({32{csr_mhpme6}}    & {22'b0,mhpme6[9:0]}) |
+                                  ({32{csr_mcountinhibit}} & {25'b0, mcountinhibit[6:0]}) |
+                                  ({32{csr_mpmc}}      & {30'b0, mpmc[1], 1'b0}) |
+                                  ({32{dec_timer_read_d}} & dec_timer_rddata_d[31:0])
+                                  );
+
+
+
+endmodule // eb1_dec_tlu_ctl
+
+module eb1_dec_timer_ctl #(
+`include "eb1_param.vh"
+ )
+  (
+   input logic clk,
+   input logic free_l2clk,
+   input logic csr_wr_clk,
+   input logic rst_l,
+   input logic        dec_csr_wen_r_mod,      // csr write enable at wb
+   input logic [11:0] dec_csr_wraddr_r,      // write address for csr
+   input logic [31:0] dec_csr_wrdata_r,   // csr write data at wb
+
+   input logic csr_mitctl0,
+   input logic csr_mitctl1,
+   input logic csr_mitb0,
+   input logic csr_mitb1,
+   input logic csr_mitcnt0,
+   input logic csr_mitcnt1,
+
+
+   input logic dec_pause_state, // Paused
+   input logic dec_tlu_pmu_fw_halted, // pmu/fw halted
+   input logic internal_dbg_halt_timers, // debug halted
+
+   output logic [31:0] dec_timer_rddata_d, // timer CSR read data
+   output logic        dec_timer_read_d, // timer CSR address match
+   output logic        dec_timer_t0_pulse, // timer0 int
+   output logic        dec_timer_t1_pulse, // timer1 int
+
+   input  logic        scan_mode
+   );
+   localparam MITCTL_ENABLE             = 0;
+   localparam MITCTL_ENABLE_HALTED      = 1;
+   localparam MITCTL_ENABLE_PAUSED      = 2;
+
+   logic [31:0] mitcnt0_ns, mitcnt0, mitcnt1_ns, mitcnt1, mitb0, mitb1, mitb0_b, mitb1_b, mitcnt0_inc, mitcnt1_inc;
+   logic [2:0] mitctl0_ns, mitctl0;
+   logic [3:0] mitctl1_ns, mitctl1;
+   logic wr_mitcnt0_r, wr_mitcnt1_r, wr_mitb0_r, wr_mitb1_r, wr_mitctl0_r, wr_mitctl1_r;
+   logic mitcnt0_inc_ok, mitcnt1_inc_ok;
+   logic mitcnt0_inc_cout, mitcnt1_inc_cout;
+ logic mit0_match_ns;
+ logic mit1_match_ns;
+ logic mitctl0_0_b_ns;
+ logic mitctl0_0_b;
+ logic mitctl1_0_b_ns;
+ logic mitctl1_0_b;
+
+   assign mit0_match_ns = (mitcnt0[31:0] >= mitb0[31:0]);
+   assign mit1_match_ns = (mitcnt1[31:0] >= mitb1[31:0]);
+
+   assign dec_timer_t0_pulse = mit0_match_ns;
+   assign dec_timer_t1_pulse = mit1_match_ns;
+   // ----------------------------------------------------------------------
+   // MITCNT0 (RW)
+   // [31:0] : Internal Timer Counter 0
+
+   localparam MITCNT0       = 12'h7d2;
+
+   assign wr_mitcnt0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITCNT0);
+
+   assign mitcnt0_inc_ok = mitctl0[MITCTL_ENABLE] & (~dec_pause_state | mitctl0[MITCTL_ENABLE_PAUSED]) & (~dec_tlu_pmu_fw_halted | mitctl0[MITCTL_ENABLE_HALTED]) & ~internal_dbg_halt_timers;
+
+   assign {mitcnt0_inc_cout, mitcnt0_inc[7:0]} = mitcnt0[7:0] + {7'b0, 1'b1};
+   assign mitcnt0_inc[31:8] = mitcnt0[31:8] + {23'b0, mitcnt0_inc_cout};
+
+   assign mitcnt0_ns[31:0]  = wr_mitcnt0_r ? dec_csr_wrdata_r[31:0] : mit0_match_ns ? 'b0 : mitcnt0_inc[31:0];
+
+   rvdffe #(24) mitcnt0_ffb      (.*, .clk(free_l2clk), .en(wr_mitcnt0_r | (mitcnt0_inc_ok & mitcnt0_inc_cout) | mit0_match_ns), .din(mitcnt0_ns[31:8]), .dout(mitcnt0[31:8]));
+   rvdffe #(8)  mitcnt0_ffa      (.*, .clk(free_l2clk), .en(wr_mitcnt0_r | mitcnt0_inc_ok | mit0_match_ns),                       .din(mitcnt0_ns[7:0]), .dout(mitcnt0[7:0]));
+
+   // ----------------------------------------------------------------------
+   // MITCNT1 (RW)
+   // [31:0] : Internal Timer Counter 0
+
+   localparam MITCNT1       = 12'h7d5;
+
+   assign wr_mitcnt1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITCNT1);
+
+   assign mitcnt1_inc_ok = mitctl1[MITCTL_ENABLE] &
+                           (~dec_pause_state | mitctl1[MITCTL_ENABLE_PAUSED]) &
+                           (~dec_tlu_pmu_fw_halted | mitctl1[MITCTL_ENABLE_HALTED]) &
+                           ~internal_dbg_halt_timers &
+                           (~mitctl1[3] | mit0_match_ns);
+
+   // only inc MITCNT1 if not cascaded with 0, or if 0 overflows
+   assign {mitcnt1_inc_cout, mitcnt1_inc[7:0]} = mitcnt1[7:0] + {7'b0, 1'b1};
+   assign mitcnt1_inc[31:8] = mitcnt1[31:8] + {23'b0, mitcnt1_inc_cout};
+
+   assign mitcnt1_ns[31:0]  = wr_mitcnt1_r ? dec_csr_wrdata_r[31:0] : mit1_match_ns ? 'b0 : mitcnt1_inc[31:0];
+
+   rvdffe #(24) mitcnt1_ffb      (.*, .clk(free_l2clk), .en(wr_mitcnt1_r | (mitcnt1_inc_ok & mitcnt1_inc_cout) | mit1_match_ns), .din(mitcnt1_ns[31:8]), .dout(mitcnt1[31:8]));
+   rvdffe #(8)  mitcnt1_ffa      (.*, .clk(free_l2clk), .en(wr_mitcnt1_r | mitcnt1_inc_ok | mit1_match_ns),                       .din(mitcnt1_ns[7:0]), .dout(mitcnt1[7:0]));
+
+
+   // ----------------------------------------------------------------------
+   // MITB0 (RW)
+   // [31:0] : Internal Timer Bound 0
+
+   localparam MITB0         = 12'h7d3;
+
+   assign wr_mitb0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITB0);
+
+   rvdffe #(32) mitb0_ff      (.*, .en(wr_mitb0_r), .din(~dec_csr_wrdata_r[31:0]), .dout(mitb0_b[31:0]));
+   assign mitb0[31:0] = ~mitb0_b[31:0];
+
+   // ----------------------------------------------------------------------
+   // MITB1 (RW)
+   // [31:0] : Internal Timer Bound 1
+
+   localparam MITB1         = 12'h7d6;
+
+   assign wr_mitb1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITB1);
+
+   rvdffe #(32) mitb1_ff      (.*, .en(wr_mitb1_r), .din(~dec_csr_wrdata_r[31:0]), .dout(mitb1_b[31:0]));
+   assign mitb1[31:0] = ~mitb1_b[31:0];
+
+   // ----------------------------------------------------------------------
+   // MITCTL0 (RW) Internal Timer Ctl 0
+   // [31:3] : Reserved, reads 0x0
+   // [2]    : Enable while PAUSEd
+   // [1]    : Enable while HALTed
+   // [0]    : Enable (resets to 0x1)
+
+   localparam MITCTL0       = 12'h7d4;
+
+   assign wr_mitctl0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITCTL0);
+   assign mitctl0_ns[2:0] = wr_mitctl0_r ? {dec_csr_wrdata_r[2:0]} : {mitctl0[2:0]};
+
+   assign mitctl0_0_b_ns = ~mitctl0_ns[0];
+   rvdffs #(3) mitctl0_ff      (.*, .clk(csr_wr_clk), .en(wr_mitctl0_r), .din({mitctl0_ns[2:1], mitctl0_0_b_ns}), .dout({mitctl0[2:1], mitctl0_0_b}));
+   assign mitctl0[0] = ~mitctl0_0_b;
+
+   // ----------------------------------------------------------------------
+   // MITCTL1 (RW) Internal Timer Ctl 1
+   // [31:4] : Reserved, reads 0x0
+   // [3]    : Cascade
+   // [2]    : Enable while PAUSEd
+   // [1]    : Enable while HALTed
+   // [0]    : Enable (resets to 0x1)
+
+   localparam MITCTL1       = 12'h7d7;
+
+   assign wr_mitctl1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITCTL1);
+   assign mitctl1_ns[3:0] = wr_mitctl1_r ? {dec_csr_wrdata_r[3:0]} : {mitctl1[3:0]};
+
+   assign mitctl1_0_b_ns = ~mitctl1_ns[0];
+   rvdffs #(4) mitctl1_ff      (.*, .clk(csr_wr_clk), .en(wr_mitctl1_r), .din({mitctl1_ns[3:1], mitctl1_0_b_ns}), .dout({mitctl1[3:1], mitctl1_0_b}));
+   assign mitctl1[0] = ~mitctl1_0_b;
+   assign dec_timer_read_d = csr_mitcnt1 | csr_mitcnt0 | csr_mitb1 | csr_mitb0 | csr_mitctl0 | csr_mitctl1;
+   assign dec_timer_rddata_d[31:0] = ( ({32{csr_mitcnt0}}      & mitcnt0[31:0]) |
+                                       ({32{csr_mitcnt1}}      & mitcnt1[31:0]) |
+                                       ({32{csr_mitb0}}        & mitb0[31:0]) |
+                                       ({32{csr_mitb1}}        & mitb1[31:0]) |
+                                       ({32{csr_mitctl0}}      & {29'b0, mitctl0[2:0]}) |
+                                       ({32{csr_mitctl1}}      & {28'b0, mitctl1[3:0]})
+                                       );
+
+
+endmodule // dec_timer_ctl
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_dec_trigger.sv b/verilog/rtl/BrqRV_EB1/design/eb1_dec_trigger.sv
new file mode 100644
index 0000000..05f9d12
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_dec_trigger.sv
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//********************************************************************************
+// $Id$
+//
+//
+// Owner:
+// Function: DEC Trigger Logic
+// Comments:
+//
+//********************************************************************************
+module eb1_dec_trigger
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )(
+
+   input eb1_trigger_pkt_t [3:0] trigger_pkt_any,           // Packet from tlu. 'select':0-pc,1-Opcode  'Execute' needs to be set for dec triggers to fire. 'match'-1 do mask, 0: full match
+   input logic [31:1]  dec_i0_pc_d,                          // i0 pc
+
+   output logic [3:0] dec_i0_trigger_match_d                 // Trigger match
+);
+
+   logic [3:0][31:0]  dec_i0_match_data;
+   logic [3:0]        dec_i0_trigger_data_match;
+
+   for (genvar i=0; i<4; i++) begin
+      assign dec_i0_match_data[i][31:0] = ({32{~trigger_pkt_any[i].select & trigger_pkt_any[i].execute}} & {dec_i0_pc_d[31:1], trigger_pkt_any[i].tdata2[0]});      // select=0; do a PC match
+
+      rvmaskandmatch trigger_i0_match (.mask(trigger_pkt_any[i].tdata2[31:0]), .data(dec_i0_match_data[i][31:0]), .masken(trigger_pkt_any[i].match), .match(dec_i0_trigger_data_match[i]));
+
+      assign dec_i0_trigger_match_d[i] = trigger_pkt_any[i].execute & trigger_pkt_any[i].m & dec_i0_trigger_data_match[i];
+   end
+
+endmodule // eb1_dec_trigger
+
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_def.sv b/verilog/rtl/BrqRV_EB1/design/eb1_def.sv
new file mode 100644
index 0000000..15df549
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_def.sv
@@ -0,0 +1,405 @@
+// performance monitor stuff
+//`ifndef eb1_DEF_SV
+//`define eb1_DEF_SV
+package eb1_pkg;
+
+typedef struct packed {
+                       logic  trace_rv_i_valid_ip;
+                       logic [31:0] trace_rv_i_insn_ip;
+                       logic [31:0] trace_rv_i_address_ip;
+                       logic  trace_rv_i_exception_ip;
+                       logic [4:0] trace_rv_i_ecause_ip;
+                       logic  trace_rv_i_interrupt_ip;
+                       logic [31:0] trace_rv_i_tval_ip;
+                       } eb1_trace_pkt_t;
+
+
+typedef enum logic [3:0] {
+                          NULL     = 4'b0000,
+                          MUL      = 4'b0001,
+                          LOAD     = 4'b0010,
+                          STORE    = 4'b0011,
+                          ALU      = 4'b0100,
+                          CSRREAD  = 4'b0101,
+                          CSRWRITE = 4'b0110,
+                          CSRRW    = 4'b0111,
+                          EBREAK   = 4'b1000,
+                          ECALL    = 4'b1001,
+                          FENCE    = 4'b1010,
+                          FENCEI   = 4'b1011,
+                          MRET     = 4'b1100,
+                          CONDBR   = 4'b1101,
+                          JAL      = 4'b1110,
+                          BITMANIPU = 4'b1111
+                          } eb1_inst_pkt_t;
+
+typedef struct packed {
+                       logic valid;
+                       logic wb;
+                       logic [2:0] tag;
+                       logic [4:0] rd;
+                       } eb1_load_cam_pkt_t;
+
+typedef struct packed {
+                       logic pc0_call;
+                       logic pc0_ret;
+                       logic pc0_pc4;
+                       } eb1_rets_pkt_t;
+typedef struct packed {
+                       logic valid;
+                       logic [11:0] toffset;
+                       logic [1:0] hist;
+                       logic br_error;
+                       logic br_start_error;
+                       logic  bank;
+                       logic [31:1] prett;  // predicted ret target
+                       logic way;
+                       logic ret;
+                       } eb1_br_pkt_t;
+
+typedef struct packed {
+                       logic valid;
+                       logic [1:0] hist;
+                       logic br_error;
+                       logic br_start_error;
+                       logic way;
+                       logic middle;
+                       } eb1_br_tlu_pkt_t;
+
+typedef struct packed {
+                       logic misp;
+                       logic ataken;
+                       logic boffset;
+                       logic pc4;
+                       logic [1:0] hist;
+                       logic [11:0] toffset;
+                       logic valid;
+                       logic br_error;
+                       logic br_start_error;
+                       logic pcall;
+                       logic pja;
+                       logic way;
+                       logic pret;
+                       // for power use the pret bit to clock the prett field
+                       logic [31:1] prett;
+                       } eb1_predict_pkt_t;
+
+typedef struct packed {
+                       // unlikely to change
+                       logic icaf;
+                       logic icaf_second;
+                       logic [1:0] icaf_type;
+                       logic fence_i;
+                       logic [3:0] i0trigger;
+                       logic pmu_i0_br_unpred;     // pmu
+                       logic pmu_divide;
+                       // likely to change
+                       logic legal;
+                       logic pmu_lsu_misaligned;
+                       eb1_inst_pkt_t pmu_i0_itype;        // pmu - instruction type
+                       } eb1_trap_pkt_t;
+
+typedef struct packed {
+                       // unlikely to change
+                       logic i0div;
+                       logic csrwen;
+                       logic csrwonly;
+                       logic [11:0] csrwaddr;
+                       // likely to change
+                       logic [4:0] i0rd;
+                       logic i0load;
+                       logic i0store;
+                       logic i0v;
+                       logic i0valid;
+                       } eb1_dest_pkt_t;
+
+typedef struct packed {
+                       logic mul;
+                       logic load;
+                       logic alu;
+                       } eb1_class_pkt_t;
+
+typedef struct packed {
+                       logic [4:0] rs1;
+                       logic [4:0] rs2;
+                       logic [4:0] rd;
+                       } eb1_reg_pkt_t;
+
+
+typedef struct packed {
+                       logic clz;
+                       logic ctz;
+                       logic pcnt;
+                       logic sext_b;
+                       logic sext_h;
+                       logic slo;
+                       logic sro;
+                       logic min;
+                       logic max;
+                       logic pack;
+                       logic packu;
+                       logic packh;
+                       logic rol;
+                       logic ror;
+                       logic grev;
+                       logic gorc;
+                       logic zbb;
+                       logic sbset;
+                       logic sbclr;
+                       logic sbinv;
+                       logic sbext;
+                       logic sh1add;
+                       logic sh2add;
+                       logic sh3add;
+                       logic zba;
+                       logic land;
+                       logic lor;
+                       logic lxor;
+                       logic sll;
+                       logic srl;
+                       logic sra;
+                       logic beq;
+                       logic bne;
+                       logic blt;
+                       logic bge;
+                       logic add;
+                       logic sub;
+                       logic slt;
+                       logic unsign;
+                       logic jal;
+                       logic predict_t;
+                       logic predict_nt;
+                       logic csr_write;
+                       logic csr_imm;
+                       } eb1_alu_pkt_t;
+
+typedef struct packed {
+                       logic fast_int;
+/* verilator lint_off SYMRSVDWORD */
+                       logic stack;
+/* verilator lint_on SYMRSVDWORD */
+                       logic by;
+                       logic half;
+                       logic word;
+                       logic dword;  // for dma
+                       logic load;
+                       logic store;
+                       logic unsign;
+                       logic dma;    // dma pkt
+                       logic store_data_bypass_d;
+                       logic load_ldst_bypass_d;
+                       logic store_data_bypass_m;
+                       logic valid;
+                       } eb1_lsu_pkt_t;
+
+typedef struct packed {
+                      logic inst_type;   //0: Load, 1: Store
+                      //logic dma_valid;
+                      logic exc_type;    //0: MisAligned, 1: Access Fault
+                      logic [3:0] mscause;
+                      logic [31:0] addr;
+                      logic single_ecc_error;
+                      logic exc_valid;
+                      } eb1_lsu_error_pkt_t;
+
+typedef struct packed {
+                       logic clz;
+                       logic ctz;
+                       logic pcnt;
+                       logic sext_b;
+                       logic sext_h;
+                       logic slo;
+                       logic sro;
+                       logic min;
+                       logic max;
+                       logic pack;
+                       logic packu;
+                       logic packh;
+                       logic rol;
+                       logic ror;
+                       logic grev;
+                       logic gorc;
+                       logic zbb;
+                       logic sbset;
+                       logic sbclr;
+                       logic sbinv;
+                       logic sbext;
+                       logic zbs;
+                       logic bext;
+                       logic bdep;
+                       logic zbe;
+                       logic clmul;
+                       logic clmulh;
+                       logic clmulr;
+                       logic zbc;
+                       logic shfl;
+                       logic unshfl;
+                       logic zbp;
+                       logic crc32_b;
+                       logic crc32_h;
+                       logic crc32_w;
+                       logic crc32c_b;
+                       logic crc32c_h;
+                       logic crc32c_w;
+                       logic zbr;
+                       logic bfp;
+                       logic zbf;
+                       logic sh1add;
+                       logic sh2add;
+                       logic sh3add;
+                       logic zba;
+                       logic alu;
+                       logic rs1;
+                       logic rs2;
+                       logic imm12;
+                       logic rd;
+                       logic shimm5;
+                       logic imm20;
+                       logic pc;
+                       logic load;
+                       logic store;
+                       logic lsu;
+                       logic add;
+                       logic sub;
+                       logic land;
+                       logic lor;
+                       logic lxor;
+                       logic sll;
+                       logic sra;
+                       logic srl;
+                       logic slt;
+                       logic unsign;
+                       logic condbr;
+                       logic beq;
+                       logic bne;
+                       logic bge;
+                       logic blt;
+                       logic jal;
+                       logic by;
+                       logic half;
+                       logic word;
+                       logic csr_read;
+                       logic csr_clr;
+                       logic csr_set;
+                       logic csr_write;
+                       logic csr_imm;
+                       logic presync;
+                       logic postsync;
+                       logic ebreak;
+                       logic ecall;
+                       logic mret;
+                       logic mul;
+                       logic rs1_sign;
+                       logic rs2_sign;
+                       logic low;
+                       logic div;
+                       logic rem;
+                       logic fence;
+                       logic fence_i;
+                       logic pm_alu;
+                       logic legal;
+                       } eb1_dec_pkt_t;
+
+
+typedef struct packed {
+                       logic valid;
+                       logic rs1_sign;
+                       logic rs2_sign;
+                       logic low;
+                       logic bext;
+                       logic bdep;
+                       logic clmul;
+                       logic clmulh;
+                       logic clmulr;
+                       logic grev;
+                       logic gorc;
+                       logic shfl;
+                       logic unshfl;
+                       logic crc32_b;
+                       logic crc32_h;
+                       logic crc32_w;
+                       logic crc32c_b;
+                       logic crc32c_h;
+                       logic crc32c_w;
+                       logic bfp;
+                       } eb1_mul_pkt_t;
+
+typedef struct packed {
+                       logic valid;
+                       logic unsign;
+                       logic rem;
+                       } eb1_div_pkt_t;
+
+typedef struct packed {
+                       logic        TEST1;
+                       logic        RME;
+                       logic [3:0]  RM;
+
+                       logic        LS;
+                       logic        DS;
+                       logic        SD;
+                       logic        TEST_RNM;
+                       logic        BC1;
+                       logic        BC2;
+                      } eb1_ccm_ext_in_pkt_t;
+
+typedef struct packed {
+                       logic        TEST1;
+                       logic        RME;
+                       logic [3:0]  RM;
+                       logic        LS;
+                       logic        DS;
+                       logic        SD;
+                       logic        TEST_RNM;
+                       logic        BC1;
+                       logic        BC2;
+                      } eb1_dccm_ext_in_pkt_t;
+
+
+typedef struct packed {
+                       logic        TEST1;
+                       logic        RME;
+                       logic [3:0]  RM;
+                       logic        LS;
+                       logic        DS;
+                       logic        SD;
+                       logic        TEST_RNM;
+                       logic        BC1;
+                       logic        BC2;
+                      } eb1_ic_data_ext_in_pkt_t;
+
+
+typedef struct packed {
+                       logic        TEST1;
+                       logic        RME;
+                       logic [3:0]  RM;
+                       logic        LS;
+                       logic        DS;
+                       logic        SD;
+                       logic        TEST_RNM;
+                       logic        BC1;
+                       logic        BC2;
+                      } eb1_ic_tag_ext_in_pkt_t;
+
+
+
+typedef struct packed {
+                        logic        select;
+                        logic        match;
+                        logic        store;
+                        logic        load;
+                        logic        execute;
+                        logic        m;
+                        logic [31:0] tdata2;
+            } eb1_trigger_pkt_t;
+
+
+typedef struct packed {
+                        logic [70:0]  icache_wrdata; // {dicad1[1:0], dicad0h[31:0], dicad0[31:0]}
+                        logic [16:0]  icache_dicawics; // Arraysel:24, Waysel:21:20, Index:16:3
+                        logic         icache_rd_valid;
+                        logic         icache_wr_valid;
+            } eb1_cache_debug_pkt_t;
+//`endif
+
+endpackage // eb1_pkg
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_exu.sv b/verilog/rtl/BrqRV_EB1/design/eb1_exu.sv
new file mode 100644
index 0000000..5df586a
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_exu.sv
@@ -0,0 +1,369 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+module eb1_exu
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+)
+  (
+   input logic          clk,                                           // Top level clock
+   input logic          rst_l,                                         // Reset
+   input logic          scan_mode,                                     // Scan control
+
+   input logic  [1:0]   dec_data_en,                                   // Clock enable {x,r}, one cycle pulse
+   input logic  [1:0]   dec_ctl_en,                                    // Clock enable {x,r}, two cycle pulse
+   input logic  [31:0]  dbg_cmd_wrdata,                                // Debug data   to primary I0 RS1
+   input eb1_alu_pkt_t i0_ap,                                         // DEC alu {valid,predecodes}
+
+   input logic          dec_debug_wdata_rs1_d,                         // Debug select to primary I0 RS1
+
+   input eb1_predict_pkt_t dec_i0_predict_p_d,                        // DEC branch predict packet
+   input logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d,                // DEC predict fghr
+   input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d,     // DEC predict index
+   input logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d,               // DEC predict branch tag
+
+   input logic  [31:0]  lsu_result_m,                                  // Load result M-stage
+   input logic  [31:0]  lsu_nonblock_load_data,                        // nonblock load data
+   input logic          dec_i0_rs1_en_d,                               // Qualify GPR RS1 data
+   input logic          dec_i0_rs2_en_d,                               // Qualify GPR RS2 data
+   input logic  [31:0]  gpr_i0_rs1_d,                                  // DEC data gpr
+   input logic  [31:0]  gpr_i0_rs2_d,                                  // DEC data gpr
+   input logic  [31:0]  dec_i0_immed_d,                                // DEC data immediate
+   input logic  [31:0]  dec_i0_result_r,                               // DEC result in R-stage
+   input logic  [12:1]  dec_i0_br_immed_d,                             // Branch immediate
+   input logic          dec_i0_alu_decode_d,                           // Valid to X-stage ALU
+   input logic          dec_i0_branch_d,                               // Branch in D-stage
+   input logic          dec_i0_select_pc_d,                            // PC select to RS1
+   input logic  [31:1]  dec_i0_pc_d,                                   // Instruction PC
+   input logic  [3:0]   dec_i0_rs1_bypass_en_d,                        // DEC bypass select  1 - X-stage, 0 - dec bypass data
+   input logic  [3:0]   dec_i0_rs2_bypass_en_d,                        // DEC bypass select  1 - X-stage, 0 - dec bypass data
+   input logic          dec_csr_ren_d,                                 // CSR read select
+   input logic  [31:0]  dec_csr_rddata_d,                              // CSR read data
+
+   input logic          dec_qual_lsu_d,                                // LSU instruction at D.  Use to quiet LSU operands
+   input eb1_mul_pkt_t mul_p,                                         // DEC {valid, operand signs, low, operand bypass}
+   input eb1_div_pkt_t div_p,                                         // DEC {valid, unsigned, rem}
+   input logic          dec_div_cancel,                                // Cancel the divide operation
+
+   input logic  [31:1]  pred_correct_npc_x,                            // DEC NPC for correctly predicted branch
+
+   input logic          dec_tlu_flush_lower_r,                         // Flush divide and secondary ALUs
+   input logic  [31:1]  dec_tlu_flush_path_r,                          // Redirect target
+
+
+   input logic         dec_extint_stall,                               // External stall mux select
+   input logic [31:2]  dec_tlu_meihap,                                 // External stall mux data
+
+
+   output logic [31:0]  exu_lsu_rs1_d,                                 // LSU operand
+   output logic [31:0]  exu_lsu_rs2_d,                                 // LSU operand
+
+   output logic         exu_flush_final,                               // Pipe is being flushed this cycle
+   output logic [31:1]  exu_flush_path_final,                          // Target for the oldest flush source
+
+   output logic [31:0]  exu_i0_result_x,                               // Primary ALU result to DEC
+   output logic [31:1]  exu_i0_pc_x,                                   // Primary PC  result to DEC
+   output logic [31:0]  exu_csr_rs1_x,                                 // RS1 source for a CSR instruction
+
+   output logic [31:1]  exu_npc_r,                                     // Divide NPC
+   output logic [1:0]   exu_i0_br_hist_r,                              // to DEC  I0 branch history
+   output logic         exu_i0_br_error_r,                             // to DEC  I0 branch error
+   output logic         exu_i0_br_start_error_r,                       // to DEC  I0 branch start error
+   output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r,     // to DEC  I0 branch index
+   output logic         exu_i0_br_valid_r,                             // to DEC  I0 branch valid
+   output logic         exu_i0_br_mp_r,                                // to DEC  I0 branch mispredict
+   output logic         exu_i0_br_middle_r,                            // to DEC  I0 branch middle
+   output logic [pt.BHT_GHR_SIZE-1:0]  exu_i0_br_fghr_r,               // to DEC  I0 branch fghr
+   output logic         exu_i0_br_way_r,                               // to DEC  I0 branch way
+
+   output eb1_predict_pkt_t exu_mp_pkt,                               // Mispredict branch packet
+   output logic [pt.BHT_GHR_SIZE-1:0]  exu_mp_eghr,                    // Mispredict global history
+   output logic [pt.BHT_GHR_SIZE-1:0]  exu_mp_fghr,                    // Mispredict fghr
+   output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]  exu_mp_index,         // Mispredict index
+   output logic [pt.BTB_BTAG_SIZE-1:0]  exu_mp_btag,                   // Mispredict btag
+
+
+   output logic         exu_pmu_i0_br_misp,                            // to PMU - I0 E4 branch mispredict
+   output logic         exu_pmu_i0_br_ataken,                          // to PMU - I0 E4 taken
+   output logic         exu_pmu_i0_pc4,                                // to PMU - I0 E4 PC
+
+
+   output logic [31:0]  exu_div_result,                                // Divide result
+   output logic         exu_div_wren                                   // Divide write enable to GPR
+  );
+
+
+
+
+   logic [31:0]                i0_rs1_bypass_data_d;
+   logic [31:0]                i0_rs2_bypass_data_d;
+   logic                       i0_rs1_bypass_en_d;
+   logic                       i0_rs2_bypass_en_d;
+   logic [31:0]                i0_rs1_d,  i0_rs2_d;
+   logic [31:0]                muldiv_rs1_d;
+   logic [31:1]                pred_correct_npc_r;
+   logic                       i0_pred_correct_upper_r;
+   logic [31:1]                i0_flush_path_upper_r;
+   logic                       x_data_en, x_data_en_q1, x_data_en_q2, r_data_en, r_data_en_q2;
+   logic                       x_ctl_en,  r_ctl_en;
+
+   logic [pt.BHT_GHR_SIZE-1:0] ghr_d_ns, ghr_d;
+   logic [pt.BHT_GHR_SIZE-1:0] ghr_x_ns, ghr_x;
+   logic                       i0_taken_d;
+   logic                       i0_taken_x;
+   logic                       i0_valid_d;
+   logic                       i0_valid_x;
+   logic [pt.BHT_GHR_SIZE-1:0] after_flush_eghr;
+
+   eb1_predict_pkt_t          final_predict_mp;
+   eb1_predict_pkt_t          i0_predict_newp_d;
+
+   logic                       flush_in_d;
+   logic [31:0]                alu_result_x;
+
+   logic                       mul_valid_x;
+   logic [31:0]                mul_result_x;
+
+   eb1_predict_pkt_t          i0_pp_r;
+
+   logic                       i0_flush_upper_d;
+   logic [31:1]                i0_flush_path_d;
+   eb1_predict_pkt_t          i0_predict_p_d;
+   logic                       i0_pred_correct_upper_d;
+
+   logic                       i0_flush_upper_x;
+   logic [31:1]                i0_flush_path_x;
+   eb1_predict_pkt_t          i0_predict_p_x;
+   logic                       i0_pred_correct_upper_x;
+   logic                       i0_branch_x;
+
+   localparam PREDPIPESIZE = pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1+pt.BHT_GHR_SIZE+pt.BTB_BTAG_SIZE;
+   logic [PREDPIPESIZE-1:0]    predpipe_d, predpipe_x, predpipe_r, final_predpipe_mp;
+
+
+
+
+   rvdffpcie #(31)                       i_flush_path_x_ff    (.*, .clk(clk),        .en ( x_data_en     ),  .din ( i0_flush_path_d[31:1]         ),  .dout( i0_flush_path_x[31:1]      ) );
+   rvdffe #(32)                          i_csr_rs1_x_ff       (.*, .clk(clk),        .en ( x_data_en_q1  ),  .din ( i0_rs1_d[31:0]                ),  .dout( exu_csr_rs1_x[31:0]        ) );
+   rvdffppe #($bits(eb1_predict_pkt_t)) i_predictpacket_x_ff (.*, .clk(clk),        .en ( x_data_en     ),  .din ( i0_predict_p_d                ),  .dout( i0_predict_p_x             ) );
+   rvdffe #(PREDPIPESIZE)                i_predpipe_x_ff      (.*, .clk(clk),        .en ( x_data_en_q2  ),  .din ( predpipe_d                    ),  .dout( predpipe_x                 ) );
+   rvdffe #(PREDPIPESIZE)                i_predpipe_r_ff      (.*, .clk(clk),        .en ( r_data_en_q2  ),  .din ( predpipe_x                    ),  .dout( predpipe_r                 ) );
+
+   rvdffe #(4+pt.BHT_GHR_SIZE)          i_x_ff               (.*, .clk(clk),        .en ( x_ctl_en      ),  .din ({i0_valid_d,i0_taken_d,i0_flush_upper_d,i0_pred_correct_upper_d,ghr_x_ns[pt.BHT_GHR_SIZE-1:0]} ),
+                                                                                                            .dout({i0_valid_x,i0_taken_x,i0_flush_upper_x,i0_pred_correct_upper_x,ghr_x[pt.BHT_GHR_SIZE-1:0]}    ) );
+
+   rvdffppe #($bits(eb1_predict_pkt_t)+1) i_r_ff0         (.*, .clk(clk),        .en ( r_ctl_en      ),  .din ({i0_pred_correct_upper_x, i0_predict_p_x}),
+                                                                                                          .dout({i0_pred_correct_upper_r, i0_pp_r       }) );
+
+   rvdffpcie #(31)                      i_flush_r_ff         (.*, .clk(clk),        .en ( r_data_en     ),  .din ( i0_flush_path_x[31:1]         ),  .dout( i0_flush_path_upper_r[31:1]) );
+   rvdffpcie #(31)                      i_npc_r_ff           (.*, .clk(clk),        .en ( r_data_en     ),  .din ( pred_correct_npc_x[31:1]      ),  .dout( pred_correct_npc_r[31:1]   ) );
+
+   rvdffie #(pt.BHT_GHR_SIZE+2,1)       i_misc_ff            (.*, .clk(clk),                                .din ({ghr_d_ns[pt.BHT_GHR_SIZE-1:0], mul_p.valid, dec_i0_branch_d}),
+                                                                                                            .dout({ghr_d[pt.BHT_GHR_SIZE-1:0]   , mul_valid_x, i0_branch_x}) );
+
+
+
+
+
+   assign predpipe_d[PREDPIPESIZE-1:0]
+                                   = {i0_predict_fghr_d, i0_predict_index_d, i0_predict_btag_d};
+
+
+   assign i0_rs1_bypass_en_d       = dec_i0_rs1_bypass_en_d[0] | dec_i0_rs1_bypass_en_d[1] | dec_i0_rs1_bypass_en_d[2] | dec_i0_rs1_bypass_en_d[3];
+   assign i0_rs2_bypass_en_d       = dec_i0_rs2_bypass_en_d[0] | dec_i0_rs2_bypass_en_d[1] | dec_i0_rs2_bypass_en_d[2] | dec_i0_rs2_bypass_en_d[3];
+
+   assign i0_rs1_bypass_data_d[31:0]=({32{dec_i0_rs1_bypass_en_d[0]}} & dec_i0_result_r[31:0]       ) |
+                                     ({32{dec_i0_rs1_bypass_en_d[1]}} & lsu_result_m[31:0]          ) |
+                                     ({32{dec_i0_rs1_bypass_en_d[2]}} & exu_i0_result_x[31:0]       ) |
+                                     ({32{dec_i0_rs1_bypass_en_d[3]}} & lsu_nonblock_load_data[31:0]);
+
+   assign i0_rs2_bypass_data_d[31:0]=({32{dec_i0_rs2_bypass_en_d[0]}} & dec_i0_result_r[31:0]       ) |
+                                     ({32{dec_i0_rs2_bypass_en_d[1]}} & lsu_result_m[31:0]          ) |
+                                     ({32{dec_i0_rs2_bypass_en_d[2]}} & exu_i0_result_x[31:0]       ) |
+                                     ({32{dec_i0_rs2_bypass_en_d[3]}} & lsu_nonblock_load_data[31:0]);
+
+
+   assign i0_rs1_d[31:0]           = ({32{ i0_rs1_bypass_en_d                                           }}             & i0_rs1_bypass_data_d[31:0]) |
+                                     ({32{~i0_rs1_bypass_en_d &  dec_i0_select_pc_d                     }}             & {dec_i0_pc_d[31:1],1'b0}  ) |    // for jal's
+                                     ({32{~i0_rs1_bypass_en_d &  dec_debug_wdata_rs1_d                  }}             & dbg_cmd_wrdata[31:0]      ) |
+                                     ({32{~i0_rs1_bypass_en_d & ~dec_debug_wdata_rs1_d & dec_i0_rs1_en_d}}             & gpr_i0_rs1_d[31:0]        );
+
+   assign i0_rs2_d[31:0]           = ({32{~i0_rs2_bypass_en_d & dec_i0_rs2_en_d}}                                      & gpr_i0_rs2_d[31:0]        ) |
+                                     ({32{~i0_rs2_bypass_en_d                  }}                                      & dec_i0_immed_d[31:0]      ) |
+                                     ({32{ i0_rs2_bypass_en_d                  }}                                      & i0_rs2_bypass_data_d[31:0]);
+
+
+   assign exu_lsu_rs1_d[31:0]      = ({32{~i0_rs1_bypass_en_d & ~dec_extint_stall & dec_i0_rs1_en_d & dec_qual_lsu_d}} & gpr_i0_rs1_d[31:0]        ) |
+                                     ({32{ i0_rs1_bypass_en_d & ~dec_extint_stall                   & dec_qual_lsu_d}} & i0_rs1_bypass_data_d[31:0]) |
+                                     ({32{                       dec_extint_stall                   & dec_qual_lsu_d}} & {dec_tlu_meihap[31:2],2'b0});
+
+   assign exu_lsu_rs2_d[31:0]      = ({32{~i0_rs2_bypass_en_d & ~dec_extint_stall & dec_i0_rs2_en_d & dec_qual_lsu_d}} & gpr_i0_rs2_d[31:0]        ) |
+                                     ({32{ i0_rs2_bypass_en_d & ~dec_extint_stall                   & dec_qual_lsu_d}} & i0_rs2_bypass_data_d[31:0]);
+
+
+   assign muldiv_rs1_d[31:0]       = ({32{~i0_rs1_bypass_en_d & dec_i0_rs1_en_d}}                                      & gpr_i0_rs1_d[31:0]        ) |
+                                     ({32{ i0_rs1_bypass_en_d                  }}                                      & i0_rs1_bypass_data_d[31:0]);
+
+
+   assign x_data_en                =  dec_data_en[1];
+   assign x_data_en_q1             =  dec_data_en[1] & dec_csr_ren_d;
+   assign x_data_en_q2             =  dec_data_en[1] & dec_i0_branch_d;
+   assign r_data_en                =  dec_data_en[0];
+   assign r_data_en_q2             =  dec_data_en[0] & i0_branch_x;
+   assign x_ctl_en                 =  dec_ctl_en[1];
+   assign r_ctl_en                 =  dec_ctl_en[0];
+
+
+
+
+   eb1_exu_alu_ctl #(.pt(pt)) i_alu  (.*,
+                          .enable            ( x_data_en                   ),   // I
+                          .pp_in             ( i0_predict_newp_d           ),   // I
+                          .valid_in          ( dec_i0_alu_decode_d         ),   // I
+                          .flush_upper_x     ( i0_flush_upper_x            ),   // I
+                          .flush_lower_r     ( dec_tlu_flush_lower_r       ),   // I
+                          .a_in              ( i0_rs1_d[31:0]              ),   // I
+                          .b_in              ( i0_rs2_d[31:0]              ),   // I
+                          .pc_in             ( dec_i0_pc_d[31:1]           ),   // I
+                          .brimm_in          ( dec_i0_br_immed_d[12:1]     ),   // I
+                          .ap                ( i0_ap                       ),   // I
+                          .csr_ren_in        ( dec_csr_ren_d               ),   // I
+                          .csr_rddata_in     ( dec_csr_rddata_d[31:0]      ),   // I
+                          .result_ff         ( alu_result_x[31:0]          ),   // O
+                          .flush_upper_out   ( i0_flush_upper_d            ),   // O
+                          .flush_final_out   ( exu_flush_final             ),   // O
+                          .flush_path_out    ( i0_flush_path_d[31:1]       ),   // O
+                          .predict_p_out     ( i0_predict_p_d              ),   // O
+                          .pred_correct_out  ( i0_pred_correct_upper_d     ),   // O
+                          .pc_ff             ( exu_i0_pc_x[31:1]           ));  // O
+
+
+
+   eb1_exu_mul_ctl #(.pt(pt)) i_mul   (.*,
+                          .mul_p             ( mul_p              & {$bits(eb1_mul_pkt_t){mul_p.valid}} ),   // I
+                          .rs1_in            ( muldiv_rs1_d[31:0] & {32{mul_p.valid}}                    ),   // I
+                          .rs2_in            ( i0_rs2_d[31:0]     & {32{mul_p.valid}}                    ),   // I
+                          .result_x          ( mul_result_x[31:0]                                        ));  // O
+
+
+
+   eb1_exu_div_ctl #(.pt(pt)) i_div   (.*,
+                          .cancel            ( dec_div_cancel              ),   // I
+                          .dp                ( div_p                       ),   // I
+                          .dividend          ( muldiv_rs1_d[31:0]          ),   // I
+                          .divisor           ( i0_rs2_d[31:0]              ),   // I
+                          .finish_dly        ( exu_div_wren                ),   // O
+                          .out               ( exu_div_result[31:0]        ));  // O
+
+
+
+   assign exu_i0_result_x[31:0]    =  (mul_valid_x)  ?  mul_result_x[31:0]  :  alu_result_x[31:0];
+
+
+
+
+   always_comb begin
+      i0_predict_newp_d            =  dec_i0_predict_p_d;
+      i0_predict_newp_d.boffset    =  dec_i0_pc_d[1];  // from the start of inst
+   end
+
+
+   assign exu_pmu_i0_br_misp       =  i0_pp_r.misp;
+   assign exu_pmu_i0_br_ataken     =  i0_pp_r.ataken;
+   assign exu_pmu_i0_pc4           =  i0_pp_r.pc4;
+
+
+   assign i0_valid_d               =  i0_predict_p_d.valid  & dec_i0_alu_decode_d & ~dec_tlu_flush_lower_r;
+   assign i0_taken_d               = (i0_predict_p_d.ataken & dec_i0_alu_decode_d);
+
+if(pt.BTB_ENABLE==1) begin
+   // maintain GHR at D
+   assign ghr_d_ns[pt.BHT_GHR_SIZE-1:0]
+                                   = ({pt.BHT_GHR_SIZE{~dec_tlu_flush_lower_r &  i0_valid_d}} & {ghr_d[pt.BHT_GHR_SIZE-2:0], i0_taken_d}) |
+                                     ({pt.BHT_GHR_SIZE{~dec_tlu_flush_lower_r & ~i0_valid_d}} &  ghr_d[pt.BHT_GHR_SIZE-1:0]             ) |
+                                     ({pt.BHT_GHR_SIZE{ dec_tlu_flush_lower_r              }} &  ghr_x[pt.BHT_GHR_SIZE-1:0]             );
+
+   // maintain GHR at X
+   assign ghr_x_ns[pt.BHT_GHR_SIZE-1:0]
+                                   = ({pt.BHT_GHR_SIZE{ i0_valid_x}} & {ghr_x[pt.BHT_GHR_SIZE-2:0], i0_taken_x}) |
+                                     ({pt.BHT_GHR_SIZE{~i0_valid_x}} &  ghr_x[pt.BHT_GHR_SIZE-1:0]             ) ;
+
+
+   assign exu_i0_br_valid_r                                 =  i0_pp_r.valid;
+   assign exu_i0_br_mp_r                                    =  i0_pp_r.misp;
+   assign exu_i0_br_way_r                                   =  i0_pp_r.way;
+   assign exu_i0_br_hist_r[1:0]                             =  {2{i0_pp_r.valid}} & i0_pp_r.hist[1:0];
+   assign exu_i0_br_error_r                                 =  i0_pp_r.br_error;
+   assign exu_i0_br_middle_r                                =  i0_pp_r.pc4 ^ i0_pp_r.boffset;
+   assign exu_i0_br_start_error_r                           =  i0_pp_r.br_start_error;
+
+   assign {exu_i0_br_fghr_r[pt.BHT_GHR_SIZE-1:0],
+           exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]}=  predpipe_r[PREDPIPESIZE-1:pt.BTB_BTAG_SIZE];
+
+
+   assign final_predict_mp                                  = (i0_flush_upper_x)  ?  i0_predict_p_x  :  '0;
+
+   assign final_predpipe_mp[PREDPIPESIZE-1:0]               = (i0_flush_upper_x)  ?  predpipe_x      :  '0;
+
+   assign after_flush_eghr[pt.BHT_GHR_SIZE-1:0]             = (i0_flush_upper_x & ~dec_tlu_flush_lower_r)  ?  ghr_d[pt.BHT_GHR_SIZE-1:0]  :  ghr_x[pt.BHT_GHR_SIZE-1:0];
+
+
+   assign exu_mp_pkt.valid                                  =  final_predict_mp.valid;
+   assign exu_mp_pkt.way                                    =  final_predict_mp.way;
+   assign exu_mp_pkt.misp                                   =  final_predict_mp.misp;
+   assign exu_mp_pkt.pcall                                  =  final_predict_mp.pcall;
+   assign exu_mp_pkt.pja                                    =  final_predict_mp.pja;
+   assign exu_mp_pkt.pret                                   =  final_predict_mp.pret;
+   assign exu_mp_pkt.ataken                                 =  final_predict_mp.ataken;
+   assign exu_mp_pkt.boffset                                =  final_predict_mp.boffset;
+   assign exu_mp_pkt.pc4                                    =  final_predict_mp.pc4;
+   assign exu_mp_pkt.hist[1:0]                              =  final_predict_mp.hist[1:0];
+   assign exu_mp_pkt.toffset[11:0]                          =  final_predict_mp.toffset[11:0];
+
+   assign exu_mp_fghr[pt.BHT_GHR_SIZE-1:0]                  =  after_flush_eghr[pt.BHT_GHR_SIZE-1:0];
+
+   assign {exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO],
+           exu_mp_btag[pt.BTB_BTAG_SIZE-1:0]}               =  final_predpipe_mp[PREDPIPESIZE-pt.BHT_GHR_SIZE-1:0];
+
+   assign exu_mp_eghr[pt.BHT_GHR_SIZE-1:0]                  =  final_predpipe_mp[PREDPIPESIZE-1:pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+pt.BTB_BTAG_SIZE+1]; // mp ghr for bht write
+end // if (pt.BTB_ENABLE==1)
+else begin
+   assign ghr_d_ns = '0;
+   assign ghr_x_ns = '0;
+   assign exu_mp_pkt = '0;
+   assign exu_mp_eghr = '0;
+   assign exu_mp_fghr = '0;
+   assign exu_mp_index = '0;
+   assign exu_mp_btag = '0;
+   assign exu_i0_br_hist_r = '0;
+   assign exu_i0_br_error_r = '0;
+   assign exu_i0_br_start_error_r = '0;
+   assign exu_i0_br_index_r = '0;
+   assign exu_i0_br_valid_r = '0;
+   assign exu_i0_br_mp_r = '0;
+   assign exu_i0_br_middle_r = '0;
+   assign exu_i0_br_fghr_r = '0;
+   assign exu_i0_br_way_r = '0;
+end // else: !if(pt.BTB_ENABLE==1)
+
+   assign exu_flush_path_final[31:1] = ( {31{ dec_tlu_flush_lower_r                   }} & dec_tlu_flush_path_r[31:1] ) |
+                                       ( {31{~dec_tlu_flush_lower_r & i0_flush_upper_d}} & i0_flush_path_d[31:1]      );
+
+   assign exu_npc_r[31:1]            = (i0_pred_correct_upper_r)  ?  pred_correct_npc_r[31:1]    :  i0_flush_path_upper_r[31:1];
+
+
+endmodule // eb1_exu
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_exu_alu_ctl.sv b/verilog/rtl/BrqRV_EB1/design/eb1_exu_alu_ctl.sv
new file mode 100644
index 0000000..9d05f43
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_exu_alu_ctl.sv
@@ -0,0 +1,597 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+module eb1_exu_alu_ctl
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+)
+  (
+   input  logic                  clk,                // Top level clock
+   input  logic                  rst_l,              // Reset
+   input  logic                  scan_mode,          // Scan control
+
+   input  logic                  flush_upper_x,      // Branch flush from previous cycle
+   input  logic                  flush_lower_r,      // Master flush of entire pipeline
+   input  logic                  enable,             // Clock enable
+   input  logic                  valid_in,           // Valid
+   input  eb1_alu_pkt_t         ap,                 // predecodes
+   input  logic                  csr_ren_in,         // CSR select
+   input  logic        [31:0]    csr_rddata_in,      // CSR data
+   input  logic signed [31:0]    a_in,               // A operand
+   input  logic        [31:0]    b_in,               // B operand
+   input  logic        [31:1]    pc_in,              // for pc=pc+2,4 calculations
+   input  eb1_predict_pkt_t     pp_in,              // Predicted branch structure
+   input  logic        [12:1]    brimm_in,           // Branch offset
+
+
+   output logic        [31:0]    result_ff,          // final result
+   output logic                  flush_upper_out,    // Branch flush
+   output logic                  flush_final_out,    // Branch flush or flush entire pipeline
+   output logic        [31:1]    flush_path_out,     // Branch flush PC
+   output logic        [31:1]    pc_ff,              // flopped PC
+   output logic                  pred_correct_out,   // NPC control
+   output eb1_predict_pkt_t     predict_p_out       // Predicted branch structure
+  );
+
+
+   logic               [31:0]    zba_a_in;
+   logic               [31:0]    aout;
+   logic                         cout,ov,neg;
+   logic               [31:0]    lout;
+   logic               [31:0]    sout;
+   logic                         sel_shift;
+   logic                         sel_adder;
+   logic                         slt_one;
+   logic                         actual_taken;
+   logic               [31:1]    pcout;
+   logic                         cond_mispredict;
+   logic                         target_mispredict;
+   logic                         eq, ne, lt, ge;
+   logic                         any_jal;
+   logic               [1:0]     newhist;
+   logic                         sel_pc;
+   logic               [31:0]    csr_write_data;
+   logic               [31:0]    result;
+
+
+
+
+   // *** Start - BitManip ***
+
+   // Zbb
+   logic                  ap_clz;
+   logic                  ap_ctz;
+   logic                  ap_pcnt;
+   logic                  ap_sext_b;
+   logic                  ap_sext_h;
+   logic                  ap_min;
+   logic                  ap_max;
+   logic                  ap_pack;
+   logic                  ap_packu;
+   logic                  ap_packh;
+   logic                  ap_rol;
+   logic                  ap_ror;
+   logic                  ap_rev;
+   logic                  ap_rev8;
+   logic                  ap_orc_b;
+   logic                  ap_orc16;
+   logic                  ap_zbb;
+
+   // Zbs
+   logic                  ap_sbset;
+   logic                  ap_sbclr;
+   logic                  ap_sbinv;
+   logic                  ap_sbext;
+
+   // Zbr
+   logic                  ap_slo;
+   logic                  ap_sro;
+
+   // Zba
+   logic                  ap_sh1add;
+   logic                  ap_sh2add;
+   logic                  ap_sh3add;
+   logic                  ap_zba;
+
+
+
+   if (pt.BITMANIP_ZBB == 1)
+     begin
+       assign ap_clz          =  ap.clz;
+       assign ap_ctz          =  ap.ctz;
+       assign ap_pcnt         =  ap.pcnt;
+       assign ap_sext_b       =  ap.sext_b;
+       assign ap_sext_h       =  ap.sext_h;
+       assign ap_min          =  ap.min;
+       assign ap_max          =  ap.max;
+     end
+   else
+     begin
+       assign ap_clz          =  1'b0;
+       assign ap_ctz          =  1'b0;
+       assign ap_pcnt         =  1'b0;
+       assign ap_sext_b       =  1'b0;
+       assign ap_sext_h       =  1'b0;
+       assign ap_min          =  1'b0;
+       assign ap_max          =  1'b0;
+     end
+
+
+   if ( (pt.BITMANIP_ZBB == 1) | (pt.BITMANIP_ZBP == 1) )
+     begin
+       assign ap_pack         =  ap.pack;
+       assign ap_packu        =  ap.packu;
+       assign ap_packh        =  ap.packh;
+       assign ap_rol          =  ap.rol;
+       assign ap_ror          =  ap.ror;
+       assign ap_rev          =  ap.grev & (b_in[4:0] == 5'b11111);
+       assign ap_rev8         =  ap.grev & (b_in[4:0] == 5'b11000);
+       assign ap_orc_b        =  ap.gorc & (b_in[4:0] == 5'b00111);
+       assign ap_orc16        =  ap.gorc & (b_in[4:0] == 5'b10000);
+       assign ap_zbb          =  ap.zbb;
+     end
+   else
+     begin
+       assign ap_pack         =  1'b0;
+       assign ap_packu        =  1'b0;
+       assign ap_packh        =  1'b0;
+       assign ap_rol          =  1'b0;
+       assign ap_ror          =  1'b0;
+       assign ap_rev          =  1'b0;
+       assign ap_rev8         =  1'b0;
+       assign ap_orc_b        =  1'b0;
+       assign ap_orc16        =  1'b0;
+       assign ap_zbb          =  1'b0;
+     end
+
+
+   if (pt.BITMANIP_ZBS == 1)
+     begin
+       assign ap_sbset        =  ap.sbset;
+       assign ap_sbclr        =  ap.sbclr;
+       assign ap_sbinv        =  ap.sbinv;
+       assign ap_sbext        =  ap.sbext;
+     end
+   else
+     begin
+       assign ap_sbset        =  1'b0;
+       assign ap_sbclr        =  1'b0;
+       assign ap_sbinv        =  1'b0;
+       assign ap_sbext        =  1'b0;
+     end
+
+
+   if (pt.BITMANIP_ZBP == 1)
+     begin
+       assign ap_slo          =  ap.slo;
+       assign ap_sro          =  ap.sro;
+     end
+   else
+     begin
+       assign ap_slo          =  1'b0;
+       assign ap_sro          =  1'b0;
+     end
+
+
+   if (pt.BITMANIP_ZBA == 1)
+     begin
+       assign ap_sh1add       =  ap.sh1add;
+       assign ap_sh2add       =  ap.sh2add;
+       assign ap_sh3add       =  ap.sh3add;
+       assign ap_zba          =  ap.zba;
+     end
+   else
+     begin
+       assign ap_sh1add       =  1'b0;
+       assign ap_sh2add       =  1'b0;
+       assign ap_sh3add       =  1'b0;
+       assign ap_zba          =  1'b0;
+     end
+
+
+
+
+   // *** End   - BitManip ***
+
+
+
+
+   rvdffpcie #(31) i_pc_ff      (.*, .clk(clk), .en(enable),              .din(pc_in[31:1]),    .dout(pc_ff[31:1]));   // any PC is run through here - doesn't have to be alu
+   rvdffe    #(32) i_result_ff  (.*, .clk(clk), .en(enable & valid_in),   .din(result[31:0]),   .dout(result_ff[31:0]));
+
+
+
+   // immediates are just muxed into rs2
+
+   // add    =>  add=1;
+   // sub    =>  add=1; sub=1;
+
+   // and    =>  lctl=3
+   // or     =>  lctl=2
+   // xor    =>  lctl=1
+
+   // sll    =>  sctl=3
+   // srl    =>  sctl=2
+   // sra    =>  sctl=1
+
+   // slt    =>  slt
+
+   // lui    =>  lctl=2; or x0, imm20 previously << 12
+   // auipc  =>  add;   add pc, imm20 previously << 12
+
+   // beq    =>  bctl=4; add; add x0, pc, sext(offset[12:1])
+   // bne    =>  bctl=3; add; add x0, pc, sext(offset[12:1])
+   // blt    =>  bctl=2; add; add x0, pc, sext(offset[12:1])
+   // bge    =>  bctl=1; add; add x0, pc, sext(offset[12:1])
+
+   // jal    =>  rs1=pc {pc[31:1],1'b0},  rs2=sext(offset20:1]);   rd=pc+[2,4]
+   // jalr   =>  rs1=rs1,                 rs2=sext(offset20:1]);   rd=pc+[2,4]
+
+
+
+   assign zba_a_in[31:0]      = ( {32{ ap_sh1add}} & {a_in[30:0],1'b0} ) |
+                                ( {32{ ap_sh2add}} & {a_in[29:0],2'b0} ) |
+                                ( {32{ ap_sh3add}} & {a_in[28:0],3'b0} ) |
+                                ( {32{~ap_zba   }} &  a_in[31:0]       );
+
+   logic        [31:0]    bm;
+
+   assign bm[31:0]            = ( ap.sub )  ?  ~b_in[31:0]  :  b_in[31:0];
+
+   assign {cout, aout[31:0]}  = {1'b0, zba_a_in[31:0]} + {1'b0, bm[31:0]} + {32'b0, ap.sub};
+
+   assign ov                  = (~a_in[31] & ~bm[31] &  aout[31]) |
+                                ( a_in[31] &  bm[31] & ~aout[31] );
+
+   assign lt                  = (~ap.unsign & (neg ^ ov)) |
+                                ( ap.unsign & ~cout);
+
+   assign eq                  = (a_in[31:0] == b_in[31:0]);
+   assign ne                  = ~eq;
+   assign neg                 =  aout[31];
+   assign ge                  = ~lt;
+
+
+
+   assign lout[31:0]          =  ( {32{csr_ren_in       }} &  csr_rddata_in[31:0]       ) |
+                                 ( {32{ap.land & ~ap_zbb}} &  a_in[31:0] &  b_in[31:0]  ) |
+                                 ( {32{ap.lor  & ~ap_zbb}} & (a_in[31:0] |  b_in[31:0]) ) |
+                                 ( {32{ap.lxor & ~ap_zbb}} & (a_in[31:0] ^  b_in[31:0]) ) |
+                                 ( {32{ap.land &  ap_zbb}} &  a_in[31:0] & ~b_in[31:0]  ) |
+                                 ( {32{ap.lor  &  ap_zbb}} & (a_in[31:0] | ~b_in[31:0]) ) |
+                                 ( {32{ap.lxor &  ap_zbb}} & (a_in[31:0] ^ ~b_in[31:0]) );
+
+
+
+
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  SLO,SRO      * * * * * * * * * * * * * * * * * *
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  ROL,ROR      * * * * * * * * * * * * * * * * * *
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  ZBEXT        * * * * * * * * * * * * * * * * * *
+
+   logic        [5:0]     shift_amount;
+   logic        [31:0]    shift_mask;
+   logic        [62:0]    shift_extend;
+   logic        [62:0]    shift_long;
+
+
+   assign shift_amount[5:0]            = ( { 6{ap.sll}}   & (6'd32 - {1'b0,b_in[4:0]}) ) |   // [5] unused
+                                         ( { 6{ap.srl}}   &          {1'b0,b_in[4:0]}  ) |
+                                         ( { 6{ap.sra}}   &          {1'b0,b_in[4:0]}  ) |
+                                         ( { 6{ap_rol}}   & (6'd32 - {1'b0,b_in[4:0]}) ) |
+                                         ( { 6{ap_ror}}   &          {1'b0,b_in[4:0]}  ) |
+                                         ( { 6{ap_slo}}   & (6'd32 - {1'b0,b_in[4:0]}) ) |
+                                         ( { 6{ap_sro}}   &          {1'b0,b_in[4:0]}  ) |
+                                         ( { 6{ap_sbext}} &          {1'b0,b_in[4:0]}  );
+
+
+   assign shift_mask[31:0]             = ( 32'hffffffff << ({5{ap.sll | ap_slo}} & b_in[4:0]) );
+
+
+   assign shift_extend[31:0]           =  a_in[31:0];
+
+   assign shift_extend[62:32]          = ( {31{ap.sra}} & {31{a_in[31]}} ) |
+                                         ( {31{ap.sll}} &     a_in[30:0] ) |
+                                         ( {31{ap_rol}} &     a_in[30:0] ) |
+                                         ( {31{ap_ror}} &     a_in[30:0] ) |
+                                         ( {31{ap_slo}} &     a_in[30:0] ) |
+                                         ( {31{ap_sro}} & {31{  1'b1  }} );
+
+
+   assign shift_long[62:0]    = ( shift_extend[62:0] >> shift_amount[4:0] );   // 62-32 unused
+
+   assign sout[31:0]          = ( shift_long[31:0] & shift_mask[31:0] ) | ( {32{ap_slo}} & ~shift_mask[31:0] );
+
+
+
+
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  CLZ,CTZ      * * * * * * * * * * * * * * * * * *
+
+   logic                  bitmanip_clz_ctz_sel;
+   logic        [31:0]    bitmanip_a_reverse_ff;
+   logic        [31:0]    bitmanip_lzd_in;
+   logic        [5:0]     bitmanip_dw_lzd_enc;
+   logic        [5:0]     bitmanip_clz_ctz_result;
+
+   assign bitmanip_clz_ctz_sel         =  ap_clz | ap_ctz;
+
+   assign bitmanip_a_reverse_ff[31:0]  = {a_in[0],  a_in[1],  a_in[2],  a_in[3],  a_in[4],  a_in[5],  a_in[6],  a_in[7],
+                                          a_in[8],  a_in[9],  a_in[10], a_in[11], a_in[12], a_in[13], a_in[14], a_in[15],
+                                          a_in[16], a_in[17], a_in[18], a_in[19], a_in[20], a_in[21], a_in[22], a_in[23],
+                                          a_in[24], a_in[25], a_in[26], a_in[27], a_in[28], a_in[29], a_in[30], a_in[31]};
+
+   assign bitmanip_lzd_in[31:0]        = ( {32{ap_clz}} & a_in[31:0]                 ) |
+                                         ( {32{ap_ctz}} & bitmanip_a_reverse_ff[31:0]);
+
+   logic        [31:0]    bitmanip_lzd_os;
+   integer                i;
+   logic                  found;
+
+   always_comb
+     begin
+        bitmanip_lzd_os[31:0]   =  bitmanip_lzd_in[31:0];
+        bitmanip_dw_lzd_enc[5:0]=  6'b0;
+        found = 1'b0;
+
+        for (int i=0; i<32 && found==0; i++) begin
+           if (bitmanip_lzd_os[31] == 1'b0) begin
+              bitmanip_dw_lzd_enc[5:0]=  bitmanip_dw_lzd_enc[5:0] + 6'b00_0001;
+              bitmanip_lzd_os[31:0]   =  bitmanip_lzd_os[31:0] << 1;
+           end
+           else
+              found=1'b1;
+        end
+     end
+
+
+
+   assign bitmanip_clz_ctz_result[5:0] = {6{bitmanip_clz_ctz_sel}} & {bitmanip_dw_lzd_enc[5],( {5{~bitmanip_dw_lzd_enc[5]}} & bitmanip_dw_lzd_enc[4:0] )};
+
+
+
+
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  PCNT         * * * * * * * * * * * * * * * * * *
+
+   logic        [5:0]     bitmanip_pcnt;
+   logic        [5:0]     bitmanip_pcnt_result;
+
+
+   integer                bitmanip_pcnt_i;
+
+   always_comb
+     begin
+       bitmanip_pcnt[5:0]               =  6'b0;
+
+       for (bitmanip_pcnt_i=0; bitmanip_pcnt_i<32; bitmanip_pcnt_i++)
+         begin
+            bitmanip_pcnt[5:0]          =  bitmanip_pcnt[5:0] + {5'b0,a_in[bitmanip_pcnt_i]};
+         end      // FOR    bitmanip_pcnt_i
+     end          // ALWAYS_COMB
+
+
+   assign bitmanip_pcnt_result[5:0]    =  {6{ap_pcnt}} & bitmanip_pcnt[5:0];
+
+
+
+
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  SEXT_B,SEXT_H  * * * * * * * * * * * * * * * * *
+
+   logic       [31:0]     bitmanip_sext_result;
+
+   assign bitmanip_sext_result[31:0]   = ( {32{ap_sext_b}} & { {24{a_in[7]}} ,a_in[7:0]  } ) |
+                                         ( {32{ap_sext_h}} & { {16{a_in[15]}},a_in[15:0] } );
+
+
+
+
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  MIN,MAX,MINU,MAXU  * * * * * * * * * * * * * * *
+
+   logic                  bitmanip_minmax_sel;
+   logic        [31:0]    bitmanip_minmax_result;
+
+   assign bitmanip_minmax_sel          =  ap_min | ap_max;
+
+
+   logic                  bitmanip_minmax_sel_a;
+
+   assign bitmanip_minmax_sel_a        =  ge  ^ ap_min;
+
+   assign bitmanip_minmax_result[31:0] = ({32{bitmanip_minmax_sel &  bitmanip_minmax_sel_a}}  &  a_in[31:0]) |
+                                         ({32{bitmanip_minmax_sel & ~bitmanip_minmax_sel_a}}  &  b_in[31:0]);
+
+
+
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  PACK, PACKU, PACKH * * * * * * * * * * * * * * *
+
+   logic        [31:0]    bitmanip_pack_result;
+   logic        [31:0]    bitmanip_packu_result;
+   logic        [31:0]    bitmanip_packh_result;
+
+   assign bitmanip_pack_result[31:0]   = {32{ap_pack}}  & {b_in[15:0], a_in[15:0]};
+   assign bitmanip_packu_result[31:0]  = {32{ap_packu}} & {b_in[31:16],a_in[31:16]};
+   assign bitmanip_packh_result[31:0]  = {32{ap_packh}} & {16'b0,b_in[7:0],a_in[7:0]};
+
+
+
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  REV, REV8, ORC_B * * * * * * * * * * * * * * * *
+
+   logic        [31:0]    bitmanip_rev_result;
+   logic        [31:0]    bitmanip_rev8_result;
+   logic        [31:0]    bitmanip_orc_b_result;
+   logic        [31:0]    bitmanip_orc16_result;
+
+   assign bitmanip_rev_result[31:0]    = {32{ap_rev}}   &
+                                         {a_in[00],a_in[01],a_in[02],a_in[03],a_in[04],a_in[05],a_in[06],a_in[07],
+                                          a_in[08],a_in[09],a_in[10],a_in[11],a_in[12],a_in[13],a_in[14],a_in[15],
+                                          a_in[16],a_in[17],a_in[18],a_in[19],a_in[20],a_in[21],a_in[22],a_in[23],
+                                          a_in[24],a_in[25],a_in[26],a_in[27],a_in[28],a_in[29],a_in[30],a_in[31]};
+
+   assign bitmanip_rev8_result[31:0]   = {32{ap_rev8}}  & {a_in[7:0],a_in[15:8],a_in[23:16],a_in[31:24]};
+
+
+// uint32_t gorc32(uint32_t rs1, uint32_t rs2)
+// {
+//      uint32_t x = rs1;
+//      int shamt = rs2 & 31;                                                        ORC.B  ORC16
+//      if (shamt &  1) x |= ((x & 0x55555555) <<  1) | ((x & 0xAAAAAAAA) >>  1);      1      0
+//      if (shamt &  2) x |= ((x & 0x33333333) <<  2) | ((x & 0xCCCCCCCC) >>  2);      1      0
+//      if (shamt &  4) x |= ((x & 0x0F0F0F0F) <<  4) | ((x & 0xF0F0F0F0) >>  4);      1      0
+//      if (shamt &  8) x |= ((x & 0x00FF00FF) <<  8) | ((x & 0xFF00FF00) >>  8);      0      0
+//      if (shamt & 16) x |= ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16);      0      1
+//      return x;
+// }
+
+
+// BEFORE              31  ,   30  ,   29  ,   28  ,    27  ,   26,     25,     24
+// shamt[0]  b =    a31|a30,a31|a30,a29|a28,a29|a28, a27|a26,a27|a26,a25|a24,a25|a24
+// shamt[1]  c =    b31|b29,b30|b28,b31|b29,b30|b28, b27|b25,b26|b24,b27|b25,b26|b24
+// shamt[2]  d =    c31|c27,c30|c26,c29|c25,c28|c24, c31|c27,c30|c26,c29|c25,c28|c24
+//
+// Expand d31 =        c31         |         c27;
+//            =   b31   |   b29    |    b27   |   b25;
+//            = a31|a30 | a29|a28  |  a27|a26 | a25|a24
+
+   assign bitmanip_orc_b_result[31:0]  = {32{ap_orc_b}} & { {8{| a_in[31:24]}}, {8{| a_in[23:16]}}, {8{| a_in[15:8]}}, {8{| a_in[7:0]}} };
+
+   assign bitmanip_orc16_result[31:0]  = {32{ap_orc16}} & {     {a_in[31:16] | a_in[15:0]},             {a_in[31:16] | a_in[15:0]}      };
+
+
+
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  ZBSET, ZBCLR, ZBINV  * * * * * * * * * * * * * *
+
+   logic        [31:0]    bitmanip_sb_1hot;
+   logic        [31:0]    bitmanip_sb_data;
+
+   assign bitmanip_sb_1hot[31:0]       = ( 32'h00000001 << b_in[4:0] );
+
+   assign bitmanip_sb_data[31:0]       = ( {32{ap_sbset}} & ( a_in[31:0] |  bitmanip_sb_1hot[31:0]) ) |
+                                         ( {32{ap_sbclr}} & ( a_in[31:0] & ~bitmanip_sb_1hot[31:0]) ) |
+                                         ( {32{ap_sbinv}} & ( a_in[31:0] ^  bitmanip_sb_1hot[31:0]) );
+
+
+
+
+
+
+   assign sel_shift           =  ap.sll  | ap.srl | ap.sra | ap_slo | ap_sro | ap_rol | ap_ror;
+   assign sel_adder           = (ap.add  | ap.sub | ap_zba) & ~ap.slt & ~ap_min & ~ap_max;
+   assign sel_pc              =  ap.jal  | pp_in.pcall | pp_in.pja | pp_in.pret;
+   assign csr_write_data[31:0]= (ap.csr_imm)  ?  b_in[31:0]  :  a_in[31:0];
+
+   assign slt_one             =  ap.slt & lt;
+
+
+
+   assign result[31:0]        =                        lout[31:0]             |
+                                ({32{sel_shift}}    &  sout[31:0]           ) |
+                                ({32{sel_adder}}    &  aout[31:0]           ) |
+                                ({32{sel_pc}}       & {pcout[31:1],1'b0}    ) |
+                                ({32{ap.csr_write}} &  csr_write_data[31:0] ) |
+                                                      {31'b0, slt_one}        |
+                                ({32{ap_sbext}}     & {31'b0, sout[0]}      ) |
+                                                      {26'b0, bitmanip_clz_ctz_result[5:0]} |
+                                                      {26'b0, bitmanip_pcnt_result[5:0]}    |
+                                                       bitmanip_sext_result[31:0]    |
+                                                       bitmanip_minmax_result[31:0]  |
+                                                       bitmanip_pack_result[31:0]    |
+                                                       bitmanip_packu_result[31:0]   |
+                                                       bitmanip_packh_result[31:0]   |
+                                                       bitmanip_rev_result[31:0]     |
+                                                       bitmanip_rev8_result[31:0]    |
+                                                       bitmanip_orc_b_result[31:0]   |
+                                                       bitmanip_orc16_result[31:0]   |
+                                                       bitmanip_sb_data[31:0];
+
+
+
+   // *** branch handling ***
+
+   assign any_jal             =  ap.jal      |
+                                 pp_in.pcall |
+                                 pp_in.pja   |
+                                 pp_in.pret;
+
+   assign actual_taken        = (ap.beq & eq) |
+                                (ap.bne & ne) |
+                                (ap.blt & lt) |
+                                (ap.bge & ge) |
+                                 any_jal;
+
+   // for a conditional br pcout[] will be the opposite of the branch prediction
+   // for jal or pcall, it will be the link address pc+2 or pc+4
+
+   rvbradder ibradder (
+                     .pc     ( pc_in[31:1]    ),
+                     .offset ( brimm_in[12:1] ),
+                     .dout   ( pcout[31:1]    ));
+
+
+   // pred_correct is for the npc logic
+   // pred_correct indicates not to use the flush_path
+   // for any_jal pred_correct==0
+
+   assign pred_correct_out    = (valid_in & ap.predict_nt & ~actual_taken & ~any_jal) |
+                                (valid_in & ap.predict_t  &  actual_taken & ~any_jal);
+
+
+   // for any_jal adder output is the flush path
+   assign flush_path_out[31:1]= (any_jal) ? aout[31:1] : pcout[31:1];
+
+
+   // pcall and pret are included here
+   assign cond_mispredict     = (ap.predict_t  & ~actual_taken) |
+                                (ap.predict_nt &  actual_taken);
+
+
+   // target mispredicts on ret's
+
+   assign target_mispredict   =  pp_in.pret & (pp_in.prett[31:1] != aout[31:1]);
+
+   assign flush_upper_out     =   (ap.jal | cond_mispredict | target_mispredict) & valid_in & ~flush_upper_x   & ~flush_lower_r;
+   assign flush_final_out     = ( (ap.jal | cond_mispredict | target_mispredict) & valid_in & ~flush_upper_x ) |  flush_lower_r;
+
+
+   // .i 3
+   // .o 2
+   // .ilb hist[1] hist[0] taken
+   // .ob newhist[1] newhist[0]
+   // .type fd
+   //
+   // 00 0 01
+   // 01 0 01
+   // 10 0 00
+   // 11 0 10
+   // 00 1 10
+   // 01 1 00
+   // 10 1 11
+   // 11 1 11
+
+   assign newhist[1]          = ( pp_in.hist[1] &  pp_in.hist[0]) | (~pp_in.hist[0] & actual_taken);
+   assign newhist[0]          = (~pp_in.hist[1] & ~actual_taken)  | ( pp_in.hist[1] & actual_taken);
+
+   always_comb begin
+      predict_p_out           =  pp_in;
+
+      predict_p_out.misp      = ~flush_upper_x & ~flush_lower_r & (cond_mispredict | target_mispredict);
+      predict_p_out.ataken    =  actual_taken;
+      predict_p_out.hist[1]   =  newhist[1];
+      predict_p_out.hist[0]   =  newhist[0];
+
+   end
+
+
+
+endmodule // eb1_exu_alu_ctl
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_exu_div_ctl.sv b/verilog/rtl/BrqRV_EB1/design/eb1_exu_div_ctl.sv
new file mode 100644
index 0000000..a3b438e
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_exu_div_ctl.sv
@@ -0,0 +1,1801 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+module eb1_exu_div_ctl
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+)
+  (
+   input logic           clk,                       // Top level clock
+   input logic           rst_l,                     // Reset
+   input logic           scan_mode,                 // Scan mode
+
+   input eb1_div_pkt_t  dp,                        // valid, sign, rem
+   input logic  [31:0]   dividend,                  // Numerator
+   input logic  [31:0]   divisor,                   // Denominator
+
+   input logic           cancel,                    // Cancel divide
+
+
+   output logic          finish_dly,                // Finish to match data
+   output logic [31:0]   out                        // Result
+  );
+
+
+   logic [31:0]          out_raw;
+
+   assign out[31:0] = {32{finish_dly}} & out_raw[31:0];     // Qualification added to quiet result bus while divide is iterating
+
+
+
+   if (pt.DIV_NEW == 0)
+      begin
+        eb1_exu_div_existing_1bit_cheapshortq   i_existing_1bit_div_cheapshortq (
+            .clk              ( clk                      ),   // I
+            .rst_l            ( rst_l                    ),   // I
+            .scan_mode        ( scan_mode                ),   // I
+            .cancel           ( cancel                   ),   // I
+            .valid_in         ( dp.valid                 ),   // I
+            .signed_in        (~dp.unsign                ),   // I
+            .rem_in           ( dp.rem                   ),   // I
+            .dividend_in      ( dividend[31:0]           ),   // I
+            .divisor_in       ( divisor[31:0]            ),   // I
+            .valid_out        ( finish_dly               ),   // O
+            .data_out         ( out_raw[31:0]            ));  // O
+      end
+
+
+   if ( (pt.DIV_NEW == 1) & (pt.DIV_BIT == 1) )
+      begin
+        eb1_exu_div_new_1bit_fullshortq         i_new_1bit_div_fullshortq       (
+            .clk              ( clk                      ),   // I
+            .rst_l            ( rst_l                    ),   // I
+            .scan_mode        ( scan_mode                ),   // I
+            .cancel           ( cancel                   ),   // I
+            .valid_in         ( dp.valid                 ),   // I
+            .signed_in        (~dp.unsign                ),   // I
+            .rem_in           ( dp.rem                   ),   // I
+            .dividend_in      ( dividend[31:0]           ),   // I
+            .divisor_in       ( divisor[31:0]            ),   // I
+            .valid_out        ( finish_dly               ),   // O
+            .data_out         ( out_raw[31:0]            ));  // O
+      end
+
+
+   if ( (pt.DIV_NEW == 1) & (pt.DIV_BIT == 2) )
+      begin
+        eb1_exu_div_new_2bit_fullshortq         i_new_2bit_div_fullshortq       (
+            .clk              ( clk                      ),   // I
+            .rst_l            ( rst_l                    ),   // I
+            .scan_mode        ( scan_mode                ),   // I
+            .cancel           ( cancel                   ),   // I
+            .valid_in         ( dp.valid                 ),   // I
+            .signed_in        (~dp.unsign                ),   // I
+            .rem_in           ( dp.rem                   ),   // I
+            .dividend_in      ( dividend[31:0]           ),   // I
+            .divisor_in       ( divisor[31:0]            ),   // I
+            .valid_out        ( finish_dly               ),   // O
+            .data_out         ( out_raw[31:0]            ));  // O
+      end
+
+
+   if ( (pt.DIV_NEW == 1) & (pt.DIV_BIT == 3) )
+      begin
+        eb1_exu_div_new_3bit_fullshortq         i_new_3bit_div_fullshortq       (
+            .clk              ( clk                      ),   // I
+            .rst_l            ( rst_l                    ),   // I
+            .scan_mode        ( scan_mode                ),   // I
+            .cancel           ( cancel                   ),   // I
+            .valid_in         ( dp.valid                 ),   // I
+            .signed_in        (~dp.unsign                ),   // I
+            .rem_in           ( dp.rem                   ),   // I
+            .dividend_in      ( dividend[31:0]           ),   // I
+            .divisor_in       ( divisor[31:0]            ),   // I
+            .valid_out        ( finish_dly               ),   // O
+            .data_out         ( out_raw[31:0]            ));  // O
+      end
+
+
+   if ( (pt.DIV_NEW == 1) & (pt.DIV_BIT == 4) )
+      begin
+        eb1_exu_div_new_4bit_fullshortq         i_new_4bit_div_fullshortq       (
+            .clk              ( clk                      ),   // I
+            .rst_l            ( rst_l                    ),   // I
+            .scan_mode        ( scan_mode                ),   // I
+            .cancel           ( cancel                   ),   // I
+            .valid_in         ( dp.valid                 ),   // I
+            .signed_in        (~dp.unsign                ),   // I
+            .rem_in           ( dp.rem                   ),   // I
+            .dividend_in      ( dividend[31:0]           ),   // I
+            .divisor_in       ( divisor[31:0]            ),   // I
+            .valid_out        ( finish_dly               ),   // O
+            .data_out         ( out_raw[31:0]            ));  // O
+      end
+
+
+
+endmodule // eb1_exu_div_ctl
+
+
+
+
+
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+module eb1_exu_div_existing_1bit_cheapshortq
+  (
+   input  logic            clk,                       // Top level clock
+   input  logic            rst_l,                     // Reset
+   input  logic            scan_mode,                 // Scan mode
+
+   input  logic            cancel,                    // Flush pipeline
+   input  logic            valid_in,
+   input  logic            signed_in,
+   input  logic            rem_in,
+   input  logic [31:0]     dividend_in,
+   input  logic [31:0]     divisor_in,
+
+   output logic            valid_out,
+   output logic [31:0]     data_out
+  );
+
+
+   logic         div_clken;
+   logic         run_in, run_state;
+   logic  [5:0]  count_in, count;
+   logic [32:0]  m_ff;
+   logic         qff_enable;
+   logic         aff_enable;
+   logic [32:0]  q_in, q_ff;
+   logic [32:0]  a_in, a_ff;
+   logic [32:0]  m_eff;
+   logic [32:0]  a_shift;
+   logic         dividend_neg_ff, divisor_neg_ff;
+   logic [31:0]  dividend_comp;
+   logic [31:0]  dividend_eff;
+   logic [31:0]  q_ff_comp;
+   logic [31:0]  q_ff_eff;
+   logic [31:0]  a_ff_comp;
+   logic [31:0]  a_ff_eff;
+   logic         sign_ff, sign_eff;
+   logic         rem_ff;
+   logic         add;
+   logic [32:0]  a_eff;
+   logic [64:0]  a_eff_shift;
+   logic         rem_correct;
+   logic         valid_ff_x;
+   logic         valid_x;
+   logic         finish;
+   logic         finish_ff;
+
+   logic         smallnum_case, smallnum_case_ff;
+   logic  [3:0]  smallnum, smallnum_ff;
+   logic         m_already_comp;
+
+   logic [4:0]   a_cls;
+   logic [4:0]   b_cls;
+   logic [5:0]   shortq_shift;
+   logic [5:0]   shortq_shift_ff;
+   logic [5:0]   shortq;
+   logic         shortq_enable;
+   logic         shortq_enable_ff;
+   logic [32:0]  short_dividend;
+   logic [3:0]   shortq_raw;
+   logic [3:0]   shortq_shift_xx;
+
+
+
+   rvdffe #(23) i_misc_ff        (.*, .clk(clk), .en(div_clken),   .din ({valid_in & ~cancel,
+                                                                          finish   & ~cancel,
+                                                                          run_in,
+                                                                          count_in[5:0],
+                                                                          (valid_in & dividend_in[31]) | (~valid_in & dividend_neg_ff),
+                                                                          (valid_in & divisor_in[31] ) | (~valid_in & divisor_neg_ff ),
+                                                                          (valid_in & sign_eff       ) | (~valid_in & sign_ff        ),
+                                                                          (valid_in & rem_in         ) | (~valid_in & rem_ff         ),
+                                                                          smallnum_case,
+                                                                          smallnum[3:0],
+                                                                          shortq_enable,
+                                                                          shortq_shift[3:0]}),
+
+                                                                   .dout({valid_ff_x,
+                                                                          finish_ff,
+                                                                          run_state,
+                                                                          count[5:0],
+                                                                          dividend_neg_ff,
+                                                                          divisor_neg_ff,
+                                                                          sign_ff,
+                                                                          rem_ff,
+                                                                          smallnum_case_ff,
+                                                                          smallnum_ff[3:0],
+                                                                          shortq_enable_ff,
+                                                                          shortq_shift_xx[3:0]}));
+
+
+   rvdffe #(33) mff              (.*, .clk(clk), .en(valid_in),    .din({signed_in & divisor_in[31], divisor_in[31:0]}),   .dout(m_ff[32:0]));
+   rvdffe #(33) qff              (.*, .clk(clk), .en(qff_enable),  .din(q_in[32:0]),                                       .dout(q_ff[32:0]));
+   rvdffe #(33) aff              (.*, .clk(clk), .en(aff_enable),  .din(a_in[32:0]),                                       .dout(a_ff[32:0]));
+
+   rvtwoscomp #(32) i_dividend_comp (.din(q_ff[31:0]),    .dout(dividend_comp[31:0]));
+   rvtwoscomp #(32) i_q_ff_comp     (.din(q_ff[31:0]),    .dout(q_ff_comp[31:0]));
+   rvtwoscomp #(32) i_a_ff_comp     (.din(a_ff[31:0]),    .dout(a_ff_comp[31:0]));
+
+
+   assign valid_x                 = valid_ff_x & ~cancel;
+
+
+   // START - short circuit logic for small numbers {{
+
+   // small number divides - any 4b / 4b is done in 1 cycle (divisor != 0)
+   // to generate espresso equations:
+   // 1.  smalldiv > smalldiv.e
+   // 2.  espresso -Dso -oeqntott smalldiv.e | addassign > smalldiv
+
+   // smallnum case does not cover divide by 0
+   assign smallnum_case           = ((q_ff[31:4] == 28'b0) & (m_ff[31:4] == 28'b0) & (m_ff[31:0] != 32'b0) & ~rem_ff & valid_x) |
+                                    ((q_ff[31:0] == 32'b0) &                         (m_ff[31:0] != 32'b0) & ~rem_ff & valid_x);
+
+
+   assign smallnum[3]             = ( q_ff[3] &                                  ~m_ff[3] & ~m_ff[2] & ~m_ff[1]           );
+
+
+   assign smallnum[2]             = ( q_ff[3] &                                  ~m_ff[3] & ~m_ff[2] &            ~m_ff[0]) |
+                                    ( q_ff[2] &                                  ~m_ff[3] & ~m_ff[2] & ~m_ff[1]           ) |
+                                    ( q_ff[3] &  q_ff[2] &                       ~m_ff[3] & ~m_ff[2]                      );
+
+
+   assign smallnum[1]             = ( q_ff[2] &                                  ~m_ff[3] & ~m_ff[2] &            ~m_ff[0]) |
+                                    (                       q_ff[1] &            ~m_ff[3] & ~m_ff[2] & ~m_ff[1]           ) |
+                                    ( q_ff[3] &                                  ~m_ff[3] &            ~m_ff[1] & ~m_ff[0]) |
+                                    ( q_ff[3] & ~q_ff[2] &                       ~m_ff[3] & ~m_ff[2] &  m_ff[1] &  m_ff[0]) |
+                                    (~q_ff[3] &  q_ff[2] &  q_ff[1] &            ~m_ff[3] & ~m_ff[2]                      ) |
+                                    ( q_ff[3] &  q_ff[2] &                       ~m_ff[3] &                       ~m_ff[0]) |
+                                    ( q_ff[3] &  q_ff[2] &                       ~m_ff[3] &  m_ff[2] & ~m_ff[1]           ) |
+                                    ( q_ff[3] &             q_ff[1] & ~m_ff[3] &                       ~m_ff[1]           ) |
+                                    ( q_ff[3] &  q_ff[2] &  q_ff[1] &            ~m_ff[3] &  m_ff[2]                      );
+
+
+   assign smallnum[0]             = (            q_ff[2] &  q_ff[1] &  q_ff[0] & ~m_ff[3] &            ~m_ff[1]           ) |
+                                    ( q_ff[3] & ~q_ff[2] &  q_ff[0] &            ~m_ff[3] &             m_ff[1] &  m_ff[0]) |
+                                    (            q_ff[2] &                       ~m_ff[3] &            ~m_ff[1] & ~m_ff[0]) |
+                                    (                       q_ff[1] &            ~m_ff[3] & ~m_ff[2] &            ~m_ff[0]) |
+                                    (                                  q_ff[0] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1]           ) |
+                                    (~q_ff[3] &  q_ff[2] & ~q_ff[1] &            ~m_ff[3] & ~m_ff[2] &  m_ff[1] &  m_ff[0]) |
+                                    (~q_ff[3] &  q_ff[2] &  q_ff[1] &            ~m_ff[3] &                       ~m_ff[0]) |
+                                    ( q_ff[3] &                                             ~m_ff[2] & ~m_ff[1] & ~m_ff[0]) |
+                                    ( q_ff[3] & ~q_ff[2] &                       ~m_ff[3] &  m_ff[2] &  m_ff[1]           ) |
+                                    (~q_ff[3] &  q_ff[2] &  q_ff[1] &            ~m_ff[3] &  m_ff[2] & ~m_ff[1]           ) |
+                                    (~q_ff[3] &  q_ff[2] &             q_ff[0] & ~m_ff[3] &            ~m_ff[1]           ) |
+                                    ( q_ff[3] & ~q_ff[2] & ~q_ff[1] &            ~m_ff[3] &  m_ff[2] &             m_ff[0]) |
+                                    (           ~q_ff[2] &  q_ff[1] &  q_ff[0] & ~m_ff[3] & ~m_ff[2]                      ) |
+                                    ( q_ff[3] &  q_ff[2] &                                             ~m_ff[1] & ~m_ff[0]) |
+                                    ( q_ff[3] &             q_ff[1] &                       ~m_ff[2] &            ~m_ff[0]) |
+                                    (~q_ff[3] &  q_ff[2] &  q_ff[1] &  q_ff[0] & ~m_ff[3] &  m_ff[2]                      ) |
+                                    ( q_ff[3] &  q_ff[2] &                        m_ff[3] & ~m_ff[2]                      ) |
+                                    ( q_ff[3] &             q_ff[1] &             m_ff[3] & ~m_ff[2] & ~m_ff[1]           ) |
+                                    ( q_ff[3] &                        q_ff[0] &            ~m_ff[2] & ~m_ff[1]           ) |
+                                    ( q_ff[3] &            ~q_ff[1] &            ~m_ff[3] &  m_ff[2] &  m_ff[1] &  m_ff[0]) |
+                                    ( q_ff[3] &  q_ff[2] &  q_ff[1] &             m_ff[3] &                       ~m_ff[0]) |
+                                    ( q_ff[3] &  q_ff[2] &  q_ff[1] &             m_ff[3] &            ~m_ff[1]           ) |
+                                    ( q_ff[3] &  q_ff[2] &             q_ff[0] &  m_ff[3] &            ~m_ff[1]           ) |
+                                    ( q_ff[3] & ~q_ff[2] &  q_ff[1] &            ~m_ff[3] &             m_ff[1]           ) |
+                                    ( q_ff[3] &             q_ff[1] &  q_ff[0] &            ~m_ff[2]                      ) |
+                                    ( q_ff[3] &  q_ff[2] &  q_ff[1] &  q_ff[0] &  m_ff[3]                                 );
+
+
+   // END   - short circuit logic for small numbers }}
+
+
+   // *** Start Short Q *** {{
+
+   assign short_dividend[31:0]    =  q_ff[31:0];
+   assign short_dividend[32]      =  sign_ff & q_ff[31];
+
+
+   //    A       B
+   //   210     210    SH
+   //   ---     ---    --
+   //   1xx     000     0
+   //   1xx     001     8
+   //   1xx     01x    16
+   //   1xx     1xx    24
+   //   01x     000     8
+   //   01x     001    16
+   //   01x     01x    24
+   //   01x     1xx    32
+   //   001     000    16
+   //   001     001    24
+   //   001     01x    32
+   //   001     1xx    32
+   //   000     000    24
+   //   000     001    32
+   //   000     01x    32
+   //   000     1xx    32
+
+   assign a_cls[4:3]              =  2'b0;
+   assign a_cls[2]                =  (~short_dividend[32] & (short_dividend[31:24] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[31:23] != {9{1'b1}}));
+   assign a_cls[1]                =  (~short_dividend[32] & (short_dividend[23:16] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[22:15] != {8{1'b1}}));
+   assign a_cls[0]                =  (~short_dividend[32] & (short_dividend[15:08] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[14:07] != {8{1'b1}}));
+
+   assign b_cls[4:3]              =  2'b0;
+   assign b_cls[2]                =  (~m_ff[32]           & (          m_ff[31:24] != {8{1'b0}})) | ( m_ff[32]           & (          m_ff[31:24] != {8{1'b1}}));
+   assign b_cls[1]                =  (~m_ff[32]           & (          m_ff[23:16] != {8{1'b0}})) | ( m_ff[32]           & (          m_ff[23:16] != {8{1'b1}}));
+   assign b_cls[0]                =  (~m_ff[32]           & (          m_ff[15:08] != {8{1'b0}})) | ( m_ff[32]           & (          m_ff[15:08] != {8{1'b1}}));
+
+   assign shortq_raw[3]           = ( (a_cls[2:1] == 2'b01 ) & (b_cls[2]   == 1'b1  ) ) |   // Shift by 32
+                                    ( (a_cls[2:0] == 3'b001) & (b_cls[2]   == 1'b1  ) ) |
+                                    ( (a_cls[2:0] == 3'b000) & (b_cls[2]   == 1'b1  ) ) |
+                                    ( (a_cls[2:0] == 3'b001) & (b_cls[2:1] == 2'b01 ) ) |
+                                    ( (a_cls[2:0] == 3'b000) & (b_cls[2:1] == 2'b01 ) ) |
+                                    ( (a_cls[2:0] == 3'b000) & (b_cls[2:0] == 3'b001) );
+
+   assign shortq_raw[2]           = ( (a_cls[2]   == 1'b1  ) & (b_cls[2]   == 1'b1  ) ) |   // Shift by 24
+                                    ( (a_cls[2:1] == 2'b01 ) & (b_cls[2:1] == 2'b01 ) ) |
+                                    ( (a_cls[2:0] == 3'b001) & (b_cls[2:0] == 3'b001) ) |
+                                    ( (a_cls[2:0] == 3'b000) & (b_cls[2:0] == 3'b000) );
+
+   assign shortq_raw[1]           = ( (a_cls[2]   == 1'b1  ) & (b_cls[2:1] == 2'b01 ) ) |   // Shift by 16
+                                    ( (a_cls[2:1] == 2'b01 ) & (b_cls[2:0] == 3'b001) ) |
+                                    ( (a_cls[2:0] == 3'b001) & (b_cls[2:0] == 3'b000) );
+
+   assign shortq_raw[0]           = ( (a_cls[2]   == 1'b1  ) & (b_cls[2:0] == 3'b001) ) |   // Shift by  8
+                                    ( (a_cls[2:1] == 2'b01 ) & (b_cls[2:0] == 3'b000) );
+
+
+   assign shortq_enable           =  valid_ff_x & (m_ff[31:0] != 32'b0) & (shortq_raw[3:0] != 4'b0);
+
+   assign shortq_shift[3:0]       = ({4{shortq_enable}} & shortq_raw[3:0]);
+
+   assign shortq[5:0]             =  6'b0;
+   assign shortq_shift[5:4]       =  2'b0;
+   assign shortq_shift_ff[5]      =  1'b0;
+
+   assign shortq_shift_ff[4:0]    = ({5{shortq_shift_xx[3]}} & 5'b1_1111) |   // 31
+                                    ({5{shortq_shift_xx[2]}} & 5'b1_1000) |   // 24
+                                    ({5{shortq_shift_xx[1]}} & 5'b1_0000) |   // 16
+                                    ({5{shortq_shift_xx[0]}} & 5'b0_1000);    //  8
+
+   // *** End   Short *** }}
+
+
+
+
+
+   assign div_clken               =  valid_in | run_state | finish | finish_ff;
+
+   assign run_in                  = (valid_in | run_state) & ~finish & ~cancel;
+
+   assign count_in[5:0]           = {6{run_state & ~finish & ~cancel & ~shortq_enable}} & (count[5:0] + {1'b0,shortq_shift_ff[4:0]} + 6'd1);
+
+
+   assign finish                  = (smallnum_case | ((~rem_ff) ? (count[5:0] == 6'd32) : (count[5:0] == 6'd33)));
+
+   assign valid_out               =  finish_ff & ~cancel;
+
+   assign sign_eff                =  signed_in & (divisor_in[31:0] != 32'b0);
+
+
+   assign q_in[32:0]              = ({33{~run_state                                   }} &  {1'b0,dividend_in[31:0]}) |
+                                    ({33{ run_state &  (valid_ff_x | shortq_enable_ff)}} &  ({dividend_eff[31:0], ~a_in[32]} << shortq_shift_ff[4:0])) |
+                                    ({33{ run_state & ~(valid_ff_x | shortq_enable_ff)}} &  {q_ff[31:0], ~a_in[32]});
+
+   assign qff_enable              =  valid_in | (run_state & ~shortq_enable);
+
+
+
+
+   assign dividend_eff[31:0]      = (sign_ff & dividend_neg_ff) ? dividend_comp[31:0] : q_ff[31:0];
+
+
+   assign m_eff[32:0]             = ( add ) ? m_ff[32:0] : ~m_ff[32:0];
+
+   assign a_eff_shift[64:0]       = {33'b0, dividend_eff[31:0]} << shortq_shift_ff[4:0];
+
+   assign a_eff[32:0]             = ({33{ rem_correct                    }} &  a_ff[32:0]            ) |
+                                    ({33{~rem_correct & ~shortq_enable_ff}} & {a_ff[31:0], q_ff[32]} ) |
+                                    ({33{~rem_correct &  shortq_enable_ff}} &  a_eff_shift[64:32]    );
+
+   assign a_shift[32:0]           = {33{run_state}} & a_eff[32:0];
+
+   assign a_in[32:0]              = {33{run_state}} & (a_shift[32:0] + m_eff[32:0] + {32'b0,~add});
+
+   assign aff_enable              =  valid_in | (run_state & ~shortq_enable & (count[5:0]!=6'd33)) | rem_correct;
+
+
+   assign m_already_comp          = (divisor_neg_ff & sign_ff);
+
+   // if m already complemented, then invert operation add->sub, sub->add
+   assign add                     = (a_ff[32] | rem_correct) ^ m_already_comp;
+
+   assign rem_correct             = (count[5:0] == 6'd33) & rem_ff & a_ff[32];
+
+
+
+   assign q_ff_eff[31:0]          = (sign_ff & (dividend_neg_ff ^ divisor_neg_ff)) ? q_ff_comp[31:0] : q_ff[31:0];
+
+   assign a_ff_eff[31:0]          = (sign_ff &  dividend_neg_ff) ? a_ff_comp[31:0] : a_ff[31:0];
+
+   assign data_out[31:0]          = ({32{ smallnum_case_ff          }} & {28'b0, smallnum_ff[3:0]}) |
+                                    ({32{                     rem_ff}} &  a_ff_eff[31:0]          ) |
+                                    ({32{~smallnum_case_ff & ~rem_ff}} &  q_ff_eff[31:0]          );
+
+
+
+
+endmodule // eb1_exu_div_existing_1bit_cheapshortq
+
+
+
+
+
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+module eb1_exu_div_new_1bit_fullshortq
+  (
+   input  logic            clk,                       // Top level clock
+   input  logic            rst_l,                     // Reset
+   input  logic            scan_mode,                 // Scan mode
+
+   input  logic            cancel,                    // Flush pipeline
+   input  logic            valid_in,
+   input  logic            signed_in,
+   input  logic            rem_in,
+   input  logic [31:0]     dividend_in,
+   input  logic [31:0]     divisor_in,
+
+   output logic            valid_out,
+   output logic [31:0]     data_out
+  );
+
+
+   logic                   valid_ff_in, valid_ff;
+   logic                   finish_raw, finish, finish_ff;
+   logic                   running_state;
+   logic                   misc_enable;
+   logic         [2:0]     control_in, control_ff;
+   logic                   dividend_sign_ff, divisor_sign_ff, rem_ff;
+   logic                   count_enable;
+   logic         [6:0]     count_in, count_ff;
+
+   logic                   smallnum_case;
+   logic         [3:0]     smallnum;
+
+   logic                   a_enable, a_shift;
+   logic        [31:0]     a_in, a_ff;
+
+   logic                   b_enable, b_twos_comp;
+   logic        [32:0]     b_in, b_ff;
+
+   logic        [31:0]     q_in, q_ff;
+
+   logic                   rq_enable, r_sign_sel, r_restore_sel, r_adder_sel;
+   logic        [31:0]     r_in, r_ff;
+
+   logic                   twos_comp_q_sel, twos_comp_b_sel;
+   logic        [31:0]     twos_comp_in, twos_comp_out;
+
+   logic                   quotient_set;
+   logic        [32:0]     adder_out;
+
+   logic        [63:0]     ar_shifted;
+   logic         [5:0]     shortq;
+   logic         [4:0]     shortq_shift;
+   logic         [4:0]     shortq_shift_ff;
+   logic                   shortq_enable;
+   logic                   shortq_enable_ff;
+   logic        [32:0]     shortq_dividend;
+
+   logic                   by_zero_case;
+   logic                   by_zero_case_ff;
+
+
+
+   rvdffe #(19) i_misc_ff        (.*, .clk(clk), .en(misc_enable),    .din ({valid_ff_in, control_in[2:0], by_zero_case,    shortq_enable,    shortq_shift[4:0],    finish,    count_in[6:0]}),
+                                                                      .dout({valid_ff,    control_ff[2:0], by_zero_case_ff, shortq_enable_ff, shortq_shift_ff[4:0], finish_ff, count_ff[6:0]}));
+
+   rvdffe #(32) i_a_ff           (.*, .clk(clk), .en(a_enable),       .din(a_in[31:0]),           .dout(a_ff[31:0]));
+   rvdffe #(33) i_b_ff           (.*, .clk(clk), .en(b_enable),       .din(b_in[32:0]),           .dout(b_ff[32:0]));
+   rvdffe #(32) i_r_ff           (.*, .clk(clk), .en(rq_enable),      .din(r_in[31:0]),           .dout(r_ff[31:0]));
+   rvdffe #(32) i_q_ff           (.*, .clk(clk), .en(rq_enable),      .din(q_in[31:0]),           .dout(q_ff[31:0]));
+
+
+
+
+   assign valid_ff_in            =  valid_in  & ~cancel;
+
+   assign control_in[2]          = (~valid_in & control_ff[2]) | (valid_in & signed_in  & dividend_in[31]);
+   assign control_in[1]          = (~valid_in & control_ff[1]) | (valid_in & signed_in  &  divisor_in[31]);
+   assign control_in[0]          = (~valid_in & control_ff[0]) | (valid_in & rem_in);
+
+   assign dividend_sign_ff       =  control_ff[2];
+   assign divisor_sign_ff        =  control_ff[1];
+   assign rem_ff                 =  control_ff[0];
+
+
+   assign by_zero_case           =  valid_ff & (b_ff[31:0] == 32'b0);
+
+   assign misc_enable            =  valid_in | valid_ff | cancel | running_state | finish_ff;
+   assign running_state          = (| count_ff[6:0]) | shortq_enable_ff;
+   assign finish_raw             =   smallnum_case      |
+                                     by_zero_case       |
+                                    (count_ff[6:0] == 7'd32);
+
+
+   assign finish                 =  finish_raw & ~cancel;
+   assign count_enable           = (valid_ff | running_state) & ~finish & ~finish_ff & ~cancel & ~shortq_enable;
+   assign count_in[6:0]          = {7{count_enable}} & (count_ff[6:0] + {6'b0,1'b1} + {2'b0,shortq_shift_ff[4:0]});
+
+
+   assign a_enable               =  valid_in | running_state;
+   assign a_shift                =  running_state & ~shortq_enable_ff;
+
+   assign ar_shifted[63:0]       = { {32{dividend_sign_ff}} , a_ff[31:0]} << shortq_shift_ff[4:0];
+
+   assign a_in[31:0]             = ( {32{~a_shift & ~shortq_enable_ff}} &  dividend_in[31:0] ) |
+                                   ( {32{ a_shift                    }} & {a_ff[30:0],1'b0}  ) |
+                                   ( {32{            shortq_enable_ff}} &  ar_shifted[31:0]  );
+
+
+
+   assign b_enable               =    valid_in | b_twos_comp;
+   assign b_twos_comp            =    valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff);
+
+   assign b_in[32:0]             = ( {33{~b_twos_comp}} & { (signed_in & divisor_in[31]),divisor_in[31:0] } ) |
+                                   ( {33{ b_twos_comp}} & {~divisor_sign_ff,twos_comp_out[31:0] } );
+
+
+   assign rq_enable              =  valid_in | valid_ff | running_state;
+   assign r_sign_sel             =  valid_ff      &  dividend_sign_ff & ~by_zero_case;
+   assign r_restore_sel          =  running_state & ~quotient_set & ~shortq_enable_ff;
+   assign r_adder_sel            =  running_state &  quotient_set & ~shortq_enable_ff;
+
+
+   assign r_in[31:0]             = ( {32{r_sign_sel      }} &  32'hffffffff          ) |
+                                   ( {32{r_restore_sel   }} & {r_ff[30:0] ,a_ff[31]} ) |
+                                   ( {32{r_adder_sel     }} &  adder_out[31:0]       ) |
+                                   ( {32{shortq_enable_ff}} &  ar_shifted[63:32]     ) |
+                                   ( {32{by_zero_case    }} &  a_ff[31:0]            );
+
+
+   assign q_in[31:0]             = ( {32{~valid_ff       }} & {q_ff[30:0], quotient_set}  ) |
+                                   ( {32{ smallnum_case  }} & {28'b0     , smallnum[3:0]} ) |
+                                   ( {32{ by_zero_case   }} & {32{1'b1}}                  );
+
+
+
+   assign adder_out[32:0]        = {r_ff[31:0],a_ff[31]} + {b_ff[32:0] };
+
+
+   assign quotient_set           = (~adder_out[32] ^ dividend_sign_ff) | ( (a_ff[30:0] == 31'b0) & (adder_out[32:0] == 33'b0) );
+
+
+
+   assign twos_comp_b_sel        =  valid_ff           & ~(dividend_sign_ff ^ divisor_sign_ff);
+   assign twos_comp_q_sel        = ~valid_ff & ~rem_ff &  (dividend_sign_ff ^ divisor_sign_ff) & ~by_zero_case_ff;
+
+   assign twos_comp_in[31:0]     = ( {32{twos_comp_q_sel}} & q_ff[31:0] ) |
+                                   ( {32{twos_comp_b_sel}} & b_ff[31:0] );
+
+   rvtwoscomp #(32) i_twos_comp  (.din(twos_comp_in[31:0]), .dout(twos_comp_out[31:0]));
+
+
+
+   assign valid_out              =  finish_ff & ~cancel;
+
+   assign data_out[31:0]         = ( {32{~rem_ff & ~twos_comp_q_sel}} & q_ff[31:0]          ) |
+                                   ( {32{ rem_ff                   }} & r_ff[31:0]          ) |
+                                   ( {32{           twos_comp_q_sel}} & twos_comp_out[31:0] );
+
+
+
+
+   // *** *** *** START : SMALLNUM {{
+
+   assign smallnum_case          = ( (a_ff[31:4]  == 28'b0) & (b_ff[31:4] == 28'b0) & ~by_zero_case & ~rem_ff & valid_ff & ~cancel) |
+                                   ( (a_ff[31:0]  == 32'b0) &                         ~by_zero_case & ~rem_ff & valid_ff & ~cancel);
+
+   assign smallnum[3]            = ( a_ff[3] &                                  ~b_ff[3] & ~b_ff[2] & ~b_ff[1]           );
+
+   assign smallnum[2]            = ( a_ff[3] &                                  ~b_ff[3] & ~b_ff[2] &            ~b_ff[0]) |
+                                   (            a_ff[2] &                       ~b_ff[3] & ~b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &  a_ff[2] &                       ~b_ff[3] & ~b_ff[2]                      );
+
+   assign smallnum[1]            = (            a_ff[2] &                       ~b_ff[3] & ~b_ff[2] &            ~b_ff[0]) |
+                                   (                       a_ff[1] &            ~b_ff[3] & ~b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &                                  ~b_ff[3] &            ~b_ff[1] & ~b_ff[0]) |
+                                   ( a_ff[3] & ~a_ff[2] &                       ~b_ff[3] & ~b_ff[2] &  b_ff[1] &  b_ff[0]) |
+                                   (~a_ff[3] &  a_ff[2] &  a_ff[1] &            ~b_ff[3] & ~b_ff[2]                      ) |
+                                   ( a_ff[3] &  a_ff[2] &                       ~b_ff[3] &                       ~b_ff[0]) |
+                                   ( a_ff[3] &  a_ff[2] &                       ~b_ff[3] &  b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &             a_ff[1] &            ~b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] &  a_ff[2] &  a_ff[1] &            ~b_ff[3] &  b_ff[2]                      );
+
+   assign smallnum[0]            = (            a_ff[2] &  a_ff[1] &  a_ff[0] & ~b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] & ~a_ff[2] &             a_ff[0] & ~b_ff[3] &             b_ff[1] &  b_ff[0]) |
+                                   (            a_ff[2] &                       ~b_ff[3] &            ~b_ff[1] & ~b_ff[0]) |
+                                   (                       a_ff[1] &            ~b_ff[3] & ~b_ff[2] &            ~b_ff[0]) |
+                                   (                                  a_ff[0] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1]           ) |
+                                   (~a_ff[3] &  a_ff[2] & ~a_ff[1] &            ~b_ff[3] & ~b_ff[2] &  b_ff[1] &  b_ff[0]) |
+                                   (~a_ff[3] &  a_ff[2] &  a_ff[1] &            ~b_ff[3] &                       ~b_ff[0]) |
+                                   ( a_ff[3] &                                             ~b_ff[2] & ~b_ff[1] & ~b_ff[0]) |
+                                   ( a_ff[3] & ~a_ff[2] &                       ~b_ff[3] &  b_ff[2] &  b_ff[1]           ) |
+                                   (~a_ff[3] &  a_ff[2] &  a_ff[1] &            ~b_ff[3] &  b_ff[2] & ~b_ff[1]           ) |
+                                   (~a_ff[3] &  a_ff[2] &             a_ff[0] & ~b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] & ~a_ff[2] & ~a_ff[1] &            ~b_ff[3] &  b_ff[2] &             b_ff[0]) |
+                                   (           ~a_ff[2] &  a_ff[1] &  a_ff[0] & ~b_ff[3] & ~b_ff[2]                      ) |
+                                   ( a_ff[3] &  a_ff[2] &                                             ~b_ff[1] & ~b_ff[0]) |
+                                   ( a_ff[3] &             a_ff[1] &                       ~b_ff[2] &            ~b_ff[0]) |
+                                   (~a_ff[3] &  a_ff[2] &  a_ff[1] &  a_ff[0] & ~b_ff[3] &  b_ff[2]                      ) |
+                                   ( a_ff[3] &  a_ff[2] &                        b_ff[3] & ~b_ff[2]                      ) |
+                                   ( a_ff[3] &             a_ff[1] &             b_ff[3] & ~b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &                        a_ff[0] &            ~b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &            ~a_ff[1] &            ~b_ff[3] &  b_ff[2] &  b_ff[1] &  b_ff[0]) |
+                                   ( a_ff[3] &  a_ff[2] &  a_ff[1] &             b_ff[3] &                       ~b_ff[0]) |
+                                   ( a_ff[3] &  a_ff[2] &  a_ff[1] &             b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] &  a_ff[2] &             a_ff[0] &  b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] & ~a_ff[2] &  a_ff[1] &            ~b_ff[3] &             b_ff[1]           ) |
+                                   ( a_ff[3] &             a_ff[1] &  a_ff[0] &            ~b_ff[2]                      ) |
+                                   ( a_ff[3] &  a_ff[2] &  a_ff[1] &  a_ff[0] &  b_ff[3]                                 );
+
+   // *** *** *** END   : SMALLNUM }}
+
+
+
+
+   // *** *** *** Start : Short Q {{
+
+   assign shortq_dividend[32:0]   = {dividend_sign_ff,a_ff[31:0]};
+
+
+   logic [5:0]  dw_a_enc;
+   logic [5:0]  dw_b_enc;
+   logic [6:0]  dw_shortq_raw;
+
+
+
+   eb1_exu_div_cls i_a_cls  (
+       .operand  ( shortq_dividend[32:0]  ),
+       .cls      ( dw_a_enc[4:0]          ));
+
+   eb1_exu_div_cls i_b_cls  (
+       .operand  ( b_ff[32:0]             ),
+       .cls      ( dw_b_enc[4:0]          ));
+
+   assign dw_a_enc[5]             =  1'b0;
+   assign dw_b_enc[5]             =  1'b0;
+
+
+
+   assign dw_shortq_raw[6:0]      =  {1'b0,dw_b_enc[5:0]} - {1'b0,dw_a_enc[5:0]} + 7'd1;
+   assign shortq[5:0]             =  dw_shortq_raw[6]  ?  6'd0  :  dw_shortq_raw[5:0];
+
+   assign shortq_enable           =  valid_ff & ~shortq[5] & ~(shortq[4:1] ==  4'b1111) & ~cancel;
+
+   assign shortq_shift[4:0]       = ~shortq_enable     ?  5'd0  :  (5'b11111 - shortq[4:0]);
+
+
+   // *** *** *** End   : Short Q }}
+
+
+
+
+
+endmodule // eb1_exu_div_new_1bit_fullshortq
+
+
+
+
+
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+module eb1_exu_div_new_2bit_fullshortq
+  (
+   input  logic            clk,                       // Top level clock
+   input  logic            rst_l,                     // Reset
+   input  logic            scan_mode,                 // Scan mode
+
+   input  logic            cancel,                    // Flush pipeline
+   input  logic            valid_in,
+   input  logic            signed_in,
+   input  logic            rem_in,
+   input  logic [31:0]     dividend_in,
+   input  logic [31:0]     divisor_in,
+
+   output logic            valid_out,
+   output logic [31:0]     data_out
+  );
+
+
+   logic                   valid_ff_in, valid_ff;
+   logic                   finish_raw, finish, finish_ff;
+   logic                   running_state;
+   logic                   misc_enable;
+   logic         [2:0]     control_in, control_ff;
+   logic                   dividend_sign_ff, divisor_sign_ff, rem_ff;
+   logic                   count_enable;
+   logic         [6:0]     count_in, count_ff;
+
+   logic                   smallnum_case;
+   logic         [3:0]     smallnum;
+
+   logic                   a_enable, a_shift;
+   logic        [31:0]     a_in, a_ff;
+
+   logic                   b_enable, b_twos_comp;
+   logic        [32:0]     b_in;
+   logic        [34:0]     b_ff;
+
+   logic        [31:0]     q_in, q_ff;
+
+   logic                   rq_enable, r_sign_sel, r_restore_sel, r_adder1_sel, r_adder2_sel, r_adder3_sel;
+   logic        [31:0]     r_in, r_ff;
+
+   logic                   twos_comp_q_sel, twos_comp_b_sel;
+   logic        [31:0]     twos_comp_in, twos_comp_out;
+
+   logic         [3:1]     quotient_raw;
+   logic         [1:0]     quotient_new;
+   logic        [32:0]     adder1_out;
+   logic        [33:0]     adder2_out;
+   logic        [34:0]     adder3_out;
+
+   logic        [63:0]     ar_shifted;
+   logic         [5:0]     shortq;
+   logic         [4:0]     shortq_shift;
+   logic         [4:1]     shortq_shift_ff;
+   logic                   shortq_enable;
+   logic                   shortq_enable_ff;
+   logic        [32:0]     shortq_dividend;
+
+   logic                   by_zero_case;
+   logic                   by_zero_case_ff;
+
+
+
+   rvdffe #(18) i_misc_ff        (.*, .clk(clk), .en(misc_enable),    .din ({valid_ff_in, control_in[2:0], by_zero_case,    shortq_enable,    shortq_shift[4:1],    finish,    count_in[6:0]}),
+                                                                      .dout({valid_ff,    control_ff[2:0], by_zero_case_ff, shortq_enable_ff, shortq_shift_ff[4:1], finish_ff, count_ff[6:0]}));
+
+   rvdffe #(32) i_a_ff           (.*, .clk(clk), .en(a_enable),       .din(a_in[31:0]),           .dout(a_ff[31:0]));
+   rvdffe #(33) i_b_ff           (.*, .clk(clk), .en(b_enable),       .din(b_in[32:0]),           .dout(b_ff[32:0]));
+   rvdffe #(32) i_r_ff           (.*, .clk(clk), .en(rq_enable),      .din(r_in[31:0]),           .dout(r_ff[31:0]));
+   rvdffe #(32) i_q_ff           (.*, .clk(clk), .en(rq_enable),      .din(q_in[31:0]),           .dout(q_ff[31:0]));
+
+
+
+
+   assign valid_ff_in            =  valid_in  & ~cancel;
+
+   assign control_in[2]          = (~valid_in & control_ff[2]) | (valid_in & signed_in  & dividend_in[31]);
+   assign control_in[1]          = (~valid_in & control_ff[1]) | (valid_in & signed_in  &  divisor_in[31]);
+   assign control_in[0]          = (~valid_in & control_ff[0]) | (valid_in & rem_in);
+
+   assign dividend_sign_ff       =  control_ff[2];
+   assign divisor_sign_ff        =  control_ff[1];
+   assign rem_ff                 =  control_ff[0];
+
+
+   assign by_zero_case           =  valid_ff & (b_ff[31:0] == 32'b0);
+
+   assign misc_enable            =  valid_in | valid_ff | cancel | running_state | finish_ff;
+   assign running_state          = (| count_ff[6:0]) | shortq_enable_ff;
+   assign finish_raw             =   smallnum_case      |
+                                     by_zero_case       |
+                                    (count_ff[6:0] == 7'd32);
+
+
+   assign finish                 =  finish_raw & ~cancel;
+   assign count_enable           = (valid_ff | running_state) & ~finish & ~finish_ff & ~cancel & ~shortq_enable;
+   assign count_in[6:0]          = {7{count_enable}} & (count_ff[6:0] + {5'b0,2'b10} + {2'b0,shortq_shift_ff[4:1],1'b0});
+
+
+   assign a_enable               =  valid_in | running_state;
+   assign a_shift                =  running_state & ~shortq_enable_ff;
+
+   assign ar_shifted[63:0]       = { {32{dividend_sign_ff}} , a_ff[31:0]} << {shortq_shift_ff[4:1],1'b0};
+
+   assign a_in[31:0]             = ( {32{~a_shift & ~shortq_enable_ff}} &  dividend_in[31:0] ) |
+                                   ( {32{ a_shift                    }} & {a_ff[29:0],2'b0}  ) |
+                                   ( {32{            shortq_enable_ff}} &  ar_shifted[31:0]  );
+
+
+
+   assign b_enable               =    valid_in | b_twos_comp;
+   assign b_twos_comp            =    valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff);
+
+   assign b_in[32:0]             = ( {33{~b_twos_comp}} & { (signed_in & divisor_in[31]),divisor_in[31:0] } ) |
+                                   ( {33{ b_twos_comp}} & {~divisor_sign_ff,twos_comp_out[31:0] } );
+
+
+   assign rq_enable              =  valid_in | valid_ff | running_state;
+   assign r_sign_sel             =  valid_ff      &  dividend_sign_ff & ~by_zero_case;
+   assign r_restore_sel          =  running_state & (quotient_new[1:0] == 2'b00) & ~shortq_enable_ff;
+   assign r_adder1_sel           =  running_state & (quotient_new[1:0] == 2'b01) & ~shortq_enable_ff;
+   assign r_adder2_sel           =  running_state & (quotient_new[1:0] == 2'b10) & ~shortq_enable_ff;
+   assign r_adder3_sel           =  running_state & (quotient_new[1:0] == 2'b11) & ~shortq_enable_ff;
+
+
+   assign r_in[31:0]             = ( {32{r_sign_sel      }} &  32'hffffffff             ) |
+                                   ( {32{r_restore_sel   }} & {r_ff[29:0] ,a_ff[31:30]} ) |
+                                   ( {32{r_adder1_sel    }} &  adder1_out[31:0]         ) |
+                                   ( {32{r_adder2_sel    }} &  adder2_out[31:0]         ) |
+                                   ( {32{r_adder3_sel    }} &  adder3_out[31:0]         ) |
+                                   ( {32{shortq_enable_ff}} &  ar_shifted[63:32]        ) |
+                                   ( {32{by_zero_case    }} &  a_ff[31:0]               );
+
+
+   assign q_in[31:0]             = ( {32{~valid_ff       }} & {q_ff[29:0], quotient_new[1:0]} ) |
+                                   ( {32{ smallnum_case  }} & {28'b0     , smallnum[3:0]}     ) |
+                                   ( {32{ by_zero_case   }} & {32{1'b1}}                      );
+
+
+   assign b_ff[34:33]            = {b_ff[32],b_ff[32]};
+
+
+   assign adder1_out[32:0]       = {         r_ff[30:0],a_ff[31:30]}  +                       b_ff[32:0];
+   assign adder2_out[33:0]       = {         r_ff[31:0],a_ff[31:30]}  + {b_ff[32:0],1'b0};
+   assign adder3_out[34:0]       = {r_ff[31],r_ff[31:0],a_ff[31:30]}  + {b_ff[33:0],1'b0}  +  b_ff[34:0];
+
+
+   assign quotient_raw[1]        = (~adder1_out[32] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder1_out[32:0] == 33'b0) );
+   assign quotient_raw[2]        = (~adder2_out[33] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder2_out[33:0] == 34'b0) );
+   assign quotient_raw[3]        = (~adder3_out[34] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder3_out[34:0] == 35'b0) );
+
+   assign quotient_new[1]        = quotient_raw[3] |  quotient_raw[2];
+   assign quotient_new[0]        = quotient_raw[3] |(~quotient_raw[2] & quotient_raw[1]);
+
+
+   assign twos_comp_b_sel        =  valid_ff           & ~(dividend_sign_ff ^ divisor_sign_ff);
+   assign twos_comp_q_sel        = ~valid_ff & ~rem_ff &  (dividend_sign_ff ^ divisor_sign_ff) & ~by_zero_case_ff;
+
+   assign twos_comp_in[31:0]     = ( {32{twos_comp_q_sel}} & q_ff[31:0] ) |
+                                   ( {32{twos_comp_b_sel}} & b_ff[31:0] );
+
+   rvtwoscomp #(32) i_twos_comp  (.din(twos_comp_in[31:0]), .dout(twos_comp_out[31:0]));
+
+
+
+   assign valid_out              =  finish_ff & ~cancel;
+
+   assign data_out[31:0]         = ( {32{~rem_ff & ~twos_comp_q_sel}} & q_ff[31:0]          ) |
+                                   ( {32{ rem_ff                   }} & r_ff[31:0]          ) |
+                                   ( {32{           twos_comp_q_sel}} & twos_comp_out[31:0] );
+
+
+
+
+   // *** *** *** START : SMALLNUM {{
+
+   assign smallnum_case          = ( (a_ff[31:4]  == 28'b0) & (b_ff[31:4] == 28'b0) & ~by_zero_case & ~rem_ff & valid_ff & ~cancel) |
+                                   ( (a_ff[31:0]  == 32'b0) &                         ~by_zero_case & ~rem_ff & valid_ff & ~cancel);
+
+   assign smallnum[3]            = ( a_ff[3] &                                  ~b_ff[3] & ~b_ff[2] & ~b_ff[1]           );
+
+   assign smallnum[2]            = ( a_ff[3] &                                  ~b_ff[3] & ~b_ff[2] &            ~b_ff[0]) |
+                                   (            a_ff[2] &                       ~b_ff[3] & ~b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &  a_ff[2] &                       ~b_ff[3] & ~b_ff[2]                      );
+
+   assign smallnum[1]            = (            a_ff[2] &                       ~b_ff[3] & ~b_ff[2] &            ~b_ff[0]) |
+                                   (                       a_ff[1] &            ~b_ff[3] & ~b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &                                  ~b_ff[3] &            ~b_ff[1] & ~b_ff[0]) |
+                                   ( a_ff[3] & ~a_ff[2] &                       ~b_ff[3] & ~b_ff[2] &  b_ff[1] &  b_ff[0]) |
+                                   (~a_ff[3] &  a_ff[2] &  a_ff[1] &            ~b_ff[3] & ~b_ff[2]                      ) |
+                                   ( a_ff[3] &  a_ff[2] &                       ~b_ff[3] &                       ~b_ff[0]) |
+                                   ( a_ff[3] &  a_ff[2] &                       ~b_ff[3] &  b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &             a_ff[1] &            ~b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] &  a_ff[2] &  a_ff[1] &            ~b_ff[3] &  b_ff[2]                      );
+
+   assign smallnum[0]            = (            a_ff[2] &  a_ff[1] &  a_ff[0] & ~b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] & ~a_ff[2] &             a_ff[0] & ~b_ff[3] &             b_ff[1] &  b_ff[0]) |
+                                   (            a_ff[2] &                       ~b_ff[3] &            ~b_ff[1] & ~b_ff[0]) |
+                                   (                       a_ff[1] &            ~b_ff[3] & ~b_ff[2] &            ~b_ff[0]) |
+                                   (                                  a_ff[0] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1]           ) |
+                                   (~a_ff[3] &  a_ff[2] & ~a_ff[1] &            ~b_ff[3] & ~b_ff[2] &  b_ff[1] &  b_ff[0]) |
+                                   (~a_ff[3] &  a_ff[2] &  a_ff[1] &            ~b_ff[3] &                       ~b_ff[0]) |
+                                   ( a_ff[3] &                                             ~b_ff[2] & ~b_ff[1] & ~b_ff[0]) |
+                                   ( a_ff[3] & ~a_ff[2] &                       ~b_ff[3] &  b_ff[2] &  b_ff[1]           ) |
+                                   (~a_ff[3] &  a_ff[2] &  a_ff[1] &            ~b_ff[3] &  b_ff[2] & ~b_ff[1]           ) |
+                                   (~a_ff[3] &  a_ff[2] &             a_ff[0] & ~b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] & ~a_ff[2] & ~a_ff[1] &            ~b_ff[3] &  b_ff[2] &             b_ff[0]) |
+                                   (           ~a_ff[2] &  a_ff[1] &  a_ff[0] & ~b_ff[3] & ~b_ff[2]                      ) |
+                                   ( a_ff[3] &  a_ff[2] &                                             ~b_ff[1] & ~b_ff[0]) |
+                                   ( a_ff[3] &             a_ff[1] &                       ~b_ff[2] &            ~b_ff[0]) |
+                                   (~a_ff[3] &  a_ff[2] &  a_ff[1] &  a_ff[0] & ~b_ff[3] &  b_ff[2]                      ) |
+                                   ( a_ff[3] &  a_ff[2] &                        b_ff[3] & ~b_ff[2]                      ) |
+                                   ( a_ff[3] &             a_ff[1] &             b_ff[3] & ~b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &                        a_ff[0] &            ~b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &            ~a_ff[1] &            ~b_ff[3] &  b_ff[2] &  b_ff[1] &  b_ff[0]) |
+                                   ( a_ff[3] &  a_ff[2] &  a_ff[1] &             b_ff[3] &                       ~b_ff[0]) |
+                                   ( a_ff[3] &  a_ff[2] &  a_ff[1] &             b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] &  a_ff[2] &             a_ff[0] &  b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] & ~a_ff[2] &  a_ff[1] &            ~b_ff[3] &             b_ff[1]           ) |
+                                   ( a_ff[3] &             a_ff[1] &  a_ff[0] &            ~b_ff[2]                      ) |
+                                   ( a_ff[3] &  a_ff[2] &  a_ff[1] &  a_ff[0] &  b_ff[3]                                 );
+
+   // *** *** *** END   : SMALLNUM }}
+
+
+
+
+   // *** *** *** Start : Short Q {{
+
+   assign shortq_dividend[32:0]   = {dividend_sign_ff,a_ff[31:0]};
+
+
+   logic [5:0]  dw_a_enc;
+   logic [5:0]  dw_b_enc;
+   logic [6:0]  dw_shortq_raw;
+
+
+
+   eb1_exu_div_cls i_a_cls  (
+       .operand  ( shortq_dividend[32:0]  ),
+       .cls      ( dw_a_enc[4:0]          ));
+
+   eb1_exu_div_cls i_b_cls  (
+       .operand  ( b_ff[32:0]             ),
+       .cls      ( dw_b_enc[4:0]          ));
+
+   assign dw_a_enc[5]             =  1'b0;
+   assign dw_b_enc[5]             =  1'b0;
+
+
+
+   assign dw_shortq_raw[6:0]      =  {1'b0,dw_b_enc[5:0]} - {1'b0,dw_a_enc[5:0]} + 7'd1;
+   assign shortq[5:0]             =  dw_shortq_raw[6]  ?  6'd0  :  dw_shortq_raw[5:0];
+
+   assign shortq_enable           =  valid_ff & ~shortq[5] & ~(shortq[4:1] ==  4'b1111) & ~cancel;
+
+   assign shortq_shift[4:0]       = ~shortq_enable     ?  5'd0  :  (5'b11111 - shortq[4:0]);   // [0] is unused
+
+
+   // *** *** *** End   : Short Q }}
+
+
+
+
+
+endmodule // eb1_exu_div_new_2bit_fullshortq
+
+
+
+
+
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+module eb1_exu_div_new_3bit_fullshortq
+  (
+   input  logic            clk,                       // Top level clock
+   input  logic            rst_l,                     // Reset
+   input  logic            scan_mode,                 // Scan mode
+
+   input  logic            cancel,                    // Flush pipeline
+   input  logic            valid_in,
+   input  logic            signed_in,
+   input  logic            rem_in,
+   input  logic [31:0]     dividend_in,
+   input  logic [31:0]     divisor_in,
+
+   output logic            valid_out,
+   output logic [31:0]     data_out
+  );
+
+
+   logic                   valid_ff_in, valid_ff;
+   logic                   finish_raw, finish, finish_ff;
+   logic                   running_state;
+   logic                   misc_enable;
+   logic         [2:0]     control_in, control_ff;
+   logic                   dividend_sign_ff, divisor_sign_ff, rem_ff;
+   logic                   count_enable;
+   logic         [6:0]     count_in, count_ff;
+
+   logic                   smallnum_case;
+   logic         [3:0]     smallnum;
+
+   logic                   a_enable, a_shift;
+   logic        [32:0]     a_in, a_ff;
+
+   logic                   b_enable, b_twos_comp;
+   logic        [32:0]     b_in;
+   logic        [36:0]     b_ff;
+
+   logic        [31:0]     q_in, q_ff;
+
+   logic                   rq_enable;
+   logic                   r_sign_sel;
+   logic                   r_restore_sel;
+   logic                   r_adder1_sel, r_adder2_sel, r_adder3_sel, r_adder4_sel, r_adder5_sel, r_adder6_sel, r_adder7_sel;
+   logic        [32:0]     r_in, r_ff;
+
+   logic                   twos_comp_q_sel, twos_comp_b_sel;
+   logic        [31:0]     twos_comp_in, twos_comp_out;
+
+   logic         [7:1]     quotient_raw;
+   logic         [2:0]     quotient_new;
+   logic        [33:0]     adder1_out;
+   logic        [34:0]     adder2_out;
+   logic        [35:0]     adder3_out;
+   logic        [36:0]     adder4_out;
+   logic        [36:0]     adder5_out;
+   logic        [36:0]     adder6_out;
+   logic        [36:0]     adder7_out;
+
+   logic        [65:0]     ar_shifted;
+   logic         [5:0]     shortq;
+   logic         [4:0]     shortq_shift;
+   logic         [4:0]     shortq_decode;
+   logic         [4:0]     shortq_shift_ff;
+   logic                   shortq_enable;
+   logic                   shortq_enable_ff;
+   logic        [32:0]     shortq_dividend;
+
+   logic                   by_zero_case;
+   logic                   by_zero_case_ff;
+
+
+
+   rvdffe #(19) i_misc_ff        (.*, .clk(clk), .en(misc_enable),    .din ({valid_ff_in, control_in[2:0], by_zero_case,    shortq_enable,    shortq_shift[4:0],    finish,    count_in[6:0]}),
+                                                                      .dout({valid_ff,    control_ff[2:0], by_zero_case_ff, shortq_enable_ff, shortq_shift_ff[4:0], finish_ff, count_ff[6:0]}));
+
+   rvdffe #(33) i_a_ff           (.*, .clk(clk), .en(a_enable),       .din(a_in[32:0]),           .dout(a_ff[32:0]));
+   rvdffe #(33) i_b_ff           (.*, .clk(clk), .en(b_enable),       .din(b_in[32:0]),           .dout(b_ff[32:0]));
+   rvdffe #(33) i_r_ff           (.*, .clk(clk), .en(rq_enable),      .din(r_in[32:0]),           .dout(r_ff[32:0]));
+   rvdffe #(32) i_q_ff           (.*, .clk(clk), .en(rq_enable),      .din(q_in[31:0]),           .dout(q_ff[31:0]));
+
+
+
+
+   assign valid_ff_in            =  valid_in  & ~cancel;
+
+   assign control_in[2]          = (~valid_in & control_ff[2]) | (valid_in & signed_in  & dividend_in[31]);
+   assign control_in[1]          = (~valid_in & control_ff[1]) | (valid_in & signed_in  &  divisor_in[31]);
+   assign control_in[0]          = (~valid_in & control_ff[0]) | (valid_in & rem_in);
+
+   assign dividend_sign_ff       =  control_ff[2];
+   assign divisor_sign_ff        =  control_ff[1];
+   assign rem_ff                 =  control_ff[0];
+
+
+   assign by_zero_case           =  valid_ff & (b_ff[31:0] == 32'b0);
+
+   assign misc_enable            =  valid_in | valid_ff | cancel | running_state | finish_ff;
+   assign running_state          = (| count_ff[6:0]) | shortq_enable_ff;
+   assign finish_raw             =   smallnum_case      |
+                                     by_zero_case       |
+                                    (count_ff[6:0] == 7'd33);
+
+
+   assign finish                 =  finish_raw & ~cancel;
+   assign count_enable           = (valid_ff | running_state) & ~finish & ~finish_ff & ~cancel & ~shortq_enable;
+   assign count_in[6:0]          = {7{count_enable}} & (count_ff[6:0] + {5'b0,2'b11} + {2'b0,shortq_shift_ff[4:0]});
+
+
+   assign a_enable               =  valid_in | running_state;
+   assign a_shift                =  running_state & ~shortq_enable_ff;
+
+   assign ar_shifted[65:0]       = { {33{dividend_sign_ff}} , a_ff[32:0]} << {shortq_shift_ff[4:0]};
+
+   assign a_in[32:0]             = ( {33{~a_shift & ~shortq_enable_ff}} & {signed_in & dividend_in[31],dividend_in[31:0]} ) |
+                                   ( {33{ a_shift                    }} & {a_ff[29:0],3'b0}  ) |
+                                   ( {33{            shortq_enable_ff}} &  ar_shifted[32:0]  );
+
+
+
+   assign b_enable               =    valid_in | b_twos_comp;
+   assign b_twos_comp            =    valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff);
+
+   assign b_in[32:0]             = ( {33{~b_twos_comp}} & { (signed_in & divisor_in[31]),divisor_in[31:0] } ) |
+                                   ( {33{ b_twos_comp}} & {~divisor_sign_ff,twos_comp_out[31:0] } );
+
+
+   assign rq_enable              =  valid_in | valid_ff | running_state;
+   assign r_sign_sel             =  valid_ff      &  dividend_sign_ff & ~by_zero_case;
+   assign r_restore_sel          =  running_state & (quotient_new[2:0] == 3'b000) & ~shortq_enable_ff;
+   assign r_adder1_sel           =  running_state & (quotient_new[2:0] == 3'b001) & ~shortq_enable_ff;
+   assign r_adder2_sel           =  running_state & (quotient_new[2:0] == 3'b010) & ~shortq_enable_ff;
+   assign r_adder3_sel           =  running_state & (quotient_new[2:0] == 3'b011) & ~shortq_enable_ff;
+   assign r_adder4_sel           =  running_state & (quotient_new[2:0] == 3'b100) & ~shortq_enable_ff;
+   assign r_adder5_sel           =  running_state & (quotient_new[2:0] == 3'b101) & ~shortq_enable_ff;
+   assign r_adder6_sel           =  running_state & (quotient_new[2:0] == 3'b110) & ~shortq_enable_ff;
+   assign r_adder7_sel           =  running_state & (quotient_new[2:0] == 3'b111) & ~shortq_enable_ff;
+
+
+   assign r_in[32:0]             = ( {33{r_sign_sel      }} & {33{1'b1}}               ) |
+                                   ( {33{r_restore_sel   }} & {r_ff[29:0] ,a_ff[32:30]} ) |
+                                   ( {33{r_adder1_sel    }} &  adder1_out[32:0]         ) |
+                                   ( {33{r_adder2_sel    }} &  adder2_out[32:0]         ) |
+                                   ( {33{r_adder3_sel    }} &  adder3_out[32:0]         ) |
+                                   ( {33{r_adder4_sel    }} &  adder4_out[32:0]         ) |
+                                   ( {33{r_adder5_sel    }} &  adder5_out[32:0]         ) |
+                                   ( {33{r_adder6_sel    }} &  adder6_out[32:0]         ) |
+                                   ( {33{r_adder7_sel    }} &  adder7_out[32:0]         ) |
+                                   ( {33{shortq_enable_ff}} &  ar_shifted[65:33]        ) |
+                                   ( {33{by_zero_case    }} & {1'b0,a_ff[31:0]}         );
+
+
+   assign q_in[31:0]             = ( {32{~valid_ff     }} & {q_ff[28:0], quotient_new[2:0]} ) |
+                                   ( {32{ smallnum_case}} & {28'b0     , smallnum[3:0]}     ) |
+                                   ( {32{ by_zero_case }} & {32{1'b1}}                      );
+
+
+   assign b_ff[36:33]            = {b_ff[32],b_ff[32],b_ff[32],b_ff[32]};
+
+
+   assign adder1_out[33:0]       = {         r_ff[30:0],a_ff[32:30]}  +                                              b_ff[33:0];
+   assign adder2_out[34:0]       = {         r_ff[31:0],a_ff[32:30]}  +                        {b_ff[33:0],1'b0};
+   assign adder3_out[35:0]       = {         r_ff[32:0],a_ff[32:30]}  +                        {b_ff[34:0],1'b0}  +  b_ff[35:0];
+   assign adder4_out[36:0]       = {r_ff[32],r_ff[32:0],a_ff[32:30]}  +  {b_ff[34:0],2'b0};
+   assign adder5_out[36:0]       = {r_ff[32],r_ff[32:0],a_ff[32:30]}  +  {b_ff[34:0],2'b0}  +                        b_ff[36:0];
+   assign adder6_out[36:0]       = {r_ff[32],r_ff[32:0],a_ff[32:30]}  +  {b_ff[34:0],2'b0}  +  {b_ff[35:0],1'b0};
+   assign adder7_out[36:0]       = {r_ff[32],r_ff[32:0],a_ff[32:30]}  +  {b_ff[34:0],2'b0}  +  {b_ff[35:0],1'b0}  +  b_ff[36:0];
+
+   assign quotient_raw[1]        = (~adder1_out[33] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder1_out[33:0] == 34'b0) );
+   assign quotient_raw[2]        = (~adder2_out[34] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder2_out[34:0] == 35'b0) );
+   assign quotient_raw[3]        = (~adder3_out[35] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder3_out[35:0] == 36'b0) );
+   assign quotient_raw[4]        = (~adder4_out[36] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder4_out[36:0] == 37'b0) );
+   assign quotient_raw[5]        = (~adder5_out[36] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder5_out[36:0] == 37'b0) );
+   assign quotient_raw[6]        = (~adder6_out[36] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder6_out[36:0] == 37'b0) );
+   assign quotient_raw[7]        = (~adder7_out[36] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder7_out[36:0] == 37'b0) );
+
+   assign quotient_new[2]        = quotient_raw[7] |   quotient_raw[6] | quotient_raw[5]  |   quotient_raw[4];
+   assign quotient_new[1]        = quotient_raw[7] |   quotient_raw[6] |                    (~quotient_raw[4] & quotient_raw[3]) | (~quotient_raw[3] & quotient_raw[2]);
+   assign quotient_new[0]        = quotient_raw[7] | (~quotient_raw[6] & quotient_raw[5]) | (~quotient_raw[4] & quotient_raw[3]) | (~quotient_raw[2] & quotient_raw[1]);
+
+
+   assign twos_comp_b_sel        =  valid_ff           & ~(dividend_sign_ff ^ divisor_sign_ff);
+   assign twos_comp_q_sel        = ~valid_ff & ~rem_ff &  (dividend_sign_ff ^ divisor_sign_ff) & ~by_zero_case_ff;
+
+   assign twos_comp_in[31:0]     = ( {32{twos_comp_q_sel}} & q_ff[31:0] ) |
+                                   ( {32{twos_comp_b_sel}} & b_ff[31:0] );
+
+   rvtwoscomp #(32) i_twos_comp  (.din(twos_comp_in[31:0]), .dout(twos_comp_out[31:0]));
+
+
+
+   assign valid_out              =  finish_ff & ~cancel;
+
+   assign data_out[31:0]         = ( {32{~rem_ff & ~twos_comp_q_sel}} & q_ff[31:0]          ) |
+                                   ( {32{ rem_ff                   }} & r_ff[31:0]          ) |
+                                   ( {32{           twos_comp_q_sel}} & twos_comp_out[31:0] );
+
+
+
+
+   // *** *** *** START : SMALLNUM {{
+
+   assign smallnum_case          = ( (a_ff[31:4]  == 28'b0) & (b_ff[31:4] == 28'b0) & ~by_zero_case & ~rem_ff & valid_ff & ~cancel) |
+                                   ( (a_ff[31:0]  == 32'b0) &                         ~by_zero_case & ~rem_ff & valid_ff & ~cancel);
+
+   assign smallnum[3]            = ( a_ff[3] &                                  ~b_ff[3] & ~b_ff[2] & ~b_ff[1]           );
+
+   assign smallnum[2]            = ( a_ff[3] &                                  ~b_ff[3] & ~b_ff[2] &            ~b_ff[0]) |
+                                   (            a_ff[2] &                       ~b_ff[3] & ~b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &  a_ff[2] &                       ~b_ff[3] & ~b_ff[2]                      );
+
+   assign smallnum[1]            = (            a_ff[2] &                       ~b_ff[3] & ~b_ff[2] &            ~b_ff[0]) |
+                                   (                       a_ff[1] &            ~b_ff[3] & ~b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &                                  ~b_ff[3] &            ~b_ff[1] & ~b_ff[0]) |
+                                   ( a_ff[3] & ~a_ff[2] &                       ~b_ff[3] & ~b_ff[2] &  b_ff[1] &  b_ff[0]) |
+                                   (~a_ff[3] &  a_ff[2] &  a_ff[1] &            ~b_ff[3] & ~b_ff[2]                      ) |
+                                   ( a_ff[3] &  a_ff[2] &                       ~b_ff[3] &                       ~b_ff[0]) |
+                                   ( a_ff[3] &  a_ff[2] &                       ~b_ff[3] &  b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &             a_ff[1] &            ~b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] &  a_ff[2] &  a_ff[1] &            ~b_ff[3] &  b_ff[2]                      );
+
+   assign smallnum[0]            = (            a_ff[2] &  a_ff[1] &  a_ff[0] & ~b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] & ~a_ff[2] &             a_ff[0] & ~b_ff[3] &             b_ff[1] &  b_ff[0]) |
+                                   (            a_ff[2] &                       ~b_ff[3] &            ~b_ff[1] & ~b_ff[0]) |
+                                   (                       a_ff[1] &            ~b_ff[3] & ~b_ff[2] &            ~b_ff[0]) |
+                                   (                                  a_ff[0] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1]           ) |
+                                   (~a_ff[3] &  a_ff[2] & ~a_ff[1] &            ~b_ff[3] & ~b_ff[2] &  b_ff[1] &  b_ff[0]) |
+                                   (~a_ff[3] &  a_ff[2] &  a_ff[1] &            ~b_ff[3] &                       ~b_ff[0]) |
+                                   ( a_ff[3] &                                             ~b_ff[2] & ~b_ff[1] & ~b_ff[0]) |
+                                   ( a_ff[3] & ~a_ff[2] &                       ~b_ff[3] &  b_ff[2] &  b_ff[1]           ) |
+                                   (~a_ff[3] &  a_ff[2] &  a_ff[1] &            ~b_ff[3] &  b_ff[2] & ~b_ff[1]           ) |
+                                   (~a_ff[3] &  a_ff[2] &             a_ff[0] & ~b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] & ~a_ff[2] & ~a_ff[1] &            ~b_ff[3] &  b_ff[2] &             b_ff[0]) |
+                                   (           ~a_ff[2] &  a_ff[1] &  a_ff[0] & ~b_ff[3] & ~b_ff[2]                      ) |
+                                   ( a_ff[3] &  a_ff[2] &                                             ~b_ff[1] & ~b_ff[0]) |
+                                   ( a_ff[3] &             a_ff[1] &                       ~b_ff[2] &            ~b_ff[0]) |
+                                   (~a_ff[3] &  a_ff[2] &  a_ff[1] &  a_ff[0] & ~b_ff[3] &  b_ff[2]                      ) |
+                                   ( a_ff[3] &  a_ff[2] &                        b_ff[3] & ~b_ff[2]                      ) |
+                                   ( a_ff[3] &             a_ff[1] &             b_ff[3] & ~b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &                        a_ff[0] &            ~b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &            ~a_ff[1] &            ~b_ff[3] &  b_ff[2] &  b_ff[1] &  b_ff[0]) |
+                                   ( a_ff[3] &  a_ff[2] &  a_ff[1] &             b_ff[3] &                       ~b_ff[0]) |
+                                   ( a_ff[3] &  a_ff[2] &  a_ff[1] &             b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] &  a_ff[2] &             a_ff[0] &  b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] & ~a_ff[2] &  a_ff[1] &            ~b_ff[3] &             b_ff[1]           ) |
+                                   ( a_ff[3] &             a_ff[1] &  a_ff[0] &            ~b_ff[2]                      ) |
+                                   ( a_ff[3] &  a_ff[2] &  a_ff[1] &  a_ff[0] &  b_ff[3]                                 );
+
+   // *** *** *** END   : SMALLNUM }}
+
+
+
+
+   // *** *** *** Start : Short Q {{
+
+   assign shortq_dividend[32:0]   = {dividend_sign_ff,a_ff[31:0]};
+
+
+   logic [5:0]  dw_a_enc;
+   logic [5:0]  dw_b_enc;
+   logic [6:0]  dw_shortq_raw;
+
+
+
+   eb1_exu_div_cls i_a_cls  (
+       .operand  ( shortq_dividend[32:0]  ),
+       .cls      ( dw_a_enc[4:0]          ));
+
+   eb1_exu_div_cls i_b_cls  (
+       .operand  ( b_ff[32:0]             ),
+       .cls      ( dw_b_enc[4:0]          ));
+
+   assign dw_a_enc[5]             =  1'b0;
+   assign dw_b_enc[5]             =  1'b0;
+
+
+
+   assign dw_shortq_raw[6:0]      =  {1'b0,dw_b_enc[5:0]} - {1'b0,dw_a_enc[5:0]} + 7'd1;
+   assign shortq[5:0]             =  dw_shortq_raw[6]  ?  6'd0  :  dw_shortq_raw[5:0];
+
+   assign shortq_enable           =  valid_ff & ~shortq[5] & ~(shortq[4:2] ==  3'b111) & ~cancel;
+
+   assign shortq_decode[4:0]      = ( {5{shortq[4:0] == 5'd31}} & 5'd00) |
+                                    ( {5{shortq[4:0] == 5'd30}} & 5'd00) |
+                                    ( {5{shortq[4:0] == 5'd29}} & 5'd00) |
+                                    ( {5{shortq[4:0] == 5'd28}} & 5'd00) |
+                                    ( {5{shortq[4:0] == 5'd27}} & 5'd03) |
+                                    ( {5{shortq[4:0] == 5'd26}} & 5'd06) |
+                                    ( {5{shortq[4:0] == 5'd25}} & 5'd06) |
+                                    ( {5{shortq[4:0] == 5'd24}} & 5'd06) |
+                                    ( {5{shortq[4:0] == 5'd23}} & 5'd09) |
+                                    ( {5{shortq[4:0] == 5'd22}} & 5'd09) |
+                                    ( {5{shortq[4:0] == 5'd21}} & 5'd09) |
+                                    ( {5{shortq[4:0] == 5'd20}} & 5'd12) |
+                                    ( {5{shortq[4:0] == 5'd19}} & 5'd12) |
+                                    ( {5{shortq[4:0] == 5'd18}} & 5'd12) |
+                                    ( {5{shortq[4:0] == 5'd17}} & 5'd15) |
+                                    ( {5{shortq[4:0] == 5'd16}} & 5'd15) |
+                                    ( {5{shortq[4:0] == 5'd15}} & 5'd15) |
+                                    ( {5{shortq[4:0] == 5'd14}} & 5'd18) |
+                                    ( {5{shortq[4:0] == 5'd13}} & 5'd18) |
+                                    ( {5{shortq[4:0] == 5'd12}} & 5'd18) |
+                                    ( {5{shortq[4:0] == 5'd11}} & 5'd21) |
+                                    ( {5{shortq[4:0] == 5'd10}} & 5'd21) |
+                                    ( {5{shortq[4:0] == 5'd09}} & 5'd21) |
+                                    ( {5{shortq[4:0] == 5'd08}} & 5'd24) |
+                                    ( {5{shortq[4:0] == 5'd07}} & 5'd24) |
+                                    ( {5{shortq[4:0] == 5'd06}} & 5'd24) |
+                                    ( {5{shortq[4:0] == 5'd05}} & 5'd27) |
+                                    ( {5{shortq[4:0] == 5'd04}} & 5'd27) |
+                                    ( {5{shortq[4:0] == 5'd03}} & 5'd27) |
+                                    ( {5{shortq[4:0] == 5'd02}} & 5'd27) |
+                                    ( {5{shortq[4:0] == 5'd01}} & 5'd27) |
+                                    ( {5{shortq[4:0] == 5'd00}} & 5'd27);
+
+
+   assign shortq_shift[4:0]       = ~shortq_enable     ?  5'd0  :  shortq_decode[4:0];
+
+
+   // *** *** *** End   : Short Q }}
+
+
+
+
+
+endmodule // eb1_exu_div_new_3bit_fullshortq
+
+
+
+
+
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+module eb1_exu_div_new_4bit_fullshortq
+  (
+   input  logic            clk,                       // Top level clock
+   input  logic            rst_l,                     // Reset
+   input  logic            scan_mode,                 // Scan mode
+
+   input  logic            cancel,                    // Flush pipeline
+   input  logic            valid_in,
+   input  logic            signed_in,
+   input  logic            rem_in,
+   input  logic [31:0]     dividend_in,
+   input  logic [31:0]     divisor_in,
+
+   output logic            valid_out,
+   output logic [31:0]     data_out
+  );
+
+
+   logic                   valid_ff_in, valid_ff;
+   logic                   finish_raw, finish, finish_ff;
+   logic                   running_state;
+   logic                   misc_enable;
+   logic         [2:0]     control_in, control_ff;
+   logic                   dividend_sign_ff, divisor_sign_ff, rem_ff;
+   logic                   count_enable;
+   logic         [6:0]     count_in, count_ff;
+
+   logic                   smallnum_case;
+   logic         [3:0]     smallnum;
+
+   logic                   a_enable, a_shift;
+   logic        [31:0]     a_in, a_ff;
+
+   logic                   b_enable, b_twos_comp;
+   logic        [32:0]     b_in;
+   logic        [37:0]     b_ff;
+
+   logic        [31:0]     q_in, q_ff;
+
+   logic                   rq_enable;
+   logic                   r_sign_sel;
+   logic                   r_restore_sel;
+   logic                   r_adder01_sel, r_adder02_sel, r_adder03_sel;
+   logic                   r_adder04_sel, r_adder05_sel, r_adder06_sel, r_adder07_sel;
+   logic                   r_adder08_sel, r_adder09_sel, r_adder10_sel, r_adder11_sel;
+   logic                   r_adder12_sel, r_adder13_sel, r_adder14_sel, r_adder15_sel;
+   logic        [32:0]     r_in, r_ff;
+
+   logic                   twos_comp_q_sel, twos_comp_b_sel;
+   logic        [31:0]     twos_comp_in, twos_comp_out;
+
+   logic        [15:1]     quotient_raw;
+   logic         [3:0]     quotient_new;
+   logic        [34:0]     adder01_out;
+   logic        [35:0]     adder02_out;
+   logic        [36:0]     adder03_out;
+   logic        [37:0]     adder04_out;
+   logic        [37:0]     adder05_out;
+   logic        [37:0]     adder06_out;
+   logic        [37:0]     adder07_out;
+   logic        [37:0]     adder08_out;
+   logic        [37:0]     adder09_out;
+   logic        [37:0]     adder10_out;
+   logic        [37:0]     adder11_out;
+   logic        [37:0]     adder12_out;
+   logic        [37:0]     adder13_out;
+   logic        [37:0]     adder14_out;
+   logic        [37:0]     adder15_out;
+
+   logic        [64:0]     ar_shifted;
+   logic         [5:0]     shortq;
+   logic         [4:0]     shortq_shift;
+   logic         [4:0]     shortq_decode;
+   logic         [4:0]     shortq_shift_ff;
+   logic                   shortq_enable;
+   logic                   shortq_enable_ff;
+   logic        [32:0]     shortq_dividend;
+
+   logic                   by_zero_case;
+   logic                   by_zero_case_ff;
+
+
+
+   rvdffe #(19) i_misc_ff        (.*, .clk(clk), .en(misc_enable),     .din ({valid_ff_in, control_in[2:0], by_zero_case,    shortq_enable,    shortq_shift[4:0],    finish,    count_in[6:0]}),
+                                                                       .dout({valid_ff,    control_ff[2:0], by_zero_case_ff, shortq_enable_ff, shortq_shift_ff[4:0], finish_ff, count_ff[6:0]}));
+
+   rvdffe #(32) i_a_ff           (.*, .clk(clk), .en(a_enable),        .din(a_in[31:0]),           .dout(a_ff[31:0]));
+   rvdffe #(33) i_b_ff           (.*, .clk(clk), .en(b_enable),        .din(b_in[32:0]),           .dout(b_ff[32:0]));
+   rvdffe #(33) i_r_ff           (.*, .clk(clk), .en(rq_enable),       .din(r_in[32:0]),           .dout(r_ff[32:0]));
+   rvdffe #(32) i_q_ff           (.*, .clk(clk), .en(rq_enable),       .din(q_in[31:0]),           .dout(q_ff[31:0]));
+
+
+
+
+   assign valid_ff_in            =  valid_in  & ~cancel;
+
+   assign control_in[2]          = (~valid_in & control_ff[2]) | (valid_in & signed_in  & dividend_in[31]);
+   assign control_in[1]          = (~valid_in & control_ff[1]) | (valid_in & signed_in  &  divisor_in[31]);
+   assign control_in[0]          = (~valid_in & control_ff[0]) | (valid_in & rem_in);
+
+   assign dividend_sign_ff       =  control_ff[2];
+   assign divisor_sign_ff        =  control_ff[1];
+   assign rem_ff                 =  control_ff[0];
+
+
+   assign by_zero_case           =  valid_ff & (b_ff[31:0] == 32'b0);
+
+   assign misc_enable            =  valid_in | valid_ff | cancel | running_state | finish_ff;
+   assign running_state          = (| count_ff[6:0]) | shortq_enable_ff;
+   assign finish_raw             =   smallnum_case      |
+                                     by_zero_case       |
+                                    (count_ff[6:0] == 7'd32);
+
+
+   assign finish                 =  finish_raw & ~cancel;
+   assign count_enable           = (valid_ff | running_state) & ~finish & ~finish_ff & ~cancel & ~shortq_enable;
+   assign count_in[6:0]          = {7{count_enable}} & (count_ff[6:0] + 7'd4 + {2'b0,shortq_shift_ff[4:0]});
+
+
+   assign a_enable               =  valid_in | running_state;
+   assign a_shift                =  running_state & ~shortq_enable_ff;
+
+   assign ar_shifted[64:0]       = { {33{dividend_sign_ff}} , a_ff[31:0]} << {shortq_shift_ff[4:0]};
+
+   assign a_in[31:0]             = ( {32{~a_shift & ~shortq_enable_ff}} &  dividend_in[31:0] ) |
+                                   ( {32{ a_shift                    }} & {a_ff[27:0],4'b0}  ) |
+                                   ( {32{            shortq_enable_ff}} &  ar_shifted[31:0]  );
+
+
+
+   assign b_enable               =    valid_in | b_twos_comp;
+   assign b_twos_comp            =    valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff);
+
+   assign b_in[32:0]             = ( {33{~b_twos_comp}} & { (signed_in & divisor_in[31]),divisor_in[31:0] } ) |
+                                   ( {33{ b_twos_comp}} & {~divisor_sign_ff,twos_comp_out[31:0] } );
+
+
+   assign rq_enable              =  valid_in | valid_ff | running_state;
+   assign r_sign_sel             =  valid_ff      &  dividend_sign_ff & ~by_zero_case;
+   assign r_restore_sel          =  running_state & (quotient_new[3:0] == 4'd00) & ~shortq_enable_ff;
+   assign r_adder01_sel          =  running_state & (quotient_new[3:0] == 4'd01) & ~shortq_enable_ff;
+   assign r_adder02_sel          =  running_state & (quotient_new[3:0] == 4'd02) & ~shortq_enable_ff;
+   assign r_adder03_sel          =  running_state & (quotient_new[3:0] == 4'd03) & ~shortq_enable_ff;
+   assign r_adder04_sel          =  running_state & (quotient_new[3:0] == 4'd04) & ~shortq_enable_ff;
+   assign r_adder05_sel          =  running_state & (quotient_new[3:0] == 4'd05) & ~shortq_enable_ff;
+   assign r_adder06_sel          =  running_state & (quotient_new[3:0] == 4'd06) & ~shortq_enable_ff;
+   assign r_adder07_sel          =  running_state & (quotient_new[3:0] == 4'd07) & ~shortq_enable_ff;
+   assign r_adder08_sel          =  running_state & (quotient_new[3:0] == 4'd08) & ~shortq_enable_ff;
+   assign r_adder09_sel          =  running_state & (quotient_new[3:0] == 4'd09) & ~shortq_enable_ff;
+   assign r_adder10_sel          =  running_state & (quotient_new[3:0] == 4'd10) & ~shortq_enable_ff;
+   assign r_adder11_sel          =  running_state & (quotient_new[3:0] == 4'd11) & ~shortq_enable_ff;
+   assign r_adder12_sel          =  running_state & (quotient_new[3:0] == 4'd12) & ~shortq_enable_ff;
+   assign r_adder13_sel          =  running_state & (quotient_new[3:0] == 4'd13) & ~shortq_enable_ff;
+   assign r_adder14_sel          =  running_state & (quotient_new[3:0] == 4'd14) & ~shortq_enable_ff;
+   assign r_adder15_sel          =  running_state & (quotient_new[3:0] == 4'd15) & ~shortq_enable_ff;
+
+   assign r_in[32:0]             = ( {33{r_sign_sel      }} & {33{1'b1}}               ) |
+                                   ( {33{r_restore_sel   }} & {r_ff[28:0],a_ff[31:28]} ) |
+                                   ( {33{r_adder01_sel   }} &  adder01_out[32:0]       ) |
+                                   ( {33{r_adder02_sel   }} &  adder02_out[32:0]       ) |
+                                   ( {33{r_adder03_sel   }} &  adder03_out[32:0]       ) |
+                                   ( {33{r_adder04_sel   }} &  adder04_out[32:0]       ) |
+                                   ( {33{r_adder05_sel   }} &  adder05_out[32:0]       ) |
+                                   ( {33{r_adder06_sel   }} &  adder06_out[32:0]       ) |
+                                   ( {33{r_adder07_sel   }} &  adder07_out[32:0]       ) |
+                                   ( {33{r_adder08_sel   }} &  adder08_out[32:0]       ) |
+                                   ( {33{r_adder09_sel   }} &  adder09_out[32:0]       ) |
+                                   ( {33{r_adder10_sel   }} &  adder10_out[32:0]       ) |
+                                   ( {33{r_adder11_sel   }} &  adder11_out[32:0]       ) |
+                                   ( {33{r_adder12_sel   }} &  adder12_out[32:0]       ) |
+                                   ( {33{r_adder13_sel   }} &  adder13_out[32:0]       ) |
+                                   ( {33{r_adder14_sel   }} &  adder14_out[32:0]       ) |
+                                   ( {33{r_adder15_sel   }} &  adder15_out[32:0]       ) |
+                                   ( {33{shortq_enable_ff}} &  ar_shifted[64:32]       ) |
+                                   ( {33{by_zero_case    }} & {1'b0,a_ff[31:0]}        );
+
+
+   assign q_in[31:0]             = ( {32{~valid_ff     }} & {q_ff[27:0], quotient_new[3:0]} ) |
+                                   ( {32{ smallnum_case}} & {28'b0     , smallnum[3:0]}     ) |
+                                   ( {32{ by_zero_case }} & {32{1'b1}}                      );
+
+
+   assign b_ff[37:33]            = {b_ff[32],b_ff[32],b_ff[32],b_ff[32],b_ff[32]};
+
+
+   assign adder01_out[34:0]      = {         r_ff[30:0],a_ff[31:28]}  +                                                                   b_ff[34:0];
+   assign adder02_out[35:0]      = {         r_ff[31:0],a_ff[31:28]}  +                                             {b_ff[34:0],1'b0};
+   assign adder03_out[36:0]      = {         r_ff[32:0],a_ff[31:28]}  +                                             {b_ff[35:0],1'b0}  +  b_ff[36:0];
+   assign adder04_out[37:0]      = {r_ff[32],r_ff[32:0],a_ff[31:28]}  +                       {b_ff[35:0],2'b0};
+   assign adder05_out[37:0]      = {r_ff[32],r_ff[32:0],a_ff[31:28]}  +                       {b_ff[35:0],2'b0}  +                        b_ff[37:0];
+   assign adder06_out[37:0]      = {r_ff[32],r_ff[32:0],a_ff[31:28]}  +                       {b_ff[35:0],2'b0}  +  {b_ff[36:0],1'b0};
+   assign adder07_out[37:0]      = {r_ff[32],r_ff[32:0],a_ff[31:28]}  +                       {b_ff[35:0],2'b0}  +  {b_ff[36:0],1'b0}  +  b_ff[37:0];
+   assign adder08_out[37:0]      = {r_ff[32],r_ff[32:0],a_ff[31:28]}  +  {b_ff[34:0],3'b0};
+   assign adder09_out[37:0]      = {r_ff[32],r_ff[32:0],a_ff[31:28]}  +  {b_ff[34:0],3'b0} +                                              b_ff[37:0];
+   assign adder10_out[37:0]      = {r_ff[32],r_ff[32:0],a_ff[31:28]}  +  {b_ff[34:0],3'b0} +                        {b_ff[36:0],1'b0};
+   assign adder11_out[37:0]      = {r_ff[32],r_ff[32:0],a_ff[31:28]}  +  {b_ff[34:0],3'b0} +                        {b_ff[36:0],1'b0}  +  b_ff[37:0];
+   assign adder12_out[37:0]      = {r_ff[32],r_ff[32:0],a_ff[31:28]}  +  {b_ff[34:0],3'b0} +  {b_ff[35:0],2'b0};
+   assign adder13_out[37:0]      = {r_ff[32],r_ff[32:0],a_ff[31:28]}  +  {b_ff[34:0],3'b0} +  {b_ff[35:0],2'b0}  +                        b_ff[37:0];
+   assign adder14_out[37:0]      = {r_ff[32],r_ff[32:0],a_ff[31:28]}  +  {b_ff[34:0],3'b0} +  {b_ff[35:0],2'b0}  +  {b_ff[36:0],1'b0};
+   assign adder15_out[37:0]      = {r_ff[32],r_ff[32:0],a_ff[31:28]}  +  {b_ff[34:0],3'b0} +  {b_ff[35:0],2'b0}  +  {b_ff[36:0],1'b0}  +  b_ff[37:0];
+
+   assign quotient_raw[01]       = (~adder01_out[34] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder01_out[34:0] == 35'b0) );
+   assign quotient_raw[02]       = (~adder02_out[35] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder02_out[35:0] == 36'b0) );
+   assign quotient_raw[03]       = (~adder03_out[36] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder03_out[36:0] == 37'b0) );
+   assign quotient_raw[04]       = (~adder04_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder04_out[37:0] == 38'b0) );
+   assign quotient_raw[05]       = (~adder05_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder05_out[37:0] == 38'b0) );
+   assign quotient_raw[06]       = (~adder06_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder06_out[37:0] == 38'b0) );
+   assign quotient_raw[07]       = (~adder07_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder07_out[37:0] == 38'b0) );
+   assign quotient_raw[08]       = (~adder08_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder08_out[37:0] == 38'b0) );
+   assign quotient_raw[09]       = (~adder09_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder09_out[37:0] == 38'b0) );
+   assign quotient_raw[10]       = (~adder10_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder10_out[37:0] == 38'b0) );
+   assign quotient_raw[11]       = (~adder11_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder11_out[37:0] == 38'b0) );
+   assign quotient_raw[12]       = (~adder12_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder12_out[37:0] == 38'b0) );
+   assign quotient_raw[13]       = (~adder13_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder13_out[37:0] == 38'b0) );
+   assign quotient_raw[14]       = (~adder14_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder14_out[37:0] == 38'b0) );
+   assign quotient_raw[15]       = (~adder15_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder15_out[37:0] == 38'b0) );
+
+
+   assign quotient_new[0]        = ( quotient_raw[15:01] == 15'b000_0000_0000_0001 ) |  //  1
+                                   ( quotient_raw[15:03] == 13'b000_0000_0000_01   ) |  //  3
+                                   ( quotient_raw[15:05] == 11'b000_0000_0001      ) |  //  5
+                                   ( quotient_raw[15:07] ==  9'b000_0000_01        ) |  //  7
+                                   ( quotient_raw[15:09] ==  7'b000_0001           ) |  //  9
+                                   ( quotient_raw[15:11] ==  5'b000_01             ) |  // 11
+                                   ( quotient_raw[15:13] ==  3'b001                ) |  // 13
+                                   ( quotient_raw[   15] ==  1'b1                  );   // 15
+
+   assign quotient_new[1]        = ( quotient_raw[15:02] == 14'b000_0000_0000_001  ) |  //  2
+                                   ( quotient_raw[15:03] == 13'b000_0000_0000_01   ) |  //  3
+                                   ( quotient_raw[15:06] == 10'b000_0000_001       ) |  //  6
+                                   ( quotient_raw[15:07] ==  9'b000_0000_01        ) |  //  7
+                                   ( quotient_raw[15:10] ==  6'b000_001            ) |  // 10
+                                   ( quotient_raw[15:11] ==  5'b000_01             ) |  // 11
+                                   ( quotient_raw[15:14] ==  2'b01                 ) |  // 14
+                                   ( quotient_raw[   15] ==  1'b1                  );   // 15
+
+   assign quotient_new[2]        = ( quotient_raw[15:04] == 12'b000_0000_0000_1    ) |  //  4
+                                   ( quotient_raw[15:05] == 11'b000_0000_0001      ) |  //  5
+                                   ( quotient_raw[15:06] == 10'b000_0000_001       ) |  //  6
+                                   ( quotient_raw[15:07] ==  9'b000_0000_01        ) |  //  7
+                                   ( quotient_raw[15:12] ==  4'b000_1              ) |  // 12
+                                   ( quotient_raw[15:13] ==  3'b001                ) |  // 13
+                                   ( quotient_raw[15:14] ==  2'b01                 ) |  // 14
+                                   ( quotient_raw[   15] ==  1'b1                  );   // 15
+
+   assign quotient_new[3]        = ( quotient_raw[15:08] ==  8'b000_0000_1         ) |  //  8
+                                   ( quotient_raw[15:09] ==  7'b000_0001           ) |  //  9
+                                   ( quotient_raw[15:10] ==  6'b000_001            ) |  // 10
+                                   ( quotient_raw[15:11] ==  5'b000_01             ) |  // 11
+                                   ( quotient_raw[15:12] ==  4'b000_1              ) |  // 12
+                                   ( quotient_raw[15:13] ==  3'b001                ) |  // 13
+                                   ( quotient_raw[15:14] ==  2'b01                 ) |  // 14
+                                   ( quotient_raw[   15] ==  1'b1                  );   // 15
+
+
+   assign twos_comp_b_sel        =  valid_ff           & ~(dividend_sign_ff ^ divisor_sign_ff);
+   assign twos_comp_q_sel        = ~valid_ff & ~rem_ff &  (dividend_sign_ff ^ divisor_sign_ff) & ~by_zero_case_ff;
+
+   assign twos_comp_in[31:0]     = ( {32{twos_comp_q_sel}} & q_ff[31:0] ) |
+                                   ( {32{twos_comp_b_sel}} & b_ff[31:0] );
+
+   rvtwoscomp #(32) i_twos_comp  (.din(twos_comp_in[31:0]), .dout(twos_comp_out[31:0]));
+
+
+
+   assign valid_out              =  finish_ff & ~cancel;
+
+   assign data_out[31:0]         = ( {32{~rem_ff & ~twos_comp_q_sel}} & q_ff[31:0]          ) |
+                                   ( {32{ rem_ff                   }} & r_ff[31:0]          ) |
+                                   ( {32{           twos_comp_q_sel}} & twos_comp_out[31:0] );
+
+
+
+
+   // *** *** *** START : SMALLNUM {{
+
+   assign smallnum_case          = ( (a_ff[31:4]  == 28'b0) & (b_ff[31:4] == 28'b0) & ~by_zero_case & ~rem_ff & valid_ff & ~cancel) |
+                                   ( (a_ff[31:0]  == 32'b0) &                         ~by_zero_case & ~rem_ff & valid_ff & ~cancel);
+
+   assign smallnum[3]            = ( a_ff[3] &                                  ~b_ff[3] & ~b_ff[2] & ~b_ff[1]           );
+
+   assign smallnum[2]            = ( a_ff[3] &                                  ~b_ff[3] & ~b_ff[2] &            ~b_ff[0]) |
+                                   (            a_ff[2] &                       ~b_ff[3] & ~b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &  a_ff[2] &                       ~b_ff[3] & ~b_ff[2]                      );
+
+   assign smallnum[1]            = (            a_ff[2] &                       ~b_ff[3] & ~b_ff[2] &            ~b_ff[0]) |
+                                   (                       a_ff[1] &            ~b_ff[3] & ~b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &                                  ~b_ff[3] &            ~b_ff[1] & ~b_ff[0]) |
+                                   ( a_ff[3] & ~a_ff[2] &                       ~b_ff[3] & ~b_ff[2] &  b_ff[1] &  b_ff[0]) |
+                                   (~a_ff[3] &  a_ff[2] &  a_ff[1] &            ~b_ff[3] & ~b_ff[2]                      ) |
+                                   ( a_ff[3] &  a_ff[2] &                       ~b_ff[3] &                       ~b_ff[0]) |
+                                   ( a_ff[3] &  a_ff[2] &                       ~b_ff[3] &  b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &             a_ff[1] &            ~b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] &  a_ff[2] &  a_ff[1] &            ~b_ff[3] &  b_ff[2]                      );
+
+   assign smallnum[0]            = (            a_ff[2] &  a_ff[1] &  a_ff[0] & ~b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] & ~a_ff[2] &             a_ff[0] & ~b_ff[3] &             b_ff[1] &  b_ff[0]) |
+                                   (            a_ff[2] &                       ~b_ff[3] &            ~b_ff[1] & ~b_ff[0]) |
+                                   (                       a_ff[1] &            ~b_ff[3] & ~b_ff[2] &            ~b_ff[0]) |
+                                   (                                  a_ff[0] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1]           ) |
+                                   (~a_ff[3] &  a_ff[2] & ~a_ff[1] &            ~b_ff[3] & ~b_ff[2] &  b_ff[1] &  b_ff[0]) |
+                                   (~a_ff[3] &  a_ff[2] &  a_ff[1] &            ~b_ff[3] &                       ~b_ff[0]) |
+                                   ( a_ff[3] &                                             ~b_ff[2] & ~b_ff[1] & ~b_ff[0]) |
+                                   ( a_ff[3] & ~a_ff[2] &                       ~b_ff[3] &  b_ff[2] &  b_ff[1]           ) |
+                                   (~a_ff[3] &  a_ff[2] &  a_ff[1] &            ~b_ff[3] &  b_ff[2] & ~b_ff[1]           ) |
+                                   (~a_ff[3] &  a_ff[2] &             a_ff[0] & ~b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] & ~a_ff[2] & ~a_ff[1] &            ~b_ff[3] &  b_ff[2] &             b_ff[0]) |
+                                   (           ~a_ff[2] &  a_ff[1] &  a_ff[0] & ~b_ff[3] & ~b_ff[2]                      ) |
+                                   ( a_ff[3] &  a_ff[2] &                                             ~b_ff[1] & ~b_ff[0]) |
+                                   ( a_ff[3] &             a_ff[1] &                       ~b_ff[2] &            ~b_ff[0]) |
+                                   (~a_ff[3] &  a_ff[2] &  a_ff[1] &  a_ff[0] & ~b_ff[3] &  b_ff[2]                      ) |
+                                   ( a_ff[3] &  a_ff[2] &                        b_ff[3] & ~b_ff[2]                      ) |
+                                   ( a_ff[3] &             a_ff[1] &             b_ff[3] & ~b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &                        a_ff[0] &            ~b_ff[2] & ~b_ff[1]           ) |
+                                   ( a_ff[3] &            ~a_ff[1] &            ~b_ff[3] &  b_ff[2] &  b_ff[1] &  b_ff[0]) |
+                                   ( a_ff[3] &  a_ff[2] &  a_ff[1] &             b_ff[3] &                       ~b_ff[0]) |
+                                   ( a_ff[3] &  a_ff[2] &  a_ff[1] &             b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] &  a_ff[2] &             a_ff[0] &  b_ff[3] &            ~b_ff[1]           ) |
+                                   ( a_ff[3] & ~a_ff[2] &  a_ff[1] &            ~b_ff[3] &             b_ff[1]           ) |
+                                   ( a_ff[3] &             a_ff[1] &  a_ff[0] &            ~b_ff[2]                      ) |
+                                   ( a_ff[3] &  a_ff[2] &  a_ff[1] &  a_ff[0] &  b_ff[3]                                 );
+
+   // *** *** *** END   : SMALLNUM }}
+
+
+
+
+   // *** *** *** Start : Short Q {{
+
+   assign shortq_dividend[32:0]   = {dividend_sign_ff,a_ff[31:0]};
+
+
+   logic [5:0]  dw_a_enc;
+   logic [5:0]  dw_b_enc;
+   logic [6:0]  dw_shortq_raw;
+
+
+
+   eb1_exu_div_cls i_a_cls  (
+       .operand  ( shortq_dividend[32:0]  ),
+       .cls      ( dw_a_enc[4:0]          ));
+
+   eb1_exu_div_cls i_b_cls  (
+       .operand  ( b_ff[32:0]             ),
+       .cls      ( dw_b_enc[4:0]          ));
+
+   assign dw_a_enc[5]             =  1'b0;
+   assign dw_b_enc[5]             =  1'b0;
+
+
+   assign dw_shortq_raw[6:0]      =  {1'b0,dw_b_enc[5:0]} - {1'b0,dw_a_enc[5:0]} + 7'd1;
+   assign shortq[5:0]             =  dw_shortq_raw[6]  ?  6'd0  :  dw_shortq_raw[5:0];
+
+   assign shortq_enable           =  valid_ff & ~shortq[5] & ~(shortq[4:2] ==  3'b111) & ~cancel;
+
+   assign shortq_decode[4:0]      = ( {5{shortq[4:0] == 5'd31}} & 5'd00) |
+                                    ( {5{shortq[4:0] == 5'd30}} & 5'd00) |
+                                    ( {5{shortq[4:0] == 5'd29}} & 5'd00) |
+                                    ( {5{shortq[4:0] == 5'd28}} & 5'd00) |
+                                    ( {5{shortq[4:0] == 5'd27}} & 5'd04) |
+                                    ( {5{shortq[4:0] == 5'd26}} & 5'd04) |
+                                    ( {5{shortq[4:0] == 5'd25}} & 5'd04) |
+                                    ( {5{shortq[4:0] == 5'd24}} & 5'd04) |
+                                    ( {5{shortq[4:0] == 5'd23}} & 5'd08) |
+                                    ( {5{shortq[4:0] == 5'd22}} & 5'd08) |
+                                    ( {5{shortq[4:0] == 5'd21}} & 5'd08) |
+                                    ( {5{shortq[4:0] == 5'd20}} & 5'd08) |
+                                    ( {5{shortq[4:0] == 5'd19}} & 5'd12) |
+                                    ( {5{shortq[4:0] == 5'd18}} & 5'd12) |
+                                    ( {5{shortq[4:0] == 5'd17}} & 5'd12) |
+                                    ( {5{shortq[4:0] == 5'd16}} & 5'd12) |
+                                    ( {5{shortq[4:0] == 5'd15}} & 5'd16) |
+                                    ( {5{shortq[4:0] == 5'd14}} & 5'd16) |
+                                    ( {5{shortq[4:0] == 5'd13}} & 5'd16) |
+                                    ( {5{shortq[4:0] == 5'd12}} & 5'd16) |
+                                    ( {5{shortq[4:0] == 5'd11}} & 5'd20) |
+                                    ( {5{shortq[4:0] == 5'd10}} & 5'd20) |
+                                    ( {5{shortq[4:0] == 5'd09}} & 5'd20) |
+                                    ( {5{shortq[4:0] == 5'd08}} & 5'd20) |
+                                    ( {5{shortq[4:0] == 5'd07}} & 5'd24) |
+                                    ( {5{shortq[4:0] == 5'd06}} & 5'd24) |
+                                    ( {5{shortq[4:0] == 5'd05}} & 5'd24) |
+                                    ( {5{shortq[4:0] == 5'd04}} & 5'd24) |
+                                    ( {5{shortq[4:0] == 5'd03}} & 5'd28) |
+                                    ( {5{shortq[4:0] == 5'd02}} & 5'd28) |
+                                    ( {5{shortq[4:0] == 5'd01}} & 5'd28) |
+                                    ( {5{shortq[4:0] == 5'd00}} & 5'd28);
+
+
+   assign shortq_shift[4:0]       = ~shortq_enable     ?  5'd0  :  shortq_decode[4:0];
+
+
+   // *** *** *** End   : Short Q }}
+
+
+
+
+
+endmodule // eb1_exu_div_new_4bit_fullshortq
+
+
+
+
+
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+module eb1_exu_div_cls
+  (
+   input  logic [32:0] operand,
+
+   output logic [4:0]  cls                  // Count leading sign bits - "n" format ignoring [32]
+   );
+
+
+   logic [4:0]   cls_zeros;
+   logic [4:0]   cls_ones;
+
+
+assign cls_zeros[4:0]             = ({5{operand[31]    ==  {           1'b1} }} & 5'd00) |
+                                    ({5{operand[31:30] ==  {{ 1{1'b0}},1'b1} }} & 5'd01) |
+                                    ({5{operand[31:29] ==  {{ 2{1'b0}},1'b1} }} & 5'd02) |
+                                    ({5{operand[31:28] ==  {{ 3{1'b0}},1'b1} }} & 5'd03) |
+                                    ({5{operand[31:27] ==  {{ 4{1'b0}},1'b1} }} & 5'd04) |
+                                    ({5{operand[31:26] ==  {{ 5{1'b0}},1'b1} }} & 5'd05) |
+                                    ({5{operand[31:25] ==  {{ 6{1'b0}},1'b1} }} & 5'd06) |
+                                    ({5{operand[31:24] ==  {{ 7{1'b0}},1'b1} }} & 5'd07) |
+                                    ({5{operand[31:23] ==  {{ 8{1'b0}},1'b1} }} & 5'd08) |
+                                    ({5{operand[31:22] ==  {{ 9{1'b0}},1'b1} }} & 5'd09) |
+                                    ({5{operand[31:21] ==  {{10{1'b0}},1'b1} }} & 5'd10) |
+                                    ({5{operand[31:20] ==  {{11{1'b0}},1'b1} }} & 5'd11) |
+                                    ({5{operand[31:19] ==  {{12{1'b0}},1'b1} }} & 5'd12) |
+                                    ({5{operand[31:18] ==  {{13{1'b0}},1'b1} }} & 5'd13) |
+                                    ({5{operand[31:17] ==  {{14{1'b0}},1'b1} }} & 5'd14) |
+                                    ({5{operand[31:16] ==  {{15{1'b0}},1'b1} }} & 5'd15) |
+                                    ({5{operand[31:15] ==  {{16{1'b0}},1'b1} }} & 5'd16) |
+                                    ({5{operand[31:14] ==  {{17{1'b0}},1'b1} }} & 5'd17) |
+                                    ({5{operand[31:13] ==  {{18{1'b0}},1'b1} }} & 5'd18) |
+                                    ({5{operand[31:12] ==  {{19{1'b0}},1'b1} }} & 5'd19) |
+                                    ({5{operand[31:11] ==  {{20{1'b0}},1'b1} }} & 5'd20) |
+                                    ({5{operand[31:10] ==  {{21{1'b0}},1'b1} }} & 5'd21) |
+                                    ({5{operand[31:09] ==  {{22{1'b0}},1'b1} }} & 5'd22) |
+                                    ({5{operand[31:08] ==  {{23{1'b0}},1'b1} }} & 5'd23) |
+                                    ({5{operand[31:07] ==  {{24{1'b0}},1'b1} }} & 5'd24) |
+                                    ({5{operand[31:06] ==  {{25{1'b0}},1'b1} }} & 5'd25) |
+                                    ({5{operand[31:05] ==  {{26{1'b0}},1'b1} }} & 5'd26) |
+                                    ({5{operand[31:04] ==  {{27{1'b0}},1'b1} }} & 5'd27) |
+                                    ({5{operand[31:03] ==  {{28{1'b0}},1'b1} }} & 5'd28) |
+                                    ({5{operand[31:02] ==  {{29{1'b0}},1'b1} }} & 5'd29) |
+                                    ({5{operand[31:01] ==  {{30{1'b0}},1'b1} }} & 5'd30) |
+                                    ({5{operand[31:00] ==  {{31{1'b0}},1'b1} }} & 5'd31) |
+                                    ({5{operand[31:00] ==  {{32{1'b0}}     } }} & 5'd00);    // Don't care case as it will be handled as special case
+
+
+assign cls_ones[4:0]              = ({5{operand[31:30] ==  {{ 1{1'b1}},1'b0} }} & 5'd00) |
+                                    ({5{operand[31:29] ==  {{ 2{1'b1}},1'b0} }} & 5'd01) |
+                                    ({5{operand[31:28] ==  {{ 3{1'b1}},1'b0} }} & 5'd02) |
+                                    ({5{operand[31:27] ==  {{ 4{1'b1}},1'b0} }} & 5'd03) |
+                                    ({5{operand[31:26] ==  {{ 5{1'b1}},1'b0} }} & 5'd04) |
+                                    ({5{operand[31:25] ==  {{ 6{1'b1}},1'b0} }} & 5'd05) |
+                                    ({5{operand[31:24] ==  {{ 7{1'b1}},1'b0} }} & 5'd06) |
+                                    ({5{operand[31:23] ==  {{ 8{1'b1}},1'b0} }} & 5'd07) |
+                                    ({5{operand[31:22] ==  {{ 9{1'b1}},1'b0} }} & 5'd08) |
+                                    ({5{operand[31:21] ==  {{10{1'b1}},1'b0} }} & 5'd09) |
+                                    ({5{operand[31:20] ==  {{11{1'b1}},1'b0} }} & 5'd10) |
+                                    ({5{operand[31:19] ==  {{12{1'b1}},1'b0} }} & 5'd11) |
+                                    ({5{operand[31:18] ==  {{13{1'b1}},1'b0} }} & 5'd12) |
+                                    ({5{operand[31:17] ==  {{14{1'b1}},1'b0} }} & 5'd13) |
+                                    ({5{operand[31:16] ==  {{15{1'b1}},1'b0} }} & 5'd14) |
+                                    ({5{operand[31:15] ==  {{16{1'b1}},1'b0} }} & 5'd15) |
+                                    ({5{operand[31:14] ==  {{17{1'b1}},1'b0} }} & 5'd16) |
+                                    ({5{operand[31:13] ==  {{18{1'b1}},1'b0} }} & 5'd17) |
+                                    ({5{operand[31:12] ==  {{19{1'b1}},1'b0} }} & 5'd18) |
+                                    ({5{operand[31:11] ==  {{20{1'b1}},1'b0} }} & 5'd19) |
+                                    ({5{operand[31:10] ==  {{21{1'b1}},1'b0} }} & 5'd20) |
+                                    ({5{operand[31:09] ==  {{22{1'b1}},1'b0} }} & 5'd21) |
+                                    ({5{operand[31:08] ==  {{23{1'b1}},1'b0} }} & 5'd22) |
+                                    ({5{operand[31:07] ==  {{24{1'b1}},1'b0} }} & 5'd23) |
+                                    ({5{operand[31:06] ==  {{25{1'b1}},1'b0} }} & 5'd24) |
+                                    ({5{operand[31:05] ==  {{26{1'b1}},1'b0} }} & 5'd25) |
+                                    ({5{operand[31:04] ==  {{27{1'b1}},1'b0} }} & 5'd26) |
+                                    ({5{operand[31:03] ==  {{28{1'b1}},1'b0} }} & 5'd27) |
+                                    ({5{operand[31:02] ==  {{29{1'b1}},1'b0} }} & 5'd28) |
+                                    ({5{operand[31:01] ==  {{30{1'b1}},1'b0} }} & 5'd29) |
+                                    ({5{operand[31:00] ==  {{31{1'b1}},1'b0} }} & 5'd30) |
+                                    ({5{operand[31:00] ==  {{32{1'b1}}     } }} & 5'd31);
+
+
+assign cls[4:0]                   =  operand[32]  ?  cls_ones[4:0]  :  cls_zeros[4:0];
+
+endmodule // eb1_exu_div_cls
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_exu_mul_ctl.sv b/verilog/rtl/BrqRV_EB1/design/eb1_exu_mul_ctl.sv
new file mode 100644
index 0000000..345d5f4
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_exu_mul_ctl.sv
@@ -0,0 +1,627 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+module eb1_exu_mul_ctl
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )
+  (
+   input logic          clk,              // Top level clock
+   input logic          rst_l,            // Reset
+   input logic          scan_mode,        // Scan mode
+
+   input eb1_mul_pkt_t mul_p,            // {Valid, RS1 signed operand, RS2 signed operand, Select low 32-bits of result}
+
+   input logic [31:0]   rs1_in,           // A operand
+   input logic [31:0]   rs2_in,           // B operand
+
+
+   output logic [31:0]  result_x          // Result
+  );
+
+
+   logic                mul_x_enable;
+   logic                bit_x_enable;
+   logic signed [32:0]  rs1_ext_in;
+   logic signed [32:0]  rs2_ext_in;
+   logic        [65:0]  prod_x;
+   logic                low_x;
+
+
+
+   // *** Start - BitManip ***
+
+   logic                bitmanip_sel_d;
+   logic                bitmanip_sel_x;
+   logic        [31:0]  bitmanip_d;
+   logic        [31:0]  bitmanip_x;
+
+
+
+   // ZBE
+   logic                ap_bext;
+   logic                ap_bdep;
+
+   // ZBC
+   logic                ap_clmul;
+   logic                ap_clmulh;
+   logic                ap_clmulr;
+
+   // ZBP
+   logic                ap_grev;
+   logic                ap_gorc;
+   logic                ap_shfl;
+   logic                ap_unshfl;
+
+   // ZBR
+   logic                ap_crc32_b;
+   logic                ap_crc32_h;
+   logic                ap_crc32_w;
+   logic                ap_crc32c_b;
+   logic                ap_crc32c_h;
+   logic                ap_crc32c_w;
+
+   // ZBF
+   logic                ap_bfp;
+
+
+   if (pt.BITMANIP_ZBE == 1)
+     begin
+       assign ap_bext         =  mul_p.bext;
+       assign ap_bdep         =  mul_p.bdep;
+     end
+   else
+     begin
+       assign ap_bext         =  1'b0;
+       assign ap_bdep         =  1'b0;
+     end
+
+   if (pt.BITMANIP_ZBC == 1)
+     begin
+       assign ap_clmul        =  mul_p.clmul;
+       assign ap_clmulh       =  mul_p.clmulh;
+       assign ap_clmulr       =  mul_p.clmulr;
+     end
+   else
+     begin
+       assign ap_clmul        =  1'b0;
+       assign ap_clmulh       =  1'b0;
+       assign ap_clmulr       =  1'b0;
+     end
+
+   if (pt.BITMANIP_ZBP == 1)
+     begin
+       assign ap_grev         =  mul_p.grev;
+       assign ap_gorc         =  mul_p.gorc;
+       assign ap_shfl         =  mul_p.shfl;
+       assign ap_unshfl       =  mul_p.unshfl;
+     end
+   else
+     begin
+       assign ap_grev         =  1'b0;
+       assign ap_gorc         =  1'b0;
+       assign ap_shfl         =  1'b0;
+       assign ap_unshfl       =  1'b0;
+     end
+
+   if (pt.BITMANIP_ZBR == 1)
+     begin
+       assign ap_crc32_b      =  mul_p.crc32_b;
+       assign ap_crc32_h      =  mul_p.crc32_h;
+       assign ap_crc32_w      =  mul_p.crc32_w;
+       assign ap_crc32c_b     =  mul_p.crc32c_b;
+       assign ap_crc32c_h     =  mul_p.crc32c_h;
+       assign ap_crc32c_w     =  mul_p.crc32c_w;
+     end
+   else
+     begin
+       assign ap_crc32_b      =  1'b0;
+       assign ap_crc32_h      =  1'b0;
+       assign ap_crc32_w      =  1'b0;
+       assign ap_crc32c_b     =  1'b0;
+       assign ap_crc32c_h     =  1'b0;
+       assign ap_crc32c_w     =  1'b0;
+     end
+
+   if (pt.BITMANIP_ZBF == 1)
+     begin
+       assign ap_bfp          =  mul_p.bfp;
+     end
+   else
+     begin
+       assign ap_bfp          =  1'b0;
+     end
+
+
+   // *** End   - BitManip ***
+
+
+
+   assign mul_x_enable           =  mul_p.valid;
+   assign bit_x_enable           =  mul_p.valid;
+
+   assign rs1_ext_in[32]         =  mul_p.rs1_sign & rs1_in[31];
+   assign rs2_ext_in[32]         =  mul_p.rs2_sign & rs2_in[31];
+
+   assign rs1_ext_in[31:0]       =  rs1_in[31:0];
+   assign rs2_ext_in[31:0]       =  rs2_in[31:0];
+
+
+
+   // --------------------------- Multiply       ----------------------------------
+
+
+   logic signed [32:0]  rs1_x;
+   logic signed [32:0]  rs2_x;
+
+   rvdffe #(34) i_a_x_ff         (.*, .clk(clk),  .din({mul_p.low,rs1_ext_in[32:0]}),        .dout({low_x,rs1_x[32:0]}),                 .en(mul_x_enable));
+   rvdffe #(33) i_b_x_ff         (.*, .clk(clk),  .din(           rs2_ext_in[32:0] ),        .dout(       rs2_x[32:0] ),                 .en(mul_x_enable));
+
+
+   assign prod_x[65:0]           =  rs1_x  *  rs2_x;
+
+
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  BEXT, BDEP   * * * * * * * * * * * * * * * * * *
+
+
+   // *** BEXT == "gather"  ***
+
+   logic        [31:0]    bext_d;
+   logic                  bext_test_bit_d;
+   integer                bext_i, bext_j;
+
+
+   always_comb
+     begin
+
+       bext_j                    =      0;
+       bext_test_bit_d           =   1'b0;
+       bext_d[31:0]              =  32'b0;
+
+       for (bext_i=0; bext_i<32; bext_i++)
+         begin
+             bext_test_bit_d     =  rs2_in[bext_i];
+             if (bext_test_bit_d)
+               begin
+                  bext_d[bext_j] =  rs1_in[bext_i];
+                  bext_j         =  bext_j + 1;
+               end  // IF  bext_test_bit
+         end        // FOR bext_i
+     end            // ALWAYS_COMB
+
+
+
+   // *** BDEP == "scatter" ***
+
+   logic        [31:0]    bdep_d;
+   logic                  bdep_test_bit_d;
+   integer                bdep_i, bdep_j;
+
+
+   always_comb
+     begin
+
+       bdep_j                    =      0;
+       bdep_test_bit_d           =   1'b0;
+       bdep_d[31:0]              =  32'b0;
+
+       for (bdep_i=0; bdep_i<32; bdep_i++)
+         begin
+             bdep_test_bit_d     =  rs2_in[bdep_i];
+             if (bdep_test_bit_d)
+               begin
+                  bdep_d[bdep_i] =  rs1_in[bdep_j];
+                  bdep_j         =  bdep_j + 1;
+               end  // IF  bdep_test_bit
+         end        // FOR bdep_i
+     end            // ALWAYS_COMB
+
+
+
+
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  CLMUL, CLMULH, CLMULR  * * * * * * * * * * * * *
+
+   logic        [62:0]    clmul_raw_d;
+
+
+   assign clmul_raw_d[62:0]      = ( {63{rs2_in[00]}} & {31'b0,rs1_in[31:0]      } ) ^
+                                   ( {63{rs2_in[01]}} & {30'b0,rs1_in[31:0], 1'b0} ) ^
+                                   ( {63{rs2_in[02]}} & {29'b0,rs1_in[31:0], 2'b0} ) ^
+                                   ( {63{rs2_in[03]}} & {28'b0,rs1_in[31:0], 3'b0} ) ^
+                                   ( {63{rs2_in[04]}} & {27'b0,rs1_in[31:0], 4'b0} ) ^
+                                   ( {63{rs2_in[05]}} & {26'b0,rs1_in[31:0], 5'b0} ) ^
+                                   ( {63{rs2_in[06]}} & {25'b0,rs1_in[31:0], 6'b0} ) ^
+                                   ( {63{rs2_in[07]}} & {24'b0,rs1_in[31:0], 7'b0} ) ^
+                                   ( {63{rs2_in[08]}} & {23'b0,rs1_in[31:0], 8'b0} ) ^
+                                   ( {63{rs2_in[09]}} & {22'b0,rs1_in[31:0], 9'b0} ) ^
+                                   ( {63{rs2_in[10]}} & {21'b0,rs1_in[31:0],10'b0} ) ^
+                                   ( {63{rs2_in[11]}} & {20'b0,rs1_in[31:0],11'b0} ) ^
+                                   ( {63{rs2_in[12]}} & {19'b0,rs1_in[31:0],12'b0} ) ^
+                                   ( {63{rs2_in[13]}} & {18'b0,rs1_in[31:0],13'b0} ) ^
+                                   ( {63{rs2_in[14]}} & {17'b0,rs1_in[31:0],14'b0} ) ^
+                                   ( {63{rs2_in[15]}} & {16'b0,rs1_in[31:0],15'b0} ) ^
+                                   ( {63{rs2_in[16]}} & {15'b0,rs1_in[31:0],16'b0} ) ^
+                                   ( {63{rs2_in[17]}} & {14'b0,rs1_in[31:0],17'b0} ) ^
+                                   ( {63{rs2_in[18]}} & {13'b0,rs1_in[31:0],18'b0} ) ^
+                                   ( {63{rs2_in[19]}} & {12'b0,rs1_in[31:0],19'b0} ) ^
+                                   ( {63{rs2_in[20]}} & {11'b0,rs1_in[31:0],20'b0} ) ^
+                                   ( {63{rs2_in[21]}} & {10'b0,rs1_in[31:0],21'b0} ) ^
+                                   ( {63{rs2_in[22]}} & { 9'b0,rs1_in[31:0],22'b0} ) ^
+                                   ( {63{rs2_in[23]}} & { 8'b0,rs1_in[31:0],23'b0} ) ^
+                                   ( {63{rs2_in[24]}} & { 7'b0,rs1_in[31:0],24'b0} ) ^
+                                   ( {63{rs2_in[25]}} & { 6'b0,rs1_in[31:0],25'b0} ) ^
+                                   ( {63{rs2_in[26]}} & { 5'b0,rs1_in[31:0],26'b0} ) ^
+                                   ( {63{rs2_in[27]}} & { 4'b0,rs1_in[31:0],27'b0} ) ^
+                                   ( {63{rs2_in[28]}} & { 3'b0,rs1_in[31:0],28'b0} ) ^
+                                   ( {63{rs2_in[29]}} & { 2'b0,rs1_in[31:0],29'b0} ) ^
+                                   ( {63{rs2_in[30]}} & { 1'b0,rs1_in[31:0],30'b0} ) ^
+                                   ( {63{rs2_in[31]}} & {      rs1_in[31:0],31'b0} );
+
+
+
+
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  GREV         * * * * * * * * * * * * * * * * * *
+
+   // uint32_t grev32(uint32_t rs1, uint32_t rs2)
+   // {
+   //     uint32_t x = rs1;
+   //     int shamt = rs2 & 31;
+   //
+   //     if (shamt &  1)  x = ( (x & 0x55555555) <<  1) | ( (x & 0xAAAAAAAA) >>  1);
+   //     if (shamt &  2)  x = ( (x & 0x33333333) <<  2) | ( (x & 0xCCCCCCCC) >>  2);
+   //     if (shamt &  4)  x = ( (x & 0x0F0F0F0F) <<  4) | ( (x & 0xF0F0F0F0) >>  4);
+   //     if (shamt &  8)  x = ( (x & 0x00FF00FF) <<  8) | ( (x & 0xFF00FF00) >>  8);
+   //     if (shamt & 16)  x = ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16);
+   //
+   //     return x;
+   //  }
+
+
+   logic        [31:0]    grev1_d;
+   logic        [31:0]    grev2_d;
+   logic        [31:0]    grev4_d;
+   logic        [31:0]    grev8_d;
+   logic        [31:0]    grev_d;
+
+
+   assign grev1_d[31:0]       = (rs2_in[0])  ?  {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25],
+                                                 rs1_in[22],rs1_in[23],rs1_in[20],rs1_in[21],rs1_in[18],rs1_in[19],rs1_in[16],rs1_in[17],
+                                                 rs1_in[14],rs1_in[15],rs1_in[12],rs1_in[13],rs1_in[10],rs1_in[11],rs1_in[08],rs1_in[09],
+                                                 rs1_in[06],rs1_in[07],rs1_in[04],rs1_in[05],rs1_in[02],rs1_in[03],rs1_in[00],rs1_in[01]}  :  rs1_in[31:0];
+
+   assign grev2_d[31:0]       = (rs2_in[1])  ?  {grev1_d[29:28],grev1_d[31:30],grev1_d[25:24],grev1_d[27:26],
+                                                 grev1_d[21:20],grev1_d[23:22],grev1_d[17:16],grev1_d[19:18],
+                                                 grev1_d[13:12],grev1_d[15:14],grev1_d[09:08],grev1_d[11:10],
+                                                 grev1_d[05:04],grev1_d[07:06],grev1_d[01:00],grev1_d[03:02]}  :  grev1_d[31:0];
+
+   assign grev4_d[31:0]       = (rs2_in[2])  ?  {grev2_d[27:24],grev2_d[31:28],grev2_d[19:16],grev2_d[23:20],
+                                                 grev2_d[11:08],grev2_d[15:12],grev2_d[03:00],grev2_d[07:04]}  :  grev2_d[31:0];
+
+   assign grev8_d[31:0]       = (rs2_in[3])  ?  {grev4_d[23:16],grev4_d[31:24],grev4_d[07:00],grev4_d[15:08]}  :  grev4_d[31:0];
+
+   assign grev_d[31:0]        = (rs2_in[4])  ?  {grev8_d[15:00],grev8_d[31:16]}  :  grev8_d[31:0];
+
+
+
+
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  GORC         * * * * * * * * * * * * * * * * * *
+
+   // uint32_t gorc32(uint32_t rs1, uint32_t rs2)
+   // {
+   //     uint32_t x = rs1;
+   //     int shamt = rs2 & 31;
+   //
+   //     if (shamt &  1)  x |= ( (x & 0x55555555) <<  1) | ( (x & 0xAAAAAAAA) >>  1);
+   //     if (shamt &  2)  x |= ( (x & 0x33333333) <<  2) | ( (x & 0xCCCCCCCC) >>  2);
+   //     if (shamt &  4)  x |= ( (x & 0x0F0F0F0F) <<  4) | ( (x & 0xF0F0F0F0) >>  4);
+   //     if (shamt &  8)  x |= ( (x & 0x00FF00FF) <<  8) | ( (x & 0xFF00FF00) >>  8);
+   //     if (shamt & 16)  x |= ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16);
+   //
+   //     return x;
+   //  }
+
+
+   logic        [31:0]    gorc1_d;
+   logic        [31:0]    gorc2_d;
+   logic        [31:0]    gorc4_d;
+   logic        [31:0]    gorc8_d;
+   logic        [31:0]    gorc_d;
+
+
+   assign gorc1_d[31:0]       = ( {32{rs2_in[0]}} & {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25],
+                                                     rs1_in[22],rs1_in[23],rs1_in[20],rs1_in[21],rs1_in[18],rs1_in[19],rs1_in[16],rs1_in[17],
+                                                     rs1_in[14],rs1_in[15],rs1_in[12],rs1_in[13],rs1_in[10],rs1_in[11],rs1_in[08],rs1_in[09],
+                                                     rs1_in[06],rs1_in[07],rs1_in[04],rs1_in[05],rs1_in[02],rs1_in[03],rs1_in[00],rs1_in[01]} ) | rs1_in[31:0];
+
+   assign gorc2_d[31:0]       = ( {32{rs2_in[1]}} & {gorc1_d[29:28],gorc1_d[31:30],gorc1_d[25:24],gorc1_d[27:26],
+                                                     gorc1_d[21:20],gorc1_d[23:22],gorc1_d[17:16],gorc1_d[19:18],
+                                                     gorc1_d[13:12],gorc1_d[15:14],gorc1_d[09:08],gorc1_d[11:10],
+                                                     gorc1_d[05:04],gorc1_d[07:06],gorc1_d[01:00],gorc1_d[03:02]} ) | gorc1_d[31:0];
+
+   assign gorc4_d[31:0]       = ( {32{rs2_in[2]}} & {gorc2_d[27:24],gorc2_d[31:28],gorc2_d[19:16],gorc2_d[23:20],
+                                                     gorc2_d[11:08],gorc2_d[15:12],gorc2_d[03:00],gorc2_d[07:04]} ) | gorc2_d[31:0];
+
+   assign gorc8_d[31:0]       = ( {32{rs2_in[3]}} & {gorc4_d[23:16],gorc4_d[31:24],gorc4_d[07:00],gorc4_d[15:08]} ) | gorc4_d[31:0];
+
+   assign gorc_d[31:0]        = ( {32{rs2_in[4]}} & {gorc8_d[15:00],gorc8_d[31:16]} ) | gorc8_d[31:0];
+
+
+
+
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  SHFL, UNSHLF * * * * * * * * * * * * * * * * * *
+
+   // uint32_t shuffle32_stage (uint32_t src, uint32_t maskL, uint32_t maskR, int N)
+   // {
+   //     uint32_t x  = src & ~(maskL | maskR);
+   //     x          |= ((src << N) & maskL) | ((src >> N) & maskR);
+   //     return x;
+   // }
+   //
+   //
+   //
+   // uint32_t shfl32(uint32_t rs1, uint32_t rs2)
+   // {
+   //     uint32_t x = rs1;
+   //     int shamt = rs2 & 15
+   //
+   //     if (shamt & 8)  x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
+   //     if (shamt & 4)  x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
+   //     if (shamt & 2)  x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2);
+   //     if (shamt & 1)  x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
+   //
+   //     return x;
+   // }
+
+
+   logic        [31:0]    shfl8_d;
+   logic        [31:0]    shfl4_d;
+   logic        [31:0]    shfl2_d;
+   logic        [31:0]    shfl_d;
+
+
+
+   assign shfl8_d[31:0]       = (rs2_in[3])  ?  {rs1_in[31:24],rs1_in[15:08],rs1_in[23:16],rs1_in[07:00]}      :  rs1_in[31:0];
+
+   assign shfl4_d[31:0]       = (rs2_in[2])  ?  {shfl8_d[31:28],shfl8_d[23:20],shfl8_d[27:24],shfl8_d[19:16],
+                                                 shfl8_d[15:12],shfl8_d[07:04],shfl8_d[11:08],shfl8_d[03:00]}  :  shfl8_d[31:0];
+
+   assign shfl2_d[31:0]       = (rs2_in[1])  ?  {shfl4_d[31:30],shfl4_d[27:26],shfl4_d[29:28],shfl4_d[25:24],
+                                                 shfl4_d[23:22],shfl4_d[19:18],shfl4_d[21:20],shfl4_d[17:16],
+                                                 shfl4_d[15:14],shfl4_d[11:10],shfl4_d[13:12],shfl4_d[09:08],
+                                                 shfl4_d[07:06],shfl4_d[03:02],shfl4_d[05:04],shfl4_d[01:00]}  :  shfl4_d[31:0];
+
+   assign shfl_d[31:0]        = (rs2_in[0])  ?  {shfl2_d[31],shfl2_d[29],shfl2_d[30],shfl2_d[28],shfl2_d[27],shfl2_d[25],shfl2_d[26],shfl2_d[24],
+                                                 shfl2_d[23],shfl2_d[21],shfl2_d[22],shfl2_d[20],shfl2_d[19],shfl2_d[17],shfl2_d[18],shfl2_d[16],
+                                                 shfl2_d[15],shfl2_d[13],shfl2_d[14],shfl2_d[12],shfl2_d[11],shfl2_d[09],shfl2_d[10],shfl2_d[08],
+                                                 shfl2_d[07],shfl2_d[05],shfl2_d[06],shfl2_d[04],shfl2_d[03],shfl2_d[01],shfl2_d[02],shfl2_d[00]}  :  shfl2_d[31:0];
+
+
+
+
+   // uint32_t unshfl32(uint32_t rs1, uint32_t rs2)
+   // {
+   //     uint32_t x = rs1;
+   //     int shamt = rs2 & 15
+   //
+   //     if (shamt & 1)  x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
+   //     if (shamt & 2)  x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2);
+   //     if (shamt & 4)  x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
+   //     if (shamt & 8)  x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
+   //
+   //     return x;
+   // }
+
+
+   logic        [31:0]    unshfl1_d;
+   logic        [31:0]    unshfl2_d;
+   logic        [31:0]    unshfl4_d;
+   logic        [31:0]    unshfl_d;
+
+
+   assign unshfl1_d[31:0]     = (rs2_in[0])  ?  {rs1_in[31],rs1_in[29],rs1_in[30],rs1_in[28],rs1_in[27],rs1_in[25],rs1_in[26],rs1_in[24],
+                                                 rs1_in[23],rs1_in[21],rs1_in[22],rs1_in[20],rs1_in[19],rs1_in[17],rs1_in[18],rs1_in[16],
+                                                 rs1_in[15],rs1_in[13],rs1_in[14],rs1_in[12],rs1_in[11],rs1_in[09],rs1_in[10],rs1_in[08],
+                                                 rs1_in[07],rs1_in[05],rs1_in[06],rs1_in[04],rs1_in[03],rs1_in[01],rs1_in[02],rs1_in[00]}  :  rs1_in[31:0];
+
+   assign unshfl2_d[31:0]     = (rs2_in[1])  ?  {unshfl1_d[31:30],unshfl1_d[27:26],unshfl1_d[29:28],unshfl1_d[25:24],
+                                                 unshfl1_d[23:22],unshfl1_d[19:18],unshfl1_d[21:20],unshfl1_d[17:16],
+                                                 unshfl1_d[15:14],unshfl1_d[11:10],unshfl1_d[13:12],unshfl1_d[09:08],
+                                                 unshfl1_d[07:06],unshfl1_d[03:02],unshfl1_d[05:04],unshfl1_d[01:00]}  :  unshfl1_d[31:0];
+
+   assign unshfl4_d[31:0]     = (rs2_in[2])  ?  {unshfl2_d[31:28],unshfl2_d[23:20],unshfl2_d[27:24],unshfl2_d[19:16],
+                                                 unshfl2_d[15:12],unshfl2_d[07:04],unshfl2_d[11:08],unshfl2_d[03:00]}  :  unshfl2_d[31:0];
+
+   assign unshfl_d[31:0]      = (rs2_in[3])  ?  {unshfl4_d[31:24],unshfl4_d[15:08],unshfl4_d[23:16],unshfl4_d[07:00]}  :  unshfl4_d[31:0];
+
+
+
+
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  CRC32, CRC32c  * * * * * * * * * * * * * * * * *
+
+   // ***  computed from   https: //crccalc.com  ***
+   //
+   // "a" is 8'h61 = 8'b0110_0001    (8'h61 ^ 8'hff = 8'h9e)
+   //
+   // Input must first be XORed with 32'hffff_ffff
+   //
+   //
+   // CRC32
+   //
+   // Input    Output        Input      Output
+   // -----   --------      --------   --------
+   // "a"     e8b7be43      ffffff9e   174841bc
+   // "aa"    078a19d7      ffff9e9e   f875e628
+   // "aaaa"  ad98e545      9e9e9e9e   5267a1ba
+   //
+   //
+   //
+   // CRC32c
+   //
+   // Input    Output        Input      Output
+   // -----   --------      --------   --------
+   // "a"     c1d04330      ffffff9e   3e2fbccf
+   // "aa"    f1f2dac2      ffff9e9e   0e0d253d
+   // "aaaa"  6a52eeb0      9e9e9e9e   95ad114f
+
+
+   logic                  crc32_all;
+   logic        [31:0]    crc32_poly_rev;
+   logic        [31:0]    crc32c_poly_rev;
+   integer                crc32_bi, crc32_hi, crc32_wi, crc32c_bi, crc32c_hi, crc32c_wi;
+   logic        [31:0]    crc32_bd, crc32_hd, crc32_wd, crc32c_bd, crc32c_hd, crc32c_wd;
+
+
+   assign crc32_all              =  ap_crc32_b  | ap_crc32_h  | ap_crc32_w | ap_crc32c_b | ap_crc32c_h | ap_crc32c_w;
+
+   assign crc32_poly_rev[31:0]   =  32'hEDB88320;    // bit reverse of 32'h04C11DB7
+   assign crc32c_poly_rev[31:0]  =  32'h82F63B78;    // bit reverse of 32'h1EDC6F41
+
+
+   always_comb
+     begin
+       crc32_bd[31:0]            =  rs1_in[31:0];
+
+       for (crc32_bi=0; crc32_bi<8; crc32_bi++)
+         begin
+            crc32_bd[31:0] = (crc32_bd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_bd[0]}});
+         end      // FOR    crc32_bi
+     end          // ALWAYS_COMB
+
+
+   always_comb
+     begin
+       crc32_hd[31:0]            =  rs1_in[31:0];
+
+       for (crc32_hi=0; crc32_hi<16; crc32_hi++)
+         begin
+            crc32_hd[31:0] = (crc32_hd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_hd[0]}});
+         end      // FOR    crc32_hi
+     end          // ALWAYS_COMB
+
+
+   always_comb
+     begin
+       crc32_wd[31:0]            =  rs1_in[31:0];
+
+       for (crc32_wi=0; crc32_wi<32; crc32_wi++)
+         begin
+            crc32_wd[31:0] = (crc32_wd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_wd[0]}});
+         end      // FOR    crc32_wi
+     end          // ALWAYS_COMB
+
+
+
+
+   always_comb
+     begin
+       crc32c_bd[31:0]           =  rs1_in[31:0];
+
+       for (crc32c_bi=0; crc32c_bi<8; crc32c_bi++)
+         begin
+            crc32c_bd[31:0] = (crc32c_bd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_bd[0]}});
+         end      // FOR    crc32c_bi
+     end          // ALWAYS_COMB
+
+
+   always_comb
+     begin
+       crc32c_hd[31:0]           =  rs1_in[31:0];
+
+       for (crc32c_hi=0; crc32c_hi<16; crc32c_hi++)
+         begin
+            crc32c_hd[31:0] = (crc32c_hd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_hd[0]}});
+         end      // FOR    crc32c_hi
+     end          // ALWAYS_COMB
+
+
+   always_comb
+     begin
+       crc32c_wd[31:0]           =  rs1_in[31:0];
+
+       for (crc32c_wi=0; crc32c_wi<32; crc32c_wi++)
+         begin
+            crc32c_wd[31:0] = (crc32c_wd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_wd[0]}});
+         end      // FOR    crc32c_wi
+     end          // ALWAYS_COMB
+
+
+
+
+
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  BFP          * * * * * * * * * * * * * * * * * *
+
+   logic        [4:0]     bfp_len;
+   logic        [4:0]     bfp_off;
+   logic        [31:0]    bfp_len_mask_;
+   logic        [15:0]    bfp_preshift_data;
+   logic        [63:0]    bfp_shift_data;
+   logic        [63:0]    bfp_shift_mask;
+   logic        [31:0]    bfp_result_d;
+
+
+   assign bfp_len[3:0]           =  rs2_in[27:24];
+   assign bfp_len[4]             = (bfp_len[3:0] == 4'b0);   // If LEN field is zero, then LEN=16
+   assign bfp_off[4:0]           =  rs2_in[20:16];
+
+   assign bfp_len_mask_[31:0]    =  32'hffff_ffff  <<  bfp_len[4:0];
+   assign bfp_preshift_data[15:0]=  rs2_in[15:0] & ~bfp_len_mask_[15:0];
+
+   assign bfp_shift_data[63:0]   = {16'b0,bfp_preshift_data[15:0], 16'b0,bfp_preshift_data[15:0]}  <<  bfp_off[4:0];
+   assign bfp_shift_mask[63:0]   = {bfp_len_mask_[31:0],           bfp_len_mask_[31:0]}            <<  bfp_off[4:0];
+
+   assign bfp_result_d[31:0]     = bfp_shift_data[63:32] | (rs1_in[31:0] & bfp_shift_mask[63:32]);
+
+
+
+
+
+   // * * * * * * * * * * * * * * * * * *  BitManip  :  Common logic * * * * * * * * * * * * * * * * * *
+
+
+   assign bitmanip_sel_d         =  ap_bext | ap_bdep | ap_clmul | ap_clmulh | ap_clmulr | ap_grev | ap_gorc | ap_shfl | ap_unshfl | crc32_all | ap_bfp;
+
+   assign bitmanip_d[31:0]       = ( {32{ap_bext}}     &       bext_d[31:0]        ) |
+                                   ( {32{ap_bdep}}     &       bdep_d[31:0]        ) |
+                                   ( {32{ap_clmul}}    &       clmul_raw_d[31:0]   ) |
+                                   ( {32{ap_clmulh}}   & {1'b0,clmul_raw_d[62:32]} ) |
+                                   ( {32{ap_clmulr}}   &       clmul_raw_d[62:31]  ) |
+                                   ( {32{ap_grev}}     &       grev_d[31:0]        ) |
+                                   ( {32{ap_gorc}}     &       gorc_d[31:0]        ) |
+                                   ( {32{ap_shfl}}     &       shfl_d[31:0]        ) |
+                                   ( {32{ap_unshfl}}   &       unshfl_d[31:0]      ) |
+                                   ( {32{ap_crc32_b}}  &       crc32_bd[31:0]      ) |
+                                   ( {32{ap_crc32_h}}  &       crc32_hd[31:0]      ) |
+                                   ( {32{ap_crc32_w}}  &       crc32_wd[31:0]      ) |
+                                   ( {32{ap_crc32c_b}} &       crc32c_bd[31:0]     ) |
+                                   ( {32{ap_crc32c_h}} &       crc32c_hd[31:0]     ) |
+                                   ( {32{ap_crc32c_w}} &       crc32c_wd[31:0]     ) |
+                                   ( {32{ap_bfp}}      &       bfp_result_d[31:0]  );
+
+
+
+   rvdffe #(33) i_bitmanip_ff    (.*, .clk(clk),  .din({bitmanip_sel_d,bitmanip_d[31:0]}),   .dout({bitmanip_sel_x,bitmanip_x[31:0]}),   .en(bit_x_enable));
+
+
+
+
+   assign result_x[31:0]         =  ( {32{~bitmanip_sel_x & ~low_x}} & prod_x[63:32]    ) |
+                                    ( {32{~bitmanip_sel_x &  low_x}} & prod_x[31:0]     ) |
+                                                                       bitmanip_x[31:0];
+
+
+
+endmodule  // eb1_exu_mul_ctl
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_ifu.sv b/verilog/rtl/BrqRV_EB1/design/eb1_ifu.sv
new file mode 100644
index 0000000..6208094
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_ifu.sv
@@ -0,0 +1,371 @@
+//********************************************************************************
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//********************************************************************************
+//********************************************************************************
+// Function: Top level file for Icache, Fetch, Branch prediction & Aligner
+// BFF -> F1 -> F2 -> A
+//********************************************************************************
+
+module eb1_ifu
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )
+  (
+   input logic free_l2clk,                   // Clock always.                  Through one clock header.  For flops with    second header built in.
+   input logic active_clk,                   // Clock only while core active.  Through two clock headers. For flops without second clock header built in.
+   input logic clk,                          // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
+   input logic rst_l,                        // reset, active low
+
+   input logic dec_i0_decode_d,              // Valid instruction at D and not blocked
+
+   input logic exu_flush_final, // flush, includes upper and lower
+   input logic dec_tlu_i0_commit_cmt , // committed i0
+   input logic dec_tlu_flush_err_wb , // flush due to parity error.
+   input logic dec_tlu_flush_noredir_wb, // don't fetch, validated with exu_flush_final
+   input logic [31:1] exu_flush_path_final, // flush fetch address
+
+   input logic [31:0]  dec_tlu_mrac_ff ,// Side_effect , cacheable for each region
+   input logic         dec_tlu_fence_i_wb, // fence.i, invalidate icache, validated with exu_flush_final
+   input logic         dec_tlu_flush_leak_one_wb, // ignore bp for leak one fetches
+
+   input logic                       dec_tlu_bpred_disable,     // disable all branch prediction
+   input logic                       dec_tlu_core_ecc_disable,  // disable ecc checking and flagging
+   input logic                       dec_tlu_force_halt,        // force halt
+
+  //-------------------------- IFU AXI signals--------------------------
+   // AXI Write Channels
+   output logic                            ifu_axi_awvalid,
+   output logic [pt.IFU_BUS_TAG-1:0]       ifu_axi_awid,
+   output logic [31:0]                     ifu_axi_awaddr,
+   output logic [3:0]                      ifu_axi_awregion,
+   output logic [7:0]                      ifu_axi_awlen,
+   output logic [2:0]                      ifu_axi_awsize,
+   output logic [1:0]                      ifu_axi_awburst,
+   output logic                            ifu_axi_awlock,
+   output logic [3:0]                      ifu_axi_awcache,
+   output logic [2:0]                      ifu_axi_awprot,
+   output logic [3:0]                      ifu_axi_awqos,
+
+   output logic                            ifu_axi_wvalid,
+   output logic [63:0]                     ifu_axi_wdata,
+   output logic [7:0]                      ifu_axi_wstrb,
+   output logic                            ifu_axi_wlast,
+
+   output logic                            ifu_axi_bready,
+
+   // AXI Read Channels
+   output logic                            ifu_axi_arvalid,
+   input  logic                            ifu_axi_arready,
+   output logic [pt.IFU_BUS_TAG-1:0]       ifu_axi_arid,
+   output logic [31:0]                     ifu_axi_araddr,
+   output logic [3:0]                      ifu_axi_arregion,
+   output logic [7:0]                      ifu_axi_arlen,
+   output logic [2:0]                      ifu_axi_arsize,
+   output logic [1:0]                      ifu_axi_arburst,
+   output logic                            ifu_axi_arlock,
+   output logic [3:0]                      ifu_axi_arcache,
+   output logic [2:0]                      ifu_axi_arprot,
+   output logic [3:0]                      ifu_axi_arqos,
+
+   input  logic                            ifu_axi_rvalid,
+   output logic                            ifu_axi_rready,
+   input  logic [pt.IFU_BUS_TAG-1:0]       ifu_axi_rid,
+   input  logic [63:0]                     ifu_axi_rdata,
+   input  logic [1:0]                      ifu_axi_rresp,
+
+   input  logic                      ifu_bus_clk_en,
+
+   input  logic                      dma_iccm_req,
+   input  logic [31:0]               dma_mem_addr,
+   input  logic [2:0]                dma_mem_sz,
+   input  logic                      dma_mem_write,
+   input  logic [63:0]               dma_mem_wdata,
+   input  logic [2:0]                dma_mem_tag,       //  DMA Buffer entry number
+
+
+   input  logic                      dma_iccm_stall_any,
+   output logic                      iccm_dma_ecc_error,
+   output logic                      iccm_dma_rvalid,
+   output logic [63:0]               iccm_dma_rdata,
+   output logic [2:0]                iccm_dma_rtag,     //   Tag of the DMA req
+   output logic                      iccm_ready,
+
+   output logic       ifu_pmu_instr_aligned,
+   output logic       ifu_pmu_fetch_stall,
+   output logic       ifu_ic_error_start,     // has all of the I$ ecc/parity for data/tag
+
+//   I$ & ITAG Ports
+   output logic [31:1]               ic_rw_addr,         // Read/Write addresss to the Icache.
+   output logic [pt.ICACHE_NUM_WAYS-1:0]                ic_wr_en,           // Icache write enable, when filling the Icache.
+   output logic                      ic_rd_en,           // Icache read  enable.
+
+   output logic [pt.ICACHE_BANKS_WAY-1:0][70:0]               ic_wr_data,         // Data to fill to the Icache. With ECC
+   input  logic [63:0]              ic_rd_data ,        // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
+   input  logic [70:0]              ic_debug_rd_data ,        // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
+   input  logic [25:0]                     ictag_debug_rd_data,// Debug icache tag.
+   output logic [70:0]               ic_debug_wr_data,   // Debug wr cache.
+
+   output logic [70:0]               ifu_ic_debug_rd_data,
+
+   input  logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr,    //
+   input  logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr,
+   output logic [63:0]               ic_premux_data,     // Premux data to be muxed with each way of the Icache.
+   output logic                      ic_sel_premux_data, // Select the premux data.
+
+   output logic [pt.ICACHE_INDEX_HI:3]               ic_debug_addr,      // Read/Write addresss to the Icache.
+   output logic                      ic_debug_rd_en,     // Icache debug rd
+   output logic                      ic_debug_wr_en,     // Icache debug wr
+   output logic                      ic_debug_tag_array, // Debug tag array
+   output logic [pt.ICACHE_NUM_WAYS-1:0]                ic_debug_way,       // Debug way. Rd or Wr.
+
+
+   output logic [pt.ICACHE_NUM_WAYS-1:0]                ic_tag_valid,       // Valid bits when accessing the Icache. One valid bit per way. F2 stage
+
+   input  logic [pt.ICACHE_NUM_WAYS-1:0]                ic_rd_hit,          // Compare hits from Icache tags. Per way.  F2 stage
+   input  logic                      ic_tag_perr,        // Icache Tag parity error
+
+
+   // ICCM ports
+   output logic [pt.ICCM_BITS-1:1]               iccm_rw_addr,       // ICCM read/write address.
+   output logic                      iccm_wren,          // ICCM write enable (through the DMA)
+   output logic                      iccm_rden,          // ICCM read enable.
+   output logic [77:0]               iccm_wr_data,       // ICCM write data.
+   output logic [2:0]                iccm_wr_size,       // ICCM write location within DW.
+
+   input  logic [63:0]               iccm_rd_data,       // Data read from ICCM.
+   input  logic [77:0]               iccm_rd_data_ecc,   // Data + ECC read from ICCM.
+
+   output logic                      ifu_iccm_rd_ecc_single_err, // This fetch has a single ICCM ecc  error.
+
+// Perf counter sigs
+   output logic       ifu_pmu_ic_miss, // ic miss
+   output logic       ifu_pmu_ic_hit, // ic hit
+   output logic       ifu_pmu_bus_error, // iside bus error
+   output logic       ifu_pmu_bus_busy,  // iside bus busy
+   output logic       ifu_pmu_bus_trxn, // iside bus transactions
+
+
+   output logic       ifu_i0_icaf,         // Instruction 0 access fault. From Aligner to Decode
+   output logic [1:0] ifu_i0_icaf_type, // Instruction 0 access fault type
+
+   output logic  ifu_i0_valid,        // Instruction 0 valid. From Aligner to Decode
+   output logic  ifu_i0_icaf_second,  // Instruction 0 has access fault on second 2B of 4B inst
+   output logic  ifu_i0_dbecc,        // Instruction 0 has double bit ecc error
+   output logic  iccm_dma_sb_error,   // Single Bit ECC error from a DMA access
+   output logic[31:0] ifu_i0_instr,   // Instruction 0 . From Aligner to Decode
+   output logic[31:1] ifu_i0_pc,      // Instruction 0 pc. From Aligner to Decode
+   output logic ifu_i0_pc4,           // Instruction 0 is 4 byte. From Aligner to Decode
+
+   output logic ifu_miss_state_idle,   // There is no outstanding miss. Cache miss state is idle.
+
+   output eb1_br_pkt_t i0_brp,           // Instruction 0 branch packet. From Aligner to Decode
+   output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index
+   output logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR
+   output logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag
+   output logic [$clog2(pt.BTB_SIZE)-1:0]         ifu_i0_fa_index,          // Fully associt btb index
+
+   input eb1_predict_pkt_t  exu_mp_pkt, // mispredict packet
+   input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // execute ghr
+   input logic [pt.BHT_GHR_SIZE-1:0]  exu_mp_fghr,                    // Mispredict fghr
+   input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]  exu_mp_index,         // Mispredict index
+   input logic [pt.BTB_BTAG_SIZE-1:0]  exu_mp_btag,                   // Mispredict btag
+
+   input eb1_br_tlu_pkt_t dec_tlu_br0_r_pkt, // slot0 update/error pkt
+   input logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // fghr to bp
+   input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // bp index
+   input logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associt btb error index
+
+   input dec_tlu_flush_lower_wb,
+
+   output logic [15:0] ifu_i0_cinst,
+
+
+/// Icache debug
+   input  eb1_cache_debug_pkt_t        dec_tlu_ic_diag_pkt ,
+   output logic                    ifu_ic_debug_rd_data_valid,
+   output logic                                iccm_buf_correct_ecc,
+   output logic                                iccm_correction_state,
+
+   input logic scan_mode
+   );
+
+   localparam TAGWIDTH = 2 ;
+   localparam IDWIDTH  = 2 ;
+
+   logic                   ifu_fb_consume1, ifu_fb_consume2;
+   logic [31:1]            ifc_fetch_addr_f;
+   logic [31:1]            ifc_fetch_addr_bf;
+
+   logic [1:0]   ifu_fetch_val;  // valids on a 2B boundary, left justified [7] implies valid fetch
+   logic [31:1]  ifu_fetch_pc;   // starting pc of fetch
+
+   logic iccm_rd_ecc_single_err, ic_error_start;
+   assign ifu_iccm_rd_ecc_single_err = iccm_rd_ecc_single_err;
+   assign ifu_ic_error_start = ic_error_start;
+
+
+   logic        ic_write_stall;
+   logic        ic_dma_active;
+   logic        ifc_dma_access_ok;
+   logic [1:0]  ic_access_fault_f;
+   logic [1:0]  ic_access_fault_type_f;
+   logic        ifu_ic_mb_empty;
+
+   logic ic_hit_f;
+
+   logic [1:0] ifu_bp_way_f; // way indication; right justified
+   logic       ifu_bp_hit_taken_f; // kill next fetch; taken target found
+   logic [31:1] ifu_bp_btb_target_f; //  predicted target PC
+   logic        ifu_bp_inst_mask_f; // tell ic which valids to kill because of a taken branch; right justified
+   logic [1:0]  ifu_bp_hist1_f; // history counters for all 4 potential branches; right justified
+   logic [1:0]  ifu_bp_hist0_f; // history counters for all 4 potential branches; right justified
+   logic [11:0] ifu_bp_poffset_f; // predicted target
+   logic [1:0]  ifu_bp_ret_f; // predicted ret ; right justified
+   logic [1:0]  ifu_bp_pc4_f; // pc4 indication; right justified
+   logic [1:0]  ifu_bp_valid_f; // branch valid, right justified
+   logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f;
+   logic [1:0] [$clog2(pt.BTB_SIZE)-1:0] ifu_bp_fa_index_f;
+
+
+   // fetch control
+   eb1_ifu_ifc_ctl #(.pt(pt)) ifc (.*
+                    );
+
+   // branch predictor
+   if (pt.BTB_ENABLE==1) begin  : bpred
+      eb1_ifu_bp_ctl #(.pt(pt)) bp (.*);
+   end
+   else begin : bpred
+      assign ifu_bp_hit_taken_f = '0;
+      // verif wires
+      logic btb_wr_en_way0, btb_wr_en_way1,dec_tlu_error_wb;
+      logic [16+pt.BTB_BTAG_SIZE:0] btb_wr_data;
+      assign btb_wr_en_way0 = '0;
+      assign btb_wr_en_way1 = '0;
+      assign btb_wr_data = '0;
+      assign dec_tlu_error_wb ='0;
+      assign ifu_bp_inst_mask_f = 1'b1;
+   end
+
+
+   logic [1:0]   ic_fetch_val_f;
+   logic [31:0] ic_data_f;
+   logic [31:0] ifu_fetch_data_f;
+   logic ifc_fetch_req_f;
+   logic ifc_fetch_req_f_raw;
+   logic [1:0] iccm_rd_ecc_double_err;  // This fetch has an iccm double error.
+
+   logic ifu_async_error_start;
+
+
+   assign ifu_fetch_data_f[31:0] = ic_data_f[31:0];
+   assign ifu_fetch_val[1:0] = ic_fetch_val_f[1:0];
+   assign ifu_fetch_pc[31:1] = ifc_fetch_addr_f[31:1];
+
+ logic                       ifc_fetch_uncacheable_bf;      // The fetch request is uncacheable space. BF stage
+ logic                       ifc_fetch_req_bf;              // Fetch request. Comes with the address.  BF stage
+ logic                       ifc_fetch_req_bf_raw;          // Fetch request without some qualifications. Used for clock-gating. BF stage
+ logic                       ifc_iccm_access_bf;            // This request is to the ICCM. Do not generate misses to the bus.
+ logic                       ifc_region_acc_fault_bf;       // Access fault. in ICCM region but offset is outside defined ICCM.
+
+   // aligner
+
+   eb1_ifu_aln_ctl #(.pt(pt)) aln (
+                                    .*
+                                    );
+
+
+   // icache
+   eb1_ifu_mem_ctl #(.pt(pt)) mem_ctl
+     (.*,
+      .ic_data_f(ic_data_f[31:0])
+      );
+
+
+
+   // Performance debug info
+   //
+   //
+`ifdef DUMP_BTB_ON
+   logic              exu_mp_valid; // conditional branch mispredict
+   logic exu_mp_way; // conditional branch mispredict
+   logic exu_mp_ataken; // direction is actual taken
+   logic exu_mp_boffset; // branch offsett
+   logic exu_mp_pc4; // branch is a 4B inst
+   logic exu_mp_call; // branch is a call inst
+   logic exu_mp_ret; // branch is a ret inst
+   logic exu_mp_ja; // branch is a jump always
+   logic [1:0] exu_mp_hist; // new history
+   logic [11:0] exu_mp_tgt; // target offset
+   logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address
+
+   assign exu_mp_valid = exu_mp_pkt.misp; // conditional branch mispredict
+   assign exu_mp_ataken = exu_mp_pkt.ataken;  // direction is actual taken
+   assign exu_mp_boffset = exu_mp_pkt.boffset;  // branch offset
+   assign exu_mp_pc4 = exu_mp_pkt.pc4;  // branch is a 4B inst
+   assign exu_mp_call = exu_mp_pkt.pcall;  // branch is a call inst
+   assign exu_mp_ret = exu_mp_pkt.pret;  // branch is a ret inst
+   assign exu_mp_ja = exu_mp_pkt.pja;  // branch is a jump always
+   assign exu_mp_way = exu_mp_pkt.way;  // branch is a jump always
+   assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0];  // new history
+   assign exu_mp_tgt[11:0]  = exu_mp_pkt.toffset[11:0] ;  // target offset
+   assign exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]  = exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ;  // BTB/BHT address
+
+   logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_rd_addr_f;
+ `define DEC `CPU_TOP.dec
+ `define EXU `CPU_TOP.exu
+   eb1_btb_addr_hash f2hash(.pc(ifc_fetch_addr_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
+   logic [31:0] mppc_ns, mppc;
+   logic        exu_flush_final_d1;
+   assign mppc_ns[31:1] = `EXU.i0_flush_upper_x ? `EXU.exu_i0_pc_x : `EXU.dec_i0_pc_d;
+   assign mppc_ns[0] = 1'b0;
+   rvdff #(33)  junk_ff (.*, .clk(active_clk), .din({mppc_ns[31:0], exu_flush_final}), .dout({mppc[31:0], exu_flush_final_d1}));
+   logic  tmp_bnk;
+   assign tmp_bnk = bpred.bp.btb_sel_f[1];
+
+   always @(negedge clk) begin
+      if(`DEC.tlu.mcyclel[31:0] == 32'h0000_0010) begin
+         $display("BTB_CONFIG: %d",pt.BTB_SIZE);
+         `ifndef BP_NOGSHARE
+         $display("BHT_CONFIG: %d gshare: 1",pt.BHT_SIZE);
+         `else
+         $display("BHT_CONFIG: %d gshare: 0",pt.BHT_SIZE);
+         `endif
+         $display("RS_CONFIG: %d", pt.RET_STACK_SIZE);
+      end
+       if(exu_flush_final_d1 & ~(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error) & (exu_mp_pkt.misp | exu_mp_pkt.ataken))
+         $display("%7d BTB_MP  : index: %0h bank: %0h call: %b ret: %b ataken: %b hist: %h valid: %b tag: %h targ: %h eghr: %b pred: %b ghr_index: %h brpc: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha, exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO], 1'b0, exu_mp_call, exu_mp_ret, exu_mp_ataken, exu_mp_hist[1:0], exu_mp_valid, exu_mp_btag[pt.BTB_BTAG_SIZE-1:0], {exu_flush_path_final[31:1], 1'b0}, exu_mp_eghr[pt.BHT_GHR_SIZE-1:0], exu_mp_valid, bpred.bp.bht_wr_addr0, mppc[31:0], exu_mp_pkt.way);
+
+     for(int i = 0; i < 8; i++) begin
+      if(ifu_bp_valid_f[i] & ifc_fetch_req_f)
+        $display("%7d BTB_HIT : index: %0h bank: %0h call: %b ret: %b taken: %b strength: %b tag: %h targ: %0h ghr: %4b ghr_index: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO],bpred.bp.btb_sel_f[1], bpred.bp.btb_rd_call_f, bpred.bp.btb_rd_ret_f, ifu_bp_hist1_f[tmp_bnk], ifu_bp_hist0_f[tmp_bnk], bpred.bp.fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0], {ifu_bp_btb_target_f[31:1], 1'b0}, bpred.bp.fghr[pt.BHT_GHR_SIZE-1:0], bpred.bp.bht_rd_addr_f, ifu_bp_way_f[tmp_bnk]);
+     end
+      if(dec_tlu_br0_r_pkt.valid & ~(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error))
+        $display("%7d BTB_UPD0: ghr_index: %0h bank: %0h hist: %h  way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,bpred.bp.br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO],{dec_tlu_br0_r_pkt.middle}, dec_tlu_br0_r_pkt.hist, dec_tlu_br0_r_pkt.way);
+
+      if(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error)
+        $display("%7d BTB_ERR0: index: %0h bank: %0h start: %b rfpc: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO],1'b0, dec_tlu_br0_r_pkt.br_start_error, {exu_flush_path_final[31:1], 1'b0}, dec_tlu_br0_r_pkt.way);
+   end // always @ (negedge clk)
+      function [1:0] encode4_2;
+      input [3:0] in;
+
+      encode4_2[1] = in[3] | in[2];
+      encode4_2[0] = in[3] | in[1];
+
+   endfunction
+`endif
+endmodule // eb1_ifu
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_ifu_aln_ctl.sv b/verilog/rtl/BrqRV_EB1/design/eb1_ifu_aln_ctl.sv
new file mode 100644
index 0000000..2d4e822
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_ifu_aln_ctl.sv
@@ -0,0 +1,700 @@
+//********************************************************************************
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//********************************************************************************
+
+//********************************************************************************
+// Function: Instruction aligner
+//********************************************************************************
+module eb1_ifu_aln_ctl
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )
+  (
+
+   input logic                                    scan_mode,                // Flop scan mode control
+   input logic                                    rst_l,                    // reset, active low
+   input logic                                    clk,                      // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
+   input logic                                    active_clk,               // Clock only while core active.  Through two clock headers. For flops without second clock header built in.
+
+   input logic                                    ifu_async_error_start,    // ecc/parity related errors with current fetch - not sent down the pipe
+
+   input logic [1:0]                              iccm_rd_ecc_double_err,   // This fetch has a double ICCM ecc  error.
+
+   input logic [1:0]                              ic_access_fault_f,        // Instruction access fault for the current fetch.
+   input logic [1:0]                              ic_access_fault_type_f,   // Instruction access fault types
+
+   input logic                                    exu_flush_final,          // Flush from the pipeline.
+
+   input logic                                    dec_i0_decode_d,          // Valid instruction at D-stage and not blocked
+
+   input logic [31:0]                             ifu_fetch_data_f,         // fetch data in memory format - not right justified
+
+   input logic [1:0]                              ifu_fetch_val,            // valids on a 2B boundary, right justified
+   input logic [31:1]                             ifu_fetch_pc,             // starting pc of fetch
+
+
+
+   output logic                                   ifu_i0_valid,             // Instruction 0 is valid
+   output logic                                   ifu_i0_icaf,              // Instruction 0 has access fault
+   output logic [1:0]                             ifu_i0_icaf_type,         // Instruction 0 access fault type
+   output logic                                   ifu_i0_icaf_second,       // Instruction 0 has access fault on second 2B of 4B inst
+
+   output logic                                   ifu_i0_dbecc,             // Instruction 0 has double bit ecc error
+   output logic [31:0]                            ifu_i0_instr,             // Instruction 0
+   output logic [31:1]                            ifu_i0_pc,                // Instruction 0 PC
+   output logic                                   ifu_i0_pc4,
+
+   output logic                                   ifu_fb_consume1,          // Consumed one buffer. To fetch control fetch for buffer mass balance
+   output logic                                   ifu_fb_consume2,          // Consumed two buffers.To fetch control fetch for buffer mass balance
+
+
+   input logic [pt.BHT_GHR_SIZE-1:0]              ifu_bp_fghr_f,            // fetch GHR
+   input logic [31:1]                             ifu_bp_btb_target_f,      // predicted RET target
+   input logic [11:0]                             ifu_bp_poffset_f,         // predicted target offset
+   input logic [1:0] [$clog2(pt.BTB_SIZE)-1:0]    ifu_bp_fa_index_f,        // predicted branch index (fully associative option)
+
+   input logic [1:0]                              ifu_bp_hist0_f,           // history counters for all 4 potential branches, bit 1, right justified
+   input logic [1:0]                              ifu_bp_hist1_f,           // history counters for all 4 potential branches, bit 1, right justified
+   input logic [1:0]                              ifu_bp_pc4_f,             // pc4 indication, right justified
+   input logic [1:0]                              ifu_bp_way_f,             // way indication, right justified
+   input logic [1:0]                              ifu_bp_valid_f,           // branch valid, right justified
+   input logic [1:0]                              ifu_bp_ret_f,             // predicted ret indication, right justified
+
+
+   output eb1_br_pkt_t                           i0_brp,                   // Branch packet for I0.
+   output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]   ifu_i0_bp_index,          // BP index
+   output logic [pt.BHT_GHR_SIZE-1:0]             ifu_i0_bp_fghr,           // BP FGHR
+   output logic [pt.BTB_BTAG_SIZE-1:0]            ifu_i0_bp_btag,           // BP tag
+
+   output logic [$clog2(pt.BTB_SIZE)-1:0]         ifu_i0_fa_index,          // Fully associt btb index
+
+   output logic                                   ifu_pmu_instr_aligned,    // number of inst aligned this cycle
+
+   output logic [15:0]                            ifu_i0_cinst              // 16b compress inst for i0
+   );
+
+
+
+   logic                                          ifvalid;
+   logic                                          shift_f1_f0, shift_f2_f0, shift_f2_f1;
+   logic                                          fetch_to_f0, fetch_to_f1, fetch_to_f2;
+
+   logic [1:0]                                    f2val_in, f2val;
+   logic [1:0]                                    f1val_in, f1val;
+   logic [1:0]                                    f0val_in, f0val;
+   logic [1:0]                                    sf1val, sf0val;
+
+   logic [31:0]                                   aligndata;
+   logic                                          first4B, first2B;
+
+   logic [31:0]                                   uncompress0;
+   logic                                          i0_shift;
+   logic                                          shift_2B, shift_4B;
+   logic                                          f1_shift_2B;
+   logic                                          f2_valid, sf1_valid, sf0_valid;
+
+   logic [31:0]                                   ifirst;
+   logic [1:0]                                    alignval;
+   logic [31:1]                                   firstpc, secondpc;
+
+   logic [11:0]                                   f1poffset;
+   logic [11:0]                                   f0poffset;
+   logic [pt.BHT_GHR_SIZE-1:0]                    f1fghr;
+   logic [pt.BHT_GHR_SIZE-1:0]                    f0fghr;
+   logic [1:0]                                    f1hist1;
+   logic [1:0]                                    f0hist1;
+   logic [1:0]                                    f1hist0;
+   logic [1:0]                                    f0hist0;
+
+   logic [1:0][$clog2(pt.BTB_SIZE)-1:0]           f0index, f1index, alignindex;
+
+   logic [1:0]                                    f1ictype;
+   logic [1:0]                                    f0ictype;
+
+   logic [1:0]                                    f1pc4;
+   logic [1:0]                                    f0pc4;
+
+   logic [1:0]                                    f1ret;
+   logic [1:0]                                    f0ret;
+   logic [1:0]                                    f1way;
+   logic [1:0]                                    f0way;
+
+   logic [1:0]                                    f1brend;
+   logic [1:0]                                    f0brend;
+
+   logic [1:0]                                    alignbrend;
+   logic [1:0]                                    alignpc4;
+
+   logic [1:0]                                    alignret;
+   logic [1:0]                                    alignway;
+   logic [1:0]                                    alignhist1;
+   logic [1:0]                                    alignhist0;
+   logic [1:1]                                    alignfromf1;
+   logic                                          i0_ends_f1;
+   logic                                          i0_br_start_error;
+
+   logic [31:1]                                   f1prett;
+   logic [31:1]                                   f0prett;
+   logic [1:0]                                    f1dbecc;
+   logic [1:0]                                    f0dbecc;
+   logic [1:0]                                    f1icaf;
+   logic [1:0]                                    f0icaf;
+
+   logic [1:0]                                    aligndbecc;
+   logic [1:0]                                    alignicaf;
+   logic                                          i0_brp_pc4;
+
+   logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]          firstpc_hash, secondpc_hash;
+
+   logic                                          first_legal;
+
+   logic [1:0]                                    wrptr, wrptr_in;
+   logic [1:0]                                    rdptr, rdptr_in;
+   logic [2:0]                                    qwen;
+   logic [31:0]                                   q2,q1,q0;
+   logic                                          q2off_in, q2off;
+   logic                                          q1off_in, q1off;
+   logic                                          q0off_in, q0off;
+   logic                                          f0_shift_2B;
+
+   logic [31:0]                                   q0eff;
+   logic [31:0]                                   q0final;
+   logic                                          q0ptr;
+   logic [1:0]                                    q0sel;
+
+   logic [31:0]                                   q1eff;
+   logic [15:0]                                   q1final;
+   logic                                          q1ptr;
+   logic [1:0]                                    q1sel;
+
+   logic [2:0]                                    qren;
+
+   logic                                          consume_fb1, consume_fb0;
+   logic [1:0]                                    icaf_eff;
+
+   localparam                                     BRDATA_SIZE  = pt.BTB_ENABLE ? 16+($clog2(pt.BTB_SIZE)*2*pt.BTB_FULLYA) : 2;
+   localparam                                     BRDATA_WIDTH = pt.BTB_ENABLE ? 8+($clog2(pt.BTB_SIZE)*pt.BTB_FULLYA) : 1;
+   logic [BRDATA_SIZE-1:0]                        brdata_in, brdata2, brdata1, brdata0;
+   logic [BRDATA_SIZE-1:0]                        brdata1eff, brdata0eff;
+   logic [BRDATA_SIZE-1:0]                        brdata1final, brdata0final;
+
+   localparam                                     MHI   = 1+(pt.BTB_ENABLE * (43+pt.BHT_GHR_SIZE));
+   localparam                                     MSIZE = 2+(pt.BTB_ENABLE * (43+pt.BHT_GHR_SIZE));
+
+   logic [MHI:0]                                  misc_data_in, misc2, misc1, misc0;
+   logic [MHI:0]                                  misc1eff, misc0eff;
+
+   logic [pt.BTB_BTAG_SIZE-1:0]                  firstbrtag_hash, secondbrtag_hash;
+
+   logic                                         error_stall_in, error_stall;
+
+   assign error_stall_in = (error_stall | ifu_async_error_start) & ~exu_flush_final;
+
+   rvdff #(.WIDTH(7))  bundle1ff (.*,
+                                  .clk(active_clk),
+                                  .din ({wrptr_in[1:0],rdptr_in[1:0],q2off_in,q1off_in,q0off_in}),
+                                  .dout({wrptr[1:0],   rdptr[1:0],   q2off,   q1off,   q0off})
+                                  );
+
+   rvdffie #(.WIDTH(7),.OVERRIDE(1))  bundle2ff (.*,
+                                                 .din ({error_stall_in,f2val_in[1:0],f1val_in[1:0],f0val_in[1:0]}),
+                                                 .dout({error_stall,   f2val[1:0],   f1val[1:0],   f0val[1:0]   })
+                                                 );
+
+if(pt.BTB_ENABLE==1) begin
+   rvdffe #(BRDATA_SIZE)  brdata2ff   (.*, .clk(clk), .en(qwen[2]),        .din(brdata_in[BRDATA_SIZE-1:0]), .dout(brdata2[BRDATA_SIZE-1:0]));
+   rvdffe #(BRDATA_SIZE)  brdata1ff   (.*, .clk(clk), .en(qwen[1]),        .din(brdata_in[BRDATA_SIZE-1:0]), .dout(brdata1[BRDATA_SIZE-1:0]));
+   rvdffe #(BRDATA_SIZE)  brdata0ff   (.*, .clk(clk), .en(qwen[0]),        .din(brdata_in[BRDATA_SIZE-1:0]), .dout(brdata0[BRDATA_SIZE-1:0]));
+   rvdffe #(MSIZE)        misc2ff     (.*, .clk(clk), .en(qwen[2]),        .din(misc_data_in[MHI:0]),        .dout(misc2[MHI:0]));
+   rvdffe #(MSIZE)        misc1ff     (.*, .clk(clk), .en(qwen[1]),        .din(misc_data_in[MHI:0]),        .dout(misc1[MHI:0]));
+   rvdffe #(MSIZE)        misc0ff     (.*, .clk(clk), .en(qwen[0]),        .din(misc_data_in[MHI:0]),        .dout(misc0[MHI:0]));
+end
+else begin
+
+   rvdffie #((MSIZE*3)+(BRDATA_SIZE*3))    miscff      (.*,
+                                                        .din({qwen[2] ? {misc_data_in[MHI:0], brdata_in[BRDATA_SIZE-1:0]} : {misc2[MHI:0], brdata2[BRDATA_SIZE-1:0]},
+                                                              qwen[1] ? {misc_data_in[MHI:0], brdata_in[BRDATA_SIZE-1:0]} : {misc1[MHI:0], brdata1[BRDATA_SIZE-1:0]},
+                                                              qwen[0] ? {misc_data_in[MHI:0], brdata_in[BRDATA_SIZE-1:0]} : {misc0[MHI:0], brdata0[BRDATA_SIZE-1:0]}}),
+                                                        .dout({misc2[MHI:0],misc1[MHI:0],misc0[MHI:0],
+                                                               brdata2[BRDATA_SIZE-1:0], brdata1[BRDATA_SIZE-1:0], brdata0[BRDATA_SIZE-1:0]})
+                                                        );
+end
+
+  logic [31:1] q2pc, q1pc, q0pc;
+
+   rvdffe #(31)           q2pcff        (.*, .clk(clk), .en(qwen[2]),        .din(ifu_fetch_pc[31:1]),     .dout(q2pc[31:1]));
+   rvdffe #(31)           q1pcff        (.*, .clk(clk), .en(qwen[1]),        .din(ifu_fetch_pc[31:1]),     .dout(q1pc[31:1]));
+   rvdffe #(31)           q0pcff        (.*, .clk(clk), .en(qwen[0]),        .din(ifu_fetch_pc[31:1]),     .dout(q0pc[31:1]));
+
+   rvdffe #(32)           q2ff        (.*, .clk(clk), .en(qwen[2]),        .din(ifu_fetch_data_f[31:0]),     .dout(q2[31:0]));
+   rvdffe #(32)           q1ff        (.*, .clk(clk), .en(qwen[1]),        .din(ifu_fetch_data_f[31:0]),     .dout(q1[31:0]));
+   rvdffe #(32)           q0ff        (.*, .clk(clk), .en(qwen[0]),        .din(ifu_fetch_data_f[31:0]),     .dout(q0[31:0]));
+
+
+   // new queue control logic
+
+   assign qren[2:0]          = {  rdptr[1:0] == 2'b10,
+                                  rdptr[1:0] == 2'b01,
+                                  rdptr[1:0] == 2'b00 };
+
+   assign qwen[2:0]          = { (wrptr[1:0] == 2'b10) & ifvalid,
+                                 (wrptr[1:0] == 2'b01) & ifvalid,
+                                 (wrptr[1:0] == 2'b00) & ifvalid };
+
+
+   assign rdptr_in[1:0]      = ({2{ qren[0]         &  ifu_fb_consume1 & ~exu_flush_final}} & 2'b01     ) |
+                               ({2{ qren[1]         &  ifu_fb_consume1 & ~exu_flush_final}} & 2'b10     ) |
+                               ({2{ qren[2]         &  ifu_fb_consume1 & ~exu_flush_final}} & 2'b00     ) |
+                               ({2{ qren[0]         &  ifu_fb_consume2 & ~exu_flush_final}} & 2'b10     ) |
+                               ({2{ qren[1]         &  ifu_fb_consume2 & ~exu_flush_final}} & 2'b00     ) |
+                               ({2{ qren[2]         &  ifu_fb_consume2 & ~exu_flush_final}} & 2'b01     ) |
+                               ({2{~ifu_fb_consume1 & ~ifu_fb_consume2 & ~exu_flush_final}} & rdptr[1:0]);
+
+   assign wrptr_in[1:0]      = ({2{ qwen[0] & ~exu_flush_final}} & 2'b01     ) |
+                               ({2{ qwen[1] & ~exu_flush_final}} & 2'b10     ) |
+                               ({2{ qwen[2] & ~exu_flush_final}} & 2'b00     ) |
+                               ({2{~ifvalid & ~exu_flush_final}} & wrptr[1:0]);
+
+
+
+   assign q2off_in          = ( ~qwen[2] & (rdptr[1:0]==2'd2)  &  (q2off | f0_shift_2B) ) |
+                              ( ~qwen[2] & (rdptr[1:0]==2'd1)  &  (q2off | f1_shift_2B) ) |
+                              ( ~qwen[2] & (rdptr[1:0]==2'd0)  &   q2off                );
+
+   assign q1off_in          = ( ~qwen[1] & (rdptr[1:0]==2'd1)  &  (q1off | f0_shift_2B) ) |
+                              ( ~qwen[1] & (rdptr[1:0]==2'd0)  &  (q1off | f1_shift_2B) ) |
+                              ( ~qwen[1] & (rdptr[1:0]==2'd2)  &   q1off                );
+
+   assign q0off_in          = ( ~qwen[0] & (rdptr[1:0]==2'd0)  &  (q0off | f0_shift_2B) ) |
+                              ( ~qwen[0] & (rdptr[1:0]==2'd2)  &  (q0off | f1_shift_2B) ) |
+                              ( ~qwen[0] & (rdptr[1:0]==2'd1)  &   q0off                );
+
+
+
+   assign q0ptr              = ( (rdptr[1:0]==2'b00) & q0off ) |
+                               ( (rdptr[1:0]==2'b01) & q1off ) |
+                               ( (rdptr[1:0]==2'b10) & q2off );
+
+   assign q1ptr              = ( (rdptr[1:0]==2'b00) & q1off ) |
+                               ( (rdptr[1:0]==2'b01) & q2off ) |
+                               ( (rdptr[1:0]==2'b10) & q0off );
+
+   assign q0sel[1:0]         = {q0ptr,~q0ptr};
+
+   assign q1sel[1:0]         = {q1ptr,~q1ptr};
+
+   // end new queue control logic
+
+
+   // misc data that is associated with each fetch buffer
+
+   if(pt.BTB_ENABLE==1)
+     assign misc_data_in[MHI:0] = {
+
+                                    ic_access_fault_type_f[1:0],
+                                    ifu_bp_btb_target_f[31:1],
+                                    ifu_bp_poffset_f[11:0],
+                                    ifu_bp_fghr_f[pt.BHT_GHR_SIZE-1:0]
+                                    };
+   else
+     assign misc_data_in[MHI:0] = {
+                                    ic_access_fault_type_f[1:0]
+                                    };
+
+
+   assign {misc1eff[MHI:0],misc0eff[MHI:0]} = (({MSIZE*2{qren[0]}} & {misc1[MHI:0],misc0[MHI:0]}) |
+                                               ({MSIZE*2{qren[1]}} & {misc2[MHI:0],misc1[MHI:0]}) |
+                                               ({MSIZE*2{qren[2]}} & {misc0[MHI:0],misc2[MHI:0]}));
+
+   if(pt.BTB_ENABLE==1) begin
+   assign {
+            f1ictype[1:0],
+            f1prett[31:1],
+            f1poffset[11:0],
+            f1fghr[pt.BHT_GHR_SIZE-1:0]
+            } = misc1eff[MHI:0];
+
+   assign {
+            f0ictype[1:0],
+            f0prett[31:1],
+            f0poffset[11:0],
+            f0fghr[pt.BHT_GHR_SIZE-1:0]
+            } = misc0eff[MHI:0];
+
+      if(pt.BTB_FULLYA) begin
+         assign brdata_in[BRDATA_SIZE-1:0] = {
+                                               ifu_bp_fa_index_f[1], iccm_rd_ecc_double_err[1],ic_access_fault_f[1],ifu_bp_hist1_f[1],ifu_bp_hist0_f[1],ifu_bp_pc4_f[1],ifu_bp_way_f[1],ifu_bp_valid_f[1],ifu_bp_ret_f[1],
+                                               ifu_bp_fa_index_f[0], iccm_rd_ecc_double_err[0],ic_access_fault_f[0],ifu_bp_hist1_f[0],ifu_bp_hist0_f[0],ifu_bp_pc4_f[0],ifu_bp_way_f[0],ifu_bp_valid_f[0],ifu_bp_ret_f[0]
+                                               };
+         assign {f0index[1],f0dbecc[1],f0icaf[1],f0hist1[1],f0hist0[1],f0pc4[1],f0way[1],f0brend[1],f0ret[1],
+                 f0index[0],f0dbecc[0],f0icaf[0],f0hist1[0],f0hist0[0],f0pc4[0],f0way[0],f0brend[0],f0ret[0]} = brdata0final[BRDATA_SIZE-1:0];
+
+         assign {f1index[1],f1dbecc[1],f1icaf[1],f1hist1[1],f1hist0[1],f1pc4[1],f1way[1],f1brend[1],f1ret[1],
+                 f1index[0],f1dbecc[0],f1icaf[0],f1hist1[0],f1hist0[0],f1pc4[0],f1way[0],f1brend[0],f1ret[0]} = brdata1final[BRDATA_SIZE-1:0];
+
+      end
+      else begin
+         assign brdata_in[BRDATA_SIZE-1:0] = {
+                                               iccm_rd_ecc_double_err[1],ic_access_fault_f[1],ifu_bp_hist1_f[1],ifu_bp_hist0_f[1],ifu_bp_pc4_f[1],ifu_bp_way_f[1],ifu_bp_valid_f[1],ifu_bp_ret_f[1],
+                                               iccm_rd_ecc_double_err[0],ic_access_fault_f[0],ifu_bp_hist1_f[0],ifu_bp_hist0_f[0],ifu_bp_pc4_f[0],ifu_bp_way_f[0],ifu_bp_valid_f[0],ifu_bp_ret_f[0]
+                                               };
+         assign {f0dbecc[1],f0icaf[1],f0hist1[1],f0hist0[1],f0pc4[1],f0way[1],f0brend[1],f0ret[1],
+                 f0dbecc[0],f0icaf[0],f0hist1[0],f0hist0[0],f0pc4[0],f0way[0],f0brend[0],f0ret[0]} = brdata0final[BRDATA_SIZE-1:0];
+
+         assign {f1dbecc[1],f1icaf[1],f1hist1[1],f1hist0[1],f1pc4[1],f1way[1],f1brend[1],f1ret[1],
+                 f1dbecc[0],f1icaf[0],f1hist1[0],f1hist0[0],f1pc4[0],f1way[0],f1brend[0],f1ret[0]} = brdata1final[BRDATA_SIZE-1:0];
+
+      end
+
+
+
+
+   assign {brdata1eff[BRDATA_SIZE-1:0],brdata0eff[BRDATA_SIZE-1:0]} = (({BRDATA_SIZE*2{qren[0]}} & {brdata1[BRDATA_SIZE-1:0],brdata0[BRDATA_SIZE-1:0]}) |
+                                                                       ({BRDATA_SIZE*2{qren[1]}} & {brdata2[BRDATA_SIZE-1:0],brdata1[BRDATA_SIZE-1:0]}) |
+                                                                       ({BRDATA_SIZE*2{qren[2]}} & {brdata0[BRDATA_SIZE-1:0],brdata2[BRDATA_SIZE-1:0]}));
+
+   assign brdata0final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q0sel[0]}} & {                     brdata0eff[2*BRDATA_WIDTH-1:0]}) |
+                                           ({BRDATA_SIZE{q0sel[1]}} & {{BRDATA_WIDTH{1'b0}},brdata0eff[BRDATA_SIZE-1:BRDATA_WIDTH]}));
+
+   assign brdata1final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q1sel[0]}} & {                     brdata1eff[2*BRDATA_WIDTH-1:0]}) |
+                                           ({BRDATA_SIZE{q1sel[1]}} & {{BRDATA_WIDTH{1'b0}},brdata1eff[BRDATA_SIZE-1:BRDATA_WIDTH]}));
+
+   end // if (pt.BTB_ENABLE==1)
+   else begin
+      assign {
+               f1ictype[1:0]
+               } = misc1eff[MHI:0];
+
+      assign {
+               f0ictype[1:0]
+               } = misc0eff[MHI:0];
+
+      assign brdata_in[BRDATA_SIZE-1:0] = {
+                                            iccm_rd_ecc_double_err[1],ic_access_fault_f[1],
+                                            iccm_rd_ecc_double_err[0],ic_access_fault_f[0]
+                                            };
+      assign {f0dbecc[1],f0icaf[1],
+              f0dbecc[0],f0icaf[0]} = brdata0final[BRDATA_SIZE-1:0];
+
+      assign {f1dbecc[1],f1icaf[1],
+              f1dbecc[0],f1icaf[0]} = brdata1final[BRDATA_SIZE-1:0];
+
+      assign {brdata1eff[BRDATA_SIZE-1:0],brdata0eff[BRDATA_SIZE-1:0]} = (({BRDATA_SIZE*2{qren[0]}} & {brdata1[BRDATA_SIZE-1:0],brdata0[BRDATA_SIZE-1:0]}) |
+                                                                          ({BRDATA_SIZE*2{qren[1]}} & {brdata2[BRDATA_SIZE-1:0],brdata1[BRDATA_SIZE-1:0]}) |
+                                                                       ({BRDATA_SIZE*2{qren[2]}} & {brdata0[BRDATA_SIZE-1:0],brdata2[BRDATA_SIZE-1:0]}));
+
+      assign brdata0final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q0sel[0]}} & {                     brdata0eff[2*BRDATA_WIDTH-1:0]}) |
+                                              ({BRDATA_SIZE{q0sel[1]}} & {{BRDATA_WIDTH{1'b0}},brdata0eff[BRDATA_SIZE-1:BRDATA_WIDTH]}));
+
+      assign brdata1final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q1sel[0]}} & {                     brdata1eff[2*BRDATA_WIDTH-1:0]}) |
+                                              ({BRDATA_SIZE{q1sel[1]}} & {{BRDATA_WIDTH{1'b0}},brdata1eff[BRDATA_SIZE-1:BRDATA_WIDTH]}));
+
+   end // else: !if(pt.BTB_ENABLE==1)
+
+
+   // possible states of { sf0_valid, sf1_valid, f2_valid }
+   //
+   // 000    if->f0
+   // 100    if->f1
+   // 101    illegal
+   // 010    if->f1, f1->f0
+   // 110    if->f2
+   // 001    if->f1, f2->f0
+   // 011    if->f2, f2->f1, f1->f0
+   // 111   !if,     no shift
+
+   assign f2_valid           =  f2val[0];
+   assign sf1_valid          =  sf1val[0];
+   assign sf0_valid          =  sf0val[0];
+
+   // interface to fetch
+
+   assign consume_fb0        = ~sf0val[0] & f0val[0];
+
+   assign consume_fb1        = ~sf1val[0] & f1val[0];
+
+   assign ifu_fb_consume1    =  consume_fb0 & ~consume_fb1 & ~exu_flush_final;
+   assign ifu_fb_consume2    =  consume_fb0 &  consume_fb1 & ~exu_flush_final;
+
+   assign ifvalid            =  ifu_fetch_val[0];
+
+   assign shift_f1_f0        =  ~sf0_valid &  sf1_valid;
+   assign shift_f2_f0        =  ~sf0_valid & ~sf1_valid &  f2_valid;
+   assign shift_f2_f1        =  ~sf0_valid &  sf1_valid &  f2_valid;
+
+   assign fetch_to_f0        =  ~sf0_valid & ~sf1_valid & ~f2_valid & ifvalid;
+
+   assign fetch_to_f1        = (~sf0_valid & ~sf1_valid &  f2_valid & ifvalid)  |
+                               (~sf0_valid &  sf1_valid & ~f2_valid & ifvalid)  |
+                               ( sf0_valid & ~sf1_valid & ~f2_valid & ifvalid);
+
+   assign fetch_to_f2        = (~sf0_valid &  sf1_valid &  f2_valid & ifvalid)  |
+                               ( sf0_valid &  sf1_valid & ~f2_valid & ifvalid);
+
+
+   assign f2val_in[1:0]      = ({2{ fetch_to_f2 &                               ~exu_flush_final}} & ifu_fetch_val[1:0]) |
+                               ({2{~fetch_to_f2 & ~shift_f2_f1 & ~shift_f2_f0 & ~exu_flush_final}} & f2val[1:0]        );
+
+
+   assign sf1val[1:0]        = ({2{ f1_shift_2B}} & {1'b0,f1val[1]}) |
+                               ({2{~f1_shift_2B}} & f1val[1:0]     );
+
+   assign f1val_in[1:0]      = ({2{ fetch_to_f1                               & ~exu_flush_final}} & ifu_fetch_val[1:0]) |
+                               ({2{                shift_f2_f1                & ~exu_flush_final}} & f2val[1:0]        ) |
+                               ({2{~fetch_to_f1 & ~shift_f2_f1 & ~shift_f1_f0 & ~exu_flush_final}} & sf1val[1:0]       );
+
+
+
+   assign sf0val[1:0]        = ({2{ shift_2B            }} & {1'b0,f0val[1]}) |
+                               ({2{~shift_2B & ~shift_4B}} & f0val[1:0]);
+
+   assign f0val_in[1:0]      = ({2{fetch_to_f0                                & ~exu_flush_final}} & ifu_fetch_val[1:0]) |
+                               ({2{                shift_f2_f0                & ~exu_flush_final}} & f2val[1:0]        ) |
+                               ({2{                               shift_f1_f0 & ~exu_flush_final}} & sf1val[1:0]       ) |
+                               ({2{~fetch_to_f0 & ~shift_f2_f0 & ~shift_f1_f0 & ~exu_flush_final}} & sf0val[1:0]       );
+
+   assign {q1eff[31:0],q0eff[31:0]} = (({64{qren[0]}} & {q1[31:0],q0[31:0]}) |
+                                       ({64{qren[1]}} & {q2[31:0],q1[31:0]}) |
+                                       ({64{qren[2]}} & {q0[31:0],q2[31:0]}));
+
+   assign q0final[31:0]      = ({32{q0sel[0]}} & {      q0eff[31:0]}) |
+                               ({32{q0sel[1]}} & {16'b0,q0eff[31:16]});
+
+   assign q1final[15:0]      = ({16{q1sel[0]}} & q1eff[15:0] ) |
+                               ({16{q1sel[1]}} & q1eff[31:16]);
+   logic [31:1] q0pceff, q0pcfinal;
+   logic [31:1] q1pceff;
+
+   assign {q1pceff[31:1],q0pceff[31:1]} = (({62{qren[0]}} & {q1pc[31:1],q0pc[31:1]}) |
+                                           ({62{qren[1]}} & {q2pc[31:1],q1pc[31:1]}) |
+                                           ({62{qren[2]}} & {q0pc[31:1],q2pc[31:1]}));
+
+
+   assign q0pcfinal[31:1]      = ({31{q0sel[0]}} & ( q0pceff[31:1])) |
+                                 ({31{q0sel[1]}} & ( q0pceff[31:1] + 31'd1));
+
+   assign aligndata[31:0]    = ({32{ f0val[1]           }} & {q0final[31:0]}) |
+                               ({32{~f0val[1] & f0val[0]}} & {q1final[15:0],q0final[15:0]});
+
+   assign alignval[1:0]      = ({ 2{ f0val[1]           }} & {2'b11}) |
+                               ({ 2{~f0val[1] & f0val[0]}} & {f1val[0],1'b1});
+
+   assign alignicaf[1:0]    = ({ 2{ f0val[1]           }} &  f0icaf[1:0]          ) |
+                              ({ 2{~f0val[1] & f0val[0]}} & {f1icaf[0],f0icaf[0]});
+
+   assign aligndbecc[1:0]    = ({ 2{ f0val[1]           }} &  f0dbecc[1:0]          ) |
+                              ({ 2{~f0val[1] & f0val[0]}} & {f1dbecc[0],f0dbecc[0]});
+
+   if (pt.BTB_ENABLE==1) begin
+
+   // for branch prediction
+
+   assign alignbrend[1:0]    = ({ 2{ f0val[1]           }} &  f0brend[1:0]          ) |
+                               ({ 2{~f0val[1] & f0val[0]}} & {f1brend[0],f0brend[0]});
+
+   assign alignpc4[1:0]      = ({ 2{ f0val[1]           }} &  f0pc4[1:0]        ) |
+                               ({ 2{~f0val[1] & f0val[0]}} & {f1pc4[0],f0pc4[0]});
+
+      if(pt.BTB_FULLYA) begin
+         assign alignindex[0]      = f0index[0];
+         assign alignindex[1]      = f0val[1] ? f0index[1] : f1index[0];
+      end
+
+   assign alignret[1:0]      = ({ 2{ f0val[1]           }} &  f0ret[1:0]        ) |
+                               ({ 2{~f0val[1] & f0val[0]}} & {f1ret[0],f0ret[0]});
+
+   assign alignway[1:0]      = ({ 2{ f0val[1]           }} &  f0way[1:0]        ) |
+                               ({ 2{~f0val[1] & f0val[0]}} & {f1way[0],f0way[0]});
+
+   assign alignhist1[1:0]    = ({ 2{ f0val[1]           }} &  f0hist1[1:0]          ) |
+                               ({ 2{~f0val[1] & f0val[0]}} & {f1hist1[0],f0hist1[0]});
+
+   assign alignhist0[1:0]    = ({ 2{ f0val[1]           }} &  f0hist0[1:0]          ) |
+                               ({ 2{~f0val[1] & f0val[0]}} & {f1hist0[0],f0hist0[0]});
+
+   assign secondpc[31:1]     = ({31{ f0val[1]           }} &  (q0pceff[31:1] + 31'd1)) |
+                               // you need the base pc for 2nd one only (4B max, 2B for the 1st and 2B for the 2nd)
+                               ({31{~f0val[1] & f0val[0]}} &   q1pceff[31:1]      );
+
+
+   assign firstpc[31:1]      =  q0pcfinal[31:1];
+      end // if (pt.BTB_ENABLE==1)
+
+   assign alignfromf1[1]     =      ~f0val[1] & f0val[0];
+
+
+   assign ifu_i0_pc[31:1]    =  q0pcfinal[31:1];
+
+
+   assign ifu_i0_pc4         =  first4B;
+
+
+   assign ifu_i0_cinst[15:0] = aligndata[15:0];
+
+   assign first4B            = (aligndata[1:0] == 2'b11);
+   assign first2B            = ~first4B;
+
+   assign ifu_i0_valid       = (first4B & alignval[1]) |
+                               (first2B & alignval[0]);
+
+   // inst access fault on any byte of inst results in access fault for the inst
+   assign ifu_i0_icaf        = (first4B & (|alignicaf[1:0])) |
+                               (first2B &   alignicaf[0]   );
+
+   assign ifu_i0_icaf_type[1:0] = (first4B & ~f0val[1] & f0val[0] & ~alignicaf[0] & ~aligndbecc[0]) ? f1ictype[1:0] : f0ictype[1:0];
+
+
+   assign icaf_eff[1:0] = alignicaf[1:0] | aligndbecc[1:0];
+
+   assign ifu_i0_icaf_second = first4B & ~icaf_eff[0] & icaf_eff[1];
+
+   assign ifu_i0_dbecc       = (first4B & (|aligndbecc[1:0])) |
+                               (first2B &   aligndbecc[0]   );
+
+
+   assign ifirst[31:0]       =  aligndata[31:0];
+
+
+   assign ifu_i0_instr[31:0] = ({32{first4B & alignval[1]}} & ifirst[31:0]) |
+                               ({32{first2B & alignval[0]}} & uncompress0[31:0]);
+
+if(pt.BTB_ENABLE==1) begin
+
+   // if you detect br does not start on instruction boundary
+
+   eb1_btb_addr_hash #(.pt(pt)) firsthash (.pc(firstpc [pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]),
+                                            .hash(firstpc_hash [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
+   eb1_btb_addr_hash #(.pt(pt)) secondhash(.pc(secondpc[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]),
+                                            .hash(secondpc_hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
+
+   if(pt.BTB_FULLYA) begin
+      assign firstbrtag_hash = firstpc;
+      assign secondbrtag_hash = secondpc;
+   end
+   else begin
+      if(pt.BTB_BTAG_FOLD) begin : btbfold
+         eb1_btb_tag_hash_fold #(.pt(pt)) first_brhash (.pc(firstpc [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]),
+                                                         .hash(firstbrtag_hash [pt.BTB_BTAG_SIZE-1:0]));
+         eb1_btb_tag_hash_fold #(.pt(pt)) second_brhash(.pc(secondpc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]),
+                                                         .hash(secondbrtag_hash[pt.BTB_BTAG_SIZE-1:0]));
+      end
+      else begin
+         eb1_btb_tag_hash #(.pt(pt)) first_brhash (.pc(firstpc [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]),
+                                                    .hash(firstbrtag_hash [pt.BTB_BTAG_SIZE-1:0]));
+         eb1_btb_tag_hash #(.pt(pt)) second_brhash(.pc(secondpc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]),
+                                                    .hash(secondbrtag_hash[pt.BTB_BTAG_SIZE-1:0]));
+      end
+   end // else: !if(pt.BTB_FULLYA)
+
+
+   // start_indexing - you want pc to be based on where the end of branch is prediction
+   // normal indexing pc based that's incorrect now for pc4 cases it's pc4 + 2
+
+   always_comb begin
+
+      i0_brp                 = '0;
+
+      i0_br_start_error      = (first4B & alignval[1] & alignbrend[0]);
+
+      i0_brp.valid           = (first2B & alignbrend[0]) |
+                               (first4B & alignbrend[1]) |
+                                i0_br_start_error;
+
+      i0_brp_pc4             = (first2B & alignpc4[0]) |
+                               (first4B & alignpc4[1]);
+
+      i0_brp.ret             = (first2B & alignret[0]) |
+                               (first4B & alignret[1]);
+
+      i0_brp.way             = (first2B | alignbrend[0])  ?  alignway[0]  :  alignway[1];
+
+      i0_brp.hist[1]         = (first2B & alignhist1[0]) |
+                               (first4B & alignhist1[1]);
+
+      i0_brp.hist[0]         = (first2B & alignhist0[0]) |
+                               (first4B & alignhist0[1]);
+
+      i0_ends_f1             =  first4B & alignfromf1[1];
+
+      i0_brp.toffset[11:0]   = (i0_ends_f1)  ?  f1poffset[11:0]  :  f0poffset[11:0];
+
+      i0_brp.prett[31:1]     = (i0_ends_f1)  ?  f1prett[31:1]    :  f0prett[31:1];
+
+      i0_brp.br_start_error  = i0_br_start_error;
+
+      i0_brp.bank            = (first2B | alignbrend[0])  ?  firstpc[1]  :  secondpc[1];
+
+      i0_brp.br_error        = (i0_brp.valid &  i0_brp_pc4 &  first2B) |
+                               (i0_brp.valid & ~i0_brp_pc4 &  first4B);
+
+      if(pt.BTB_FULLYA)
+        ifu_i0_fa_index = (first2B | alignbrend[0])  ?  alignindex[0]  :  alignindex[1];
+      else
+        ifu_i0_fa_index = '0;
+
+ end
+
+
+   assign ifu_i0_bp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = (first2B | alignbrend[0])  ?  firstpc_hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]  :
+                                                                                         secondpc_hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
+
+   assign ifu_i0_bp_fghr[pt.BHT_GHR_SIZE-1:0]            = (i0_ends_f1)               ?  f1fghr[pt.BHT_GHR_SIZE-1:0]  :
+                                                                                         f0fghr[pt.BHT_GHR_SIZE-1:0];
+
+   assign ifu_i0_bp_btag[pt.BTB_BTAG_SIZE-1:0]           = (first2B | alignbrend[0])  ?  firstbrtag_hash[pt.BTB_BTAG_SIZE-1:0]  :
+                                                                                         secondbrtag_hash[pt.BTB_BTAG_SIZE-1:0];
+end
+else begin
+   assign i0_brp = '0;
+   assign ifu_i0_bp_index = '0;
+   assign ifu_i0_bp_fghr = '0;
+   assign ifu_i0_bp_btag = '0;
+end // else: !if(pt.BTB_ENABLE==1)
+
+   // decompress
+
+   // quiet inputs for 4B inst
+   eb1_ifu_compress_ctl compress0 (.din((first2B) ? aligndata[15:0] : '0), .dout(uncompress0[31:0]));
+
+
+
+   assign i0_shift           =  dec_i0_decode_d & ~error_stall;
+
+   assign ifu_pmu_instr_aligned = i0_shift;
+
+
+   // compute how many bytes are being shifted from f0
+
+   assign shift_2B           =  i0_shift & first2B;
+
+   assign shift_4B           =  i0_shift & first4B;
+
+   // exact equations for the queue logic
+   assign f0_shift_2B        = (shift_2B & f0val[0]            ) |
+                               (shift_4B & f0val[0] & ~f0val[1]);
+
+
+   // f0 valid states
+   //     11
+   //     10
+   //     00
+
+   assign f1_shift_2B        =  f0val[0] & ~f0val[1] & shift_4B;
+
+
+
+endmodule
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_ifu_bp_ctl.sv b/verilog/rtl/BrqRV_EB1/design/eb1_ifu_bp_ctl.sv
new file mode 100644
index 0000000..eae8a4e
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_ifu_bp_ctl.sv
@@ -0,0 +1,884 @@
+//********************************************************************************
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//********************************************************************************
+
+//********************************************************************************
+// Function: Branch predictor
+// Comments:
+//
+//
+//  Bank3 : Bank2 : Bank1 : Bank0
+//  FA  C       8       4       0
+//********************************************************************************
+
+module eb1_ifu_bp_ctl
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )
+  (
+
+   input logic clk,
+   input logic rst_l,
+
+   input logic ic_hit_f,      // Icache hit, enables F address capture
+
+   input logic [31:1] ifc_fetch_addr_f, // look up btb address
+   input logic ifc_fetch_req_f,  // F1 valid
+
+   input eb1_br_tlu_pkt_t dec_tlu_br0_r_pkt, // BP commit update packet, includes errors
+   input logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // fghr to bp
+   input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // bp index
+
+   input logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associative btb error index
+
+   input logic dec_tlu_flush_lower_wb, // used to move EX4 RS to EX1 and F
+   input logic dec_tlu_flush_leak_one_wb, // don't hit for leak one fetches
+
+   input logic dec_tlu_bpred_disable, // disable all branch prediction
+
+   input eb1_predict_pkt_t  exu_mp_pkt, // mispredict packet
+
+   input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // execute ghr (for patching fghr)
+   input logic [pt.BHT_GHR_SIZE-1:0]  exu_mp_fghr,                    // Mispredict fghr
+   input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]  exu_mp_index,         // Mispredict index
+   input logic [pt.BTB_BTAG_SIZE-1:0]  exu_mp_btag,                   // Mispredict btag
+
+   input logic exu_flush_final, // all flushes
+
+   output logic ifu_bp_hit_taken_f, // btb hit, select target
+   output logic [31:1] ifu_bp_btb_target_f, //  predicted target PC
+   output logic ifu_bp_inst_mask_f, // tell ic which valids to kill because of a taken branch, right justified
+
+   output logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f, // fetch ghr
+
+   output logic [1:0] ifu_bp_way_f, // way
+   output logic [1:0] ifu_bp_ret_f, // predicted ret
+   output logic [1:0] ifu_bp_hist1_f, // history counters for all 4 potential branches, bit 1, right justified
+   output logic [1:0] ifu_bp_hist0_f, // history counters for all 4 potential branches, bit 0, right justified
+   output logic [1:0] ifu_bp_pc4_f, // pc4 indication, right justified
+   output logic [1:0] ifu_bp_valid_f, // branch valid, right justified
+   output logic [11:0] ifu_bp_poffset_f, // predicted target
+
+   output logic [1:0] [$clog2(pt.BTB_SIZE)-1:0]    ifu_bp_fa_index_f, // predicted branch index (fully associative option)
+
+   input  logic       scan_mode
+   );
+
+
+   localparam BTB_DWIDTH =  pt.BTB_TOFFSET_SIZE+pt.BTB_BTAG_SIZE+5;
+   localparam BTB_DWIDTH_TOP =  int'(pt.BTB_TOFFSET_SIZE)+int'(pt.BTB_BTAG_SIZE)+4;
+   localparam BTB_FA_INDEX = $clog2(pt.BTB_SIZE)-1;
+   localparam FA_CMP_LOWER = $clog2(pt.ICACHE_LN_SZ);
+   localparam FA_TAG_END_UPPER= 5+int'(pt.BTB_TOFFSET_SIZE)+int'(FA_CMP_LOWER)-1; // must cast to int or vcs build fails
+   localparam FA_TAG_START_LOWER = 3+int'(pt.BTB_TOFFSET_SIZE)+int'(FA_CMP_LOWER);
+   localparam FA_TAG_END_LOWER = 5+int'(pt.BTB_TOFFSET_SIZE);
+
+   localparam TAG_START=BTB_DWIDTH-1;
+   localparam PC4=4;
+   localparam BOFF=3;
+   localparam CALL=2;
+   localparam RET=1;
+   localparam BV=0;
+
+   localparam LRU_SIZE=pt.BTB_ARRAY_DEPTH;
+   localparam NUM_BHT_LOOP = (pt.BHT_ARRAY_DEPTH > 16 ) ? 16 : pt.BHT_ARRAY_DEPTH;
+   localparam NUM_BHT_LOOP_INNER_HI =  (pt.BHT_ARRAY_DEPTH > 16 ) ?pt.BHT_ADDR_LO+3 : pt.BHT_ADDR_HI;
+   localparam NUM_BHT_LOOP_OUTER_LO =  (pt.BHT_ARRAY_DEPTH > 16 ) ?pt.BHT_ADDR_LO+4 : pt.BHT_ADDR_LO;
+   localparam BHT_NO_ADDR_MATCH  = ( pt.BHT_ARRAY_DEPTH <= 16 );
+
+
+   logic exu_mp_valid_write;
+   logic exu_mp_ataken;
+   logic exu_mp_valid; // conditional branch mispredict
+   logic exu_mp_boffset; // branch offsett
+   logic exu_mp_pc4; // branch is a 4B inst
+   logic exu_mp_call; // branch is a call inst
+   logic exu_mp_ret; // branch is a ret inst
+   logic exu_mp_ja; // branch is a jump always
+   logic [1:0] exu_mp_hist; // new history
+   logic [11:0] exu_mp_tgt; // target offset
+   logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address
+   logic                                   dec_tlu_br0_v_wb; // WB stage history update
+   logic [1:0]                             dec_tlu_br0_hist_wb; // new history
+   logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_tlu_br0_addr_wb; // addr
+   logic                                   dec_tlu_br0_error_wb; // error; invalidate bank
+   logic                                   dec_tlu_br0_start_error_wb; // error; invalidate all 4 banks in fg
+   logic [pt.BHT_GHR_SIZE-1:0]             exu_i0_br_fghr_wb;
+
+   logic use_mp_way, use_mp_way_p1;
+   logic [pt.RET_STACK_SIZE-1:0][31:0] rets_out, rets_in;
+   logic [pt.RET_STACK_SIZE-1:0]        rsenable;
+
+
+   logic [11:0]       btb_rd_tgt_f;
+   logic              btb_rd_pc4_f, btb_rd_call_f, btb_rd_ret_f;
+   logic [1:1]        bp_total_branch_offset_f;
+
+   logic [31:1]       bp_btb_target_adder_f;
+   logic [31:1]       bp_rs_call_target_f;
+   logic              rs_push, rs_pop, rs_hold;
+   logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_rd_addr_p1_f, btb_wr_addr, btb_rd_addr_f;
+   logic [pt.BTB_BTAG_SIZE-1:0] btb_wr_tag, fetch_rd_tag_f, fetch_rd_tag_p1_f;
+   logic [BTB_DWIDTH-1:0]        btb_wr_data;
+   logic               btb_wr_en_way0, btb_wr_en_way1;
+
+
+   logic               dec_tlu_error_wb, btb_valid, dec_tlu_br0_middle_wb;
+   logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]        btb_error_addr_wb;
+   logic branch_error_collision_f, fetch_mp_collision_f, branch_error_collision_p1_f, fetch_mp_collision_p1_f;
+
+   logic  branch_error_bank_conflict_f;
+   logic [pt.BHT_GHR_SIZE-1:0] merged_ghr, fghr_ns, fghr;
+   logic [1:0] num_valids;
+   logic [LRU_SIZE-1:0] btb_lru_b0_f, btb_lru_b0_hold, btb_lru_b0_ns,
+                        fetch_wrindex_dec, fetch_wrindex_p1_dec, fetch_wrlru_b0, fetch_wrlru_p1_b0,
+                        mp_wrindex_dec, mp_wrlru_b0;
+   logic                btb_lru_rd_f, btb_lru_rd_p1_f, lru_update_valid_f;
+   logic  tag_match_way0_f, tag_match_way1_f;
+   logic [1:0] way_raw, bht_dir_f, btb_sel_f, wayhit_f, vwayhit_f, wayhit_p1_f;
+   logic [1:0] bht_valid_f, bht_force_taken_f;
+
+   logic leak_one_f, leak_one_f_d1;
+
+   logic [LRU_SIZE-1:0][BTB_DWIDTH-1:0]  btb_bank0_rd_data_way0_out ;
+
+   logic [LRU_SIZE-1:0][BTB_DWIDTH-1:0]  btb_bank0_rd_data_way1_out ;
+
+   logic                [BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_f ;
+   logic                [BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_f ;
+
+   logic                [BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_p1_f ;
+   logic                [BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_p1_f ;
+
+   logic                [BTB_DWIDTH-1:0] btb_vbank0_rd_data_f, btb_vbank1_rd_data_f;
+
+   logic                                         final_h;
+   logic                                         btb_fg_crossing_f;
+   logic                                         middle_of_bank;
+
+
+   logic [1:0]                                   bht_vbank0_rd_data_f, bht_vbank1_rd_data_f;
+   logic                                         branch_error_bank_conflict_p1_f;
+   logic                                         tag_match_way0_p1_f, tag_match_way1_p1_f;
+
+   logic [1:0]                                   btb_vlru_rd_f, fetch_start_f, tag_match_vway1_expanded_f, tag_match_way0_expanded_p1_f, tag_match_way1_expanded_p1_f;
+   logic [31:2] fetch_addr_p1_f;
+
+
+   logic exu_mp_way, exu_mp_way_f, dec_tlu_br0_way_wb, dec_tlu_way_wb;
+   logic                [BTB_DWIDTH-1:0] btb_bank0e_rd_data_f, btb_bank0e_rd_data_p1_f;
+
+   logic                [BTB_DWIDTH-1:0] btb_bank0o_rd_data_f;
+
+   logic [1:0] tag_match_way0_expanded_f, tag_match_way1_expanded_f;
+
+
+    logic [1:0]                                  bht_bank0_rd_data_f;
+    logic [1:0]                                  bht_bank1_rd_data_f;
+    logic [1:0]                                  bht_bank0_rd_data_p1_f;
+   genvar                                        j, i;
+
+   assign exu_mp_valid = exu_mp_pkt.misp & ~leak_one_f; // conditional branch mispredict
+   assign exu_mp_boffset = exu_mp_pkt.boffset;  // branch offset
+   assign exu_mp_pc4 = exu_mp_pkt.pc4;  // branch is a 4B inst
+   assign exu_mp_call = exu_mp_pkt.pcall;  // branch is a call inst
+   assign exu_mp_ret = exu_mp_pkt.pret;  // branch is a ret inst
+   assign exu_mp_ja = exu_mp_pkt.pja;  // branch is a jump always
+   assign exu_mp_way = exu_mp_pkt.way;  // repl way
+   assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0];  // new history
+   assign exu_mp_tgt[11:0]  = exu_mp_pkt.toffset[11:0] ;  // target offset
+   assign exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]  = exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ;  // BTB/BHT address
+   assign exu_mp_ataken = exu_mp_pkt.ataken;
+
+
+   assign dec_tlu_br0_v_wb = dec_tlu_br0_r_pkt.valid;
+   assign dec_tlu_br0_hist_wb[1:0]  = dec_tlu_br0_r_pkt.hist[1:0];
+   assign dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
+   assign dec_tlu_br0_error_wb = dec_tlu_br0_r_pkt.br_error;
+   assign dec_tlu_br0_middle_wb = dec_tlu_br0_r_pkt.middle;
+   assign dec_tlu_br0_way_wb = dec_tlu_br0_r_pkt.way;
+   assign dec_tlu_br0_start_error_wb = dec_tlu_br0_r_pkt.br_start_error;
+   assign exu_i0_br_fghr_wb[pt.BHT_GHR_SIZE-1:0] = exu_i0_br_fghr_r[pt.BHT_GHR_SIZE-1:0];
+
+
+
+
+   // ----------------------------------------------------------------------
+   // READ
+   // ----------------------------------------------------------------------
+
+   // hash the incoming fetch PC, first guess at hashing algorithm
+   eb1_btb_addr_hash #(.pt(pt)) f1hash(.pc(ifc_fetch_addr_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
+
+
+   assign fetch_addr_p1_f[31:2] = ifc_fetch_addr_f[31:2] + 30'b1;
+   eb1_btb_addr_hash #(.pt(pt)) f1hash_p1(.pc(fetch_addr_p1_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
+
+   assign btb_sel_f[1] = ~bht_dir_f[0];
+   assign btb_sel_f[0] =  bht_dir_f[0];
+
+   assign fetch_start_f[1:0] = {ifc_fetch_addr_f[1], ~ifc_fetch_addr_f[1]};
+
+   // Errors colliding with fetches must kill the btb/bht hit.
+
+   assign branch_error_collision_f = dec_tlu_error_wb & (btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]);
+   assign branch_error_collision_p1_f = dec_tlu_error_wb & (btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]);
+
+   assign branch_error_bank_conflict_f = branch_error_collision_f & dec_tlu_error_wb;
+   assign branch_error_bank_conflict_p1_f = branch_error_collision_p1_f & dec_tlu_error_wb;
+
+   // set on leak one, hold until next flush without leak one
+   assign leak_one_f = (dec_tlu_flush_leak_one_wb & dec_tlu_flush_lower_wb) | (leak_one_f_d1 & ~dec_tlu_flush_lower_wb);
+
+logic exu_flush_final_d1;
+
+ if(!pt.BTB_FULLYA) begin
+   assign fetch_mp_collision_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) &
+                                    exu_mp_valid & ifc_fetch_req_f &
+                                    (exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])
+                                    );
+   assign fetch_mp_collision_p1_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) &
+                                       exu_mp_valid & ifc_fetch_req_f &
+                                       (exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])
+                                       );
+   // 2 -way SA, figure out the way hit and mux accordingly
+   assign tag_match_way0_f = btb_bank0_rd_data_way0_f[BV] & (btb_bank0_rd_data_way0_f[TAG_START:17] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) &
+                              ~(dec_tlu_way_wb & branch_error_bank_conflict_f) & ifc_fetch_req_f & ~leak_one_f;
+
+   assign tag_match_way1_f = btb_bank0_rd_data_way1_f[BV] & (btb_bank0_rd_data_way1_f[TAG_START:17] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) &
+                              ~(dec_tlu_way_wb & branch_error_bank_conflict_f) & ifc_fetch_req_f & ~leak_one_f;
+
+   assign tag_match_way0_p1_f = btb_bank0_rd_data_way0_p1_f[BV] & (btb_bank0_rd_data_way0_p1_f[TAG_START:17] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) &
+                              ~(dec_tlu_way_wb & branch_error_bank_conflict_p1_f) & ifc_fetch_req_f & ~leak_one_f;
+
+   assign tag_match_way1_p1_f = btb_bank0_rd_data_way1_p1_f[BV] & (btb_bank0_rd_data_way1_p1_f[TAG_START:17] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) &
+                              ~(dec_tlu_way_wb & branch_error_bank_conflict_p1_f) & ifc_fetch_req_f & ~leak_one_f;
+
+
+   // Both ways could hit, use the offset bit to reorder
+
+   assign tag_match_way0_expanded_f[1:0] = {tag_match_way0_f &  (btb_bank0_rd_data_way0_f[BOFF] ^ btb_bank0_rd_data_way0_f[PC4]),
+                                             tag_match_way0_f & ~(btb_bank0_rd_data_way0_f[BOFF] ^ btb_bank0_rd_data_way0_f[PC4])};
+
+   assign tag_match_way1_expanded_f[1:0] = {tag_match_way1_f &  (btb_bank0_rd_data_way1_f[BOFF] ^ btb_bank0_rd_data_way1_f[PC4]),
+                                             tag_match_way1_f & ~(btb_bank0_rd_data_way1_f[BOFF] ^ btb_bank0_rd_data_way1_f[PC4])};
+
+   assign tag_match_way0_expanded_p1_f[1:0] = {tag_match_way0_p1_f &  (btb_bank0_rd_data_way0_p1_f[BOFF] ^ btb_bank0_rd_data_way0_p1_f[PC4]),
+                                                tag_match_way0_p1_f & ~(btb_bank0_rd_data_way0_p1_f[BOFF] ^ btb_bank0_rd_data_way0_p1_f[PC4])};
+
+   assign tag_match_way1_expanded_p1_f[1:0] = {tag_match_way1_p1_f &  (btb_bank0_rd_data_way1_p1_f[BOFF] ^ btb_bank0_rd_data_way1_p1_f[PC4]),
+                                                tag_match_way1_p1_f & ~(btb_bank0_rd_data_way1_p1_f[BOFF] ^ btb_bank0_rd_data_way1_p1_f[PC4])};
+
+   assign wayhit_f[1:0] = tag_match_way0_expanded_f[1:0] | tag_match_way1_expanded_f[1:0];
+   assign wayhit_p1_f[1:0] = tag_match_way0_expanded_p1_f[1:0] | tag_match_way1_expanded_p1_f[1:0];
+
+   assign btb_bank0o_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_f[1]}} & btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0]) |
+                                                            ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_f[1]}} & btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0]) );
+   assign btb_bank0e_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_f[0]}} & btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0]) |
+                                                            ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_f[0]}} & btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0]) );
+
+   assign btb_bank0e_rd_data_p1_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_p1_f[0]}} & btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0]) |
+                                                               ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_p1_f[0]}} & btb_bank0_rd_data_way1_p1_f[BTB_DWIDTH-1:0]) );
+
+   // virtual bank order
+
+   assign btb_vbank0_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{fetch_start_f[0]}} &  btb_bank0e_rd_data_f[BTB_DWIDTH-1:0]) |
+                                                            ({17+pt.BTB_BTAG_SIZE{fetch_start_f[1]}} &  btb_bank0o_rd_data_f[BTB_DWIDTH-1:0]) );
+   assign btb_vbank1_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{fetch_start_f[0]}} &  btb_bank0o_rd_data_f[BTB_DWIDTH-1:0]) |
+                                                            ({17+pt.BTB_BTAG_SIZE{fetch_start_f[1]}} &  btb_bank0e_rd_data_p1_f[BTB_DWIDTH-1:0]) );
+
+   assign way_raw[1:0] =  tag_match_vway1_expanded_f[1:0] | (~vwayhit_f[1:0] & btb_vlru_rd_f[1:0]);
+
+   // --------------------------------------------------------------------------------
+   // --------------------------------------------------------------------------------
+   // update lru
+   // mp
+
+   // create a onehot lru write vector
+   assign mp_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} <<  exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
+
+   // fetch
+   assign fetch_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} <<  btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
+   assign fetch_wrindex_p1_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} <<  btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
+
+   assign mp_wrlru_b0[LRU_SIZE-1:0] = mp_wrindex_dec[LRU_SIZE-1:0] & {LRU_SIZE{exu_mp_valid}};
+
+
+   assign btb_lru_b0_hold[LRU_SIZE-1:0] = ~mp_wrlru_b0[LRU_SIZE-1:0] & ~fetch_wrlru_b0[LRU_SIZE-1:0];
+
+   // Forward the mp lru information to the fetch, avoids multiple way hits later
+   assign use_mp_way = fetch_mp_collision_f;
+   assign use_mp_way_p1 = fetch_mp_collision_p1_f;
+
+   assign lru_update_valid_f = (vwayhit_f[0] | vwayhit_f[1]) & ifc_fetch_req_f & ~leak_one_f;
+
+
+   assign fetch_wrlru_b0[LRU_SIZE-1:0] = fetch_wrindex_dec[LRU_SIZE-1:0] &
+                                         {LRU_SIZE{lru_update_valid_f}};
+   assign fetch_wrlru_p1_b0[LRU_SIZE-1:0] = fetch_wrindex_p1_dec[LRU_SIZE-1:0] &
+                                         {LRU_SIZE{lru_update_valid_f}};
+
+   assign btb_lru_b0_ns[LRU_SIZE-1:0] = ( (btb_lru_b0_hold[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]) |
+                                          (mp_wrlru_b0[LRU_SIZE-1:0] & {LRU_SIZE{~exu_mp_way}}) |
+                                          (fetch_wrlru_b0[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_f}}) |
+                                          (fetch_wrlru_p1_b0[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_p1_f}}) );
+
+
+
+   assign btb_lru_rd_f = use_mp_way ? exu_mp_way_f : |(fetch_wrindex_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]);
+
+   assign btb_lru_rd_p1_f = use_mp_way_p1 ? exu_mp_way_f : |(fetch_wrindex_p1_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]);
+
+   // rotated
+   assign btb_vlru_rd_f[1:0] = ( ({2{fetch_start_f[0]}} & {btb_lru_rd_f, btb_lru_rd_f}) |
+                                  ({2{fetch_start_f[1]}} & {btb_lru_rd_p1_f, btb_lru_rd_f}));
+
+   assign tag_match_vway1_expanded_f[1:0] = ( ({2{fetch_start_f[0]}} & {tag_match_way1_expanded_f[1:0]}) |
+                                               ({2{fetch_start_f[1]}} & {tag_match_way1_expanded_p1_f[0], tag_match_way1_expanded_f[1]}) );
+
+
+   rvdffe #(LRU_SIZE) btb_lru_ff (.*, .en(ifc_fetch_req_f | exu_mp_valid),
+                                    .din(btb_lru_b0_ns[(LRU_SIZE)-1:0]),
+                                   .dout(btb_lru_b0_f[(LRU_SIZE)-1:0]));
+
+ end // if (!pt.BTB_FULLYA)
+   // Detect end of cache line and mask as needed
+   logic eoc_near;
+   logic eoc_mask;
+   assign eoc_near = &ifc_fetch_addr_f[pt.ICACHE_BEAT_ADDR_HI:3];
+   assign eoc_mask = ~eoc_near| (|(~ifc_fetch_addr_f[2:1]));
+
+
+
+   // --------------------------------------------------------------------------------
+   // --------------------------------------------------------------------------------
+
+   // mux out critical hit bank for pc computation
+   // This is only useful for the first taken branch in the fetch group
+   logic [16:1] btb_sel_data_f;
+
+   assign btb_rd_tgt_f[11:0] = btb_sel_data_f[16:5];
+   assign btb_rd_pc4_f       = btb_sel_data_f[4];
+   assign btb_rd_call_f      = btb_sel_data_f[2];
+   assign btb_rd_ret_f       = btb_sel_data_f[1];
+
+   assign btb_sel_data_f[16:1] = ( ({16{btb_sel_f[1]}} & btb_vbank1_rd_data_f[16:1]) |
+                                    ({16{btb_sel_f[0]}} & btb_vbank0_rd_data_f[16:1]) );
+
+
+   logic [1:0] hist0_raw, hist1_raw, pc4_raw, pret_raw;
+
+   // a valid taken target needs to kill the next fetch as we compute the target address
+   assign ifu_bp_hit_taken_f = |(vwayhit_f[1:0] & hist1_raw[1:0]) & ifc_fetch_req_f & ~leak_one_f_d1 & ~dec_tlu_bpred_disable;
+
+
+   // Don't put calls/rets/ja in the predictor, force the bht taken instead
+   assign bht_force_taken_f[1:0] = {(btb_vbank1_rd_data_f[CALL] | btb_vbank1_rd_data_f[RET]),
+                                     (btb_vbank0_rd_data_f[CALL] | btb_vbank0_rd_data_f[RET])};
+
+
+   // taken and valid, otherwise, branch errors must clear the bht
+   assign bht_valid_f[1:0] = vwayhit_f[1:0];
+
+   assign bht_vbank0_rd_data_f[1:0] = ( ({2{fetch_start_f[0]}} & bht_bank0_rd_data_f[1:0]) |
+                                         ({2{fetch_start_f[1]}} & bht_bank1_rd_data_f[1:0]) );
+
+   assign bht_vbank1_rd_data_f[1:0] = ( ({2{fetch_start_f[0]}} & bht_bank1_rd_data_f[1:0]) |
+                                         ({2{fetch_start_f[1]}} & bht_bank0_rd_data_p1_f[1:0]) );
+
+
+   assign bht_dir_f[1:0] = {(bht_force_taken_f[1] | bht_vbank1_rd_data_f[1]) & bht_valid_f[1],
+                             (bht_force_taken_f[0] | bht_vbank0_rd_data_f[1]) & bht_valid_f[0]};
+
+   assign ifu_bp_inst_mask_f = (ifu_bp_hit_taken_f & btb_sel_f[1]) | ~ifu_bp_hit_taken_f;
+
+
+
+
+   // Branch prediction info is sent with the 2byte lane associated with the end of the branch.
+   // Cases
+   //       BANK1         BANK0
+   // -------------------------------
+   // |      :       |      :       |
+   // -------------------------------
+   //         <------------>                   : PC4 branch, offset, should be in B1 (indicated on [2])
+   //                <------------>            : PC4 branch, no offset, indicate PC4, VALID, HIST on [1]
+   //                       <------------>     : PC4 branch, offset, indicate PC4, VALID, HIST on [0]
+   //                <------>                  : PC2 branch, offset, indicate VALID, HIST on [1]
+   //                       <------>           : PC2 branch, no offset, indicate VALID, HIST on [0]
+   //
+
+
+
+   assign hist1_raw[1:0] = bht_force_taken_f[1:0] | {bht_vbank1_rd_data_f[1],
+                                                      bht_vbank0_rd_data_f[1]};
+
+   assign hist0_raw[1:0] = {bht_vbank1_rd_data_f[0],
+                            bht_vbank0_rd_data_f[0]};
+
+
+   assign pc4_raw[1:0] = {vwayhit_f[1] & btb_vbank1_rd_data_f[PC4],
+                          vwayhit_f[0] & btb_vbank0_rd_data_f[PC4]};
+
+   assign pret_raw[1:0] = {vwayhit_f[1] & ~btb_vbank1_rd_data_f[CALL] & btb_vbank1_rd_data_f[RET],
+                           vwayhit_f[0] & ~btb_vbank0_rd_data_f[CALL] & btb_vbank0_rd_data_f[RET]};
+
+   // GHR
+
+
+  // count the valids with masking based on first taken
+   assign num_valids[1:0] = countones(bht_valid_f[1:0]);
+
+   // Note that the following property holds
+   // P: prior ghr, H: history bit of last valid branch in line (could be 1 or 0)
+   // Num valid branches   What new GHR must be
+   // 2                    0H
+   // 1                    PH
+   // 0                    PP
+
+   assign final_h = |(btb_sel_f[1:0] & bht_dir_f[1:0]);
+
+   assign merged_ghr[pt.BHT_GHR_SIZE-1:0] = (
+                                            ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h2}} & {fghr[pt.BHT_GHR_SIZE-3:0], 1'b0, final_h}) | // 0H
+                                            ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h1}} & {fghr[pt.BHT_GHR_SIZE-2:0], final_h}) | // PH
+                                            ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h0}} & {fghr[pt.BHT_GHR_SIZE-1:0]}) ); // PP
+
+   logic [pt.BHT_GHR_SIZE-1:0] exu_flush_ghr;
+   assign exu_flush_ghr[pt.BHT_GHR_SIZE-1:0] = exu_mp_fghr[pt.BHT_GHR_SIZE-1:0];
+
+   assign fghr_ns[pt.BHT_GHR_SIZE-1:0] = ( ({pt.BHT_GHR_SIZE{exu_flush_final_d1}} & exu_flush_ghr[pt.BHT_GHR_SIZE-1:0]) |
+                                         ({pt.BHT_GHR_SIZE{~exu_flush_final_d1 & ifc_fetch_req_f & ic_hit_f & ~leak_one_f_d1}} & merged_ghr[pt.BHT_GHR_SIZE-1:0]) |
+                                         ({pt.BHT_GHR_SIZE{~exu_flush_final_d1 & ~(ifc_fetch_req_f & ic_hit_f & ~leak_one_f_d1)}} & fghr[pt.BHT_GHR_SIZE-1:0]));
+
+   rvdffie #(.WIDTH(pt.BHT_GHR_SIZE+3),.OVERRIDE(1)) fetchghr (.*,
+                                          .din ({exu_flush_final, exu_mp_way, leak_one_f, fghr_ns[pt.BHT_GHR_SIZE-1:0]}),
+                                          .dout({exu_flush_final_d1, exu_mp_way_f, leak_one_f_d1, fghr[pt.BHT_GHR_SIZE-1:0]}));
+
+   assign ifu_bp_fghr_f[pt.BHT_GHR_SIZE-1:0] = fghr[pt.BHT_GHR_SIZE-1:0];
+
+
+   assign ifu_bp_way_f[1:0] = way_raw[1:0];
+   assign ifu_bp_hist1_f[1:0]    = hist1_raw[1:0];
+   assign ifu_bp_hist0_f[1:0]    = hist0_raw[1:0];
+   assign ifu_bp_pc4_f[1:0]     = pc4_raw[1:0];
+
+   assign ifu_bp_valid_f[1:0]   = vwayhit_f[1:0] & ~{2{dec_tlu_bpred_disable}};
+   assign ifu_bp_ret_f[1:0]     = pret_raw[1:0];
+
+
+   // compute target
+   // Form the fetch group offset based on the btb hit location and the location of the branch within the 4 byte chunk
+
+//  .i 5
+//  .o 3
+//  .ilb bht_dir_f[1] bht_dir_f[0] fetch_start_f[1] fetch_start_f[0] btb_rd_pc4_f
+//  .ob bloc_f[1] bloc_f[0] use_fa_plus
+//  .type fr
+//
+//
+//  ## rotdir[1:0]  fs   pc4  off fapl
+//    -1            01 -  01  0
+//    10            01 -  10  0
+//
+//    -1            10 -  10  0
+//    10            10 0  01  1
+//    10            10 1  01  0
+logic [1:0] bloc_f;
+logic use_fa_plus;
+assign bloc_f[1] = (bht_dir_f[0] & ~fetch_start_f[0]) | (~bht_dir_f[0]
+     & fetch_start_f[0]);
+assign bloc_f[0] = (bht_dir_f[0] & fetch_start_f[0]) | (~bht_dir_f[0]
+     & ~fetch_start_f[0]);
+assign use_fa_plus = (~bht_dir_f[0] & ~fetch_start_f[0] & ~btb_rd_pc4_f);
+
+
+
+
+    assign btb_fg_crossing_f = fetch_start_f[0] & btb_sel_f[0] & btb_rd_pc4_f;
+
+   assign bp_total_branch_offset_f =  bloc_f[1] ^ btb_rd_pc4_f;
+
+   logic [31:2] adder_pc_in_f, ifc_fetch_adder_prior;
+   rvdfflie #(.WIDTH(30), .LEFT(19)) faddrf_ff (.*, .en(ifc_fetch_req_f & ~ifu_bp_hit_taken_f & ic_hit_f), .din(ifc_fetch_addr_f[31:2]), .dout(ifc_fetch_adder_prior[31:2]));
+
+
+   assign ifu_bp_poffset_f[11:0] = btb_rd_tgt_f[11:0];
+
+   assign adder_pc_in_f[31:2] = ( ({30{ use_fa_plus}} & fetch_addr_p1_f[31:2]) |
+                                   ({30{ btb_fg_crossing_f}} & ifc_fetch_adder_prior[31:2]) |
+                                   ({30{~btb_fg_crossing_f & ~use_fa_plus}} & ifc_fetch_addr_f[31:2]));
+
+   rvbradder predtgt_addr (.pc({adder_pc_in_f[31:2], bp_total_branch_offset_f}),
+                         .offset(btb_rd_tgt_f[11:0]),
+                         .dout(bp_btb_target_adder_f[31:1])
+                         );
+   // mux in the return stack address here for a predicted return assuming the RS is valid, quite if no prediction
+   assign ifu_bp_btb_target_f[31:1] = (({31{btb_rd_ret_f & ~btb_rd_call_f & rets_out[0][0] & ifu_bp_hit_taken_f}} & rets_out[0][31:1]) |
+                                       ({31{~(btb_rd_ret_f & ~btb_rd_call_f & rets_out[0][0]) & ifu_bp_hit_taken_f}} & bp_btb_target_adder_f[31:1]) );
+
+
+   // ----------------------------------------------------------------------
+   // Return Stack
+   // ----------------------------------------------------------------------
+
+   rvbradder rs_addr (.pc({adder_pc_in_f[31:2], bp_total_branch_offset_f}),
+                    .offset({11'b0,  ~btb_rd_pc4_f}),
+                    .dout(bp_rs_call_target_f[31:1])
+                         );
+
+   assign rs_push = (btb_rd_call_f & ~btb_rd_ret_f & ifu_bp_hit_taken_f);
+   assign rs_pop = (btb_rd_ret_f & ~btb_rd_call_f & ifu_bp_hit_taken_f);
+   assign rs_hold = ~rs_push & ~rs_pop;
+
+
+
+   // Fetch based (bit 0 is a valid)
+   assign rets_in[0][31:0] = ( ({32{rs_push}} & {bp_rs_call_target_f[31:1], 1'b1}) | // target[31:1], valid
+                               ({32{rs_pop}}  & rets_out[1][31:0]) );
+
+   assign rsenable[0] = ~rs_hold;
+
+   for (i=0; i<pt.RET_STACK_SIZE; i++) begin : retstack
+
+      // for the last entry in the stack, we don't have a pop position
+      if(i==pt.RET_STACK_SIZE-1) begin
+         assign rets_in[i][31:0] = rets_out[i-1][31:0];
+         assign rsenable[i] = rs_push;
+      end
+      else if(i>0) begin
+        assign rets_in[i][31:0] = ( ({32{rs_push}} & rets_out[i-1][31:0]) |
+                                    ({32{rs_pop}}  & rets_out[i+1][31:0]) );
+         assign rsenable[i] = rs_push | rs_pop;
+      end
+      rvdffe #(32) rets_ff (.*, .en(rsenable[i]), .din(rets_in[i][31:0]), .dout(rets_out[i][31:0]));
+
+   end : retstack
+
+   // ----------------------------------------------------------------------
+   // WRITE
+   // ----------------------------------------------------------------------
+
+
+   assign dec_tlu_error_wb = dec_tlu_br0_start_error_wb | dec_tlu_br0_error_wb;
+
+   assign btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
+
+   assign dec_tlu_way_wb = dec_tlu_br0_way_wb;
+
+   assign btb_valid = exu_mp_valid & ~dec_tlu_error_wb;
+
+   assign btb_wr_tag[pt.BTB_BTAG_SIZE-1:0] = exu_mp_btag[pt.BTB_BTAG_SIZE-1:0];
+
+   if(!pt.BTB_FULLYA) begin
+
+      if(pt.BTB_BTAG_FOLD) begin : btbfold
+         eb1_btb_tag_hash_fold #(.pt(pt)) rdtagf  (.hash(fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]),
+                                                    .pc({ifc_fetch_addr_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
+         eb1_btb_tag_hash_fold #(.pt(pt)) rdtagp1f(.hash(fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]),
+                                                    .pc({fetch_addr_p1_f[ pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
+      end
+      else begin
+         eb1_btb_tag_hash #(.pt(pt)) rdtagf(.hash(fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]),
+                                             .pc({ifc_fetch_addr_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
+         eb1_btb_tag_hash #(.pt(pt)) rdtagp1f(.hash(fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]),
+                                               .pc({fetch_addr_p1_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
+      end
+
+      assign btb_wr_en_way0 = ( ({{~exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}}) |
+                                ({{~dec_tlu_way_wb & dec_tlu_error_wb}}));
+
+      assign btb_wr_en_way1 = ( ({{exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}}) |
+                                ({{dec_tlu_way_wb & dec_tlu_error_wb}}));
+      assign btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_tlu_error_wb ? btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] : exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
+
+
+      assign vwayhit_f[1:0] = ( ({2{fetch_start_f[0]}} & {wayhit_f[1:0]}) |
+                                ({2{fetch_start_f[1]}} & {wayhit_p1_f[0], wayhit_f[1]})) & {eoc_mask, 1'b1};
+
+   end // if (!pt.BTB_FULLYA)
+
+   assign btb_wr_data[BTB_DWIDTH-1:0] = {btb_wr_tag[pt.BTB_BTAG_SIZE-1:0], exu_mp_tgt[pt.BTB_TOFFSET_SIZE-1:0], exu_mp_pc4, exu_mp_boffset,
+                                                exu_mp_call | exu_mp_ja, exu_mp_ret | exu_mp_ja, btb_valid} ;
+
+   assign exu_mp_valid_write = exu_mp_valid & exu_mp_ataken & ~exu_mp_pkt.valid;
+   logic [1:0] bht_wr_data0, bht_wr_data2;
+   logic [1:0] bht_wr_en0, bht_wr_en2;
+
+   assign middle_of_bank = exu_mp_pc4 ^ exu_mp_boffset;
+   assign bht_wr_en0[1:0] = {2{exu_mp_valid & ~exu_mp_call & ~exu_mp_ret & ~exu_mp_ja}} & {middle_of_bank, ~middle_of_bank};
+   assign bht_wr_en2[1:0] = {2{dec_tlu_br0_v_wb}} & {dec_tlu_br0_middle_wb, ~dec_tlu_br0_middle_wb} ;
+
+   // Experiments show this is the best priority scheme for same bank/index writes at the same time.
+   assign bht_wr_data0[1:0] = exu_mp_hist[1:0]; // lowest priority
+   assign bht_wr_data2[1:0] = dec_tlu_br0_hist_wb[1:0]; // highest priority
+
+
+
+   logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] bht_rd_addr_f, bht_rd_addr_p1_f, bht_wr_addr0, bht_wr_addr2;
+
+   logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] mp_hashed, br0_hashed_wb, bht_rd_addr_hashed_f, bht_rd_addr_hashed_p1_f;
+   eb1_btb_ghr_hash #(.pt(pt)) mpghrhs  (.hashin(exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(exu_mp_eghr[pt.BHT_GHR_SIZE-1:0]), .hash(mp_hashed[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
+   eb1_btb_ghr_hash #(.pt(pt)) br0ghrhs (.hashin(dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(exu_i0_br_fghr_wb[pt.BHT_GHR_SIZE-1:0]), .hash(br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
+   eb1_btb_ghr_hash #(.pt(pt)) fghrhs (.hashin(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(fghr[pt.BHT_GHR_SIZE-1:0]), .hash(bht_rd_addr_hashed_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
+   eb1_btb_ghr_hash #(.pt(pt)) fghrhs_p1 (.hashin(btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(fghr[pt.BHT_GHR_SIZE-1:0]), .hash(bht_rd_addr_hashed_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
+
+   assign bht_wr_addr0[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = mp_hashed[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
+   assign bht_wr_addr2[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
+   assign bht_rd_addr_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = bht_rd_addr_hashed_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
+   assign bht_rd_addr_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = bht_rd_addr_hashed_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
+
+
+   // ----------------------------------------------------------------------
+   // Structures. Using FLOPS
+   // ----------------------------------------------------------------------
+   // BTB
+   // Entry -> tag[pt.BTB_BTAG_SIZE-1:0], toffset[11:0], pc4, boffset, call, ret, valid
+
+   if(!pt.BTB_FULLYA) begin
+
+      for (j=0 ; j<LRU_SIZE ; j++) begin : BTB_FLOPS
+         // Way 0
+         rvdffe #(17+pt.BTB_BTAG_SIZE) btb_bank0_way0 (.*,
+                    .en(((btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == j) & btb_wr_en_way0)),
+                    .din        (btb_wr_data[BTB_DWIDTH-1:0]),
+                    .dout       (btb_bank0_rd_data_way0_out[j]));
+
+         // Way 1
+         rvdffe #(17+pt.BTB_BTAG_SIZE) btb_bank0_way1 (.*,
+                    .en(((btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == j) & btb_wr_en_way1)),
+                    .din        (btb_wr_data[BTB_DWIDTH-1:0]),
+                    .dout       (btb_bank0_rd_data_way1_out[j]));
+
+      end
+
+
+    always_comb begin : BTB_rd_mux
+        btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0] = '0 ;
+        btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0] = '0 ;
+        btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0] = '0 ;
+        btb_bank0_rd_data_way1_p1_f[BTB_DWIDTH-1:0] = '0 ;
+
+        for (int j=0; j< LRU_SIZE; j++) begin
+          if (btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == (pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1)'(j)) begin
+
+           btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0] =  btb_bank0_rd_data_way0_out[j];
+           btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0] =  btb_bank0_rd_data_way1_out[j];
+
+          end
+        end
+        for (int j=0; j< LRU_SIZE; j++) begin
+          if (btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == (pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1)'(j)) begin
+
+           btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0] =  btb_bank0_rd_data_way0_out[j];
+           btb_bank0_rd_data_way1_p1_f[BTB_DWIDTH-1:0] =  btb_bank0_rd_data_way1_out[j];
+
+          end
+        end
+    end
+end // if (!pt.BTB_FULLYA)
+
+
+
+
+
+      if(pt.BTB_FULLYA) begin : fa
+
+         logic found1, hit0, hit1;
+         logic btb_used_reset, write_used;
+         logic [$clog2(pt.BTB_SIZE)-1:0] btb_fa_wr_addr0, hit0_index, hit1_index;
+
+         logic [pt.BTB_SIZE-1:0]         btb_tag_hit, btb_offset_0, btb_offset_1, btb_used_ns, btb_used,
+                                         wr0_en, btb_upper_hit;
+         logic [pt.BTB_SIZE-1:0][BTB_DWIDTH-1:0] btbdata;
+
+         // Fully Associative tag hash uses bits 31:3. Bits 2:1 are the offset bits used for the 4 tag comp banks
+         // Full tag used to speed up lookup. There is one 31:3 cmp per entry, and 4 2:1 cmps per entry.
+
+         logic [FA_CMP_LOWER-1:1]  ifc_fetch_addr_p1_f;
+
+
+         assign ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1] = ifc_fetch_addr_f[FA_CMP_LOWER-1:1] + 1'b1;
+
+         assign fetch_mp_collision_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == ifc_fetch_addr_f[31:1]) &
+                                      exu_mp_valid & ifc_fetch_req_f & ~exu_mp_pkt.way);
+         assign fetch_mp_collision_p1_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == {ifc_fetch_addr_f[31:FA_CMP_LOWER], ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1]}) &
+                                      exu_mp_valid & ifc_fetch_req_f & ~exu_mp_pkt.way);
+
+      always_comb begin
+         btb_vbank0_rd_data_f = '0;
+         btb_vbank1_rd_data_f = '0;
+         btb_tag_hit = '0;
+         btb_upper_hit = '0;
+         btb_offset_0 = '0;
+         btb_offset_1 = '0;
+
+         found1 = 1'b0;
+         hit0 = 1'b0;
+         hit1 = 1'b0;
+         hit0_index = '0;
+         hit1_index = '0;
+         btb_fa_wr_addr0 = '0;
+
+         for(int i=0; i<pt.BTB_SIZE; i++) begin
+            // Break the cmp into chunks for lower area.
+            // Chunk1: FA 31:6 or 31:5 depending on icache line size
+            // Chunk2: FA 5:1 or 4:1 depending on icache line size
+            btb_upper_hit[i] = (btbdata[i][BTB_DWIDTH_TOP:FA_TAG_END_UPPER] == ifc_fetch_addr_f[31:FA_CMP_LOWER]) & btbdata[i][0] & ~wr0_en[i];
+            btb_offset_0[i] = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_f[FA_CMP_LOWER-1:1]) & btb_upper_hit[i];
+            btb_offset_1[i] = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1]) & btb_upper_hit[i];
+
+            if(~hit0) begin
+               if(btb_offset_0[i]) begin
+                  hit0_index[BTB_FA_INDEX:0] = (BTB_FA_INDEX+1)'(i);
+                  // hit unless we are also writing this entry at the same time
+                  hit0 = 1'b1;
+               end
+            end
+            if(~hit1) begin
+               if(btb_offset_1[i]) begin
+                  hit1_index[BTB_FA_INDEX:0] = (BTB_FA_INDEX+1)'(i);
+                  hit1 = 1'b1;
+               end
+            end
+
+
+            // Mux out the 2 potential branches
+            if(btb_offset_0[i] == 1'b1)
+              btb_vbank0_rd_data_f[BTB_DWIDTH-1:0] = fetch_mp_collision_f ? btb_wr_data : btbdata[i];
+            if(btb_offset_1[i] == 1'b1)
+              btb_vbank1_rd_data_f[BTB_DWIDTH-1:0] = fetch_mp_collision_p1_f ? btb_wr_data : btbdata[i];
+
+            // find the first zero from bit zero in the used vector, this is the write address
+            if(~found1) begin
+               if(~btb_used[i]) begin
+                  btb_fa_wr_addr0[BTB_FA_INDEX:0] = i[BTB_FA_INDEX:0];
+                  found1 = 1'b1;
+               end
+            end
+         end
+      end // always_comb begin
+
+`ifdef RV_ASSERT_ON
+   btbhitonehot0: assert #0 ($onehot0(btb_offset_0));
+   btbhitonehot1: assert #0 ($onehot0(btb_offset_1));
+`endif
+
+   assign vwayhit_f[1:0] = {hit1, hit0} & {eoc_mask, 1'b1};
+
+   // way bit is reused as the predicted bit
+   assign way_raw[1:0] =  vwayhit_f[1:0] | {fetch_mp_collision_p1_f, fetch_mp_collision_f};
+
+   for (j=0 ; j<pt.BTB_SIZE ; j++) begin : BTB_FAFLOPS
+
+      assign wr0_en[j] = ((btb_fa_wr_addr0[BTB_FA_INDEX:0] == j) & (exu_mp_valid_write & ~exu_mp_pkt.way)) |
+                         ((dec_fa_error_index == j) & dec_tlu_error_wb);
+
+      rvdffe #(BTB_DWIDTH) btb_fa (.*, .clk(clk),
+                                   .en  (wr0_en[j]),
+                                   .din (btb_wr_data[BTB_DWIDTH-1:0]),
+                                   .dout(btbdata[j]));
+   end // block: BTB_FAFLOPS
+
+   assign ifu_bp_fa_index_f[1] = hit1 ? hit1_index : '0;
+   assign ifu_bp_fa_index_f[0] = hit0 ? hit0_index : '0;
+
+   assign btb_used_reset = &btb_used[pt.BTB_SIZE-1:0];
+   assign btb_used_ns[pt.BTB_SIZE-1:0] = ({pt.BTB_SIZE{vwayhit_f[1]}} & (32'b1 << hit1_index[BTB_FA_INDEX:0])) |
+                                         ({pt.BTB_SIZE{vwayhit_f[0]}} & (32'b1 << hit0_index[BTB_FA_INDEX:0])) |
+                                         ({pt.BTB_SIZE{exu_mp_valid_write & ~exu_mp_pkt.way & ~dec_tlu_error_wb}} & (32'b1 << btb_fa_wr_addr0[BTB_FA_INDEX:0])) |
+                                         ({pt.BTB_SIZE{btb_used_reset}} & {pt.BTB_SIZE{1'b0}}) |
+                                         ({pt.BTB_SIZE{~btb_used_reset & dec_tlu_error_wb}} & (btb_used[pt.BTB_SIZE-1:0] & ~(32'b1 << dec_fa_error_index[BTB_FA_INDEX:0]))) |
+                                         (~{pt.BTB_SIZE{btb_used_reset | dec_tlu_error_wb}} & btb_used[pt.BTB_SIZE-1:0]);
+
+   assign write_used = btb_used_reset | ifu_bp_hit_taken_f | exu_mp_valid_write | dec_tlu_error_wb;
+
+
+   rvdffe #(pt.BTB_SIZE) btb_usedf (.*, .clk(clk),
+                    .en  (write_used),
+                    .din (btb_used_ns[pt.BTB_SIZE-1:0]),
+                    .dout(btb_used[pt.BTB_SIZE-1:0]));
+
+end // block: fa
+
+
+   //-----------------------------------------------------------------------------
+   // BHT
+   // 2 bit Entry -> direction, strength
+   //
+   //-----------------------------------------------------------------------------
+
+   logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0][1:0]      bht_bank_wr_data ;
+   logic [1:0] [pt.BHT_ARRAY_DEPTH-1:0] [1:0]                bht_bank_rd_data_out ;
+   logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0]                 bht_bank_clken ;
+   logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0]                 bht_bank_clk   ;
+   logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0]           bht_bank_sel   ;
+
+   for ( i=0; i<2; i++) begin : BANKS
+     for (genvar k=0 ; k < (pt.BHT_ARRAY_DEPTH)/NUM_BHT_LOOP ; k++) begin : BHT_CLK_GROUP
+     assign bht_bank_clken[i][k]  = (bht_wr_en0[i] & ((bht_wr_addr0[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) |  BHT_NO_ADDR_MATCH)) |
+                                    (bht_wr_en2[i] & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) |  BHT_NO_ADDR_MATCH));
+`ifndef RV_FPGA_OPTIMIZE
+     rvclkhdr bht_bank_grp_cgc ( .en(bht_bank_clken[i][k]), .l1clk(bht_bank_clk[i][k]), .* ); // ifndef RV_FPGA_OPTIMIZE
+`endif
+
+     for (j=0 ; j<NUM_BHT_LOOP ; j++) begin : BHT_FLOPS
+       assign   bht_bank_sel[i][k][j]    = (bht_wr_en0[i] & (bht_wr_addr0[NUM_BHT_LOOP_INNER_HI :pt.BHT_ADDR_LO] == j) & ((bht_wr_addr0[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) |
+                                           (bht_wr_en2[i] & (bht_wr_addr2[NUM_BHT_LOOP_INNER_HI :pt.BHT_ADDR_LO] == j) & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) ;
+
+       assign bht_bank_wr_data[i][k][j]  = (bht_wr_en2[i] & (bht_wr_addr2[NUM_BHT_LOOP_INNER_HI:pt.BHT_ADDR_LO] == j) & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) ? bht_wr_data2[1:0] :
+                                                                                                                      bht_wr_data0[1:0]   ;
+
+
+          rvdffs_fpga #(2) bht_bank (.*,
+                    .clk        (bht_bank_clk[i][k]),
+                    .en         (bht_bank_sel[i][k][j]),
+                    .rawclk     (clk),
+                    .clken      (bht_bank_sel[i][k][j]),
+                    .din        (bht_bank_wr_data[i][k][j]),
+                    .dout       (bht_bank_rd_data_out[i][(16*k)+j]));
+
+      end // block: BHT_FLOPS
+   end // block: BHT_CLK_GROUP
+ end // block: BANKS
+
+    always_comb begin : BHT_rd_mux
+     bht_bank0_rd_data_f[1:0] = '0 ;
+     bht_bank1_rd_data_f[1:0] = '0 ;
+     bht_bank0_rd_data_p1_f[1:0] = '0 ;
+     for (int j=0; j< pt.BHT_ARRAY_DEPTH; j++) begin
+       if (bht_rd_addr_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] == (pt.BHT_ADDR_HI-pt.BHT_ADDR_LO+1)'(j)) begin
+         bht_bank0_rd_data_f[1:0] = bht_bank_rd_data_out[0][j];
+         bht_bank1_rd_data_f[1:0] = bht_bank_rd_data_out[1][j];
+       end
+       if (bht_rd_addr_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] == (pt.BHT_ADDR_HI-pt.BHT_ADDR_LO+1)'(j)) begin
+         bht_bank0_rd_data_p1_f[1:0] = bht_bank_rd_data_out[0][j];
+       end
+      end
+    end // block: BHT_rd_mux
+
+
+function [1:0] countones;
+      input [1:0] valid;
+
+      begin
+
+countones[1:0] = {2'b0, valid[1]} +
+                 {2'b0, valid[0]};
+      end
+   endfunction
+endmodule // eb1_ifu_bp_ctl
+
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_ifu_compress_ctl.sv b/verilog/rtl/BrqRV_EB1/design/eb1_ifu_compress_ctl.sv
new file mode 100644
index 0000000..a55e30f
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_ifu_compress_ctl.sv
@@ -0,0 +1,383 @@
+//********************************************************************************
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//********************************************************************************
+
+// purpose of this file is to convert 16b RISCV compressed instruction into 32b equivalent
+
+module eb1_ifu_compress_ctl
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )
+  (
+   input  logic [15:0] din,        // 16-bit   compressed instruction
+   output logic [31:0] dout        // 32-bit uncompressed instruction
+   );
+
+
+   logic               legal;
+
+   logic [15:0]  i;
+
+   logic [31:0]  o,l1,l2,l3;
+
+
+   assign i[15:0] = din[15:0];
+
+
+   logic [4:0]   rs2d,rdd,rdpd,rs2pd;
+
+   logic rdrd;
+   logic rdrs1;
+   logic rs2rs2;
+   logic rdprd;
+   logic rdprs1;
+   logic rs2prs2;
+   logic rs2prd;
+   logic uimm9_2;
+   logic ulwimm6_2;
+   logic ulwspimm7_2;
+   logic rdeq2;
+   logic rdeq1;
+   logic rs1eq2;
+   logic sbroffset8_1;
+   logic simm9_4;
+   logic simm5_0;
+   logic sjaloffset11_1;
+   logic sluimm17_12;
+   logic uimm5_0;
+   logic uswimm6_2;
+   logic uswspimm7_2;
+
+
+
+   // form the opcodes
+
+   // formats
+   //
+   // c.add rd 11:7 rs2  6:2
+   // c.and rdp 9:7 rs2p 4:2
+   //
+   // add rs2 24:20 rs1 19:15  rd 11:7
+
+   assign rs2d[4:0] = i[6:2];
+
+   assign rdd[4:0] = i[11:7];
+
+   assign rdpd[4:0] = {2'b01, i[9:7]};
+
+   assign rs2pd[4:0] = {2'b01, i[4:2]};
+
+
+
+   // merge in rd, rs1, rs2
+
+
+   // rd
+   assign l1[6:0] = o[6:0];
+
+   assign l1[11:7] = o[11:7] |
+                     ({5{rdrd}} & rdd[4:0]) |
+                     ({5{rdprd}} & rdpd[4:0]) |
+                     ({5{rs2prd}} & rs2pd[4:0]) |
+                     ({5{rdeq1}} & 5'd1) |
+                     ({5{rdeq2}} & 5'd2);
+
+
+   // rs1
+   assign l1[14:12] = o[14:12];
+   assign l1[19:15] = o[19:15] |
+                      ({5{rdrs1}} & rdd[4:0]) |
+                      ({5{rdprs1}} & rdpd[4:0]) |
+                      ({5{rs1eq2}} & 5'd2);
+
+
+   // rs2
+   assign l1[24:20] = o[24:20] |
+                      ({5{rs2rs2}} & rs2d[4:0]) |
+                      ({5{rs2prs2}} & rs2pd[4:0]);
+
+   assign l1[31:25] = o[31:25];
+
+   logic [5:0] simm5d;
+   logic [9:2] uimm9d;
+
+   logic [9:4] simm9d;
+   logic [6:2] ulwimm6d;
+   logic [7:2] ulwspimm7d;
+   logic [5:0] uimm5d;
+   logic [20:1] sjald;
+
+   logic [31:12] sluimmd;
+
+   // merge in immediates + jal offset
+
+   assign simm5d[5:0] = { i[12], i[6:2] };
+
+   assign uimm9d[9:2] = { i[10:7], i[12:11], i[5], i[6] };
+
+   assign simm9d[9:4] = { i[12], i[4:3], i[5], i[2], i[6] };
+
+   assign ulwimm6d[6:2] = { i[5], i[12:10], i[6] };
+
+   assign ulwspimm7d[7:2] = { i[3:2], i[12], i[6:4] };
+
+   assign uimm5d[5:0] = { i[12], i[6:2] };
+
+   assign sjald[11:1] = { i[12], i[8], i[10:9], i[6], i[7], i[2], i[11], i[5:4], i[3] };
+
+   assign sjald[20:12] =  {9{i[12]}};
+
+
+
+   assign sluimmd[31:12] = { {15{i[12]}}, i[6:2] };
+
+
+   assign l2[31:20] = ( l1[31:20] ) |
+                      ( {12{simm5_0}}   &  {{7{simm5d[5]}},simm5d[4:0]} ) |
+                      ( {12{uimm9_2}}   &  {2'b0,uimm9d[9:2],2'b0} ) |
+                      ( {12{simm9_4}}   &   {{3{simm9d[9]}},simm9d[8:4],4'b0} ) |
+                      ( {12{ulwimm6_2}} &   {5'b0,ulwimm6d[6:2],2'b0} ) |
+                      ( {12{ulwspimm7_2}}  & {4'b0,ulwspimm7d[7:2],2'b0} ) |
+                      ( {12{uimm5_0}}      &    {6'b0,uimm5d[5:0]} ) |
+                      ( {12{sjaloffset11_1}} &  {sjald[20],sjald[10:1],sjald[11]} ) |
+                      ( {12{sluimm17_12}}    &  sluimmd[31:20] );
+
+
+
+   assign l2[19:12] = ( l1[19:12] ) |
+                      ( {8{sjaloffset11_1}} & sjald[19:12] ) |
+                      ( {8{sluimm17_12}} & sluimmd[19:12] );
+
+
+   assign l2[11:0] = l1[11:0];
+
+
+   // merge in branch offset and store immediates
+
+   logic [8:1]   sbr8d;
+   logic [6:2]   uswimm6d;
+   logic [7:2]   uswspimm7d;
+
+
+   assign sbr8d[8:1] =   { i[12], i[6], i[5], i[2], i[11], i[10], i[4], i[3] };
+
+   assign uswimm6d[6:2] = { i[5], i[12:10], i[6] };
+
+   assign uswspimm7d[7:2] = { i[8:7], i[12:9] };
+
+   assign l3[31:25] = ( l2[31:25] ) |
+                      ( {7{sbroffset8_1}} & { {4{sbr8d[8]}},sbr8d[7:5] } ) |
+                      ( {7{uswimm6_2}}    & { 5'b0, uswimm6d[6:5] } ) |
+                      ( {7{uswspimm7_2}} & { 4'b0, uswspimm7d[7:5] } );
+
+
+   assign l3[24:12] = l2[24:12];
+
+   assign l3[11:7] = ( l2[11:7] ) |
+                     ( {5{sbroffset8_1}} & { sbr8d[4:1], sbr8d[8] } ) |
+                     ( {5{uswimm6_2}} & { uswimm6d[4:2], 2'b0 } ) |
+                     ( {5{uswspimm7_2}} & { uswspimm7d[4:2], 2'b0 } );
+
+   assign l3[6:0] = l2[6:0];
+
+
+   assign dout[31:0] = l3[31:0] & {32{legal}};
+
+
+// file "cdecode" is human readable file that has all of the compressed instruction decodes defined and is part of git repo
+// modify this file as needed
+
+// to generate all the equations below from "cdecode" except legal equation:
+
+// 1) coredecode -in cdecode > cdecode.e
+
+// 2) espresso -Dso -oeqntott cdecode.e | addassign > compress_equations
+
+// to generate the legal (16b compressed instruction is legal)  equation below:
+
+// 1) coredecode -in cdecode -legal > clegal.e
+
+// 2) espresso -Dso -oeqntott clegal.e | addassign > clegal_equation
+
+
+
+
+
+// espresso decodes
+assign rdrd = (!i[14]&i[6]&i[1]) | (!i[15]&i[14]&i[11]&i[0]) | (!i[14]&i[5]&i[1]) | (
+    !i[15]&i[14]&i[10]&i[0]) | (!i[14]&i[4]&i[1]) | (!i[15]&i[14]&i[9]
+    &i[0]) | (!i[14]&i[3]&i[1]) | (!i[15]&i[14]&!i[8]&i[0]) | (!i[14]
+    &i[2]&i[1]) | (!i[15]&i[14]&i[7]&i[0]) | (!i[15]&i[1]) | (!i[15]
+    &!i[13]&i[0]);
+
+assign rdrs1 = (!i[14]&i[12]&i[11]&i[1]) | (!i[14]&i[12]&i[10]&i[1]) | (!i[14]
+    &i[12]&i[9]&i[1]) | (!i[14]&i[12]&i[8]&i[1]) | (!i[14]&i[12]&i[7]
+    &i[1]) | (!i[14]&!i[12]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]
+    &i[12]&i[6]&i[1]) | (!i[14]&i[12]&i[5]&i[1]) | (!i[14]&i[12]&i[4]
+    &i[1]) | (!i[14]&i[12]&i[3]&i[1]) | (!i[14]&i[12]&i[2]&i[1]) | (
+    !i[15]&!i[14]&!i[13]&i[0]) | (!i[15]&!i[14]&i[1]);
+
+assign rs2rs2 = (i[15]&i[6]&i[1]) | (i[15]&i[5]&i[1]) | (i[15]&i[4]&i[1]) | (
+    i[15]&i[3]&i[1]) | (i[15]&i[2]&i[1]) | (i[15]&i[14]&i[1]);
+
+assign rdprd = (i[15]&!i[14]&!i[13]&i[0]);
+
+assign rdprs1 = (i[15]&!i[13]&i[0]) | (i[15]&i[14]&i[0]) | (i[14]&!i[1]&!i[0]);
+
+assign rs2prs2 = (i[15]&!i[14]&!i[13]&i[11]&i[10]&i[0]) | (i[15]&!i[1]&!i[0]);
+
+assign rs2prd = (!i[15]&!i[1]&!i[0]);
+
+assign uimm9_2 = (!i[14]&!i[1]&!i[0]);
+
+assign ulwimm6_2 = (!i[15]&i[14]&!i[1]&!i[0]);
+
+assign ulwspimm7_2 = (!i[15]&i[14]&i[1]);
+
+assign rdeq2 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]);
+
+assign rdeq1 = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]
+    &i[12]&i[10]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[9]
+    &!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[8]&!i[6]&!i[5]
+    &!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[7]&!i[6]&!i[5]&!i[4]&!i[3]
+    &!i[2]&i[1]) | (!i[15]&!i[14]&i[13]);
+
+assign rs1eq2 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]) | (i[14]
+    &i[1]) | (!i[14]&!i[1]&!i[0]);
+
+assign sbroffset8_1 = (i[15]&i[14]&i[0]);
+
+assign simm9_4 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]);
+
+assign simm5_0 = (!i[14]&!i[13]&i[11]&!i[10]&i[0]) | (!i[15]&!i[13]&i[0]);
+
+assign sjaloffset11_1 = (!i[14]&i[13]);
+
+assign sluimm17_12 = (!i[15]&i[14]&i[13]&i[7]) | (!i[15]&i[14]&i[13]&!i[8]) | (
+    !i[15]&i[14]&i[13]&i[9]) | (!i[15]&i[14]&i[13]&i[10]) | (!i[15]&i[14]
+    &i[13]&i[11]);
+
+assign uimm5_0 = (i[15]&!i[14]&!i[13]&!i[11]&i[0]) | (!i[15]&!i[14]&i[1]);
+
+assign uswimm6_2 = (i[15]&!i[1]&!i[0]);
+
+assign uswspimm7_2 = (i[15]&i[14]&i[1]);
+
+assign o[31]  = 1'b0;
+
+assign o[30] = (i[15]&!i[14]&!i[13]&i[10]&!i[6]&!i[5]&i[0]) | (i[15]&!i[14]
+    &!i[13]&!i[11]&i[10]&i[0]);
+
+assign o[29]  = 1'b0;
+
+assign o[28]  = 1'b0;
+
+assign o[27]  = 1'b0;
+
+assign o[26]  = 1'b0;
+
+assign o[25]  = 1'b0;
+
+assign o[24]  = 1'b0;
+
+assign o[23]  = 1'b0;
+
+assign o[22]  = 1'b0;
+
+assign o[21]  = 1'b0;
+
+assign o[20] = (!i[14]&i[12]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[6]&!i[5]&!i[4]
+    &!i[3]&!i[2]&i[1]);
+
+assign o[19]  = 1'b0;
+
+assign o[18]  = 1'b0;
+
+assign o[17]  = 1'b0;
+
+assign o[16]  = 1'b0;
+
+assign o[15]  = 1'b0;
+
+assign o[14] = (i[15]&!i[14]&!i[13]&!i[11]&i[0]) | (i[15]&!i[14]&!i[13]&!i[10]
+    &i[0]) | (i[15]&!i[14]&!i[13]&i[6]&i[0]) | (i[15]&!i[14]&!i[13]&i[5]
+    &i[0]);
+
+assign o[13] = (i[15]&!i[14]&!i[13]&i[11]&!i[10]&i[0]) | (i[15]&!i[14]&!i[13]
+    &i[11]&i[6]&i[0]) | (i[14]&!i[0]);
+
+assign o[12] = (i[15]&!i[14]&!i[13]&i[6]&i[5]&i[0]) | (i[15]&!i[14]&!i[13]&!i[11]
+    &i[0]) | (i[15]&!i[14]&!i[13]&!i[10]&i[0]) | (!i[15]&!i[14]&i[1]) | (
+    i[15]&i[14]&i[13]);
+
+assign o[11]  = 1'b0;
+
+assign o[10]  = 1'b0;
+
+assign o[9]  = 1'b0;
+
+assign o[8]  = 1'b0;
+
+assign o[7]  = 1'b0;
+
+assign o[6] = (i[15]&!i[14]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&!i[0]) | (!i[14]&i[13]) | (
+    i[15]&i[14]&i[0]);
+
+assign o[5] = (i[15]&!i[0]) | (i[15]&i[11]&i[10]) | (i[13]&!i[8]) | (i[13]&i[7]) | (
+    i[13]&i[9]) | (i[13]&i[10]) | (i[13]&i[11]) | (!i[14]&i[13]) | (
+    i[15]&i[14]);
+
+assign o[4] = (!i[14]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[0]) | (!i[15]&!i[14]
+    &!i[0]) | (!i[14]&i[6]&!i[0]) | (!i[15]&i[14]&i[0]) | (!i[14]&i[5]
+    &!i[0]) | (!i[14]&i[4]&!i[0]) | (!i[14]&!i[13]&i[0]) | (!i[14]&i[3]
+    &!i[0]) | (!i[14]&i[2]&!i[0]);
+
+assign o[3] = (!i[14]&i[13]);
+
+assign o[2] = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]
+    &i[12]&i[10]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[9]
+    &!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[8]&!i[6]&!i[5]
+    &!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[7]&!i[6]&!i[5]&!i[4]&!i[3]
+    &!i[2]&i[1]) | (i[15]&!i[14]&!i[12]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]
+    &!i[0]) | (!i[15]&i[13]&!i[8]) | (!i[15]&i[13]&i[7]) | (!i[15]&i[13]
+    &i[9]) | (!i[15]&i[13]&i[10]) | (!i[15]&i[13]&i[11]) | (!i[14]&i[13]);
+
+// 32b instruction has lower two bits 2'b11
+
+assign o[1]  = 1'b1;
+
+assign o[0]  = 1'b1;
+
+assign legal = (!i[13]&!i[12]&i[11]&i[1]&!i[0]) | (!i[13]&!i[12]&i[6]&i[1]&!i[0]) | (
+    !i[15]&!i[13]&i[11]&!i[1]) | (!i[13]&!i[12]&i[5]&i[1]&!i[0]) | (
+    !i[13]&!i[12]&i[10]&i[1]&!i[0]) | (!i[15]&!i[13]&i[6]&!i[1]) | (
+    i[15]&!i[12]&!i[1]&i[0]) | (!i[13]&!i[12]&i[9]&i[1]&!i[0]) | (!i[12]
+    &i[6]&!i[1]&i[0]) | (!i[15]&!i[13]&i[5]&!i[1]) | (!i[13]&!i[12]&i[8]
+    &i[1]&!i[0]) | (!i[12]&i[5]&!i[1]&i[0]) | (!i[15]&!i[13]&i[10]&!i[1]) | (
+    !i[13]&!i[12]&i[7]&i[1]&!i[0]) | (i[12]&i[11]&!i[10]&!i[1]&i[0]) | (
+    !i[15]&!i[13]&i[9]&!i[1]) | (!i[13]&!i[12]&i[4]&i[1]&!i[0]) | (i[13]
+    &i[12]&!i[1]&i[0]) | (!i[15]&!i[13]&i[8]&!i[1]) | (!i[13]&!i[12]&i[3]
+    &i[1]&!i[0]) | (i[13]&i[4]&!i[1]&i[0]) | (!i[13]&!i[12]&i[2]&i[1]
+    &!i[0]) | (!i[15]&!i[13]&i[7]&!i[1]) | (i[13]&i[3]&!i[1]&i[0]) | (
+    i[13]&i[2]&!i[1]&i[0]) | (i[14]&!i[13]&!i[1]) | (!i[14]&!i[12]&!i[1]
+    &i[0]) | (i[15]&!i[13]&i[12]&i[1]&!i[0]) | (!i[15]&!i[13]&!i[12]&i[1]
+    &!i[0]) | (!i[15]&!i[13]&i[12]&!i[1]) | (i[14]&!i[13]&!i[0]);
+
+
+
+
+endmodule
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_ifu_ic_mem.sv b/verilog/rtl/BrqRV_EB1/design/eb1_ifu_ic_mem.sv
new file mode 100644
index 0000000..f849bc0
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_ifu_ic_mem.sv
@@ -0,0 +1,1458 @@
+//********************************************************************************
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//********************************************************************************
+////////////////////////////////////////////////////
+//   ICACHE DATA & TAG MODULE WRAPPER              //
+/////////////////////////////////////////////////////
+module eb1_ifu_ic_mem
+import eb1_pkg::*;
+ #(
+`include "eb1_param.vh"
+ )
+  (
+      input logic                                   clk,                // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
+      input logic                                   active_clk,         // Clock only while core active.  Through two clock headers. For flops without second clock header built in.
+      input logic                                   rst_l,              // reset, active low
+      input logic                                   clk_override,       // Override non-functional clock gating
+      input logic                                   dec_tlu_core_ecc_disable,  // Disable ECC checking
+
+      input logic [31:1]                            ic_rw_addr,
+      input logic [pt.ICACHE_NUM_WAYS-1:0]          ic_wr_en  ,         // Which way to write
+      input logic                                   ic_rd_en  ,         // Read enable
+      input logic [pt.ICACHE_INDEX_HI:3]            ic_debug_addr,      // Read/Write addresss to the Icache.
+      input logic                                   ic_debug_rd_en,     // Icache debug rd
+      input logic                                   ic_debug_wr_en,     // Icache debug wr
+      input logic                                   ic_debug_tag_array, // Debug tag array
+      input logic [pt.ICACHE_NUM_WAYS-1:0]          ic_debug_way,       // Debug way. Rd or Wr.
+      input logic [63:0]                            ic_premux_data,     // Premux data to be muxed with each way of the Icache.
+      input logic                                   ic_sel_premux_data, // Select the pre_muxed data
+
+      input  logic [pt.ICACHE_BANKS_WAY-1:0][70:0]  ic_wr_data,         // Data to fill to the Icache. With ECC
+      output logic [63:0]                           ic_rd_data ,        // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
+      output logic [70:0]                           ic_debug_rd_data ,  // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
+      output logic [25:0]                           ictag_debug_rd_data,// Debug icache tag.
+      input logic  [70:0]                           ic_debug_wr_data,   // Debug wr cache.
+
+      output logic [pt.ICACHE_BANKS_WAY-1:0]        ic_eccerr,          // ecc error per bank
+      output logic [pt.ICACHE_BANKS_WAY-1:0]        ic_parerr,          // ecc error per bank
+      input logic [pt.ICACHE_NUM_WAYS-1:0]          ic_tag_valid,       // Valid from the I$ tag valid outside (in flops).
+      input eb1_ic_data_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_data_ext_in_pkt,   // this is being driven by the top level for soc testing/etc
+      input eb1_ic_tag_ext_in_pkt_t  [pt.ICACHE_NUM_WAYS-1:0]                          ic_tag_ext_in_pkt,    // this is being driven by the top level for soc testing/etc
+
+      output logic [pt.ICACHE_NUM_WAYS-1:0]         ic_rd_hit,          // ic_rd_hit[3:0]
+      output logic                                  ic_tag_perr,        // Tag Parity error
+      input  logic                                  scan_mode           // Flop scan mode control
+      ) ;
+
+
+
+
+   eb1_IC_TAG #(.pt(pt)) ic_tag_inst
+          (
+           .*,
+           .ic_wr_en     (ic_wr_en[pt.ICACHE_NUM_WAYS-1:0]),
+           .ic_debug_addr(ic_debug_addr[pt.ICACHE_INDEX_HI:3]),
+           .ic_rw_addr   (ic_rw_addr[31:3])
+           ) ;
+
+   eb1_IC_DATA #(.pt(pt)) ic_data_inst
+          (
+           .*,
+           .ic_wr_en     (ic_wr_en[pt.ICACHE_NUM_WAYS-1:0]),
+           .ic_debug_addr(ic_debug_addr[pt.ICACHE_INDEX_HI:3]),
+           .ic_rw_addr   (ic_rw_addr[31:1])
+           ) ;
+
+ endmodule
+
+
+/////////////////////////////////////////////////
+////// ICACHE DATA MODULE    ////////////////////
+/////////////////////////////////////////////////
+module eb1_IC_DATA
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )
+     (
+      input logic clk,
+      input logic active_clk,
+      input logic rst_l,
+      input logic clk_override,
+
+      input logic [31:1]                  ic_rw_addr,
+      input logic [pt.ICACHE_NUM_WAYS-1:0]ic_wr_en,
+      input logic                          ic_rd_en,           // Read enable
+
+      input  logic [pt.ICACHE_BANKS_WAY-1:0][70:0]    ic_wr_data,         // Data to fill to the Icache. With ECC
+      output logic [63:0]                             ic_rd_data ,                                 // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
+      input  logic [70:0]                             ic_debug_wr_data,   // Debug wr cache.
+      output logic [70:0]                             ic_debug_rd_data ,  // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
+      output logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr,
+      output logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr,    // ecc error per bank
+      input logic [pt.ICACHE_INDEX_HI:3]     ic_debug_addr,     // Read/Write addresss to the Icache.
+      input logic                            ic_debug_rd_en,      // Icache debug rd
+      input logic                            ic_debug_wr_en,      // Icache debug wr
+      input logic                            ic_debug_tag_array,  // Debug tag array
+      input logic [pt.ICACHE_NUM_WAYS-1:0]   ic_debug_way,        // Debug way. Rd or Wr.
+      input logic [63:0]                     ic_premux_data,      // Premux data to be muxed with each way of the Icache.
+      input logic                            ic_sel_premux_data,  // Select the pre_muxed data
+
+      input logic [pt.ICACHE_NUM_WAYS-1:0]ic_rd_hit,
+      input eb1_ic_data_ext_in_pkt_t  [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_data_ext_in_pkt,   // this is being driven by the top level for soc testing/etc
+      input  logic                         scan_mode
+
+      ) ;
+
+   logic [pt.ICACHE_TAG_INDEX_LO-1:1]                                             ic_rw_addr_ff;
+   logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_WAYS-1:0]                        ic_b_sb_wren;    //bank x ways
+   logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_WAYS-1:0]                        ic_b_sb_rden;    //bank x ways
+
+
+   logic [pt.ICACHE_BANKS_WAY-1:0]                                                ic_b_rden;       //bank
+   logic [pt.ICACHE_BANKS_WAY-1:0]                                                ic_b_rden_ff;    //bank
+   logic [pt.ICACHE_BANKS_WAY-1:0]                                                ic_debug_sel_sb;
+
+   logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][70:0]                  wb_dout ;       //  ways x bank
+   logic [pt.ICACHE_BANKS_WAY-1:0][70:0]                                          ic_sb_wr_data, ic_bank_wr_data, wb_dout_ecc_bank;
+   logic [pt.ICACHE_NUM_WAYS-1:0] [141:0]                                         wb_dout_way_pre;
+   logic [pt.ICACHE_NUM_WAYS-1:0] [63:0]                                          wb_dout_way, wb_dout_way_with_premux;
+   logic [141:0]                                                                  wb_dout_ecc;
+
+   logic [pt.ICACHE_BANKS_WAY-1:0]                                                bank_check_en;
+
+   logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_WAYS-1:0]                        ic_bank_way_clken;
+   logic [pt.ICACHE_BANKS_WAY-1:0]                                                ic_bank_way_clken_final;
+   logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0]                        ic_bank_way_clken_final_up;
+
+   logic [pt.ICACHE_NUM_WAYS-1:0]                                                 ic_debug_rd_way_en;    // debug wr_way
+   logic [pt.ICACHE_NUM_WAYS-1:0]                                                 ic_debug_rd_way_en_ff; // debug wr_way
+   logic [pt.ICACHE_NUM_WAYS-1:0]                                                 ic_debug_wr_way_en;    // debug wr_way
+   logic [pt.ICACHE_INDEX_HI:1]                                                   ic_rw_addr_q;
+
+   logic [pt.ICACHE_BANKS_WAY-1:0]       [pt.ICACHE_INDEX_HI : pt.ICACHE_DATA_INDEX_LO] ic_rw_addr_bank_q;
+
+   logic [pt.ICACHE_TAG_LO-1 : pt.ICACHE_DATA_INDEX_LO]                           ic_rw_addr_q_inc;
+   logic [pt.ICACHE_NUM_WAYS-1:0]                                                 ic_rd_hit_q;
+
+
+
+      logic [pt.ICACHE_BANKS_WAY-1:0]                                                ic_b_sram_en;
+      logic [pt.ICACHE_BANKS_WAY-1:0]                                                ic_b_read_en;
+      logic [pt.ICACHE_BANKS_WAY-1:0]                                                ic_b_write_en;
+      logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] [31 : pt.ICACHE_DATA_INDEX_LO]  wb_index_hold;
+      logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0]                                 write_bypass_en;     //bank
+      logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0]                                 write_bypass_en_ff;  //bank
+      logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0]                                 index_valid;  //bank
+      logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0]                                 ic_b_clear_en;
+      logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0]                                 ic_b_addr_match;
+      logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0]                                 ic_b_addr_match_index_only;
+
+      logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0]                                                ic_b_sram_en_up;
+      logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0]                                                ic_b_read_en_up;
+      logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0]                                                ic_b_write_en_up;
+      logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] [31 : pt.ICACHE_DATA_INDEX_LO]  wb_index_hold_up;
+      logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0]                                 write_bypass_en_up;     //bank
+      logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0]                                 write_bypass_en_ff_up;  //bank
+      logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0]                                 index_valid_up;  //bank
+      logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0]                                 ic_b_clear_en_up;
+      logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0]                                 ic_b_addr_match_up;
+      logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0]                                 ic_b_addr_match_index_only_up;
+
+
+   logic [pt.ICACHE_BANKS_WAY-1:0]                 [31 : pt.ICACHE_DATA_INDEX_LO] ic_b_rw_addr;
+   logic [pt.ICACHE_BANKS_WAY-1:0]                 [31 : pt.ICACHE_DATA_INDEX_LO] ic_b_rw_addr_index_only;
+
+   logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0]                 [31 : pt.ICACHE_DATA_INDEX_LO] ic_b_rw_addr_up;
+   logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0]                 [31 : pt.ICACHE_DATA_INDEX_LO] ic_b_rw_addr_index_only_up;
+
+
+
+   logic                                                                          ic_rd_en_with_debug;
+   logic                                                                          ic_rw_addr_wrap, ic_cacheline_wrap_ff;
+   logic                                                                          ic_debug_rd_en_ff;
+
+
+//-----------------------------------------------------------
+// ----------- Logic section starts here --------------------
+//-----------------------------------------------------------
+   assign  ic_debug_rd_way_en[pt.ICACHE_NUM_WAYS-1:0] =  {pt.ICACHE_NUM_WAYS{ic_debug_rd_en & ~ic_debug_tag_array}} & ic_debug_way[pt.ICACHE_NUM_WAYS-1:0] ;
+   assign  ic_debug_wr_way_en[pt.ICACHE_NUM_WAYS-1:0] =  {pt.ICACHE_NUM_WAYS{ic_debug_wr_en & ~ic_debug_tag_array}} & ic_debug_way[pt.ICACHE_NUM_WAYS-1:0] ;
+
+   logic end_of_cache_line;
+   assign end_of_cache_line = (pt.ICACHE_LN_SZ==7'h40) ? (&ic_rw_addr_q[5:4]) : ic_rw_addr_q[4];
+   always_comb begin : clkens
+      ic_bank_way_clken  = '0;
+
+      for ( int i=0; i<pt.ICACHE_BANKS_WAY; i++) begin: wr_ens
+       ic_b_sb_wren[i]        =  ic_wr_en[pt.ICACHE_NUM_WAYS-1:0]  |
+                                       (ic_debug_wr_way_en[pt.ICACHE_NUM_WAYS-1:0] & {pt.ICACHE_NUM_WAYS{ic_debug_addr[pt.ICACHE_BANK_HI : pt.ICACHE_BANK_LO] == i}}) ;
+       ic_debug_sel_sb[i]     = (ic_debug_addr[pt.ICACHE_BANK_HI : pt.ICACHE_BANK_LO] == i );
+       ic_sb_wr_data[i]       = (ic_debug_sel_sb[i] & ic_debug_wr_en) ? ic_debug_wr_data : ic_bank_wr_data[i] ;
+       ic_b_rden[i]           =  ic_rd_en_with_debug & ( ( ~ic_rw_addr_q[pt.ICACHE_BANK_HI] & (i==0)) |
+                                                        (( ic_rw_addr_q[pt.ICACHE_BANK_HI] & ic_rw_addr_q[2:1] == 2'b11) & (i==0) & ~end_of_cache_line) |
+                                                         (  ic_rw_addr_q[pt.ICACHE_BANK_HI] & (i==1)) |
+                                                         ((~ic_rw_addr_q[pt.ICACHE_BANK_HI] & ic_rw_addr_q[2:1] == 2'b11) & (i==1)) ) ;
+
+
+
+       ic_b_sb_rden[i]        =  {pt.ICACHE_NUM_WAYS{ic_b_rden[i]}}   ;
+
+       for ( int j=0; j<pt.ICACHE_NUM_WAYS; j++) begin: way_clkens
+         ic_bank_way_clken[i][j] |= ic_b_sb_rden[i][j] | clk_override | ic_b_sb_wren[i][j];
+       end
+     end // block: wr_ens
+   end // block: clkens
+
+// bank read enables
+  assign ic_rd_en_with_debug                          = (ic_rd_en   | ic_debug_rd_en ) & ~(|ic_wr_en);
+  assign ic_rw_addr_q[pt.ICACHE_INDEX_HI:1] = (ic_debug_rd_en | ic_debug_wr_en) ?
+                                              {ic_debug_addr[pt.ICACHE_INDEX_HI:3],2'b0} :
+                                              ic_rw_addr[pt.ICACHE_INDEX_HI:1] ;
+
+   assign ic_rw_addr_q_inc[pt.ICACHE_TAG_LO-1:pt.ICACHE_DATA_INDEX_LO] = ic_rw_addr_q[pt.ICACHE_TAG_LO-1 : pt.ICACHE_DATA_INDEX_LO] + 1 ;
+   assign ic_rw_addr_wrap                                        = ic_rw_addr_q[pt.ICACHE_BANK_HI] & (ic_rw_addr_q[2:1] == 2'b11) & ic_rd_en_with_debug & ~(|ic_wr_en[pt.ICACHE_NUM_WAYS-1:0]);
+   assign ic_cacheline_wrap_ff                                   = ic_rw_addr_ff[pt.ICACHE_TAG_INDEX_LO-1:pt.ICACHE_BANK_LO] == {(pt.ICACHE_TAG_INDEX_LO - pt.ICACHE_BANK_LO){1'b1}};
+
+
+   assign ic_rw_addr_bank_q[0] = ~ic_rw_addr_wrap ? ic_rw_addr_q[pt.ICACHE_INDEX_HI:pt.ICACHE_DATA_INDEX_LO] : {ic_rw_addr_q[pt.ICACHE_INDEX_HI: pt.ICACHE_TAG_INDEX_LO] , ic_rw_addr_q_inc[pt.ICACHE_TAG_INDEX_LO-1: pt.ICACHE_DATA_INDEX_LO] } ;
+   assign ic_rw_addr_bank_q[1] = ic_rw_addr_q[pt.ICACHE_INDEX_HI:pt.ICACHE_DATA_INDEX_LO];
+
+
+   rvdffie #(.WIDTH(int'(pt.ICACHE_TAG_INDEX_LO+pt.ICACHE_BANKS_WAY+pt.ICACHE_NUM_WAYS)),.OVERRIDE(1)) miscff
+            (.*,
+             .din({ ic_b_rden[pt.ICACHE_BANKS_WAY-1:0],   ic_rw_addr_q[pt.ICACHE_TAG_INDEX_LO-1:1], ic_debug_rd_way_en[pt.ICACHE_NUM_WAYS-1:0],   ic_debug_rd_en}),
+             .dout({ic_b_rden_ff[pt.ICACHE_BANKS_WAY-1:0],ic_rw_addr_ff[pt.ICACHE_TAG_INDEX_LO-1:1],ic_debug_rd_way_en_ff[pt.ICACHE_NUM_WAYS-1:0],ic_debug_rd_en_ff})
+             );
+
+ if (pt.ICACHE_WAYPACK == 0 ) begin : PACKED_0
+
+
+    logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS_WIDTH-1:0] wrptr_up;
+    logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS_WIDTH-1:0] wrptr_in_up;
+    logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0]       sel_bypass_up;
+    logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0]       sel_bypass_ff_up;
+    logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][(71*pt.ICACHE_NUM_WAYS)-1:0]    sel_bypass_data_up;
+    logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0]                                 any_bypass_up;
+    logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0]                                 any_addr_match_up;
+
+`define eb1_IC_DATA_SRAM(depth,width)                                                                               \
+           ram_``depth``x``width ic_bank_sb_way_data (                                                               \
+                                     .ME(ic_bank_way_clken_final_up[i][k]),                                          \
+                                     .WE (ic_b_sb_wren[k][i]),                                                       \
+                                     .D  (ic_sb_wr_data[k][``width-1:0]),                                            \
+                                     .ADR(ic_rw_addr_bank_q[k][pt.ICACHE_INDEX_HI:pt.ICACHE_DATA_INDEX_LO]),         \
+                                     .Q  (wb_dout_pre_up[i][k]),                                                     \
+                                     .CLK (clk),                                                                     \
+                                     .ROP ( ),                                                                       \
+                                     .TEST1(ic_data_ext_in_pkt[i][k].TEST1),                                         \
+                                     .RME(ic_data_ext_in_pkt[i][k].RME),                                             \
+                                     .RM(ic_data_ext_in_pkt[i][k].RM),                                               \
+                                                                                                                     \
+                                     .LS(ic_data_ext_in_pkt[i][k].LS),                                               \
+                                     .DS(ic_data_ext_in_pkt[i][k].DS),                                               \
+                                     .SD(ic_data_ext_in_pkt[i][k].SD),                                               \
+                                                                                                                     \
+                                     .TEST_RNM(ic_data_ext_in_pkt[i][k].TEST_RNM),                                   \
+                                     .BC1(ic_data_ext_in_pkt[i][k].BC1),                                             \
+                                     .BC2(ic_data_ext_in_pkt[i][k].BC2)                                              \
+                                    );  \
+if (pt.ICACHE_BYPASS_ENABLE == 1) begin \
+                 assign wrptr_in_up[i][k] = (wrptr_up[i][k] == (pt.ICACHE_NUM_BYPASS-1)) ? '0 : (wrptr_up[i][k] + 1'd1);                                    \
+                 rvdffs  #(pt.ICACHE_NUM_BYPASS_WIDTH)  wrptr_ff(.*, .clk(active_clk),  .en(|write_bypass_en_up[i][k]), .din (wrptr_in_up[i][k]), .dout(wrptr_up[i][k])) ;     \
+                 assign ic_b_sram_en_up[i][k]              = ic_bank_way_clken[k][i];                             \
+                 assign ic_b_read_en_up[i][k]              =  ic_b_sram_en_up[i][k] &   ic_b_sb_rden[k][i];       \
+                 assign ic_b_write_en_up[i][k]             =  ic_b_sram_en_up[i][k] &   ic_b_sb_wren[k][i];       \
+                 assign ic_bank_way_clken_final_up[i][k]   =  ic_b_sram_en_up[i][k] &    ~(|sel_bypass_up[i][k]); \
+                 assign ic_b_rw_addr_up[i][k] = {ic_rw_addr[31:pt.ICACHE_INDEX_HI+1],ic_rw_addr_bank_q[k]};       \
+                 assign ic_b_rw_addr_index_only_up[i][k] = ic_rw_addr_bank_q[k];                                  \
+                 always_comb begin                                                                                \
+                    any_addr_match_up[i][k] = '0;                                                                 \
+                    for (int l=0; l<pt.ICACHE_NUM_BYPASS; l++) begin                                              \
+                       any_addr_match_up[i][k] |= ic_b_addr_match_up[i][k][l];                                    \
+                    end                                                                                           \
+                 end                                                                                              \
+                // it is an error to ever have 2 entries with the same index and both valid                       \
+                for (genvar l=0; l<pt.ICACHE_NUM_BYPASS; l++) begin: BYPASS                                       \
+                   // full match up to bit 31                                                                     \
+                   assign ic_b_addr_match_up[i][k][l] = (wb_index_hold_up[i][k][l] ==  ic_b_rw_addr_up[i][k]) & index_valid_up[i][k][l];            \
+                   assign ic_b_addr_match_index_only_up[i][k][l] = (wb_index_hold_up[i][k][l][pt.ICACHE_INDEX_HI:pt.ICACHE_DATA_INDEX_LO] ==  ic_b_rw_addr_index_only_up[i][k]) & index_valid_up[i][k][l];            \
+                                                                                                                                                    \
+                   assign ic_b_clear_en_up[i][k][l]   = ic_b_write_en_up[i][k] &   ic_b_addr_match_index_only_up[i][k][l];                                     \
+                                                                                                                                                    \
+                   assign sel_bypass_up[i][k][l]      = ic_b_read_en_up[i][k]  &   ic_b_addr_match_up[i][k][l] ;                                    \
+                                                                                                                                                    \
+                   assign write_bypass_en_up[i][k][l] = ic_b_read_en_up[i][k]  &  ~any_addr_match_up[i][k] & (wrptr_up[i][k] == l);                 \
+                                                                                                                                                    \
+                   rvdff  #(1)  write_bypass_ff (.*, .clk(active_clk),                                                                 .din(write_bypass_en_up[i][k][l]), .dout(write_bypass_en_ff_up[i][k][l])) ; \
+                   rvdffs #(1)  index_val_ff    (.*, .clk(active_clk), .en(write_bypass_en_up[i][k][l] | ic_b_clear_en_up[i][k][l]),   .din(~ic_b_clear_en_up[i][k][l]),  .dout(index_valid_up[i][k][l])) ;       \
+                   rvdff  #(1)  sel_hold_ff     (.*, .clk(active_clk),                                                                 .din(sel_bypass_up[i][k][l]),      .dout(sel_bypass_ff_up[i][k][l])) ;     \
+                   rvdffe #((31-pt.ICACHE_DATA_INDEX_LO+1)) ic_addr_index    (.*, .en(write_bypass_en_up[i][k][l]),    .din (ic_b_rw_addr_up[i][k]), .dout(wb_index_hold_up[i][k][l]));         \
+                   rvdffe #(``width)                             rd_data_hold_ff  (.*, .en(write_bypass_en_ff_up[i][k][l]), .din (wb_dout_pre_up[i][k]),  .dout(wb_dout_hold_up[i][k][l]));     \
+                end                                                                                                                       \
+                always_comb begin                                                                                                         \
+                 any_bypass_up[i][k] = '0;                                                                                                \
+                 sel_bypass_data_up[i][k] = '0;                                                                                           \
+                 for (int l=0; l<pt.ICACHE_NUM_BYPASS; l++) begin                                                                         \
+                    any_bypass_up[i][k]      |=  sel_bypass_ff_up[i][k][l];                                                               \
+                    sel_bypass_data_up[i][k] |= (sel_bypass_ff_up[i][k][l]) ? wb_dout_hold_up[i][k][l] : '0;                              \
+                 end                                                                                                                      \
+                 wb_dout[i][k]   =   any_bypass_up[i][k] ?  sel_bypass_data_up[i][k] :  wb_dout_pre_up[i][k] ;                            \
+                 end                                                                                                                      \
+             end                                                                                                                          \
+             else begin                                                                                                                   \
+                 assign wb_dout[i][k]                      =   wb_dout_pre_up[i][k] ;                                                     \
+                 assign ic_bank_way_clken_final_up[i][k]   =  ic_bank_way_clken[i][k];                                                    \
+             end
+
+
+   for (genvar i=0; i<pt.ICACHE_NUM_WAYS; i++) begin: WAYS
+      for (genvar k=0; k<pt.ICACHE_BANKS_WAY; k++) begin: BANKS_WAY   // 16B subbank
+      if (pt.ICACHE_ECC) begin : ECC1
+        logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] [71-1:0]        wb_dout_pre_up;           // data and its bit enables
+        logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] [pt.ICACHE_NUM_BYPASS-1:0] [71-1:0]  wb_dout_hold_up;
+
+        if ($clog2(pt.ICACHE_DATA_DEPTH) == 13 )   begin : size_8192
+           `eb1_IC_DATA_SRAM(8192,71)
+        end
+        else if ($clog2(pt.ICACHE_DATA_DEPTH) == 12 )   begin : size_4096
+           `eb1_IC_DATA_SRAM(4096,71)
+        end
+        else if ($clog2(pt.ICACHE_DATA_DEPTH) == 11 ) begin : size_2048
+           `eb1_IC_DATA_SRAM(2048,71)
+        end
+        else if ( $clog2(pt.ICACHE_DATA_DEPTH) == 10 ) begin : size_1024
+           `eb1_IC_DATA_SRAM(1024,71)
+        end
+        else if ( $clog2(pt.ICACHE_DATA_DEPTH) == 9 ) begin : size_512
+           `eb1_IC_DATA_SRAM(512,71)
+        end
+         else if ( $clog2(pt.ICACHE_DATA_DEPTH) == 8 ) begin : size_256
+           `eb1_IC_DATA_SRAM(256,71)
+         end
+         else if ( $clog2(pt.ICACHE_DATA_DEPTH) == 7 ) begin : size_128
+           `eb1_IC_DATA_SRAM(128,71)
+         end
+         else  begin : size_64
+           `eb1_IC_DATA_SRAM(64,71)
+         end
+      end // if (pt.ICACHE_ECC)
+
+     else  begin  : ECC0
+        logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] [68-1:0]        wb_dout_pre_up;           // data and its bit enables
+        logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] [pt.ICACHE_NUM_BYPASS-1:0] [68-1:0]  wb_dout_hold_up;
+        if ($clog2(pt.ICACHE_DATA_DEPTH) == 13 )   begin : size_8192
+           `eb1_IC_DATA_SRAM(8192,68)
+        end
+        else if ($clog2(pt.ICACHE_DATA_DEPTH) == 12 )   begin : size_4096
+           `eb1_IC_DATA_SRAM(4096,68)
+        end
+        else if ($clog2(pt.ICACHE_DATA_DEPTH) == 11 ) begin : size_2048
+           `eb1_IC_DATA_SRAM(2048,68)
+        end
+        else if ( $clog2(pt.ICACHE_DATA_DEPTH) == 10 ) begin : size_1024
+           `eb1_IC_DATA_SRAM(1024,68)
+        end
+        else if ( $clog2(pt.ICACHE_DATA_DEPTH) == 9 ) begin : size_512
+           `eb1_IC_DATA_SRAM(512,68)
+        end
+         else if ( $clog2(pt.ICACHE_DATA_DEPTH) == 8 ) begin : size_256
+           `eb1_IC_DATA_SRAM(256,68)
+         end
+         else if ( $clog2(pt.ICACHE_DATA_DEPTH) == 7 ) begin : size_128
+           `eb1_IC_DATA_SRAM(128,68)
+         end
+         else  begin : size_64
+           `eb1_IC_DATA_SRAM(64,68)
+         end
+      end // else: !if(pt.ICACHE_ECC)
+      end // block: BANKS_WAY
+   end // block: WAYS
+
+ end // block: PACKED_0
+
+ // WAY PACKED
+ else begin : PACKED_1
+
+    logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS_WIDTH-1:0] wrptr;
+    logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS_WIDTH-1:0] wrptr_in;
+    logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0]                       sel_bypass;
+    logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0]                       sel_bypass_ff;
+
+
+    logic [pt.ICACHE_BANKS_WAY-1:0][(71*pt.ICACHE_NUM_WAYS)-1:0]  sel_bypass_data;
+    logic [pt.ICACHE_BANKS_WAY-1:0]                               any_bypass;
+    logic [pt.ICACHE_BANKS_WAY-1:0]                               any_addr_match;
+
+
+// SRAM macros
+
+`define eb1_PACKED_IC_DATA_SRAM(depth,width,waywidth)                                                                                                 \
+            ram_be_``depth``x``width  ic_bank_sb_way_data (                                                                                           \
+                            .CLK   (clk),                                                                                                             \
+                            .WE    (|ic_b_sb_wren[k]),                                                    // OR of all the ways in the bank           \
+                            .WEM   (ic_b_sb_bit_en_vec[k]),                                               // 284 bits of bit enables                  \
+                            .D     ({pt.ICACHE_NUM_WAYS{ic_sb_wr_data[k][``waywidth-1:0]}}),                                                          \
+                            .ADR   (ic_rw_addr_bank_q[k][pt.ICACHE_INDEX_HI:pt.ICACHE_DATA_INDEX_LO]),                                                \
+                            .Q     (wb_packeddout_pre[k]),                                                                                            \
+                            .ME    (|ic_bank_way_clken_final[k]),                                                                                     \
+                            .ROP   ( ),                                                                                                               \
+                            .TEST1  (ic_data_ext_in_pkt[0][k].TEST1),                                                                                 \
+                            .RME   (ic_data_ext_in_pkt[0][k].RME),                                                                                    \
+                            .RM    (ic_data_ext_in_pkt[0][k].RM),                                                                                     \
+                                                                                                                                                      \
+                            .LS    (ic_data_ext_in_pkt[0][k].LS),                                                                                     \
+                            .DS    (ic_data_ext_in_pkt[0][k].DS),                                                                                     \
+                            .SD    (ic_data_ext_in_pkt[0][k].SD),                                                                                     \
+                                                                                                                                                      \
+                            .TEST_RNM (ic_data_ext_in_pkt[0][k].TEST_RNM),                                                                            \
+                            .BC1      (ic_data_ext_in_pkt[0][k].BC1),                                                                                 \
+                            .BC2      (ic_data_ext_in_pkt[0][k].BC2)                                                                                  \
+                           );                                                                                                                         \
+                                                                                                                                                      \
+              if (pt.ICACHE_BYPASS_ENABLE == 1) begin                                                                                                                                                 \
+                                                                                                                                                                                                      \
+                 assign wrptr_in[k] = (wrptr[k] == (pt.ICACHE_NUM_BYPASS-1)) ? '0 : (wrptr[k] + 1'd1);                                                                                                \
+                                                                                                                                                                                                      \
+                 rvdffs  #(pt.ICACHE_NUM_BYPASS_WIDTH)  wrptr_ff(.*, .clk(active_clk), .en(|write_bypass_en[k]), .din (wrptr_in[k]), .dout(wrptr[k])) ;                                                                       \
+                                                                                                                                                                                                      \
+                 assign ic_b_sram_en[k]              = |ic_bank_way_clken[k];                                                                                                                         \
+                                                                                                                                                                                                      \
+                                                                                                                                                                                                      \
+                 assign ic_b_read_en[k]              =  ic_b_sram_en[k]  &  (|ic_b_sb_rden[k]) ;                                                                                                              \
+                 assign ic_b_write_en[k]             =  ic_b_sram_en[k] &   (|ic_b_sb_wren[k]);                                                                                                       \
+                 assign ic_bank_way_clken_final[k]   =  ic_b_sram_en[k] &    ~(|sel_bypass[k]);                                                                                                       \
+                                                                                                                                                                                                      \
+                 assign ic_b_rw_addr[k] = {ic_rw_addr[31:pt.ICACHE_INDEX_HI+1],ic_rw_addr_bank_q[k]};                                                                                                 \
+                 assign ic_b_rw_addr_index_only[k] = ic_rw_addr_bank_q[k];                                                                                                    \
+                                                                                                                                                                                                      \
+                 always_comb begin                                                                                                                                                                    \
+                    any_addr_match[k] = '0;                                                                                                                                                           \
+                                                                                                                                                                                                      \
+                    for (int l=0; l<pt.ICACHE_NUM_BYPASS; l++) begin                                                                                                                                  \
+                       any_addr_match[k] |= ic_b_addr_match[k][l];                                                                                                                                    \
+                    end                                                                                                                                                                               \
+                 end                                                                                                                                                                                  \
+                                                                                                                                                                                                      \
+                // it is an error to ever have 2 entries with the same index and both valid                                                                                                           \
+                for (genvar l=0; l<pt.ICACHE_NUM_BYPASS; l++) begin: BYPASS                                                                                                                           \
+                                                                                                                                                                                                      \
+                   // full match up to bit 31                                                                                                                                                         \
+                   assign ic_b_addr_match[k][l] = (wb_index_hold[k][l] ==  ic_b_rw_addr[k]) & index_valid[k][l];                                                                                      \
+                   assign ic_b_addr_match_index_only[k][l] = (wb_index_hold[k][l][pt.ICACHE_INDEX_HI:pt.ICACHE_DATA_INDEX_LO] ==  ic_b_rw_addr_index_only[k]) & index_valid[k][l];                    \
+                                                                                                                                                                                                      \
+                   assign ic_b_clear_en[k][l]   = ic_b_write_en[k] &   ic_b_addr_match_index_only[k][l];                                                                                                              \
+                                                                                                                                                                                                      \
+                   assign sel_bypass[k][l]      = ic_b_read_en[k]  &   ic_b_addr_match[k][l] ;                                                                                                        \
+                                                                                                                                                                                                      \
+                   assign write_bypass_en[k][l] = ic_b_read_en[k]  &  ~any_addr_match[k] & (wrptr[k] == l);                                                                                           \
+                                                                                                                                                                                                      \
+                   rvdff  #(1)  write_bypass_ff (.*, .clk(active_clk),                                                     .din(write_bypass_en[k][l]), .dout(write_bypass_en_ff[k][l])) ;                            \
+                   rvdffs #(1)  index_val_ff    (.*, .clk(active_clk), .en(write_bypass_en[k][l] | ic_b_clear_en[k][l]),   .din(~ic_b_clear_en[k][l]),  .dout(index_valid[k][l])) ;                                   \
+                   rvdff  #(1)  sel_hold_ff     (.*, .clk(active_clk),                                                     .din(sel_bypass[k][l]),      .dout(sel_bypass_ff[k][l])) ;                                 \
+                                                                                                                                                                                                      \
+                   rvdffe #((31-pt.ICACHE_DATA_INDEX_LO+1)) ic_addr_index    (.*, .en(write_bypass_en[k][l]),    .din (ic_b_rw_addr[k]),      .dout(wb_index_hold[k][l]));                            \
+                   rvdffe #((``waywidth*pt.ICACHE_NUM_WAYS))        rd_data_hold_ff  (.*, .en(write_bypass_en_ff[k][l]), .din (wb_packeddout_pre[k]), .dout(wb_packeddout_hold[k][l]));                       \
+                                                                                                                                                                                                      \
+                end // block: BYPASS                                                                                                                                                                  \
+                                                                                                                                                                                                      \
+                always_comb begin                                                                                                                                                                     \
+                 any_bypass[k] = '0;                                                                                                                                                                  \
+                 sel_bypass_data[k] = '0;                                                                                                                                                             \
+                                                                                                                                                                                                      \
+                 for (int l=0; l<pt.ICACHE_NUM_BYPASS; l++) begin                                                                                                                                     \
+                    any_bypass[k]      |=  sel_bypass_ff[k][l];                                                                                                                                       \
+                      sel_bypass_data[k] |= (sel_bypass_ff[k][l]) ? wb_packeddout_hold[k][l] : '0;                                                                                                    \
+                 end                                                                                                                                                                                  \
+                                                                                                                                                                                                      \
+                   wb_packeddout[k]   =   any_bypass[k] ?  sel_bypass_data[k] :  wb_packeddout_pre[k] ;                                                                                               \
+                end // always_comb begin                                                                                                                                                              \
+                                                                                                                                                                                                      \
+             end // if (pt.ICACHE_BYPASS_ENABLE == 1)                                                                                                                                                 \
+             else begin                                                                                                                                                                               \
+                 assign wb_packeddout[k]   =   wb_packeddout_pre[k] ;                                                                                                                                 \
+                 assign ic_bank_way_clken_final[k]   =  |ic_bank_way_clken[k] ;                                                                                                                       \
+             end
+
+ // generate IC DATA PACKED SRAMS for 2/4 ways
+  for (genvar k=0; k<pt.ICACHE_BANKS_WAY; k++) begin: BANKS_WAY   // 16B subbank
+     if (pt.ICACHE_ECC) begin : ECC1
+        logic [pt.ICACHE_BANKS_WAY-1:0] [(71*pt.ICACHE_NUM_WAYS)-1:0]        wb_packeddout, ic_b_sb_bit_en_vec, wb_packeddout_pre;           // data and its bit enables
+
+        logic [pt.ICACHE_BANKS_WAY-1:0] [pt.ICACHE_NUM_BYPASS-1:0] [(71*pt.ICACHE_NUM_WAYS)-1:0]  wb_packeddout_hold;
+
+        for (genvar i=0; i<pt.ICACHE_NUM_WAYS; i++) begin: BITEN
+           assign ic_b_sb_bit_en_vec[k][(71*i)+70:71*i] = {71{ic_b_sb_wren[k][i]}};
+        end
+
+        // SRAMS with ECC (single/double detect; no correct)
+        if ($clog2(pt.ICACHE_DATA_DEPTH) == 13 )   begin : size_8192
+           if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(8192,284,71)    // 64b data + 7b ecc
+           end // block: WAYS
+           else   begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(8192,142,71)
+           end // block: WAYS
+        end // block: size_8192
+
+        else if ($clog2(pt.ICACHE_DATA_DEPTH) == 12 )   begin : size_4096
+           if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(4096,284,71)
+           end // block: WAYS
+           else   begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(4096,142,71)
+           end // block: WAYS
+        end // block: size_4096
+
+        else if ($clog2(pt.ICACHE_DATA_DEPTH) == 11 ) begin : size_2048
+           if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(2048,284,71)
+           end // block: WAYS
+           else   begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(2048,142,71)
+           end // block: WAYS
+        end // block: size_2048
+
+        else if ( $clog2(pt.ICACHE_DATA_DEPTH) == 10 ) begin : size_1024
+           if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(1024,284,71)
+           end // block: WAYS
+           else   begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(1024,142,71)
+           end // block: WAYS
+        end // block: size_1024
+
+        else if ( $clog2(pt.ICACHE_DATA_DEPTH) == 9 ) begin : size_512
+           if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(512,284,71)
+           end // block: WAYS
+           else   begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(512,142,71)
+           end // block: WAYS
+        end // block: size_512
+
+        else if ( $clog2(pt.ICACHE_DATA_DEPTH) == 8 ) begin : size_256
+           if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(256,284,71)
+           end // block: WAYS
+           else   begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(256,142,71)
+           end // block: WAYS
+        end // block: size_256
+
+        else if ( $clog2(pt.ICACHE_DATA_DEPTH) == 7 ) begin : size_128
+           if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(128,284,71)
+           end // block: WAYS
+           else   begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(128,142,71)
+           end // block: WAYS
+        end // block: size_128
+
+        else  begin : size_64
+           if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(64,284,71)
+           end // block: WAYS
+           else   begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(64,142,71)
+           end // block: WAYS
+        end // block: size_64
+
+
+       for (genvar i=0; i<pt.ICACHE_NUM_WAYS; i++) begin: WAYS
+          assign wb_dout[i][k][70:0]  = wb_packeddout[k][(71*i)+70:71*i];
+       end : WAYS
+
+       end // if (pt.ICACHE_ECC)
+
+
+     else  begin  : ECC0
+        logic [pt.ICACHE_BANKS_WAY-1:0] [(68*pt.ICACHE_NUM_WAYS)-1:0]        wb_packeddout, ic_b_sb_bit_en_vec, wb_packeddout_pre;           // data and its bit enables
+
+        logic [pt.ICACHE_BANKS_WAY-1:0] [pt.ICACHE_NUM_BYPASS-1:0] [(68*pt.ICACHE_NUM_WAYS)-1:0]  wb_packeddout_hold;
+
+        for (genvar i=0; i<pt.ICACHE_NUM_WAYS; i++) begin: BITEN
+           assign ic_b_sb_bit_en_vec[k][(68*i)+67:68*i] = {68{ic_b_sb_wren[k][i]}};
+        end
+
+        // SRAMs with parity
+        if ($clog2(pt.ICACHE_DATA_DEPTH) == 13 )   begin : size_8192
+           if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(8192,272,68)    // 64b data + 4b parity
+           end // block: WAYS
+           else   begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(8192,136,68)
+           end // block: WAYS
+        end // block: size_8192
+
+        else if ($clog2(pt.ICACHE_DATA_DEPTH) == 12 )   begin : size_4096
+           if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(4096,272,68)
+           end // block: WAYS
+           else   begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(4096,136,68)
+           end // block: WAYS
+        end // block: size_4096
+
+        else if ($clog2(pt.ICACHE_DATA_DEPTH) == 11 ) begin : size_2048
+           if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(2048,272,68)
+           end // block: WAYS
+           else   begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(2048,136,68)
+           end // block: WAYS
+        end // block: size_2048
+
+        else if ( $clog2(pt.ICACHE_DATA_DEPTH) == 10 ) begin : size_1024
+           if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(1024,272,68)
+           end // block: WAYS
+           else   begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(1024,136,68)
+           end // block: WAYS
+        end // block: size_1024
+
+        else if ( $clog2(pt.ICACHE_DATA_DEPTH) == 9 ) begin : size_512
+           if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(512,272,68)
+           end // block: WAYS
+           else   begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(512,136,68)
+           end // block: WAYS
+        end // block: size_512
+
+        else if ( $clog2(pt.ICACHE_DATA_DEPTH) == 8 ) begin : size_256
+           if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(256,272,68)
+           end // block: WAYS
+           else   begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(256,136,68)
+           end // block: WAYS
+        end // block: size_256
+
+        else if ( $clog2(pt.ICACHE_DATA_DEPTH) == 7 ) begin : size_128
+           if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(128,272,68)
+           end // block: WAYS
+           else   begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(128,136,68)
+           end // block: WAYS
+        end // block: size_128
+
+        else  begin : size_64
+           if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(64,272,68)
+           end // block: WAYS
+           else   begin : WAYS
+              `eb1_PACKED_IC_DATA_SRAM(64,136,68)
+           end // block: WAYS
+        end // block: size_64
+
+       for (genvar i=0; i<pt.ICACHE_NUM_WAYS; i++) begin: WAYS
+          assign wb_dout[i][k][67:0]  = wb_packeddout[k][(68*i)+67:68*i];
+       end
+     end // block: ECC0
+     end // block: BANKS_WAY
+ end // block: PACKED_1
+
+
+   assign ic_rd_hit_q[pt.ICACHE_NUM_WAYS-1:0] = ic_debug_rd_en_ff ? ic_debug_rd_way_en_ff[pt.ICACHE_NUM_WAYS-1:0] : ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0] ;
+
+
+ if ( pt.ICACHE_ECC ) begin : ECC1_MUX
+
+   assign ic_bank_wr_data[1] = ic_wr_data[1][70:0];
+   assign ic_bank_wr_data[0] = ic_wr_data[0][70:0];
+
+    always_comb begin : rd_mux
+      wb_dout_way_pre[pt.ICACHE_NUM_WAYS-1:0] = '0;
+
+      for ( int i=0; i<pt.ICACHE_NUM_WAYS; i++) begin : num_ways
+        for ( int j=0; j<pt.ICACHE_BANKS_WAY; j++) begin : banks
+         wb_dout_way_pre[i][70:0]      |=  ({71{(ic_rw_addr_ff[pt.ICACHE_BANK_HI : pt.ICACHE_BANK_LO] == (pt.ICACHE_BANK_BITS)'(j))}}   &  wb_dout[i][j]);
+         wb_dout_way_pre[i][141 : 71]  |=  ({71{(ic_rw_addr_ff[pt.ICACHE_BANK_HI : pt.ICACHE_BANK_LO] == (pt.ICACHE_BANK_BITS)'(j-1))}} &  wb_dout[i][j]);
+        end
+      end
+    end
+
+    for ( genvar i=0; i<pt.ICACHE_NUM_WAYS; i++) begin : num_ways_mux1
+      assign wb_dout_way[i][63:0] = (ic_rw_addr_ff[2:1] == 2'b00) ? wb_dout_way_pre[i][63:0]   :
+                                    (ic_rw_addr_ff[2:1] == 2'b01) ?{wb_dout_way_pre[i][86:71], wb_dout_way_pre[i][63:16]} :
+                                    (ic_rw_addr_ff[2:1] == 2'b10) ?{wb_dout_way_pre[i][102:71],wb_dout_way_pre[i][63:32]} :
+                                                                   {wb_dout_way_pre[i][119:71],wb_dout_way_pre[i][63:48]};
+
+      assign wb_dout_way_with_premux[i][63:0]  =  ic_sel_premux_data ? ic_premux_data[63:0] : wb_dout_way[i][63:0] ;
+   end
+
+   always_comb begin : rd_out
+      ic_debug_rd_data[70:0]     = '0;
+      ic_rd_data[63:0]           = '0;
+      wb_dout_ecc[141:0]         = '0;
+      for ( int i=0; i<pt.ICACHE_NUM_WAYS; i++) begin : num_ways_mux2
+         ic_rd_data[63:0]       |= ({64{ic_rd_hit_q[i] | ic_sel_premux_data}}) &  wb_dout_way_with_premux[i][63:0];
+         ic_debug_rd_data[70:0] |= ({71{ic_rd_hit_q[i]}}) & wb_dout_way_pre[i][70:0];
+         wb_dout_ecc[141:0]     |= {142{ic_rd_hit_q[i]}}  & wb_dout_way_pre[i];
+      end
+   end
+
+
+ for (genvar i=0; i < pt.ICACHE_BANKS_WAY ; i++) begin : ic_ecc_error
+    assign bank_check_en[i]    = |ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0] & ((i==0) | (~ic_cacheline_wrap_ff & (ic_b_rden_ff[pt.ICACHE_BANKS_WAY-1:0] == {pt.ICACHE_BANKS_WAY{1'b1}})));  // always check the lower address bank, and drop the upper address bank on a CL wrap
+    assign wb_dout_ecc_bank[i] = wb_dout_ecc[(71*i)+70:(71*i)];
+
+   rvecc_decode_64  ecc_decode_64 (
+                           .en               (bank_check_en[i]),
+                           .din              (wb_dout_ecc_bank[i][63 : 0]),                // [134:71],  [63:0]
+                           .ecc_in           (wb_dout_ecc_bank[i][70 : 64]),               // [141:135] [70:64]
+                           .ecc_error        (ic_eccerr[i]));
+
+   // or the sb and db error detects into 1 signal called aligndataperr[i] where i corresponds to 2B position
+  assign  ic_parerr[i]  = '0 ;
+  end // block: ic_ecc_error
+
+end // if ( pt.ICACHE_ECC )
+
+else  begin : ECC0_MUX
+   assign ic_bank_wr_data[1] = ic_wr_data[1][70:0];
+   assign ic_bank_wr_data[0] = ic_wr_data[0][70:0];
+
+   always_comb begin : rd_mux
+      wb_dout_way_pre[pt.ICACHE_NUM_WAYS-1:0] = '0;
+
+   for ( int i=0; i<pt.ICACHE_NUM_WAYS; i++) begin : num_ways
+     for ( int j=0; j<pt.ICACHE_BANKS_WAY; j++) begin : banks
+         wb_dout_way_pre[i][67:0]         |=  ({68{(ic_rw_addr_ff[pt.ICACHE_BANK_HI : pt.ICACHE_BANK_LO] == (pt.ICACHE_BANK_BITS)'(j))}}   &  wb_dout[i][j][67:0]);
+         wb_dout_way_pre[i][135 : 68]     |=  ({68{(ic_rw_addr_ff[pt.ICACHE_BANK_HI : pt.ICACHE_BANK_LO] == (pt.ICACHE_BANK_BITS)'(j-1))}} &  wb_dout[i][j][67:0]);
+      end
+     end
+   end
+   // When we straddle the banks like this - the ECC we capture is not correct ??
+   for ( genvar i=0; i<pt.ICACHE_NUM_WAYS; i++) begin : num_ways_mux1
+      assign wb_dout_way[i][63:0] = (ic_rw_addr_ff[2:1] == 2'b00) ? wb_dout_way_pre[i][63:0]   :
+                                    (ic_rw_addr_ff[2:1] == 2'b01) ?{wb_dout_way_pre[i][83:68],  wb_dout_way_pre[i][63:16]} :
+                                    (ic_rw_addr_ff[2:1] == 2'b10) ?{wb_dout_way_pre[i][99:68],  wb_dout_way_pre[i][63:32]} :
+                                                                   {wb_dout_way_pre[i][115:68], wb_dout_way_pre[i][63:48]};
+
+      assign wb_dout_way_with_premux[i][63:0]      =  ic_sel_premux_data ? ic_premux_data[63:0]  : wb_dout_way[i][63:0] ;
+   end
+
+   always_comb begin : rd_out
+      ic_rd_data[63:0]   = '0;
+      ic_debug_rd_data[70:0]   = '0;
+      wb_dout_ecc[135:0] = '0;
+
+      for ( int i=0; i<pt.ICACHE_NUM_WAYS; i++) begin : num_ways_mux2
+         ic_rd_data[63:0]   |= ({64{ic_rd_hit_q[i] | ic_sel_premux_data}} &  wb_dout_way_with_premux[i][63:0]);
+         ic_debug_rd_data[70:0] |= ({71{ic_rd_hit_q[i]}}) & {3'b0,wb_dout_way_pre[i][67:0]};
+         wb_dout_ecc[135:0] |= {136{ic_rd_hit_q[i]}}  & wb_dout_way_pre[i][135:0];
+      end
+   end
+
+   assign wb_dout_ecc_bank[0] =  wb_dout_ecc[67:0];
+   assign wb_dout_ecc_bank[1] =  wb_dout_ecc[135:68];
+
+   logic [pt.ICACHE_BANKS_WAY-1:0][3:0] ic_parerr_bank;
+
+  for (genvar i=0; i < pt.ICACHE_BANKS_WAY ; i++) begin : ic_par_error
+    assign bank_check_en[i]    = |ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0] & ((i==0) | (~ic_cacheline_wrap_ff & (ic_b_rden_ff[pt.ICACHE_BANKS_WAY-1:0] == {pt.ICACHE_BANKS_WAY{1'b1}})));  // always check the lower address bank, and drop the upper address bank on a CL wrap
+     for (genvar j=0; j<4; j++)  begin : parity
+      rveven_paritycheck pchk (
+                           .data_in   (wb_dout_ecc_bank[i][16*(j+1)-1: 16*j]),
+                           .parity_in (wb_dout_ecc_bank[i][64+j]),
+                           .parity_err(ic_parerr_bank[i][j] )
+                           );
+        end
+     assign ic_eccerr [i] = '0 ;
+  end
+
+     assign ic_parerr[1] = (|ic_parerr_bank[1][3:0]) & bank_check_en[1];
+     assign ic_parerr[0] = (|ic_parerr_bank[0][3:0]) & bank_check_en[0];
+
+end // else: !if( pt.ICACHE_ECC )
+
+
+endmodule // eb1_IC_DATA
+
+//=============================================================================================================================================================
+///\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ END OF IC DATA MODULE \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
+//\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
+//=============================================================================================================================================================
+
+/////////////////////////////////////////////////
+////// ICACHE TAG MODULE     ////////////////////
+/////////////////////////////////////////////////
+module eb1_IC_TAG
+import eb1_pkg::*;
+ #(
+`include "eb1_param.vh"
+ )
+     (
+      input logic                                                   clk,
+      input logic                                                   active_clk,
+      input logic                                                   rst_l,
+      input logic                                                   clk_override,
+      input logic                                                   dec_tlu_core_ecc_disable,
+
+      input logic [31:3]                                            ic_rw_addr,
+
+      input logic [pt.ICACHE_NUM_WAYS-1:0]                         ic_wr_en,             // way
+      input logic [pt.ICACHE_NUM_WAYS-1:0]                         ic_tag_valid,
+      input logic                                                  ic_rd_en,
+
+      input logic [pt.ICACHE_INDEX_HI:3]                           ic_debug_addr,        // Read/Write addresss to the Icache.
+      input logic                                                  ic_debug_rd_en,       // Icache debug rd
+      input logic                                                  ic_debug_wr_en,       // Icache debug wr
+      input logic                                                  ic_debug_tag_array,   // Debug tag array
+      input logic [pt.ICACHE_NUM_WAYS-1:0]                         ic_debug_way,         // Debug way. Rd or Wr.
+      input eb1_ic_tag_ext_in_pkt_t   [pt.ICACHE_NUM_WAYS-1:0]    ic_tag_ext_in_pkt,
+
+      output logic [25:0]                                          ictag_debug_rd_data,
+      input  logic [70:0]                                          ic_debug_wr_data,     // Debug wr cache.
+
+      output logic [pt.ICACHE_NUM_WAYS-1:0]                        ic_rd_hit,
+      output logic                                                 ic_tag_perr,
+      input  logic                                                 scan_mode
+   ) ;
+
+   logic [pt.ICACHE_NUM_WAYS-1:0] [25:0]                           ic_tag_data_raw;
+   logic [pt.ICACHE_NUM_WAYS-1:0] [25:0]                           ic_tag_data_raw_pre;
+   logic [pt.ICACHE_NUM_WAYS-1:0] [36:pt.ICACHE_TAG_LO]            w_tout;
+   logic [25:0]                                                    ic_tag_wr_data ;
+   logic [pt.ICACHE_NUM_WAYS-1:0] [31:0]                           ic_tag_corrected_data_unc;
+   logic [pt.ICACHE_NUM_WAYS-1:0] [06:0]                           ic_tag_corrected_ecc_unc;
+   logic [pt.ICACHE_NUM_WAYS-1:0]                                  ic_tag_single_ecc_error;
+   logic [pt.ICACHE_NUM_WAYS-1:0]                                  ic_tag_double_ecc_error;
+   logic [6:0]                                                     ic_tag_ecc;
+
+   logic [pt.ICACHE_NUM_WAYS-1:0]                                  ic_tag_way_perr ;
+   logic [pt.ICACHE_NUM_WAYS-1:0]                                  ic_debug_rd_way_en ;
+   logic [pt.ICACHE_NUM_WAYS-1:0]                                  ic_debug_rd_way_en_ff ;
+
+   logic [pt.ICACHE_INDEX_HI: pt.ICACHE_TAG_INDEX_LO]              ic_rw_addr_q;
+   logic [31:pt.ICACHE_TAG_LO]                                     ic_rw_addr_ff;
+   logic [pt.ICACHE_NUM_WAYS-1:0]                                  ic_tag_rden_q;          // way
+   logic [pt.ICACHE_NUM_WAYS-1:0]                                  ic_tag_wren;          // way
+   logic [pt.ICACHE_NUM_WAYS-1:0]                                  ic_tag_wren_q;        // way
+   logic [pt.ICACHE_NUM_WAYS-1:0]                                  ic_tag_clken;
+   logic [pt.ICACHE_NUM_WAYS-1:0]                                  ic_debug_wr_way_en;   // debug wr_way
+   logic                                                           ic_rd_en_ff;
+   logic                                                           ic_tag_parity;
+
+
+   assign  ic_tag_wren [pt.ICACHE_NUM_WAYS-1:0]  = ic_wr_en[pt.ICACHE_NUM_WAYS-1:0] & {pt.ICACHE_NUM_WAYS{(ic_rw_addr[pt.ICACHE_BEAT_ADDR_HI:4] == {pt.ICACHE_BEAT_BITS-1{1'b1}})}} ;
+   assign  ic_tag_clken[pt.ICACHE_NUM_WAYS-1:0]  = {pt.ICACHE_NUM_WAYS{ic_rd_en | clk_override}} | ic_wr_en[pt.ICACHE_NUM_WAYS-1:0] | ic_debug_wr_way_en[pt.ICACHE_NUM_WAYS-1:0] | ic_debug_rd_way_en[pt.ICACHE_NUM_WAYS-1:0];
+
+   rvdff #(1) rd_en_ff (.*, .clk(active_clk),
+                    .din (ic_rd_en),
+                    .dout(ic_rd_en_ff)) ;
+
+
+   rvdffie #(32-pt.ICACHE_TAG_LO) adr_ff (.*,
+                                          .din ({ic_rw_addr[31:pt.ICACHE_TAG_LO]}),
+                                          .dout({ic_rw_addr_ff[31:pt.ICACHE_TAG_LO]})
+                                          );
+
+   localparam PAD_BITS = 21 - (32 - pt.ICACHE_TAG_LO);  // sizing for a max tag width.
+
+   // tags
+   assign  ic_debug_rd_way_en[pt.ICACHE_NUM_WAYS-1:0] =  {pt.ICACHE_NUM_WAYS{ic_debug_rd_en & ic_debug_tag_array}} & ic_debug_way[pt.ICACHE_NUM_WAYS-1:0] ;
+   assign  ic_debug_wr_way_en[pt.ICACHE_NUM_WAYS-1:0] =  {pt.ICACHE_NUM_WAYS{ic_debug_wr_en & ic_debug_tag_array}} & ic_debug_way[pt.ICACHE_NUM_WAYS-1:0] ;
+
+   assign  ic_tag_wren_q[pt.ICACHE_NUM_WAYS-1:0]  =  ic_tag_wren[pt.ICACHE_NUM_WAYS-1:0]          |
+                                  ic_debug_wr_way_en[pt.ICACHE_NUM_WAYS-1:0]   ;
+
+   assign  ic_tag_rden_q[pt.ICACHE_NUM_WAYS-1:0]  =  ({pt.ICACHE_NUM_WAYS{ic_rd_en }}  | ic_debug_rd_way_en[pt.ICACHE_NUM_WAYS-1:0] ) &  {pt.ICACHE_NUM_WAYS{~(|ic_wr_en)  & ~ic_debug_wr_en}};
+
+if (pt.ICACHE_TAG_LO == 11) begin: SMALLEST
+ if (pt.ICACHE_ECC) begin : ECC1_W
+           rvecc_encode  tag_ecc_encode (
+                                  .din    ({{pt.ICACHE_TAG_LO{1'b0}}, ic_rw_addr[31:pt.ICACHE_TAG_LO]}),
+                                  .ecc_out({ ic_tag_ecc[6:0]}));
+
+   assign  ic_tag_wr_data[25:0] = (ic_debug_wr_en & ic_debug_tag_array) ?
+                                  {ic_debug_wr_data[68:64], ic_debug_wr_data[31:11]} :
+                                  {ic_tag_ecc[4:0], ic_rw_addr[31:pt.ICACHE_TAG_LO]} ;
+ end
+
+ else begin : ECC0_W
+           rveven_paritygen #(32-pt.ICACHE_TAG_LO) pargen  (.data_in   (ic_rw_addr[31:pt.ICACHE_TAG_LO]),
+                                                 .parity_out(ic_tag_parity));
+
+   assign  ic_tag_wr_data[21:0] = (ic_debug_wr_en & ic_debug_tag_array) ?
+                                  {ic_debug_wr_data[64], ic_debug_wr_data[31:11]} :
+                                  {ic_tag_parity, ic_rw_addr[31:pt.ICACHE_TAG_LO]} ;
+ end // else: !if(pt.ICACHE_ECC)
+
+end // block: SMALLEST
+
+
+else begin: OTHERS
+  if(pt.ICACHE_ECC) begin :ECC1_W
+           rvecc_encode  tag_ecc_encode (
+                                  .din    ({{pt.ICACHE_TAG_LO{1'b0}}, ic_rw_addr[31:pt.ICACHE_TAG_LO]}),
+                                  .ecc_out({ ic_tag_ecc[6:0]}));
+
+   assign  ic_tag_wr_data[25:0] = (ic_debug_wr_en & ic_debug_tag_array) ?
+                                  {ic_debug_wr_data[68:64],ic_debug_wr_data[31:11]} :
+                                  {ic_tag_ecc[4:0], {PAD_BITS{1'b0}},ic_rw_addr[31:pt.ICACHE_TAG_LO]} ;
+
+  end
+  else  begin :ECC0_W
+   logic   ic_tag_parity ;
+           rveven_paritygen #(32-pt.ICACHE_TAG_LO) pargen  (.data_in   (ic_rw_addr[31:pt.ICACHE_TAG_LO]),
+                                                 .parity_out(ic_tag_parity));
+   assign  ic_tag_wr_data[21:0] = (ic_debug_wr_en & ic_debug_tag_array) ?
+                                  {ic_debug_wr_data[64], ic_debug_wr_data[31:11]} :
+                                  {ic_tag_parity, {PAD_BITS{1'b0}},ic_rw_addr[31:pt.ICACHE_TAG_LO]} ;
+  end // else: !if(pt.ICACHE_ECC)
+
+end // block: OTHERS
+
+
+    assign ic_rw_addr_q[pt.ICACHE_INDEX_HI: pt.ICACHE_TAG_INDEX_LO] = (ic_debug_rd_en | ic_debug_wr_en) ?
+                                                ic_debug_addr[pt.ICACHE_INDEX_HI: pt.ICACHE_TAG_INDEX_LO] :
+                                                ic_rw_addr[pt.ICACHE_INDEX_HI: pt.ICACHE_TAG_INDEX_LO] ;
+
+   rvdff #(pt.ICACHE_NUM_WAYS) tag_rd_wy_ff (.*, .clk(active_clk),
+                    .din ({ic_debug_rd_way_en[pt.ICACHE_NUM_WAYS-1:0]}),
+                    .dout({ic_debug_rd_way_en_ff[pt.ICACHE_NUM_WAYS-1:0]}));
+
+ if (pt.ICACHE_WAYPACK == 0 ) begin : PACKED_0
+
+   logic [pt.ICACHE_NUM_WAYS-1:0] ic_b_sram_en;
+   logic [pt.ICACHE_NUM_WAYS-1:0]                                                                               ic_b_read_en;
+   logic [pt.ICACHE_NUM_WAYS-1:0]                                                                               ic_b_write_en;
+   logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_TAG_NUM_BYPASS-1:0] [pt.ICACHE_INDEX_HI : pt.ICACHE_TAG_INDEX_LO]   wb_index_hold;
+   logic [pt.ICACHE_NUM_WAYS-1:0]                               [pt.ICACHE_INDEX_HI : pt.ICACHE_TAG_INDEX_LO]   ic_b_rw_addr;
+   logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_TAG_NUM_BYPASS-1:0]                                                 write_bypass_en;     //bank
+   logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_TAG_NUM_BYPASS-1:0]                                                 write_bypass_en_ff;  //bank
+   logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_TAG_NUM_BYPASS-1:0]                                                 index_valid;  //bank
+   logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_TAG_NUM_BYPASS-1:0]                                                 ic_b_clear_en;
+   logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_TAG_NUM_BYPASS-1:0]                                                 ic_b_addr_match;
+
+
+
+
+    logic [pt.ICACHE_NUM_WAYS-1:0] [pt.ICACHE_TAG_NUM_BYPASS_WIDTH-1:0] wrptr;
+    logic [pt.ICACHE_NUM_WAYS-1:0] [pt.ICACHE_TAG_NUM_BYPASS_WIDTH-1:0] wrptr_in;
+    logic [pt.ICACHE_NUM_WAYS-1:0] [pt.ICACHE_TAG_NUM_BYPASS-1:0]       sel_bypass;
+    logic [pt.ICACHE_NUM_WAYS-1:0] [pt.ICACHE_TAG_NUM_BYPASS-1:0]       sel_bypass_ff;
+
+
+
+    logic [pt.ICACHE_NUM_WAYS-1:0][25:0]  sel_bypass_data;
+    logic [pt.ICACHE_NUM_WAYS-1:0]        any_bypass;
+    logic [pt.ICACHE_NUM_WAYS-1:0]        any_addr_match;
+    logic [pt.ICACHE_NUM_WAYS-1:0]        ic_tag_clken_final;
+
+      `define eb1_IC_TAG_SRAM(depth,width)                                                                                                      \
+                                  ram_``depth``x``width  ic_way_tag (                                                                           \
+                                .ME(ic_tag_clken_final[i]),                                                                                     \
+                                .WE (ic_tag_wren_q[i]),                                                                                         \
+                                .D  (ic_tag_wr_data[``width-1:0]),                                                                              \
+                                .ADR(ic_rw_addr_q[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO]),                                                  \
+                                .Q  (ic_tag_data_raw_pre[i][``width-1:0]),                                                                      \
+                                .CLK (clk),                                                                                                     \
+                                .ROP ( ),                                                                                                       \
+                                                                                                                                                \
+                                .TEST1(ic_tag_ext_in_pkt[i].TEST1),                                                                             \
+                                .RME(ic_tag_ext_in_pkt[i].RME),                                                                                 \
+                                .RM(ic_tag_ext_in_pkt[i].RM),                                                                                   \
+                                                                                                                                                \
+                                .LS(ic_tag_ext_in_pkt[i].LS),                                                                                   \
+                                .DS(ic_tag_ext_in_pkt[i].DS),                                                                                   \
+                                .SD(ic_tag_ext_in_pkt[i].SD),                                                                                   \
+                                                                                                                                                \
+                                .TEST_RNM(ic_tag_ext_in_pkt[i].TEST_RNM),                                                                       \
+                                .BC1(ic_tag_ext_in_pkt[i].BC1),                                                                                 \
+                                .BC2(ic_tag_ext_in_pkt[i].BC2)                                                                                  \
+                                                                                                                                                \
+                               );                                                                                                               \
+                                                                                                                                                \
+                                                                                                                                                \
+                                                                                                                                                \
+                                                                                                                                                \
+              if (pt.ICACHE_TAG_BYPASS_ENABLE == 1) begin                                                                                                                                             \
+                                                                                                                                                                                                      \
+                 assign wrptr_in[i] = (wrptr[i] == (pt.ICACHE_TAG_NUM_BYPASS-1)) ? '0 : (wrptr[i] + 1'd1);                                                                                            \
+                                                                                                                                                                                                      \
+                 rvdffs  #(pt.ICACHE_TAG_NUM_BYPASS_WIDTH)  wrptr_ff(.*, .clk(active_clk), .en(|write_bypass_en[i]), .din (wrptr_in[i]), .dout(wrptr[i])) ;                                           \
+                                                                                                                                                                                                      \
+                 assign ic_b_sram_en[i]              = ic_tag_clken[i];                                                                                                                               \
+                                                                                                                                                                                                      \
+                 assign ic_b_read_en[i]              =  ic_b_sram_en[i] &   (ic_tag_rden_q[i]);                                                                                                       \
+                 assign ic_b_write_en[i]             =  ic_b_sram_en[i] &   (ic_tag_wren_q[i]);                                                                                                       \
+                 assign ic_tag_clken_final[i]        =  ic_b_sram_en[i] &    ~(|sel_bypass[i]);                                                                                                       \
+                                                                                                                                                                                                      \
+                 // LSB is pt.ICACHE_TAG_INDEX_LO]                                                                                                                                                    \
+                 assign ic_b_rw_addr[i] = {ic_rw_addr_q};                                                                                                                                             \
+                                                                                                                                                                                                      \
+                 always_comb begin                                                                                                                                                                    \
+                    any_addr_match[i] = '0;                                                                                                                                                           \
+                                                                                                                                                                                                      \
+                    for (int l=0; l<pt.ICACHE_TAG_NUM_BYPASS; l++) begin                                                                                                                              \
+                       any_addr_match[i] |= (ic_b_addr_match[i][l] & index_valid[i][l]);                                                                                                              \
+                    end                                                                                                                                                                               \
+                 end                                                                                                                                                                                  \
+                                                                                                                                                                                                      \
+                // it is an error to ever have 2 entries with the same index and both valid                                                                                                           \
+                for (genvar l=0; l<pt.ICACHE_TAG_NUM_BYPASS; l++) begin: BYPASS                                                                                                                       \
+                                                                                                                                                                                                      \
+                   assign ic_b_addr_match[i][l] = (wb_index_hold[i][l] ==  ic_b_rw_addr[i]) & index_valid[i][l];                                                                                      \
+                                                                                                                                                                                                      \
+                   assign ic_b_clear_en[i][l]   = ic_b_write_en[i] &   ic_b_addr_match[i][l];                                                                                                         \
+                                                                                                                                                                                                      \
+                   assign sel_bypass[i][l]      = ic_b_read_en[i]  &   ic_b_addr_match[i][l] ;                                                                                                        \
+                                                                                                                                                                                                      \
+                   assign write_bypass_en[i][l] = ic_b_read_en[i]  &  ~any_addr_match[i] & (wrptr[i] == l);                                                                                           \
+                                                                                                                                                                                                      \
+                   rvdff  #(1)  write_bypass_ff (.*, .clk(active_clk),                                                     .din(write_bypass_en[i][l]), .dout(write_bypass_en_ff[i][l])) ;                            \
+                   rvdffs #(1)  index_val_ff    (.*, .clk(active_clk), .en(write_bypass_en[i][l] | ic_b_clear_en[i][l]),         .din(~ic_b_clear_en[i][l]),  .dout(index_valid[i][l])) ;                             \
+                   rvdff  #(1)  sel_hold_ff     (.*, .clk(active_clk),                                                     .din(sel_bypass[i][l]),      .dout(sel_bypass_ff[i][l])) ;                                 \
+                                                                                                                                                                                                      \
+                   rvdffe #(.WIDTH(pt.ICACHE_INDEX_HI-pt.ICACHE_TAG_INDEX_LO+1),.OVERRIDE(1))  ic_addr_index   (.*, .en(write_bypass_en[i][l]),    .din (ic_b_rw_addr[i]),        .dout(wb_index_hold[i][l]));   \
+                   rvdffe #(``width)                                                           rd_data_hold_ff (.*, .en(write_bypass_en_ff[i][l]), .din (ic_tag_data_raw_pre[i][``width-1:0]), .dout(wb_dout_hold[i][l]));            \
+                                                                                                                                                                                                      \
+                end // block: BYPASS                                                                                                                                                                  \
+                                                                                                                                                                                                      \
+                always_comb begin                                                                                                                                                                     \
+                 any_bypass[i] = '0;                                                                                                                                                                  \
+                 sel_bypass_data[i] = '0;                                                                                                                                                             \
+                                                                                                                                                                                                      \
+                 for (int l=0; l<pt.ICACHE_TAG_NUM_BYPASS; l++) begin                                                                                                                                 \
+                    any_bypass[i]      |=  sel_bypass_ff[i][l];                                                                                                                                       \
+                    sel_bypass_data[i] |= (sel_bypass_ff[i][l]) ? wb_dout_hold[i][l] : '0;                                                                                                            \
+                 end                                                                                                                                                                                  \
+                                                                                                                                                                                                      \
+                   ic_tag_data_raw[i]   =   any_bypass[i] ?  sel_bypass_data[i] :  ic_tag_data_raw_pre[i] ;                                                                                           \
+                end // always_comb begin                                                                                                                                                              \
+                                                                                                                                                                                                      \
+             end // if (pt.ICACHE_BYPASS_ENABLE == 1)                                                                                                                                                 \
+             else begin                                                                                                                                                                               \
+                 assign ic_tag_data_raw[i]   =   ic_tag_data_raw_pre[i] ;                                                                                                                             \
+                 assign ic_tag_clken_final[i]       =   ic_tag_clken[i];                                                                                                                              \
+             end
+   for (genvar i=0; i<pt.ICACHE_NUM_WAYS; i++) begin: WAYS
+
+   if (pt.ICACHE_ECC) begin  : ECC1
+      logic [pt.ICACHE_NUM_WAYS-1:0] [pt.ICACHE_TAG_NUM_BYPASS-1:0][25 :0] wb_dout_hold;
+
+      if (pt.ICACHE_TAG_DEPTH == 32)   begin : size_32
+                 `eb1_IC_TAG_SRAM(32,26)
+      end // if (pt.ICACHE_TAG_DEPTH == 32)
+      if (pt.ICACHE_TAG_DEPTH == 64)   begin : size_64
+                 `eb1_IC_TAG_SRAM(64,26)
+      end // if (pt.ICACHE_TAG_DEPTH == 64)
+      if (pt.ICACHE_TAG_DEPTH == 128)   begin : size_128
+                 `eb1_IC_TAG_SRAM(128,26)
+      end // if (pt.ICACHE_TAG_DEPTH == 128)
+       if (pt.ICACHE_TAG_DEPTH == 256)   begin : size_256
+                 `eb1_IC_TAG_SRAM(256,26)
+       end // if (pt.ICACHE_TAG_DEPTH == 256)
+       if (pt.ICACHE_TAG_DEPTH == 512)   begin : size_512
+                 `eb1_IC_TAG_SRAM(512,26)
+       end // if (pt.ICACHE_TAG_DEPTH == 512)
+       if (pt.ICACHE_TAG_DEPTH == 1024)   begin : size_1024
+                 `eb1_IC_TAG_SRAM(1024,26)
+       end // if (pt.ICACHE_TAG_DEPTH == 1024)
+       if (pt.ICACHE_TAG_DEPTH == 2048)   begin : size_2048
+                 `eb1_IC_TAG_SRAM(2048,26)
+       end // if (pt.ICACHE_TAG_DEPTH == 2048)
+       if (pt.ICACHE_TAG_DEPTH == 4096)   begin  : size_4096
+                 `eb1_IC_TAG_SRAM(4096,26)
+       end // if (pt.ICACHE_TAG_DEPTH == 4096)
+
+         assign w_tout[i][31:pt.ICACHE_TAG_LO] = ic_tag_data_raw[i][31-pt.ICACHE_TAG_LO:0] ;
+         assign w_tout[i][36:32]              = ic_tag_data_raw[i][25:21] ;
+
+         rvecc_decode  ecc_decode (
+                           .en(~dec_tlu_core_ecc_disable & ic_rd_en_ff),
+                           .sed_ded ( 1'b1 ),    // 1 : means only detection
+                           .din({11'b0,ic_tag_data_raw[i][20:0]}),
+                           .ecc_in({2'b0, ic_tag_data_raw[i][25:21]}),
+                           .dout(ic_tag_corrected_data_unc[i][31:0]),
+                           .ecc_out(ic_tag_corrected_ecc_unc[i][6:0]),
+                           .single_ecc_error(ic_tag_single_ecc_error[i]),
+                           .double_ecc_error(ic_tag_double_ecc_error[i]));
+
+          assign ic_tag_way_perr[i]= ic_tag_single_ecc_error[i] | ic_tag_double_ecc_error[i]  ;
+      end
+      else  begin : ECC0
+      logic [pt.ICACHE_NUM_WAYS-1:0] [pt.ICACHE_TAG_NUM_BYPASS-1:0][21 :0] wb_dout_hold;
+      assign ic_tag_data_raw_pre[i][25:22] = '0 ;
+
+      if (pt.ICACHE_TAG_DEPTH == 32)   begin : size_32
+                 `eb1_IC_TAG_SRAM(32,22)
+      end // if (pt.ICACHE_TAG_DEPTH == 32)
+      if (pt.ICACHE_TAG_DEPTH == 64)   begin : size_64
+                 `eb1_IC_TAG_SRAM(64,22)
+      end // if (pt.ICACHE_TAG_DEPTH == 64)
+      if (pt.ICACHE_TAG_DEPTH == 128)   begin : size_128
+                 `eb1_IC_TAG_SRAM(128,22)
+      end // if (pt.ICACHE_TAG_DEPTH == 128)
+       if (pt.ICACHE_TAG_DEPTH == 256)   begin : size_256
+                 `eb1_IC_TAG_SRAM(256,22)
+       end // if (pt.ICACHE_TAG_DEPTH == 256)
+       if (pt.ICACHE_TAG_DEPTH == 512)   begin : size_512
+                 `eb1_IC_TAG_SRAM(512,22)
+       end // if (pt.ICACHE_TAG_DEPTH == 512)
+       if (pt.ICACHE_TAG_DEPTH == 1024)   begin : size_1024
+                 `eb1_IC_TAG_SRAM(1024,22)
+       end // if (pt.ICACHE_TAG_DEPTH == 1024)
+       if (pt.ICACHE_TAG_DEPTH == 2048)   begin : size_2048
+                 `eb1_IC_TAG_SRAM(2048,22)
+       end // if (pt.ICACHE_TAG_DEPTH == 2048)
+       if (pt.ICACHE_TAG_DEPTH == 4096)   begin  : size_4096
+                 `eb1_IC_TAG_SRAM(4096,22)
+       end // if (pt.ICACHE_TAG_DEPTH == 4096)
+
+         assign w_tout[i][31:pt.ICACHE_TAG_LO] = ic_tag_data_raw[i][31-pt.ICACHE_TAG_LO:0] ;
+         assign w_tout[i][32]                 = ic_tag_data_raw[i][21] ;
+
+         rveven_paritycheck #(32-pt.ICACHE_TAG_LO) parcheck(.data_in   (w_tout[i][31:pt.ICACHE_TAG_LO]),
+                                                   .parity_in (w_tout[i][32]),
+                                                   .parity_err(ic_tag_way_perr[i]));
+      end // else: !if(pt.ICACHE_ECC)
+
+   end // block: WAYS
+ end // block: PACKED_0
+
+
+ else begin : PACKED_1
+
+
+   logic                                                                                ic_b_sram_en;
+   logic                                                                                ic_b_read_en;
+   logic                                                                                ic_b_write_en;
+   logic [pt.ICACHE_TAG_NUM_BYPASS-1:0] [pt.ICACHE_INDEX_HI : pt.ICACHE_TAG_INDEX_LO]   wb_index_hold;
+   logic                                [pt.ICACHE_INDEX_HI : pt.ICACHE_TAG_INDEX_LO]   ic_b_rw_addr;
+   logic [pt.ICACHE_TAG_NUM_BYPASS-1:0]                                                 write_bypass_en;     //bank
+   logic [pt.ICACHE_TAG_NUM_BYPASS-1:0]                                                 write_bypass_en_ff;  //bank
+   logic [pt.ICACHE_TAG_NUM_BYPASS-1:0]                                                 index_valid;  //bank
+   logic [pt.ICACHE_TAG_NUM_BYPASS-1:0]                                                 ic_b_clear_en;
+   logic [pt.ICACHE_TAG_NUM_BYPASS-1:0]                                                 ic_b_addr_match;
+
+
+
+
+    logic [pt.ICACHE_TAG_NUM_BYPASS_WIDTH-1:0]  wrptr;
+    logic [pt.ICACHE_TAG_NUM_BYPASS_WIDTH-1:0]  wrptr_in;
+    logic [pt.ICACHE_TAG_NUM_BYPASS-1:0]        sel_bypass;
+    logic [pt.ICACHE_TAG_NUM_BYPASS-1:0]        sel_bypass_ff;
+
+
+
+    logic [(26*pt.ICACHE_NUM_WAYS)-1:0]  sel_bypass_data;
+    logic                                any_bypass;
+    logic                                any_addr_match;
+    logic                                ic_tag_clken_final;
+
+`define eb1_IC_TAG_PACKED_SRAM(depth,width)                                                               \
+                  ram_be_``depth``x``width  ic_way_tag (                                                   \
+                                .ME  ( ic_tag_clken_final),                                                \
+                                .WE  (|ic_tag_wren_q[pt.ICACHE_NUM_WAYS-1:0]),                             \
+                                .WEM (ic_tag_wren_biten_vec[``width-1:0]),                                 \
+                                                                                                           \
+                                .D   ({pt.ICACHE_NUM_WAYS{ic_tag_wr_data[``width/pt.ICACHE_NUM_WAYS-1:0]}}), \
+                                .ADR (ic_rw_addr_q[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO]),            \
+                                .Q   (ic_tag_data_raw_packed_pre[``width-1:0]),                            \
+                                .CLK (clk),                                                                \
+                                .ROP ( ),                                                                  \
+                                                                                                           \
+                                .TEST1     (ic_tag_ext_in_pkt[0].TEST1),                                   \
+                                .RME      (ic_tag_ext_in_pkt[0].RME),                                      \
+                                .RM       (ic_tag_ext_in_pkt[0].RM),                                       \
+                                                                                                           \
+                                .LS       (ic_tag_ext_in_pkt[0].LS),                                       \
+                                .DS       (ic_tag_ext_in_pkt[0].DS),                                       \
+                                .SD       (ic_tag_ext_in_pkt[0].SD),                                       \
+                                                                                                           \
+                                .TEST_RNM (ic_tag_ext_in_pkt[0].TEST_RNM),                                 \
+                                .BC1      (ic_tag_ext_in_pkt[0].BC1),                                      \
+                                .BC2      (ic_tag_ext_in_pkt[0].BC2)                                       \
+                                                                                                           \
+                               );                                                                          \
+                                                                                                           \
+              if (pt.ICACHE_TAG_BYPASS_ENABLE == 1) begin                                                                                                                                             \
+                                                                                                                                                                                                      \
+                 assign wrptr_in = (wrptr == (pt.ICACHE_TAG_NUM_BYPASS-1)) ? '0 : (wrptr + 1'd1);                                                                                                     \
+                                                                                                                                                                                                      \
+                 rvdffs  #(pt.ICACHE_TAG_NUM_BYPASS_WIDTH)  wrptr_ff(.*, .clk(active_clk), .en(|write_bypass_en), .din (wrptr_in), .dout(wrptr)) ;                                                    \
+                                                                                                                                                                                                      \
+                 assign ic_b_sram_en              = |ic_tag_clken;                                                                                                                                    \
+                                                                                                                                                                                                      \
+                 assign ic_b_read_en              =  ic_b_sram_en &   (|ic_tag_rden_q);                                                                                                               \
+                 assign ic_b_write_en             =  ic_b_sram_en &   (|ic_tag_wren_q);                                                                                                               \
+                 assign ic_tag_clken_final        =  ic_b_sram_en &    ~(|sel_bypass);                                                                                                                \
+                                                                                                                                                                                                      \
+                 // LSB is pt.ICACHE_TAG_INDEX_LO]                                                                                                                                                    \
+                 assign ic_b_rw_addr = {ic_rw_addr_q};                                                                                                                                                \
+                                                                                                                                                                                                      \
+                 always_comb begin                                                                                                                                                                    \
+                    any_addr_match = '0;                                                                                                                                                              \
+                                                                                                                                                                                                      \
+                    for (int l=0; l<pt.ICACHE_TAG_NUM_BYPASS; l++) begin                                                                                                                              \
+                       any_addr_match |= ic_b_addr_match[l];                                                                                                                                          \
+                    end                                                                                                                                                                               \
+                 end                                                                                                                                                                                  \
+                                                                                                                                                                                                      \
+                // it is an error to ever have 2 entries with the same index and both valid                                                                                                           \
+                for (genvar l=0; l<pt.ICACHE_TAG_NUM_BYPASS; l++) begin: BYPASS                                                                                                                       \
+                                                                                                                                                                                                      \
+                   assign ic_b_addr_match[l] = (wb_index_hold[l] ==  ic_b_rw_addr) & index_valid[l];                                                                                                  \
+                                                                                                                                                                                                      \
+                   assign ic_b_clear_en[l]   = ic_b_write_en &   ic_b_addr_match[l];                                                                                                                  \
+                                                                                                                                                                                                      \
+                   assign sel_bypass[l]      = ic_b_read_en  &   ic_b_addr_match[l] ;                                                                                                                 \
+                                                                                                                                                                                                      \
+                   assign write_bypass_en[l] = ic_b_read_en  &  ~any_addr_match & (wrptr == l);                                                                                                       \
+                                                                                                                                                                                                      \
+                   rvdff  #(1)  write_bypass_ff (.*, .clk(active_clk),                                                     .din(write_bypass_en[l]), .dout(write_bypass_en_ff[l])) ;                                  \
+                   rvdffs #(1)  index_val_ff    (.*, .clk(active_clk), .en(write_bypass_en[l] | ic_b_clear_en[l]),         .din(~ic_b_clear_en[l]),  .dout(index_valid[l])) ;                                         \
+                   rvdff  #(1)  sel_hold_ff     (.*, .clk(active_clk),                                                     .din(sel_bypass[l]),      .dout(sel_bypass_ff[l])) ;                                               \
+                                                                                                                                                                                                      \
+                   rvdffe #(.WIDTH(pt.ICACHE_INDEX_HI-pt.ICACHE_TAG_INDEX_LO+1),.OVERRIDE(1)) ic_addr_index    (.*, .en(write_bypass_en[l]),    .din (ic_b_rw_addr),               .dout(wb_index_hold[l]));          \
+                   rvdffe #(``width)                                                          rd_data_hold_ff  (.*, .en(write_bypass_en_ff[l]), .din (ic_tag_data_raw_packed_pre[``width-1:0]), .dout(wb_packeddout_hold[l]));        \
+                                                                                                                                                                                                      \
+                end // block: BYPASS                                                                                                                                                                  \
+                                                                                                                                                                                                      \
+                always_comb begin                                                                                                                                                                     \
+                 any_bypass = '0;                                                                                                                                                                     \
+                 sel_bypass_data = '0;                                                                                                                                                                \
+                                                                                                                                                                                                      \
+                 for (int l=0; l<pt.ICACHE_TAG_NUM_BYPASS; l++) begin                                                                                                                                 \
+                    any_bypass      |=  sel_bypass_ff[l];                                                                                                                                             \
+                    sel_bypass_data |= (sel_bypass_ff[l]) ? wb_packeddout_hold[l] : '0;                                                                                                               \
+                 end                                                                                                                                                                                  \
+                                                                                                                                                                                                      \
+                   ic_tag_data_raw_packed   =   any_bypass ?  sel_bypass_data :  ic_tag_data_raw_packed_pre ;                                                                                         \
+                end // always_comb begin                                                                                                                                                              \
+                                                                                                                                                                                                      \
+             end // if (pt.ICACHE_BYPASS_ENABLE == 1)                                                                                                                                                 \
+             else begin                                                                                                                                                                               \
+                 assign ic_tag_data_raw_packed   =   ic_tag_data_raw_packed_pre ;                                                                                                                     \
+                 assign ic_tag_clken_final       =   |ic_tag_clken;                                                                                                                                   \
+             end
+
+   if (pt.ICACHE_ECC) begin  : ECC1
+    logic [(26*pt.ICACHE_NUM_WAYS)-1 :0]  ic_tag_data_raw_packed, ic_tag_wren_biten_vec, ic_tag_data_raw_packed_pre;           // data and its bit enables
+    logic [pt.ICACHE_TAG_NUM_BYPASS-1:0][(26*pt.ICACHE_NUM_WAYS)-1 :0] wb_packeddout_hold;
+    for (genvar i=0; i<pt.ICACHE_NUM_WAYS; i++) begin: BITEN
+        assign ic_tag_wren_biten_vec[(26*i)+25:26*i] = {26{ic_tag_wren_q[i]}};
+     end
+      if (pt.ICACHE_TAG_DEPTH == 32)   begin : size_32
+        if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(32,104)
+        end // block: WAYS
+      else begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(32,52)
+        end // block: WAYS
+      end // if (pt.ICACHE_TAG_DEPTH == 32
+
+      if (pt.ICACHE_TAG_DEPTH == 64)   begin : size_64
+        if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(64,104)
+        end // block: WAYS
+      else begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(64,52)
+        end // block: WAYS
+      end // block: size_64
+
+      if (pt.ICACHE_TAG_DEPTH == 128)   begin : size_128
+       if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(128,104)
+      end // block: WAYS
+      else begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(128,52)
+      end // block: WAYS
+
+      end // block: size_128
+
+      if (pt.ICACHE_TAG_DEPTH == 256)   begin : size_256
+       if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(256,104)
+        end // block: WAYS
+       else begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(256,52)
+        end // block: WAYS
+      end // block: size_256
+
+      if (pt.ICACHE_TAG_DEPTH == 512)   begin : size_512
+       if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(512,104)
+        end // block: WAYS
+       else begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(512,52)
+        end // block: WAYS
+      end // block: size_512
+
+      if (pt.ICACHE_TAG_DEPTH == 1024)   begin : size_1024
+         if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(1024,104)
+        end // block: WAYS
+       else begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(1024,52)
+        end // block: WAYS
+      end // block: size_1024
+
+      if (pt.ICACHE_TAG_DEPTH == 2048)   begin : size_2048
+       if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(2048,104)
+        end // block: WAYS
+       else begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(2048,52)
+        end // block: WAYS
+      end // block: size_2048
+
+      if (pt.ICACHE_TAG_DEPTH == 4096)   begin  : size_4096
+       if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(4096,104)
+        end // block: WAYS
+       else begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(4096,52)
+        end // block: WAYS
+      end // block: size_4096
+
+        for (genvar i=0; i<pt.ICACHE_NUM_WAYS; i++) begin
+          assign ic_tag_data_raw[i]  = ic_tag_data_raw_packed[(26*i)+25:26*i];
+          assign w_tout[i][31:pt.ICACHE_TAG_LO] = ic_tag_data_raw[i][31-pt.ICACHE_TAG_LO:0] ;
+          assign w_tout[i][36:32]              = ic_tag_data_raw[i][25:21] ;
+          rvecc_decode  ecc_decode (
+                           .en(~dec_tlu_core_ecc_disable & ic_rd_en_ff),
+                           .sed_ded ( 1'b1 ),    // 1 : means only detection
+                           .din({11'b0,ic_tag_data_raw[i][20:0]}),
+                           .ecc_in({2'b0, ic_tag_data_raw[i][25:21]}),
+                           .dout(ic_tag_corrected_data_unc[i][31:0]),
+                           .ecc_out(ic_tag_corrected_ecc_unc[i][6:0]),
+                           .single_ecc_error(ic_tag_single_ecc_error[i]),
+                           .double_ecc_error(ic_tag_double_ecc_error[i]));
+
+          assign ic_tag_way_perr[i]= ic_tag_single_ecc_error[i] | ic_tag_double_ecc_error[i]  ;
+     end // for (genvar i=0; i<pt.ICACHE_NUM_WAYS; i++)
+
+   end // block: ECC1
+
+
+   else  begin : ECC0
+    logic [(22*pt.ICACHE_NUM_WAYS)-1 :0]  ic_tag_data_raw_packed, ic_tag_wren_biten_vec, ic_tag_data_raw_packed_pre;           // data and its bit enables
+    logic [pt.ICACHE_TAG_NUM_BYPASS-1:0][(22*pt.ICACHE_NUM_WAYS)-1 :0] wb_packeddout_hold;
+    for (genvar i=0; i<pt.ICACHE_NUM_WAYS; i++) begin: BITEN
+        assign ic_tag_wren_biten_vec[(22*i)+21:22*i] = {22{ic_tag_wren_q[i]}};
+     end
+      if (pt.ICACHE_TAG_DEPTH == 32)   begin : size_32
+        if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(32,88)
+        end // block: WAYS
+      else begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(32,44)
+        end // block: WAYS
+      end // if (pt.ICACHE_TAG_DEPTH == 32
+
+      if (pt.ICACHE_TAG_DEPTH == 64)   begin : size_64
+        if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(64,88)
+        end // block: WAYS
+      else begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(64,44)
+        end // block: WAYS
+      end // block: size_64
+
+      if (pt.ICACHE_TAG_DEPTH == 128)   begin : size_128
+       if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(128,88)
+      end // block: WAYS
+      else begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(128,44)
+      end // block: WAYS
+
+      end // block: size_128
+
+      if (pt.ICACHE_TAG_DEPTH == 256)   begin : size_256
+       if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(256,88)
+        end // block: WAYS
+       else begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(256,44)
+        end // block: WAYS
+      end // block: size_256
+
+      if (pt.ICACHE_TAG_DEPTH == 512)   begin : size_512
+       if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(512,88)
+        end // block: WAYS
+       else begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(512,44)
+        end // block: WAYS
+      end // block: size_512
+
+      if (pt.ICACHE_TAG_DEPTH == 1024)   begin : size_1024
+         if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(1024,88)
+        end // block: WAYS
+       else begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(1024,44)
+        end // block: WAYS
+      end // block: size_1024
+
+      if (pt.ICACHE_TAG_DEPTH == 2048)   begin : size_2048
+       if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(2048,88)
+        end // block: WAYS
+       else begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(2048,44)
+        end // block: WAYS
+      end // block: size_2048
+
+      if (pt.ICACHE_TAG_DEPTH == 4096)   begin  : size_4096
+       if (pt.ICACHE_NUM_WAYS == 4) begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(4096,88)
+        end // block: WAYS
+       else begin : WAYS
+                 `eb1_IC_TAG_PACKED_SRAM(4096,44)
+        end // block: WAYS
+      end // block: size_4096
+
+      for (genvar i=0; i<pt.ICACHE_NUM_WAYS; i++) begin
+          assign ic_tag_data_raw[i]  = ic_tag_data_raw_packed[(22*i)+21:22*i];
+          assign w_tout[i][31:pt.ICACHE_TAG_LO] = ic_tag_data_raw[i][31-pt.ICACHE_TAG_LO:0] ;
+          assign w_tout[i][32]                 = ic_tag_data_raw[i][21] ;
+          assign w_tout[i][36:33]              = '0 ;
+
+
+          rveven_paritycheck #(32-pt.ICACHE_TAG_LO) parcheck(.data_in   (w_tout[i][31:pt.ICACHE_TAG_LO]),
+                                                   .parity_in (w_tout[i][32]),
+                                                   .parity_err(ic_tag_way_perr[i]));
+      end
+
+
+   end // block: ECC0
+ end // block: PACKED_1
+
+
+   always_comb begin : tag_rd_out
+      ictag_debug_rd_data[25:0] = '0;
+      for ( int j=0; j<pt.ICACHE_NUM_WAYS; j++) begin: debug_rd_out
+         ictag_debug_rd_data[25:0] |=  pt.ICACHE_ECC ? ({26{ic_debug_rd_way_en_ff[j]}} & ic_tag_data_raw[j] ) : {4'b0, ({22{ic_debug_rd_way_en_ff[j]}} & ic_tag_data_raw[j][21:0])};
+      end
+   end
+
+
+   for ( genvar i=0; i<pt.ICACHE_NUM_WAYS; i++) begin : ic_rd_hit_loop
+      assign ic_rd_hit[i] = (w_tout[i][31:pt.ICACHE_TAG_LO] == ic_rw_addr_ff[31:pt.ICACHE_TAG_LO]) & ic_tag_valid[i];
+   end
+
+   assign  ic_tag_perr  = | (ic_tag_way_perr[pt.ICACHE_NUM_WAYS-1:0] & ic_tag_valid[pt.ICACHE_NUM_WAYS-1:0] ) ;
+endmodule // eb1_IC_TAG
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_ifu_iccm_mem.sv b/verilog/rtl/BrqRV_EB1/design/eb1_ifu_iccm_mem.sv
new file mode 100644
index 0000000..38e24c5
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_ifu_iccm_mem.sv
@@ -0,0 +1,506 @@
+//********************************************************************************
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//********************************************************************************
+
+//********************************************************************************
+// Icache closely coupled memory --- ICCM
+//********************************************************************************
+
+module eb1_ifu_iccm_mem
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )(
+   input logic 					vccd1,
+   input logic						vssd1,
+   input logic                                        clk,                                 // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
+   input logic                                        active_clk,                          // Clock only while core active.  Through two clock headers. For flops without second clock header built in.
+   input logic                                        rst_l,                               // reset, active low
+   input logic                                        clk_override,                        // Override non-functional clock gating
+
+   input logic                                        iccm_wren,                           // ICCM write enable
+   input logic                                        iccm_rden,                           // ICCM read enable
+   input logic [pt.ICCM_BITS-1:1]                     iccm_rw_addr,                        // ICCM read/write address
+   input logic                                        iccm_buf_correct_ecc,                // ICCM is doing a single bit error correct cycle
+   input logic                                        iccm_correction_state,               // ICCM under a correction - This is needed to guard replacements when hit
+   input logic [2:0]                                  iccm_wr_size,                        // ICCM write size
+   input logic [77:0]                                 iccm_wr_data,                        // ICCM write data
+
+   input eb1_ccm_ext_in_pkt_t [pt.ICCM_NUM_BANKS-1:0] iccm_ext_in_pkt,                    // External packet
+
+   output logic [63:0]                                iccm_rd_data,                        // ICCM read data
+   output logic [77:0]                                iccm_rd_data_ecc,                    // ICCM read ecc
+   input  logic                                       scan_mode                            // Scan mode control
+
+);
+
+
+   logic [pt.ICCM_NUM_BANKS-1:0]                                                wren_bank;
+   logic [pt.ICCM_NUM_BANKS-1:0]                                                rden_bank;
+   logic [pt.ICCM_NUM_BANKS-1:0]                                                iccm_clken;
+   logic [pt.ICCM_NUM_BANKS-1:0] [pt.ICCM_BITS-1:pt.ICCM_BANK_INDEX_LO] addr_bank;
+
+   logic [pt.ICCM_NUM_BANKS-1:0] [38:0]  iccm_bank_dout, iccm_bank_dout_fn;
+   logic [pt.ICCM_NUM_BANKS-1:0] [38:0]  iccm_bank_wr_data;
+   logic [pt.ICCM_BITS-1:1]              addr_bank_inc;
+   logic [pt.ICCM_BANK_HI : 2]           iccm_rd_addr_hi_q;
+   logic [pt.ICCM_BANK_HI : 1]           iccm_rd_addr_lo_q;
+   logic             [63:0]              iccm_rd_data_pre;
+   logic             [63:0]              iccm_data;
+   logic [1:0]                           addr_incr;
+   logic [pt.ICCM_NUM_BANKS-1:0] [38:0]  iccm_bank_wr_data_vec;
+
+   // logic to handle hard persisten faults
+   logic [1:0] [pt.ICCM_BITS-1:2]        redundant_address;
+   logic [1:0] [38:0]                    redundant_data;
+   logic [1:0]                           redundant_valid;
+   logic [pt.ICCM_NUM_BANKS-1:0]         sel_red1, sel_red0, sel_red1_q, sel_red0_q;
+
+
+   logic [38:0]                          redundant_data0_in, redundant_data1_in;
+   logic                                 redundant_lru, redundant_lru_in, redundant_lru_en;
+   logic                                 redundant_data0_en;
+   logic                                 redundant_data1_en;
+   logic                                 r0_addr_en, r1_addr_en;
+
+   // Testing persistent flip
+   //   logic [3:0]                              not_iccm_bank_dout;
+   //   logic [15:3]                     ecc_insert_flip_in, ecc_insert_flip;
+   //   logic                                 flip_en, flip_match, flip_match_q;
+   //
+   //   assign      flip_in = (iccm_rw_addr[3:2] != 2'b00);    // dont flip when bank0 - this is to make some progress in DMA streaming cases
+   //   assign      flip_en = iccm_rden;
+   //
+   //   rvdffs #(1) flipmatch  (.*,
+   //                   .clk(clk),
+   //                   .din(flip_in),
+   //                   .en(flip_en),
+   //                   .dout(flip_match_q));
+   //
+   // end of testing flip
+
+
+   assign addr_incr[1:0]                    = (iccm_wr_size[1:0] == 2'b11) ?  2'b10: 2'b01;
+   assign addr_bank_inc[pt.ICCM_BITS-1 : 1] = iccm_rw_addr[pt.ICCM_BITS-1 : 1] + addr_incr[1:0];
+
+   for (genvar i=0; i<pt.ICCM_NUM_BANKS/2; i++) begin: mem_bank_data
+      assign iccm_bank_wr_data_vec[(2*i)]   = iccm_wr_data[38:0];
+      assign iccm_bank_wr_data_vec[(2*i)+1] = iccm_wr_data[77:39];
+   end
+
+   for (genvar i=0; i<pt.ICCM_NUM_BANKS; i++) begin: mem_bank
+      assign wren_bank[i]         = iccm_wren & ((iccm_rw_addr[pt.ICCM_BANK_HI:2] == i) | (addr_bank_inc[pt.ICCM_BANK_HI:2] == i));
+      assign iccm_bank_wr_data[i] = iccm_bank_wr_data_vec[i];
+      assign rden_bank[i]         = iccm_rden & ( (iccm_rw_addr[pt.ICCM_BANK_HI:2] == i) | (addr_bank_inc[pt.ICCM_BANK_HI:2] == i));
+      assign iccm_clken[i]        =  wren_bank[i] | rden_bank[i] | clk_override;
+      assign addr_bank[i][pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO] = wren_bank[i] ? iccm_rw_addr[pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO] :
+                                                                                      ((addr_bank_inc[pt.ICCM_BANK_HI:2] == i) ?
+                                                                                                    addr_bank_inc[pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO] :
+                                                                                                    iccm_rw_addr[pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO]);
+ `ifdef VERILATOR
+
+    /*eb1_ram #(.depth(1<<pt.ICCM_INDEX_BITS), .width(39)) iccm_bank (
+                                     // Primary ports
+                                     .ME(iccm_clken[i]),
+                                     .CLK(clk),
+                                     .WE(wren_bank[i]),
+                                     .ADR(addr_bank[i]),
+                                     .D(iccm_bank_wr_data[i][38:0]),
+                                     .Q(iccm_bank_dout[i][38:0]),
+                                     .ROP ( ),
+                                     // These are used by SoC
+                                     .TEST1(iccm_ext_in_pkt[i].TEST1),
+                                     .RME(iccm_ext_in_pkt[i].RME),
+                                     .RM(iccm_ext_in_pkt[i].RM),
+                                     .LS(iccm_ext_in_pkt[i].LS),
+                                     .DS(iccm_ext_in_pkt[i].DS),
+                                     .SD(iccm_ext_in_pkt[i].SD) ,
+                                     .TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
+                                     .BC1(iccm_ext_in_pkt[i].BC1),
+                                     .BC2(iccm_ext_in_pkt[i].BC2)
+
+                                      );*/
+                                      sky130_sram_1kbyte_1rw1r_32x256_8 sram(
+    									`ifdef USE_POWER_PINS
+    									.vccd1(vccd1),
+    									.vssd1(vssd1),
+    									`endif
+									.clk0(clk),
+									.csb0(~iccm_clken[i]),
+									.web0(~wren_bank[i]),
+									.wmask0(4'hf),
+									.addr0(addr_bank[i]),
+									.din0(iccm_bank_wr_data[i]),
+									.dout0(iccm_bank_dout[i]),
+    									.clk1(clk),
+    									.csb1(1'b1),
+    									.addr1(10'h000),
+    									.dout1()
+  					);
+                                    
+ `else
+
+     if (pt.ICCM_INDEX_BITS == 6 ) begin : iccm
+               ram_64x39 iccm_bank (
+                                     // Primary ports
+                                     .CLK(clk),
+                                     .ME(iccm_clken[i]),
+                                     .WE(wren_bank[i]),
+                                     .ADR(addr_bank[i]),
+                                     .D(iccm_bank_wr_data[i][38:0]),
+                                     .Q(iccm_bank_dout[i][38:0]),
+                                     .ROP ( ),
+                                     // These are used by SoC
+                                     .TEST1(iccm_ext_in_pkt[i].TEST1),
+                                     .RME(iccm_ext_in_pkt[i].RME),
+                                     .RM(iccm_ext_in_pkt[i].RM),
+                                     .LS(iccm_ext_in_pkt[i].LS),
+                                     .DS(iccm_ext_in_pkt[i].DS),
+                                     .SD(iccm_ext_in_pkt[i].SD) ,
+                                     .TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
+                                     .BC1(iccm_ext_in_pkt[i].BC1),
+                                     .BC2(iccm_ext_in_pkt[i].BC2)
+
+                                      );
+     end // block: iccm
+
+   else if (pt.ICCM_INDEX_BITS == 7 ) begin : iccm
+               ram_128x39 iccm_bank (
+                                     // Primary ports
+                                     .CLK(clk),
+                                     .ME(iccm_clken[i]),
+                                     .WE(wren_bank[i]),
+                                     .ADR(addr_bank[i]),
+                                     .D(iccm_bank_wr_data[i][38:0]),
+                                     .Q(iccm_bank_dout[i][38:0]),
+                                     .ROP ( ),
+                                     // These are used by SoC
+                                     .TEST1(iccm_ext_in_pkt[i].TEST1),
+                                     .RME(iccm_ext_in_pkt[i].RME),
+                                     .RM(iccm_ext_in_pkt[i].RM),
+                                     .LS(iccm_ext_in_pkt[i].LS),
+                                     .DS(iccm_ext_in_pkt[i].DS),
+                                     .SD(iccm_ext_in_pkt[i].SD) ,
+                                     .TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
+                                     .BC1(iccm_ext_in_pkt[i].BC1),
+                                     .BC2(iccm_ext_in_pkt[i].BC2)
+
+                                      );
+     end // block: iccm
+
+     else if (pt.ICCM_INDEX_BITS == 8 ) begin : iccm
+               ram_256x39 iccm_bank (
+                                     // Primary ports
+                                     .CLK(clk),
+                                     .ME(iccm_clken[i]),
+                                     .WE(wren_bank[i]),
+                                     .ADR(addr_bank[i]),
+                                     .D(iccm_bank_wr_data[i][38:0]),
+                                     .Q(iccm_bank_dout[i][38:0]),
+                                     .ROP ( ),
+                                     // These are used by SoC
+                                     .TEST1(iccm_ext_in_pkt[i].TEST1),
+                                     .RME(iccm_ext_in_pkt[i].RME),
+                                     .RM(iccm_ext_in_pkt[i].RM),
+                                     .LS(iccm_ext_in_pkt[i].LS),
+                                     .DS(iccm_ext_in_pkt[i].DS),
+                                     .SD(iccm_ext_in_pkt[i].SD) ,
+                                     .TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
+                                     .BC1(iccm_ext_in_pkt[i].BC1),
+                                     .BC2(iccm_ext_in_pkt[i].BC2)
+
+                                      );
+     end // block: iccm
+     else if (pt.ICCM_INDEX_BITS == 9 ) begin : iccm
+               ram_512x39 iccm_bank (
+                                     // Primary ports
+                                     .CLK(clk),
+                                     .ME(iccm_clken[i]),
+                                     .WE(wren_bank[i]),
+                                     .ADR(addr_bank[i]),
+                                     .D(iccm_bank_wr_data[i][38:0]),
+                                     .Q(iccm_bank_dout[i][38:0]),
+                                     .ROP ( ),
+                                     // These are used by SoC
+                                     .TEST1(iccm_ext_in_pkt[i].TEST1),
+                                     .RME(iccm_ext_in_pkt[i].RME),
+                                     .RM(iccm_ext_in_pkt[i].RM),
+                                     .LS(iccm_ext_in_pkt[i].LS),
+                                     .DS(iccm_ext_in_pkt[i].DS),
+                                     .SD(iccm_ext_in_pkt[i].SD) ,
+                                     .TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
+                                     .BC1(iccm_ext_in_pkt[i].BC1),
+                                     .BC2(iccm_ext_in_pkt[i].BC2)
+
+                                      );
+     end // block: iccm
+     else if (pt.ICCM_INDEX_BITS == 10 ) begin : iccm
+              /* ram_1024x39 iccm_bank (
+                                     // Primary ports
+                                     .CLK(clk),
+                                     .ME(iccm_clken[i]),
+                                     .WE(wren_bank[i]),
+                                     .ADR(addr_bank[i]),
+                                     .D(iccm_bank_wr_data[i][38:0]),
+                                     .Q(iccm_bank_dout[i][38:0]),
+                                     .ROP ( ),
+                                     // These are used by SoC
+                                     .TEST1(iccm_ext_in_pkt[i].TEST1),
+                                     .RME(iccm_ext_in_pkt[i].RME),
+                                     .RM(iccm_ext_in_pkt[i].RM),
+                                     .LS(iccm_ext_in_pkt[i].LS),
+                                     .DS(iccm_ext_in_pkt[i].DS),
+                                     .SD(iccm_ext_in_pkt[i].SD) ,
+                                     .TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
+                                     .BC1(iccm_ext_in_pkt[i].BC1),
+                                     .BC2(iccm_ext_in_pkt[i].BC2)
+                                     );*/
+                                     
+                                     sky130_sram_1kbyte_1rw1r_32x256_8 sram(
+    									`ifdef USE_POWER_PINS
+    									.vccd1(vccd1),
+    									.vssd1(vssd1),
+    									`endif
+									.clk0(clk),
+									.csb0(~iccm_clken[i]),
+									.web0(~wren_bank[i]),
+									.wmask0(4'hf),
+									.addr0(addr_bank[i]),
+									.din0(iccm_bank_wr_data[i]),
+									.dout0(iccm_bank_dout[i]),
+    									.clk1(clk),
+    									.csb1(1'b1),
+    									.addr1(10'h000),
+    									.dout1()
+  					);
+     end // block: iccm
+     else if (pt.ICCM_INDEX_BITS == 11 ) begin : iccm
+               ram_2048x39 iccm_bank (
+                                     // Primary ports
+                                     .CLK(clk),
+                                     .ME(iccm_clken[i]),
+                                     .WE(wren_bank[i]),
+                                     .ADR(addr_bank[i]),
+                                     .D(iccm_bank_wr_data[i][38:0]),
+                                     .Q(iccm_bank_dout[i][38:0]),
+                                     .ROP ( ),
+                                     // These are used by SoC
+                                     .TEST1(iccm_ext_in_pkt[i].TEST1),
+                                     .RME(iccm_ext_in_pkt[i].RME),
+                                     .RM(iccm_ext_in_pkt[i].RM),
+                                     .LS(iccm_ext_in_pkt[i].LS),
+                                     .DS(iccm_ext_in_pkt[i].DS),
+                                     .SD(iccm_ext_in_pkt[i].SD) ,
+                                     .TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
+                                     .BC1(iccm_ext_in_pkt[i].BC1),
+                                     .BC2(iccm_ext_in_pkt[i].BC2)
+
+                                      );
+     end // block: iccm
+     else if (pt.ICCM_INDEX_BITS == 12 ) begin : iccm
+               ram_4096x39 iccm_bank (
+                                     // Primary ports
+                                     .CLK(clk),
+                                     .ME(iccm_clken[i]),
+                                     .WE(wren_bank[i]),
+                                     .ADR(addr_bank[i]),
+                                     .D(iccm_bank_wr_data[i][38:0]),
+                                     .Q(iccm_bank_dout[i][38:0]),
+                                     .ROP ( ),
+                                     // These are used by SoC
+                                     .TEST1(iccm_ext_in_pkt[i].TEST1),
+                                     .RME(iccm_ext_in_pkt[i].RME),
+                                     .RM(iccm_ext_in_pkt[i].RM),
+                                     .LS(iccm_ext_in_pkt[i].LS),
+                                     .DS(iccm_ext_in_pkt[i].DS),
+                                     .SD(iccm_ext_in_pkt[i].SD) ,
+                                     .TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
+                                     .BC1(iccm_ext_in_pkt[i].BC1),
+                                     .BC2(iccm_ext_in_pkt[i].BC2)
+
+                                      );
+     end // block: iccm
+     else if (pt.ICCM_INDEX_BITS == 13 ) begin : iccm
+               ram_8192x39 iccm_bank (
+                                     // Primary ports
+                                     .CLK(clk),
+                                     .ME(iccm_clken[i]),
+                                     .WE(wren_bank[i]),
+                                     .ADR(addr_bank[i]),
+                                     .D(iccm_bank_wr_data[i][38:0]),
+                                     .Q(iccm_bank_dout[i][38:0]),
+                                     .ROP ( ),
+                                     // These are used by SoC
+                                     .TEST1(iccm_ext_in_pkt[i].TEST1),
+                                     .RME(iccm_ext_in_pkt[i].RME),
+                                     .RM(iccm_ext_in_pkt[i].RM),
+                                     .LS(iccm_ext_in_pkt[i].LS),
+                                     .DS(iccm_ext_in_pkt[i].DS),
+                                     .SD(iccm_ext_in_pkt[i].SD) ,
+                                     .TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
+                                     .BC1(iccm_ext_in_pkt[i].BC1),
+                                     .BC2(iccm_ext_in_pkt[i].BC2)
+
+                                      );
+     end // block: iccm
+     else if (pt.ICCM_INDEX_BITS == 14 ) begin : iccm
+               ram_16384x39 iccm_bank (
+                                     // Primary ports
+                                     .CLK(clk),
+                                     .ME(iccm_clken[i]),
+                                     .WE(wren_bank[i]),
+                                     .ADR(addr_bank[i]),
+                                     .D(iccm_bank_wr_data[i][38:0]),
+                                     .Q(iccm_bank_dout[i][38:0]),
+                                     .ROP ( ),
+                                     // These are used by SoC
+                                     .TEST1(iccm_ext_in_pkt[i].TEST1),
+                                     .RME(iccm_ext_in_pkt[i].RME),
+                                     .RM(iccm_ext_in_pkt[i].RM),
+                                     .LS(iccm_ext_in_pkt[i].LS),
+                                     .DS(iccm_ext_in_pkt[i].DS),
+                                     .SD(iccm_ext_in_pkt[i].SD) ,
+                                     .TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
+                                     .BC1(iccm_ext_in_pkt[i].BC1),
+                                     .BC2(iccm_ext_in_pkt[i].BC2)
+
+                                      );
+     end // block: iccm
+     else begin : iccm
+               ram_32768x39 iccm_bank (
+                                     // Primary ports
+                                     .CLK(clk),
+                                     .ME(iccm_clken[i]),
+                                     .WE(wren_bank[i]),
+                                     .ADR(addr_bank[i]),
+                                     .D(iccm_bank_wr_data[i][38:0]),
+                                     .Q(iccm_bank_dout[i][38:0]),
+                                     .ROP ( ),
+                                     // These are used by SoC
+                                     .TEST1(iccm_ext_in_pkt[i].TEST1),
+                                     .RME(iccm_ext_in_pkt[i].RME),
+                                     .RM(iccm_ext_in_pkt[i].RM),
+                                     .LS(iccm_ext_in_pkt[i].LS),
+                                     .DS(iccm_ext_in_pkt[i].DS),
+                                     .SD(iccm_ext_in_pkt[i].SD) ,
+                                     .TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
+                                     .BC1(iccm_ext_in_pkt[i].BC1),
+                                     .BC2(iccm_ext_in_pkt[i].BC2)
+
+                                      );
+     end // block: iccm
+`endif
+
+   // match the redundant rows
+   assign sel_red1[i]  = (redundant_valid[1]  & (((iccm_rw_addr[pt.ICCM_BITS-1:2] == redundant_address[1][pt.ICCM_BITS-1:2]) & (iccm_rw_addr[3:2] == i)) |
+                                                 ((addr_bank_inc[pt.ICCM_BITS-1:2]== redundant_address[1][pt.ICCM_BITS-1:2]) & (addr_bank_inc[3:2] == i))));
+
+   assign sel_red0[i]  = (redundant_valid[0]  & (((iccm_rw_addr[pt.ICCM_BITS-1:2] == redundant_address[0][pt.ICCM_BITS-1:2]) & (iccm_rw_addr[3:2] == i)) |
+                                                 ((addr_bank_inc[pt.ICCM_BITS-1:2]== redundant_address[0][pt.ICCM_BITS-1:2]) & (addr_bank_inc[3:2] == i))));
+
+   rvdff #(1) selred0  (.*,
+                   .clk(active_clk),
+                   .din(sel_red0[i]),
+                   .dout(sel_red0_q[i]));
+
+   rvdff #(1) selred1  (.*,
+                   .clk(active_clk),
+                   .din(sel_red1[i]),
+                   .dout(sel_red1_q[i]));
+
+
+  // muxing out the memory data with the redundant data if the address matches
+   assign iccm_bank_dout_fn[i][38:0] = ({39{sel_red1_q[i]}}                         & redundant_data[1][38:0]) |
+                                       ({39{sel_red0_q[i]}}                         & redundant_data[0][38:0]) |
+                                       ({39{~sel_red0_q[i] & ~sel_red1_q[i]}}       & iccm_bank_dout[i][38:0]);
+
+  end : mem_bank
+// This section does the redundancy for tolerating single bit errors
+// 2x 39 bit data values with address[hi:2] and a valid bit is needed to CAM and sub out the reads/writes to the particular locations
+// Also a LRU flop is kept to decide which of the redundant element to replace.
+   assign r0_addr_en              = ~redundant_lru & iccm_buf_correct_ecc;
+   assign r1_addr_en              = redundant_lru  & iccm_buf_correct_ecc;
+   assign redundant_lru_en         = iccm_buf_correct_ecc | (((|sel_red0[pt.ICCM_NUM_BANKS-1:0]) | (|sel_red1[pt.ICCM_NUM_BANKS-1:0])) & iccm_rden & iccm_correction_state);
+   assign redundant_lru_in        = iccm_buf_correct_ecc ? ~redundant_lru : (|sel_red0[pt.ICCM_NUM_BANKS-1:0]) ? 1'b1 : 1'b0;
+
+   rvdffs #() red_lru  (.*,                               // LRU flop for the redundant replacements
+                   .clk(active_clk),
+                   .en(redundant_lru_en),
+                   .din(redundant_lru_in),
+                   .dout(redundant_lru));
+
+    rvdffs #(pt.ICCM_BITS-2) r0_address  (.*,                 // Redundant Row 0 address
+                   .clk(active_clk),
+                   .en(r0_addr_en),
+                   .din(iccm_rw_addr[pt.ICCM_BITS-1:2]),
+                   .dout(redundant_address[0][pt.ICCM_BITS-1:2]));
+
+   rvdffs #(pt.ICCM_BITS-2) r1_address  (.*,                   // Redundant Row 0 address
+                   .clk(active_clk),
+                   .en(r1_addr_en),
+                   .din(iccm_rw_addr[pt.ICCM_BITS-1:2]),
+                   .dout(redundant_address[1][pt.ICCM_BITS-1:2]));
+
+    rvdffs #(1) r0_valid  (.*,
+                   .clk(active_clk),                                  // Redundant Row 0 Valid
+                   .en(r0_addr_en),
+                   .din(1'b1),
+                   .dout(redundant_valid[0]));
+
+   rvdffs #(1) r1_valid  (.*,                                   // Redundant Row 1 Valid
+                   .clk(active_clk),
+                   .en(r1_addr_en),
+                   .din(1'b1),
+                   .dout(redundant_valid[1]));
+
+
+
+   // We will have to update the Redundant copies in addition to the memory on subsequent writes to this memory location.
+   // The data gets updated on : 1) correction cycle, 2) Future writes - this could be W writes from DMA ( match up till addr[2]) or DW writes ( match till address[3])
+   // The data to pick also depends on the current address[2], size and the addr[2] stored in the address field of the redundant flop. Correction cycle is always W write and the data is splat on both legs, so choosing lower Word
+
+    assign redundant_data0_en      = ((iccm_rw_addr[pt.ICCM_BITS-1:3] == redundant_address[0][pt.ICCM_BITS-1:3]) & ((iccm_rw_addr[2] == redundant_address[0][2]) | (iccm_wr_size[1:0] == 2'b11)) & redundant_valid[0] & iccm_wren) |
+                                      (~redundant_lru & iccm_buf_correct_ecc);
+
+    assign redundant_data0_in[38:0] = (((iccm_rw_addr[2] == redundant_address[0][2]) & iccm_rw_addr[2]) | (redundant_address[0][2] & (iccm_wr_size[1:0] == 2'b11))) ? iccm_wr_data[77:39]  : iccm_wr_data[38:0];
+
+    rvdffs #(39) r0_data  (.*,                                 // Redundant Row 1 data
+                   .clk(active_clk),
+                   .en(redundant_data0_en),
+                   .din(redundant_data0_in[38:0]),
+                   .dout(redundant_data[0][38:0]));
+
+   assign redundant_data1_en      =  ((iccm_rw_addr[pt.ICCM_BITS-1:3] == redundant_address[1][pt.ICCM_BITS-1:3]) & ((iccm_rw_addr[2] == redundant_address[1][2]) | (iccm_wr_size[1:0] == 2'b11)) & redundant_valid[1] & iccm_wren) |
+                                     (redundant_lru & iccm_buf_correct_ecc);
+
+   assign redundant_data1_in[38:0] = (((iccm_rw_addr[2] == redundant_address[1][2]) & iccm_rw_addr[2]) | (redundant_address[1][2] & (iccm_wr_size[1:0] == 2'b11))) ? iccm_wr_data[77:39]  : iccm_wr_data[38:0];
+
+    rvdffs #(39) r1_data  (.*,                                  // Redundant Row 1 data
+                   .clk(active_clk),
+                   .en(redundant_data1_en),
+                   .din(redundant_data1_in[38:0]),
+                   .dout(redundant_data[1][38:0]));
+
+
+   rvdffs  #(pt.ICCM_BANK_HI)   rd_addr_lo_ff (.*, .clk(active_clk), .din(iccm_rw_addr [pt.ICCM_BANK_HI:1]), .dout(iccm_rd_addr_lo_q[pt.ICCM_BANK_HI:1]), .en(1'b1));   // bit 0 of address is always 0
+   rvdffs  #(pt.ICCM_BANK_BITS) rd_addr_hi_ff (.*, .clk(active_clk), .din(addr_bank_inc[pt.ICCM_BANK_HI:2]), .dout(iccm_rd_addr_hi_q[pt.ICCM_BANK_HI:2]), .en(1'b1));
+
+   assign iccm_rd_data_pre[63:0] = {iccm_bank_dout_fn[iccm_rd_addr_hi_q][31:0], iccm_bank_dout_fn[iccm_rd_addr_lo_q[pt.ICCM_BANK_HI:2]][31:0]};
+   assign iccm_data[63:0]        = 64'({16'b0, (iccm_rd_data_pre[63:0] >> (16*iccm_rd_addr_lo_q[1]))});
+   assign iccm_rd_data[63:0]     = {iccm_data[63:0]};
+   assign iccm_rd_data_ecc[77:0] = {iccm_bank_dout_fn[iccm_rd_addr_hi_q][38:0], iccm_bank_dout_fn[iccm_rd_addr_lo_q[pt.ICCM_BANK_HI:2]][38:0]};
+
+endmodule // eb1_ifu_iccm_mem
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_ifu_ifc_ctl.sv b/verilog/rtl/BrqRV_EB1/design/eb1_ifu_ifc_ctl.sv
new file mode 100644
index 0000000..d5f59a8
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_ifu_ifc_ctl.sv
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//********************************************************************************
+// eb1_ifu_ifc_ctl.sv
+// Function: Fetch pipe control
+//
+// Comments:
+//********************************************************************************
+
+module eb1_ifu_ifc_ctl
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )
+  (
+   input logic clk,                         // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
+   input logic free_l2clk,                  // Clock always.                  Through one clock header.  For flops with    second header built in.
+
+   input logic rst_l, // reset enable, from core pin
+   input logic scan_mode, // scan
+
+   input logic ic_hit_f,      // Icache hit
+   input logic ifu_ic_mb_empty, // Miss buffer empty
+
+   input logic ifu_fb_consume1,  // Aligner consumed 1 fetch buffer
+   input logic ifu_fb_consume2,  // Aligner consumed 2 fetch buffers
+
+   input logic dec_tlu_flush_noredir_wb, // Don't fetch on flush
+   input logic exu_flush_final, // FLush
+   input logic [31:1] exu_flush_path_final, // Flush path
+
+   input logic ifu_bp_hit_taken_f, // btb hit, select the target path
+   input logic [31:1] ifu_bp_btb_target_f, //  predicted target PC
+
+   input logic ic_dma_active, // IC DMA active, stop fetching
+   input logic ic_write_stall, // IC is writing, stop fetching
+   input logic dma_iccm_stall_any, // force a stall in the fetch pipe for DMA ICCM access
+
+   input logic [31:0]  dec_tlu_mrac_ff ,   // side_effect and cacheable for each region
+
+   output logic [31:1] ifc_fetch_addr_f, // fetch addr F
+   output logic [31:1] ifc_fetch_addr_bf, // fetch addr BF
+
+   output logic  ifc_fetch_req_f,  // fetch request valid F
+
+   output logic  ifu_pmu_fetch_stall, // pmu event measuring fetch stall
+
+   output logic                       ifc_fetch_uncacheable_bf,      // The fetch request is uncacheable space. BF stage
+   output logic                      ifc_fetch_req_bf,              // Fetch request. Comes with the address.  BF stage
+   output logic                       ifc_fetch_req_bf_raw,          // Fetch request without some qualifications. Used for clock-gating. BF stage
+   output logic                       ifc_iccm_access_bf,            // This request is to the ICCM. Do not generate misses to the bus.
+   output logic                       ifc_region_acc_fault_bf,       // Access fault. in ICCM region but offset is outside defined ICCM.
+
+   output logic  ifc_dma_access_ok // fetch is not accessing the ICCM, DMA can proceed
+
+   
+
+   );
+
+   logic [31:1]  fetch_addr_bf;
+   logic [31:1]  fetch_addr_next;
+   logic [3:0]   fb_write_f, fb_write_ns;
+
+   logic     fb_full_f_ns, fb_full_f;
+   logic     fb_right, fb_right2, fb_left, wfm, idle;
+   logic     sel_last_addr_bf, sel_next_addr_bf;
+   logic     miss_f, miss_a;
+   logic     flush_fb, dma_iccm_stall_any_f;
+   logic     mb_empty_mod, goto_idle, leave_idle;
+   logic     fetch_bf_en;
+   logic         line_wrap;
+   logic         fetch_addr_next_1;
+
+   // FSM assignment
+    typedef enum logic [1:0] { IDLE  = 2'b00 ,
+                               FETCH = 2'b01 ,
+                               STALL = 2'b10 ,
+                               WFM   = 2'b11   } state_t ;
+   state_t state      ;
+   state_t next_state ;
+
+   logic     dma_stall;
+   assign dma_stall = ic_dma_active | dma_iccm_stall_any_f;
+
+
+
+   // Fetch address mux
+   // - flush
+   // - Miss *or* flush during WFM (icache miss buffer is blocking)
+   // - Sequential
+
+if(pt.BTB_ENABLE==1) begin
+   logic sel_btb_addr_bf;
+
+   assign sel_last_addr_bf = ~exu_flush_final & (~ifc_fetch_req_f | ~ic_hit_f);
+   assign sel_btb_addr_bf  = ~exu_flush_final & ifc_fetch_req_f & ifu_bp_hit_taken_f & ic_hit_f;
+   assign sel_next_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ~ifu_bp_hit_taken_f & ic_hit_f;
+
+
+   assign fetch_addr_bf[31:1] = ( ({31{exu_flush_final}} &  exu_flush_path_final[31:1]) | // FLUSH path
+                  ({31{sel_last_addr_bf}} & ifc_fetch_addr_f[31:1]) | // MISS path
+                  ({31{sel_btb_addr_bf}} & {ifu_bp_btb_target_f[31:1]})| // BTB target
+                  ({31{sel_next_addr_bf}} & {fetch_addr_next[31:1]})); // SEQ path
+
+
+end // if (pt.BTB_ENABLE=1)
+   else begin
+   assign sel_last_addr_bf = ~exu_flush_final & (~ifc_fetch_req_f | ~ic_hit_f);
+   assign sel_next_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ic_hit_f;
+
+
+   assign fetch_addr_bf[31:1] = ( ({31{exu_flush_final}} &  exu_flush_path_final[31:1]) | // FLUSH path
+                  ({31{sel_last_addr_bf}} & ifc_fetch_addr_f[31:1]) | // MISS path
+                  ({31{sel_next_addr_bf}} & {fetch_addr_next[31:1]})); // SEQ path
+
+end
+   assign fetch_addr_next[31:1] = {({ifc_fetch_addr_f[31:2]} + 31'b1), fetch_addr_next_1 };
+   assign line_wrap = (fetch_addr_next[pt.ICACHE_TAG_INDEX_LO] ^ ifc_fetch_addr_f[pt.ICACHE_TAG_INDEX_LO]);
+
+   assign fetch_addr_next_1 = line_wrap ? 1'b0 : ifc_fetch_addr_f[1];
+
+   assign ifc_fetch_req_bf_raw = ~idle;
+   assign ifc_fetch_req_bf =  ifc_fetch_req_bf_raw &
+
+                 ~(fb_full_f_ns & ~(ifu_fb_consume2 | ifu_fb_consume1)) &
+                 ~dma_stall &
+                 ~ic_write_stall &
+                 ~dec_tlu_flush_noredir_wb ;
+
+
+   assign fetch_bf_en = exu_flush_final | ifc_fetch_req_f;
+
+   assign miss_f = ifc_fetch_req_f & ~ic_hit_f & ~exu_flush_final;
+
+   assign mb_empty_mod = (ifu_ic_mb_empty | exu_flush_final) & ~dma_stall & ~miss_f & ~miss_a;
+
+   // Halt flushes and takes us to IDLE
+   assign goto_idle = exu_flush_final & dec_tlu_flush_noredir_wb;
+   // If we're in IDLE, and we get a flush, goto FETCH
+   assign leave_idle = exu_flush_final & ~dec_tlu_flush_noredir_wb & idle;
+
+//.i 7
+//.o 2
+//.ilb state[1] state[0] reset_delayed miss_f mb_empty_mod  goto_idle leave_idle
+//.ob next_state[1] next_state[0]
+//.type fr
+//
+//# fetch 01, stall 10, wfm 11, idle 00
+//-- 1---- 01
+//-- 0--1- 00
+//00 0--00 00
+//00 0--01 01
+//
+//01 01-0- 11
+//01 00-0- 01
+//
+//11 0-10- 01
+//11 0-00- 11
+
+   assign next_state[1] = (~state[1] & state[0] & miss_f & ~goto_idle) |
+              (state[1] & ~mb_empty_mod & ~goto_idle);
+
+   assign next_state[0] = (~goto_idle & leave_idle) | (state[0] & ~goto_idle);
+
+   assign flush_fb = exu_flush_final;
+
+   // model fb write logic to mass balance the fetch buffers
+   assign fb_right = ( ifu_fb_consume1 & ~ifu_fb_consume2 & (~ifc_fetch_req_f | miss_f)) | // Consumed and no new fetch
+              (ifu_fb_consume2 &  ifc_fetch_req_f); // Consumed 2 and new fetch
+
+
+   assign fb_right2 = (ifu_fb_consume2 & (~ifc_fetch_req_f | miss_f)); // Consumed 2 and no new fetch
+
+   assign fb_left = ifc_fetch_req_f & ~(ifu_fb_consume1 | ifu_fb_consume2) & ~miss_f;
+
+// CBH
+   assign fb_write_ns[3:0] = ( ({4{(flush_fb)}} & 4'b0001) |
+                   ({4{~flush_fb & fb_right }} & {1'b0, fb_write_f[3:1]}) |
+                   ({4{~flush_fb & fb_right2}} & {2'b0, fb_write_f[3:2]}) |
+                   ({4{~flush_fb & fb_left  }} & {fb_write_f[2:0], 1'b0}) |
+                   ({4{~flush_fb & ~fb_right & ~fb_right2 & ~fb_left}}  & fb_write_f[3:0]));
+
+
+   assign fb_full_f_ns = fb_write_ns[3];
+
+   assign idle     = state      == IDLE  ;
+   assign wfm      = state      == WFM   ;
+
+   rvdffie #(10) fbwrite_ff (.*, .clk(free_l2clk),
+                          .din( {dma_iccm_stall_any, miss_f, ifc_fetch_req_bf, next_state[1:0], fb_full_f_ns, fb_write_ns[3:0]}),
+                          .dout({dma_iccm_stall_any_f, miss_a, ifc_fetch_req_f, state[1:0], fb_full_f, fb_write_f[3:0]}));
+
+   assign ifu_pmu_fetch_stall = wfm | 
+                (ifc_fetch_req_bf_raw & ( (fb_full_f & ~(ifu_fb_consume2 | ifu_fb_consume1 | exu_flush_final)) |
+                  dma_stall));
+
+
+
+   assign ifc_fetch_addr_bf[31:1] = fetch_addr_bf[31:1];
+
+   rvdffpcie #(31) faddrf1_ff  (.*, .en(fetch_bf_en), .din(fetch_addr_bf[31:1]), .dout(ifc_fetch_addr_f[31:1]));
+
+
+ if (pt.ICCM_ENABLE)  begin
+   logic iccm_acc_in_region_bf;
+   logic iccm_acc_in_range_bf;
+   rvrangecheck #( .CCM_SADR    (pt.ICCM_SADR),
+                   .CCM_SIZE    (pt.ICCM_SIZE) ) iccm_rangecheck (
+                                     .addr     ({ifc_fetch_addr_bf[31:1],1'b0}) ,
+                                     .in_range (iccm_acc_in_range_bf) ,
+                                     .in_region(iccm_acc_in_region_bf)
+                                     );
+
+   assign ifc_iccm_access_bf = iccm_acc_in_range_bf ;
+
+  assign ifc_dma_access_ok = ( (~ifc_iccm_access_bf |
+                 (fb_full_f & ~(ifu_fb_consume2 | ifu_fb_consume1)) |
+                 (wfm  & ~ifc_fetch_req_bf) |
+                 idle ) & ~exu_flush_final) |
+                  dma_iccm_stall_any_f;
+
+  assign ifc_region_acc_fault_bf = ~iccm_acc_in_range_bf & iccm_acc_in_region_bf ;
+ end
+ else  begin
+   assign ifc_iccm_access_bf = 1'b0 ;
+   assign ifc_dma_access_ok  = 1'b0 ;
+   assign ifc_region_acc_fault_bf  = 1'b0 ;
+ end
+
+   assign ifc_fetch_uncacheable_bf =  ~dec_tlu_mrac_ff[{ifc_fetch_addr_bf[31:28] , 1'b0 }]  ; // bit 0 of each region description is the cacheable bit
+
+endmodule // eb1_ifu_ifc_ctl
+
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_ifu_mem_ctl.sv b/verilog/rtl/BrqRV_EB1/design/eb1_ifu_mem_ctl.sv
new file mode 100644
index 0000000..77c9ebd
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_ifu_mem_ctl.sv
@@ -0,0 +1,1672 @@
+//********************************************************************************
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//********************************************************************************
+
+
+//********************************************************************************
+// Function: Icache , iccm  control
+// BFF -> F1 -> F2 -> A
+//********************************************************************************
+
+module eb1_ifu_mem_ctl
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )
+  (
+   input logic clk,                                                 // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
+   input logic active_clk,                                          // Clock only while core active.  Through two clock headers. For flops without second clock header built in.
+   input logic free_l2clk,                                          // Clock always.                  Through one clock header.  For flops with    second header built in.
+   input logic rst_l,                                               // reset, active low
+
+   input logic                       exu_flush_final,               // Flush from the pipeline., includes flush lower
+   input logic                       dec_tlu_flush_lower_wb,        // Flush lower from the pipeline.
+   input logic                       dec_tlu_flush_err_wb,          // Flush from the pipeline due to perr.
+   input logic                       dec_tlu_i0_commit_cmt,         // committed i0 instruction
+   input logic                       dec_tlu_force_halt,            // force halt.
+
+   input logic [31:1]                ifc_fetch_addr_bf,             // Fetch Address byte aligned always.      F1 stage.
+   input logic                       ifc_fetch_uncacheable_bf,      // The fetch request is uncacheable space. F1 stage
+   input logic                       ifc_fetch_req_bf,              // Fetch request. Comes with the address.  F1 stage
+   input logic                       ifc_fetch_req_bf_raw,          // Fetch request without some qualifications. Used for clock-gating. F1 stage
+   input logic                       ifc_iccm_access_bf,            // This request is to the ICCM. Do not generate misses to the bus.
+   input logic                       ifc_region_acc_fault_bf,       // Access fault. in ICCM region but offset is outside defined ICCM.
+   input logic                       ifc_dma_access_ok,             // It is OK to give dma access to the ICCM. (ICCM is not busy this cycle).
+   input logic                       dec_tlu_fence_i_wb,            // Fence.i instruction is committing. Clear all Icache valids.
+   input logic                       ifu_bp_hit_taken_f,            // Branch is predicted taken. Kill the fetch next cycle.
+
+   input logic                       ifu_bp_inst_mask_f,            // tell ic which valids to kill because of a taken branch, right justified
+
+   output logic                      ifu_miss_state_idle,           // No icache misses are outstanding.
+   output logic                      ifu_ic_mb_empty,               // Continue with normal fetching. This does not mean that miss is finished.
+   output logic                      ic_dma_active  ,               // In the middle of servicing dma request to ICCM. Do not make any new requests.
+   output logic                      ic_write_stall,                // Stall fetch the cycle we are writing the cache.
+
+/// PMU signals
+   output logic                      ifu_pmu_ic_miss,               // IC miss event
+   output logic                      ifu_pmu_ic_hit,                // IC hit event
+   output logic                      ifu_pmu_bus_error,             // Bus error event
+   output logic                      ifu_pmu_bus_busy,              // Bus busy event
+   output logic                      ifu_pmu_bus_trxn,              // Bus transaction
+
+  //-------------------------- IFU AXI signals--------------------------
+   // AXI Write Channels
+   output logic                            ifu_axi_awvalid,
+   output logic [pt.IFU_BUS_TAG-1:0]       ifu_axi_awid,
+   output logic [31:0]                     ifu_axi_awaddr,
+   output logic [3:0]                      ifu_axi_awregion,
+   output logic [7:0]                      ifu_axi_awlen,
+   output logic [2:0]                      ifu_axi_awsize,
+   output logic [1:0]                      ifu_axi_awburst,
+   output logic                            ifu_axi_awlock,
+   output logic [3:0]                      ifu_axi_awcache,
+   output logic [2:0]                      ifu_axi_awprot,
+   output logic [3:0]                      ifu_axi_awqos,
+
+   output logic                            ifu_axi_wvalid,
+   output logic [63:0]                     ifu_axi_wdata,
+   output logic [7:0]                      ifu_axi_wstrb,
+   output logic                            ifu_axi_wlast,
+
+   output logic                            ifu_axi_bready,
+
+   // AXI Read Channels
+   output logic                            ifu_axi_arvalid,
+   input  logic                            ifu_axi_arready,
+   output logic [pt.IFU_BUS_TAG-1:0]       ifu_axi_arid,
+   output logic [31:0]                     ifu_axi_araddr,
+   output logic [3:0]                      ifu_axi_arregion,
+   output logic [7:0]                      ifu_axi_arlen,
+   output logic [2:0]                      ifu_axi_arsize,
+   output logic [1:0]                      ifu_axi_arburst,
+   output logic                            ifu_axi_arlock,
+   output logic [3:0]                      ifu_axi_arcache,
+   output logic [2:0]                      ifu_axi_arprot,
+   output logic [3:0]                      ifu_axi_arqos,
+
+   input  logic                            ifu_axi_rvalid,
+   output logic                            ifu_axi_rready,
+   input  logic [pt.IFU_BUS_TAG-1:0]       ifu_axi_rid,
+   input  logic [63:0]                     ifu_axi_rdata,
+   input  logic [1:0]                      ifu_axi_rresp,
+
+    input  logic                     ifu_bus_clk_en,
+
+
+   input  logic                      dma_iccm_req,      //  dma iccm command (read or write)
+   input  logic [31:0]               dma_mem_addr,      //  dma address
+   input  logic [2:0]                dma_mem_sz,        //  size
+   input  logic                      dma_mem_write,     //  write
+   input  logic [63:0]               dma_mem_wdata,     //  write data
+   input  logic [2:0]                dma_mem_tag,       //  DMA Buffer entry number
+
+   output logic                      iccm_dma_ecc_error,//   Data read from iccm has an ecc error
+   output logic                      iccm_dma_rvalid,   //   Data read from iccm is valid
+   output logic [63:0]               iccm_dma_rdata,    //   dma data read from iccm
+   output logic [2:0]                iccm_dma_rtag,     //   Tag of the DMA req
+   output logic                      iccm_ready,        //   iccm ready to accept new command.
+
+
+//   I$ & ITAG Ports
+   output logic [31:1]               ic_rw_addr,         // Read/Write addresss to the Icache.
+   output logic [pt.ICACHE_NUM_WAYS-1:0]                ic_wr_en,           // Icache write enable, when filling the Icache.
+   output logic                      ic_rd_en,           // Icache read  enable.
+
+   output logic [pt.ICACHE_BANKS_WAY-1:0] [70:0]               ic_wr_data,           // Data to fill to the Icache. With ECC
+   input  logic [63:0]               ic_rd_data ,          // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
+   input  logic [70:0]               ic_debug_rd_data ,          // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
+   input  logic [25:0]               ictag_debug_rd_data,  // Debug icache tag.
+   output logic [70:0]               ic_debug_wr_data,     // Debug wr cache.
+   output logic [70:0]               ifu_ic_debug_rd_data, // debug data read
+
+
+   input  logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr,    //
+   input  logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr,
+
+   output logic [pt.ICACHE_INDEX_HI:3]               ic_debug_addr,      // Read/Write addresss to the Icache.
+   output logic                      ic_debug_rd_en,     // Icache debug rd
+   output logic                      ic_debug_wr_en,     // Icache debug wr
+   output logic                      ic_debug_tag_array, // Debug tag array
+   output logic [pt.ICACHE_NUM_WAYS-1:0]                ic_debug_way,       // Debug way. Rd or Wr.
+
+
+   output logic [pt.ICACHE_NUM_WAYS-1:0]                ic_tag_valid,       // Valid bits when accessing the Icache. One valid bit per way. F2 stage
+
+   input  logic [pt.ICACHE_NUM_WAYS-1:0]                ic_rd_hit,          // Compare hits from Icache tags. Per way.  F2 stage
+   input  logic                      ic_tag_perr,        // Icache Tag parity error
+
+   // ICCM ports
+   output logic [pt.ICCM_BITS-1:1]  iccm_rw_addr,       // ICCM read/write address.
+   output logic                      iccm_wren,          // ICCM write enable (through the DMA)
+   output logic                      iccm_rden,          // ICCM read enable.
+   output logic [77:0]               iccm_wr_data,       // ICCM write data.
+   output logic [2:0]                iccm_wr_size,       // ICCM write location within DW.
+
+   input  logic [63:0]               iccm_rd_data,       // Data read from ICCM.
+   input  logic [77:0]               iccm_rd_data_ecc,   // Data + ECC read from ICCM.
+   input  logic [1:0]                ifu_fetch_val,
+   // IFU control signals
+   output logic                      ic_hit_f,               // Hit in Icache(if Icache access) or ICCM access( ICCM always has ic_hit_f)
+   output logic [1:0]                ic_access_fault_f,      // Access fault (bus error or ICCM access in region but out of offset range).
+   output logic [1:0]                ic_access_fault_type_f, // Access fault types
+   output logic                      iccm_rd_ecc_single_err, // This fetch has a single ICCM ecc  error.
+   output logic [1:0]                iccm_rd_ecc_double_err, // This fetch has a double ICCM ecc  error.
+   output logic                      ic_error_start,         // This has any I$ errors ( data/tag/ecc/parity )
+
+   output logic                      ifu_async_error_start,  // Or of the sb iccm, and all the icache errors sent to aligner to stop
+   output logic                      iccm_dma_sb_error,      // Single Bit ECC error from a DMA access
+   output logic [1:0]                ic_fetch_val_f,         // valid bytes for fetch. To the Aligner.
+   output logic [31:0]               ic_data_f,              // Data read from Icache or ICCM. To the Aligner.
+   output logic [63:0]               ic_premux_data,         // Premuxed data to be muxed with Icache data
+   output logic                      ic_sel_premux_data,     // Select premux data.
+
+/////  Debug
+   input  eb1_cache_debug_pkt_t     dec_tlu_ic_diag_pkt ,       // Icache/tag debug read/write packet
+   input  logic                      dec_tlu_core_ecc_disable,   // disable the ecc checking and flagging
+   output logic                      ifu_ic_debug_rd_data_valid, // debug data valid.
+   output logic                      iccm_buf_correct_ecc,
+   output logic                      iccm_correction_state,
+
+
+   input  logic         scan_mode
+   );
+
+//  Create different defines for ICACHE and ICCM enable combinations
+
+ localparam   NUM_OF_BEATS = 8 ;
+
+
+
+   logic [31:3]    ifu_ic_req_addr_f;
+   logic           uncacheable_miss_in ;
+   logic           uncacheable_miss_ff;
+
+
+
+   logic           bus_ifu_wr_en     ;
+   logic           bus_ifu_wr_en_ff  ;
+   logic           bus_ifu_wr_en_ff_q  ;
+   logic           bus_ifu_wr_en_ff_wo_err  ;
+   logic [pt.ICACHE_NUM_WAYS-1:0]     bus_ic_wr_en ;
+
+   logic           reset_tag_valid_for_miss  ;
+
+
+   logic [pt.ICACHE_STATUS_BITS-1:0]     way_status;
+   logic [pt.ICACHE_STATUS_BITS-1:0]     way_status_mb_in;
+   logic [pt.ICACHE_STATUS_BITS-1:0]     way_status_rep_new;
+   logic [pt.ICACHE_STATUS_BITS-1:0]     way_status_mb_ff;
+   logic [pt.ICACHE_STATUS_BITS-1:0]     way_status_new;
+   logic [pt.ICACHE_STATUS_BITS-1:0]     way_status_hit_new;
+   logic [pt.ICACHE_STATUS_BITS-1:0]     way_status_new_w_debug;
+   logic [pt.ICACHE_NUM_WAYS-1:0]     tagv_mb_in;
+   logic [pt.ICACHE_NUM_WAYS-1:0]     tagv_mb_ff;
+
+
+   logic           ifu_wr_data_comb_err ;
+   logic           ifu_byp_data_err_new;
+   logic  [1:0]    ifu_byp_data_err_f;
+   logic           ifu_wr_cumulative_err_data;
+   logic           ifu_wr_cumulative_err;
+   logic           ifu_wr_data_comb_err_ff;
+   logic           scnd_miss_index_match ;
+
+
+   logic           ifc_dma_access_q_ok;
+   logic           ifc_iccm_access_f ;
+   logic           ifc_region_acc_fault_f;
+   logic           ifc_region_acc_fault_final_f;
+   logic  [1:0]    ifc_bus_acc_fault_f;
+   logic           ic_act_miss_f;
+   logic           ic_miss_under_miss_f;
+   logic           ic_ignore_2nd_miss_f;
+   logic           ic_act_hit_f;
+   logic           miss_pending;
+   logic [31:1]    imb_in , imb_ff  ;
+   logic [31:pt.ICACHE_BEAT_ADDR_HI+1]    miss_addr_in , miss_addr  ;
+   logic           miss_wrap_f ;
+   logic           flush_final_f;
+   logic           ifc_fetch_req_f;
+   logic           ifc_fetch_req_f_raw;
+   logic           fetch_req_f_qual   ;
+   logic           ifc_fetch_req_qual_bf ;
+   logic [pt.ICACHE_NUM_WAYS-1:0]     replace_way_mb_any;
+   logic           last_beat;
+   logic           reset_beat_cnt  ;
+   logic [pt.ICACHE_BEAT_ADDR_HI:3]     ic_req_addr_bits_hi_3 ;
+   logic [pt.ICACHE_BEAT_ADDR_HI:3]     ic_wr_addr_bits_hi_3 ;
+   logic [31:1]    ifu_fetch_addr_int_f ;
+   logic [31:1]    ifu_ic_rw_int_addr ;
+   logic           crit_wd_byp_ok_ff ;
+   logic           ic_crit_wd_rdy_new_ff;
+   logic   [79:0]  ic_byp_data_only_pre_new;
+   logic   [79:0]  ic_byp_data_only_new;
+   logic           ic_byp_hit_f ;
+   logic           ic_valid ;
+   logic           ic_valid_ff;
+   logic           reset_all_tags;
+   logic           ic_valid_w_debug;
+
+   logic [pt.ICACHE_NUM_WAYS-1:0]     ifu_tag_wren,ifu_tag_wren_ff;
+   logic [pt.ICACHE_NUM_WAYS-1:0]     ic_debug_tag_wr_en;
+   logic [pt.ICACHE_NUM_WAYS-1:0]     ifu_tag_wren_w_debug;
+   logic [pt.ICACHE_NUM_WAYS-1:0]     ic_debug_way_ff;
+   logic           ic_debug_rd_en_ff   ;
+   logic           fetch_bf_f_c1_clken ;
+   logic           fetch_bf_f_c1_clk;
+   logic           debug_c1_clken;
+   logic           debug_c1_clk;
+
+   logic           reset_ic_in ;
+   logic           reset_ic_ff ;
+   logic [pt.ICACHE_BEAT_ADDR_HI:1]     vaddr_f ;
+   logic [31:1]    ifu_status_wr_addr;
+   logic           sel_mb_addr ;
+   logic           sel_mb_addr_ff ;
+   logic           sel_mb_status_addr ;
+   logic [63:0]    ic_final_data;
+
+   logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] ifu_ic_rw_int_addr_ff ;
+   logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] ifu_status_wr_addr_ff ;
+   logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] ifu_ic_rw_int_addr_w_debug ;
+   logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] ifu_status_wr_addr_w_debug ;
+
+   logic [pt.ICACHE_STATUS_BITS-1:0]                              way_status_new_ff ;
+   logic                                    way_status_wr_en_ff ;
+   logic [pt.ICACHE_TAG_DEPTH-1:0][pt.ICACHE_STATUS_BITS-1:0]        way_status_out ;
+   logic [1:0]                              ic_debug_way_enc;
+
+   logic [pt.IFU_BUS_TAG-1:0]             ifu_bus_rid_ff;
+
+   logic         fetch_req_icache_f;
+   logic         fetch_req_iccm_f;
+   logic         ic_iccm_hit_f;
+   logic         fetch_uncacheable_ff;
+   logic         way_status_wr_en;
+   logic         sel_byp_data;
+   logic         sel_ic_data;
+   logic         sel_iccm_data;
+   logic         ic_rd_parity_final_err;
+   logic         ic_act_miss_f_delayed;
+   logic         bus_ifu_wr_data_error;
+   logic         bus_ifu_wr_data_error_ff;
+   logic         way_status_wr_en_w_debug;
+   logic         ic_debug_tag_val_rd_out;
+   logic         ifu_pmu_ic_miss_in;
+   logic         ifu_pmu_ic_hit_in;
+   logic         ifu_pmu_bus_error_in;
+   logic         ifu_pmu_bus_trxn_in;
+   logic         ifu_pmu_bus_busy_in;
+   logic         ic_debug_ict_array_sel_in;
+   logic         ic_debug_ict_array_sel_ff;
+   logic         debug_data_clken;
+   logic         last_data_recieved_in ;
+   logic         last_data_recieved_ff ;
+
+   logic                          ifu_bus_rvalid           ;
+   logic                          ifu_bus_rvalid_ff        ;
+   logic                          ifu_bus_rvalid_unq_ff    ;
+   logic                          ifu_bus_arready_unq       ;
+   logic                          ifu_bus_arready_unq_ff    ;
+   logic                          ifu_bus_arvalid           ;
+   logic                          ifu_bus_arvalid_ff        ;
+   logic                          ifu_bus_arready           ;
+   logic                          ifu_bus_arready_ff        ;
+   logic [63:0]                   ifu_bus_rdata_ff        ;
+   logic [1:0]                    ifu_bus_rresp_ff          ;
+   logic                          ifu_bus_rsp_valid ;
+   logic                          ifu_bus_rsp_ready ;
+   logic [pt.IFU_BUS_TAG-1:0]     ifu_bus_rsp_tag;
+   logic [63:0]                   ifu_bus_rsp_rdata;
+   logic [1:0]                    ifu_bus_rsp_opc;
+
+   logic [pt.ICACHE_NUM_BEATS-1:0]    write_fill_data;
+   logic [pt.ICACHE_NUM_BEATS-1:0]    wr_data_c1_clk;
+   logic [pt.ICACHE_NUM_BEATS-1:0]    ic_miss_buff_data_valid_in;
+   logic [pt.ICACHE_NUM_BEATS-1:0]    ic_miss_buff_data_valid;
+   logic [pt.ICACHE_NUM_BEATS-1:0]    ic_miss_buff_data_error_in;
+   logic [pt.ICACHE_NUM_BEATS-1:0]    ic_miss_buff_data_error;
+   logic [pt.ICACHE_BEAT_ADDR_HI:1]    byp_fetch_index;
+   logic [pt.ICACHE_BEAT_ADDR_HI:2]    byp_fetch_index_0;
+   logic [pt.ICACHE_BEAT_ADDR_HI:2]    byp_fetch_index_1;
+   logic [pt.ICACHE_BEAT_ADDR_HI:3]    byp_fetch_index_inc;
+   logic [pt.ICACHE_BEAT_ADDR_HI:2]    byp_fetch_index_inc_0;
+   logic [pt.ICACHE_BEAT_ADDR_HI:2]    byp_fetch_index_inc_1;
+   logic          miss_buff_hit_unq_f ;
+   logic          stream_hit_f ;
+   logic          stream_miss_f ;
+   logic          stream_eol_f ;
+   logic          crit_byp_hit_f ;
+   logic [pt.IFU_BUS_TAG-1:0] other_tag ;
+   logic [(2*pt.ICACHE_NUM_BEATS)-1:0] [31:0] ic_miss_buff_data;
+   logic [63:0] ic_miss_buff_half;
+   logic        scnd_miss_req, scnd_miss_req_q;
+   logic        scnd_miss_req_in;
+
+
+   logic [pt.ICCM_BITS-1:2]                iccm_ecc_corr_index_ff;
+   logic [pt.ICCM_BITS-1:2]                iccm_ecc_corr_index_in;
+   logic [38:0]                         iccm_ecc_corr_data_ff;
+   logic                                iccm_ecc_write_status     ;
+   logic                                iccm_rd_ecc_single_err_ff   ;
+   logic                                iccm_error_start;     // start the error fsm
+   logic                                perr_state_en;
+   logic                                miss_state_en;
+
+   logic        busclk;
+   logic        busclk_force;
+   logic        busclk_reset;
+   logic        bus_ifu_bus_clk_en_ff;
+   logic        bus_ifu_bus_clk_en ;
+
+   logic        ifc_bus_ic_req_ff_in;
+   logic        ifu_bus_cmd_valid ;
+   logic        ifu_bus_cmd_ready ;
+
+   logic        bus_inc_data_beat_cnt     ;
+   logic        bus_reset_data_beat_cnt   ;
+   logic        bus_hold_data_beat_cnt    ;
+
+   logic        bus_inc_cmd_beat_cnt     ;
+   logic        bus_reset_cmd_beat_cnt_0   ;
+   logic        bus_reset_cmd_beat_cnt_secondlast   ;
+   logic        bus_hold_cmd_beat_cnt    ;
+
+   logic [pt.ICACHE_BEAT_BITS-1:0]  bus_new_data_beat_count  ;
+   logic [pt.ICACHE_BEAT_BITS-1:0]  bus_data_beat_count      ;
+
+   logic [pt.ICACHE_BEAT_BITS-1:0]  bus_new_cmd_beat_count  ;
+   logic [pt.ICACHE_BEAT_BITS-1:0]  bus_cmd_beat_count      ;
+
+
+   logic [pt.ICACHE_BEAT_BITS-1:0]  bus_new_rd_addr_count;
+   logic [pt.ICACHE_BEAT_BITS-1:0]  bus_rd_addr_count;
+
+
+   logic        bus_cmd_sent           ;
+   logic        bus_last_data_beat     ;
+
+
+   logic [pt.ICACHE_NUM_WAYS-1:0]       bus_wren            ;
+
+   logic [pt.ICACHE_NUM_WAYS-1:0]       bus_wren_last       ;
+   logic [pt.ICACHE_NUM_WAYS-1:0]       wren_reset_miss      ;
+   logic        ifc_dma_access_ok_d;
+   logic        ifc_dma_access_ok_prev;
+
+   logic   bus_cmd_req_in ;
+   logic   bus_cmd_req_hold ;
+
+   logic   second_half_available ;
+   logic   write_ic_16_bytes ;
+
+   logic   ifc_region_acc_fault_final_bf;
+   logic   ifc_region_acc_fault_memory_bf;
+   logic   ifc_region_acc_fault_memory_f;
+   logic   ifc_region_acc_okay;
+
+   logic   iccm_correct_ecc;
+   logic   dma_sb_err_state, dma_sb_err_state_ff;
+   logic   two_byte_instr;
+
+   typedef enum logic [2:0] {IDLE=3'b000, CRIT_BYP_OK=3'b001, HIT_U_MISS=3'b010, MISS_WAIT=3'b011,CRIT_WRD_RDY=3'b100,SCND_MISS=3'b101,STREAM=3'b110 , STALL_SCND_MISS=3'b111} miss_state_t;
+   miss_state_t miss_state, miss_nxtstate;
+
+   typedef enum logic [1:0] {ERR_STOP_IDLE=2'b00, ERR_FETCH1=2'b01 , ERR_FETCH2=2'b10 , ERR_STOP_FETCH=2'b11} err_stop_state_t;
+   err_stop_state_t err_stop_state, err_stop_nxtstate;
+   logic   err_stop_state_en ;
+   logic   err_stop_fetch ;
+
+   logic   ic_crit_wd_rdy;         // Critical fetch is ready to be bypassed.
+
+   logic   ifu_bp_hit_taken_q_f;
+   logic   ifu_bus_rvalid_unq;
+   logic   bus_cmd_beat_en;
+
+
+// ---- Clock gating section -----
+// c1 clock enables
+
+
+   assign fetch_bf_f_c1_clken  = ifc_fetch_req_bf_raw | ifc_fetch_req_f | miss_pending | exu_flush_final | scnd_miss_req;
+   assign debug_c1_clken       = ic_debug_rd_en | ic_debug_wr_en ;
+   // C1 - 1 clock pulse for data
+`ifdef RV_FPGA_OPTIMIZE
+   assign fetch_bf_f_c1_clk = 1'b0;
+   assign debug_c1_clk      = 1'b0;
+`else
+   rvclkhdr fetch_bf_f_c1_cgc    ( .en(fetch_bf_f_c1_clken),     .l1clk(fetch_bf_f_c1_clk), .* );
+   rvclkhdr debug_c1_cgc         ( .en(debug_c1_clken),          .l1clk(debug_c1_clk), .* );
+`endif
+
+
+// ------ end clock gating section ------------------------
+
+   logic [1:0]    iccm_single_ecc_error;
+   logic          dma_iccm_req_f ;
+   assign iccm_dma_sb_error     = (|iccm_single_ecc_error[1:0] )  & dma_iccm_req_f ;
+   assign ifu_async_error_start = iccm_rd_ecc_single_err | ic_error_start;
+
+
+   typedef enum logic [2:0] {ERR_IDLE=3'b000, IC_WFF=3'b001 , ECC_WFF=3'b010 , ECC_CORR=3'b011, DMA_SB_ERR=3'b100} perr_state_t;
+   perr_state_t perr_state, perr_nxtstate;
+
+
+   assign ic_dma_active = iccm_correct_ecc | (perr_state == DMA_SB_ERR) | (err_stop_state == ERR_STOP_FETCH) | err_stop_fetch |
+                          dec_tlu_flush_err_wb; // The last term is to give a error-correction a chance to finish before refetch starts
+
+   assign scnd_miss_req_in     = ifu_bus_rsp_valid & bus_ifu_bus_clk_en & ifu_bus_rsp_ready &
+                                 (&bus_new_data_beat_count[pt.ICACHE_BEAT_BITS-1:0]) &
+                                 ~uncacheable_miss_ff &  ((miss_state == SCND_MISS) | (miss_nxtstate == SCND_MISS)) & ~exu_flush_final;
+
+   assign ifu_bp_hit_taken_q_f = ifu_bp_hit_taken_f & ic_hit_f ;
+
+   //////////////////////////////////// Create Miss State Machine ///////////////////////
+   //                                   Create Miss State Machine                      //
+   //                                   Create Miss State Machine                      //
+   //                                   Create Miss State Machine                      //
+   //////////////////////////////////// Create Miss State Machine ///////////////////////
+   // FIFO state machine
+   always_comb begin : MISS_SM
+      miss_nxtstate   = IDLE;
+      miss_state_en   = 1'b0;
+      case (miss_state)
+         IDLE: begin : idle
+                  miss_nxtstate = (ic_act_miss_f & ~exu_flush_final) ? CRIT_BYP_OK : HIT_U_MISS ;
+                  miss_state_en = ic_act_miss_f & ~dec_tlu_force_halt ;
+         end
+         CRIT_BYP_OK: begin : crit_byp_ok
+                  miss_nxtstate =  (dec_tlu_force_halt ) ?                                                                             IDLE :
+                                  ( ic_byp_hit_f &  (last_data_recieved_ff | (bus_ifu_wr_en_ff & last_beat)) &  uncacheable_miss_ff) ? IDLE :
+                                  ( ic_byp_hit_f &  ~last_data_recieved_ff                                &  uncacheable_miss_ff) ? MISS_WAIT :
+                                  (~ic_byp_hit_f &  ~exu_flush_final &  (bus_ifu_wr_en_ff & last_beat)       &  uncacheable_miss_ff) ? CRIT_WRD_RDY :
+                                  (                                      (bus_ifu_wr_en_ff & last_beat)       & ~uncacheable_miss_ff) ? IDLE :
+                                  ( ic_byp_hit_f  &  ~exu_flush_final & ~(bus_ifu_wr_en_ff & last_beat)       & ~ifu_bp_hit_taken_q_f   & ~uncacheable_miss_ff) ? STREAM :
+                                  ( bus_ifu_wr_en_ff &  ~exu_flush_final & ~(bus_ifu_wr_en_ff & last_beat)       & ~ifu_bp_hit_taken_q_f   & ~uncacheable_miss_ff) ? STREAM :
+                                  (~ic_byp_hit_f  &  ~exu_flush_final &  (bus_ifu_wr_en_ff & last_beat)       & ~uncacheable_miss_ff) ? IDLE :
+                                  ( (exu_flush_final | ifu_bp_hit_taken_q_f)  & ~(bus_ifu_wr_en_ff & last_beat)                      ) ? HIT_U_MISS : IDLE;
+                  miss_state_en =  dec_tlu_force_halt | exu_flush_final | ic_byp_hit_f | ifu_bp_hit_taken_q_f | (bus_ifu_wr_en_ff & last_beat) | (bus_ifu_wr_en_ff & ~uncacheable_miss_ff)  ;
+         end
+         CRIT_WRD_RDY: begin : crit_wrd_rdy
+                  miss_nxtstate =  IDLE ;
+                  miss_state_en =  exu_flush_final | flush_final_f | ic_byp_hit_f | dec_tlu_force_halt  ;
+         end
+         STREAM: begin : stream
+                  miss_nxtstate =  ((exu_flush_final | ifu_bp_hit_taken_q_f  | stream_eol_f ) & ~(bus_ifu_wr_en_ff & last_beat) & ~dec_tlu_force_halt) ? HIT_U_MISS  : IDLE ;
+                  miss_state_en =    exu_flush_final | ifu_bp_hit_taken_q_f  | stream_eol_f   |  (bus_ifu_wr_en_ff & last_beat) | dec_tlu_force_halt ;
+         end
+         MISS_WAIT: begin : miss_wait
+                  miss_nxtstate =  (exu_flush_final & ~(bus_ifu_wr_en_ff & last_beat) & ~dec_tlu_force_halt) ? HIT_U_MISS  : IDLE ;
+                  miss_state_en =   exu_flush_final | (bus_ifu_wr_en_ff & last_beat) | dec_tlu_force_halt ;
+         end
+         HIT_U_MISS: begin : hit_u_miss
+                  miss_nxtstate =  ic_miss_under_miss_f & ~(bus_ifu_wr_en_ff & last_beat) & ~dec_tlu_force_halt ? SCND_MISS :
+                                   ic_ignore_2nd_miss_f & ~(bus_ifu_wr_en_ff & last_beat) & ~dec_tlu_force_halt ? STALL_SCND_MISS : IDLE  ;
+                  miss_state_en = (bus_ifu_wr_en_ff & last_beat) | ic_miss_under_miss_f | ic_ignore_2nd_miss_f | dec_tlu_force_halt;
+         end
+         SCND_MISS: begin : scnd_miss
+                  miss_nxtstate   = dec_tlu_force_halt ? IDLE  :
+                                    exu_flush_final ?  ((bus_ifu_wr_en_ff & last_beat) ? IDLE : HIT_U_MISS) : CRIT_BYP_OK;
+                  miss_state_en   = (bus_ifu_wr_en_ff & last_beat) | exu_flush_final | dec_tlu_force_halt;
+         end
+         STALL_SCND_MISS: begin : stall_scnd_miss
+                  miss_nxtstate   =  dec_tlu_force_halt ? IDLE  :
+                                     exu_flush_final ?  ((bus_ifu_wr_en_ff & last_beat) ? IDLE : HIT_U_MISS) : IDLE;
+                  miss_state_en   = (bus_ifu_wr_en_ff & last_beat) | exu_flush_final | dec_tlu_force_halt;
+         end
+         default: begin : def_case
+                  miss_nxtstate   = IDLE;
+                  miss_state_en   = 1'b0;
+         end
+      endcase
+   end
+   rvdffs #(($bits(miss_state_t))) miss_state_ff (.clk(active_clk), .din(miss_nxtstate), .dout({miss_state}), .en(miss_state_en),   .*);
+
+  logic    sel_hold_imb     ;
+
+   assign miss_pending       =  (miss_state != IDLE) ;
+   assign crit_wd_byp_ok_ff  =  (miss_state == CRIT_BYP_OK) | ((miss_state == CRIT_WRD_RDY) & ~flush_final_f);
+   assign sel_hold_imb       =  (miss_pending & ~(bus_ifu_wr_en_ff & last_beat) & ~((miss_state == CRIT_WRD_RDY) & exu_flush_final) &
+                              ~((miss_state == CRIT_WRD_RDY) & crit_byp_hit_f) ) | ic_act_miss_f |
+                                (miss_pending & (miss_nxtstate == CRIT_WRD_RDY)) ;
+
+
+   logic         sel_hold_imb_scnd;
+   logic  [31:1] imb_scnd_in;
+   logic  [31:1] imb_scnd_ff;
+   logic         uncacheable_miss_scnd_in ;
+   logic         uncacheable_miss_scnd_ff ;
+
+   logic  [pt.ICACHE_NUM_WAYS-1:0] tagv_mb_scnd_in;
+   logic  [pt.ICACHE_NUM_WAYS-1:0] tagv_mb_scnd_ff;
+
+   logic  [pt.ICACHE_STATUS_BITS-1:0] way_status_mb_scnd_in;
+   logic  [pt.ICACHE_STATUS_BITS-1:0] way_status_mb_scnd_ff;
+
+   assign sel_hold_imb_scnd                                =((miss_state == SCND_MISS) | ic_miss_under_miss_f) & ~flush_final_f ;
+   assign way_status_mb_scnd_in[pt.ICACHE_STATUS_BITS-1:0] = (miss_state == SCND_MISS) ? way_status_mb_scnd_ff[pt.ICACHE_STATUS_BITS-1:0] : {way_status[pt.ICACHE_STATUS_BITS-1:0]} ;
+   assign tagv_mb_scnd_in[pt.ICACHE_NUM_WAYS-1:0]          = (miss_state == SCND_MISS) ? tagv_mb_scnd_ff[pt.ICACHE_NUM_WAYS-1:0]          : ({ic_tag_valid[pt.ICACHE_NUM_WAYS-1:0]} & {pt.ICACHE_NUM_WAYS{~reset_all_tags & ~exu_flush_final}});
+   assign uncacheable_miss_scnd_in   = sel_hold_imb_scnd ? uncacheable_miss_scnd_ff : ifc_fetch_uncacheable_bf ;
+
+
+   rvdff_fpga #(1)  unc_miss_scnd_ff    (.*, .clk(fetch_bf_f_c1_clk), .clken(fetch_bf_f_c1_clken), .rawclk(clk), .din (uncacheable_miss_scnd_in), .dout(uncacheable_miss_scnd_ff));
+   rvdffpcie #(31) imb_f_scnd_ff       (.*, .en(fetch_bf_f_c1_clken),  .din ({imb_scnd_in[31:1]}), .dout({imb_scnd_ff[31:1]}));
+   rvdff_fpga #(pt.ICACHE_STATUS_BITS)  mb_rep_wayf2_scnd_ff (.*, .clk(fetch_bf_f_c1_clk), .clken(fetch_bf_f_c1_clken), .rawclk(clk), .din ({way_status_mb_scnd_in[pt.ICACHE_STATUS_BITS-1:0]}), .dout({way_status_mb_scnd_ff[pt.ICACHE_STATUS_BITS-1:0]}));
+   rvdff_fpga #(pt.ICACHE_NUM_WAYS)     mb_tagv_scnd_ff      (.*, .clk(fetch_bf_f_c1_clk), .clken(fetch_bf_f_c1_clken), .rawclk(clk), .din ({tagv_mb_scnd_in[pt.ICACHE_NUM_WAYS-1:0]}), .dout({tagv_mb_scnd_ff[pt.ICACHE_NUM_WAYS-1:0]}));
+
+
+
+
+   assign ic_req_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] = bus_rd_addr_count[pt.ICACHE_BEAT_BITS-1:0] ;
+   assign ic_wr_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3]  = ifu_bus_rid_ff[pt.ICACHE_BEAT_BITS-1:0] & {pt.ICACHE_BEAT_BITS{bus_ifu_wr_en_ff}};
+   // NOTE: Cacheline size is 16 bytes in this example.
+   // Tag     Index  Bank Offset
+   // [31:16] [15:5] [4]  [3:0]
+
+
+   assign fetch_req_icache_f   = ifc_fetch_req_f & ~ifc_iccm_access_f & ~ifc_region_acc_fault_final_f;
+   assign fetch_req_iccm_f     = ifc_fetch_req_f &  ifc_iccm_access_f;
+
+   assign ic_iccm_hit_f        = fetch_req_iccm_f  &  (~miss_pending | (miss_state==HIT_U_MISS) | (miss_state==STREAM));
+   assign ic_byp_hit_f         = (crit_byp_hit_f | stream_hit_f)  & fetch_req_icache_f &  miss_pending ;
+   assign ic_act_hit_f         = (|ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0]) & fetch_req_icache_f & ~reset_all_tags & (~miss_pending | (miss_state==HIT_U_MISS)) & ~sel_mb_addr_ff;
+   assign ic_act_miss_f        = (((~(|ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0]) | reset_all_tags) & fetch_req_icache_f & ~miss_pending) | scnd_miss_req) & ~ifc_region_acc_fault_final_f;
+   assign ic_miss_under_miss_f = (~(|ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0]) | reset_all_tags) & fetch_req_icache_f & (miss_state == HIT_U_MISS) &
+                                   (imb_ff[31:pt.ICACHE_TAG_INDEX_LO] != ifu_fetch_addr_int_f[31:pt.ICACHE_TAG_INDEX_LO]) & ~uncacheable_miss_ff & ~sel_mb_addr_ff & ~ifc_region_acc_fault_final_f;
+   assign ic_ignore_2nd_miss_f = (~(|ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0]) | reset_all_tags) & fetch_req_icache_f & (miss_state == HIT_U_MISS) &
+                                   ((imb_ff[31:pt.ICACHE_TAG_INDEX_LO] == ifu_fetch_addr_int_f[31:pt.ICACHE_TAG_INDEX_LO])  |   uncacheable_miss_ff) ;
+   assign ic_hit_f             =  ic_act_hit_f | ic_byp_hit_f | ic_iccm_hit_f | (ifc_region_acc_fault_final_f & ifc_fetch_req_f);
+
+   assign uncacheable_miss_in   = scnd_miss_req ? uncacheable_miss_scnd_ff : sel_hold_imb ? uncacheable_miss_ff : ifc_fetch_uncacheable_bf ;
+   assign imb_in[31:1]          = scnd_miss_req ? imb_scnd_ff[31:1]        : sel_hold_imb ? imb_ff[31:1] : {ifc_fetch_addr_bf[31:1]} ;
+
+   assign imb_scnd_in[31:1]     = sel_hold_imb_scnd ? imb_scnd_ff[31:1] : {ifc_fetch_addr_bf[31:1]} ;
+
+   assign scnd_miss_index_match  =  (imb_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] == imb_scnd_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO]) & scnd_miss_req & ~ifu_wr_cumulative_err_data;
+   assign way_status_mb_in[pt.ICACHE_STATUS_BITS-1:0] = (scnd_miss_req & ~scnd_miss_index_match) ? way_status_mb_scnd_ff[pt.ICACHE_STATUS_BITS-1:0] :
+                                                        (scnd_miss_req &  scnd_miss_index_match) ? way_status_rep_new[pt.ICACHE_STATUS_BITS-1:0] :
+                                                         miss_pending                            ? way_status_mb_ff[pt.ICACHE_STATUS_BITS-1:0] :
+                                                                                                  {way_status[pt.ICACHE_STATUS_BITS-1:0]} ;
+   assign tagv_mb_in[pt.ICACHE_NUM_WAYS-1:0]          = scnd_miss_req ? (tagv_mb_scnd_ff[pt.ICACHE_NUM_WAYS-1:0] | ({pt.ICACHE_NUM_WAYS {scnd_miss_index_match}} & replace_way_mb_any[pt.ICACHE_NUM_WAYS-1:0])) :
+                                                         miss_pending ? tagv_mb_ff[pt.ICACHE_NUM_WAYS-1:0]  : ({ic_tag_valid[pt.ICACHE_NUM_WAYS-1:0]} & {pt.ICACHE_NUM_WAYS{~reset_all_tags & ~exu_flush_final}}) ;
+
+   assign reset_ic_in           = miss_pending & ~scnd_miss_req_q &  (reset_all_tags |  reset_ic_ff) ;
+
+
+
+   rvdffpcie #(31) ifu_fetch_addr_f_ff (.*, .en(fetch_bf_f_c1_clken), .din ({ifc_fetch_addr_bf[31:1]}), .dout({ifu_fetch_addr_int_f[31:1]}));
+
+   assign vaddr_f[pt.ICACHE_BEAT_ADDR_HI:1] = ifu_fetch_addr_int_f[pt.ICACHE_BEAT_ADDR_HI:1] ;
+
+   rvdffpcie #(31) imb_f_ff        (.*, .en(fetch_bf_f_c1_clken), .din (imb_in[31:1]), .dout(imb_ff[31:1]));
+   rvdff_fpga #(1) unc_miss_ff     (.*, .clk(fetch_bf_f_c1_clk), .clken(fetch_bf_f_c1_clken), .rawclk(clk),  .din ( uncacheable_miss_in),               .dout( uncacheable_miss_ff));
+
+
+   assign miss_addr_in[31:pt.ICACHE_BEAT_ADDR_HI+1]      = (~miss_pending                    ) ? imb_ff[31:pt.ICACHE_BEAT_ADDR_HI+1] :
+                                                           (                scnd_miss_req_q  ) ? imb_scnd_ff[31:pt.ICACHE_BEAT_ADDR_HI+1] : miss_addr[31:pt.ICACHE_BEAT_ADDR_HI+1] ;
+
+
+   rvdfflie #(.WIDTH(31-pt.ICACHE_BEAT_ADDR_HI),.LEFT(31-pt.ICACHE_BEAT_ADDR_HI-8)) miss_f_ff       (.*, .en(bus_ifu_bus_clk_en | ic_act_miss_f | dec_tlu_force_halt), .din ({miss_addr_in[31:pt.ICACHE_BEAT_ADDR_HI+1]}), .dout({miss_addr[31:pt.ICACHE_BEAT_ADDR_HI+1]}));
+
+
+
+
+
+   rvdff_fpga #(pt.ICACHE_STATUS_BITS)  mb_rep_wayf2_ff (.*, .clk(fetch_bf_f_c1_clk),  .clken(fetch_bf_f_c1_clken), .rawclk(clk),  .din ({way_status_mb_in[pt.ICACHE_STATUS_BITS-1:0]}), .dout({way_status_mb_ff[pt.ICACHE_STATUS_BITS-1:0]}));
+   rvdff_fpga #(pt.ICACHE_NUM_WAYS)     mb_tagv_ff      (.*, .clk(fetch_bf_f_c1_clk),  .clken(fetch_bf_f_c1_clken), .rawclk(clk),  .din ({tagv_mb_in[pt.ICACHE_NUM_WAYS-1:0]}), .dout({tagv_mb_ff[pt.ICACHE_NUM_WAYS-1:0]}));
+
+   assign ifc_fetch_req_qual_bf  = ifc_fetch_req_bf  & ~((miss_state == CRIT_WRD_RDY) & flush_final_f) & ~stream_miss_f ;// & ~exu_flush_final ;
+
+   assign ifc_fetch_req_f       = ifc_fetch_req_f_raw & ~exu_flush_final ;
+
+   rvdff_fpga #(1) ifu_iccm_acc_ff     (.*, .clk(fetch_bf_f_c1_clk), .clken(fetch_bf_f_c1_clken), .rawclk(clk),   .din(ifc_iccm_access_bf),      .dout(ifc_iccm_access_f));
+   rvdff_fpga #(1) ifu_iccm_reg_acc_ff (.*, .clk(fetch_bf_f_c1_clk), .clken(fetch_bf_f_c1_clken), .rawclk(clk),   .din(ifc_region_acc_fault_final_bf), .dout(ifc_region_acc_fault_final_f));
+   rvdff_fpga #(1) rgn_acc_ff          (.*, .clk(fetch_bf_f_c1_clk), .clken(fetch_bf_f_c1_clken), .rawclk(clk),   .din(ifc_region_acc_fault_bf),       .dout(ifc_region_acc_fault_f));
+
+
+   assign ifu_ic_req_addr_f[31:3]  = {miss_addr[31:pt.ICACHE_BEAT_ADDR_HI+1] , ic_req_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] };
+   assign ifu_ic_mb_empty          = (((miss_state == HIT_U_MISS) | (miss_state == STREAM)) & ~(bus_ifu_wr_en_ff & last_beat)) |  ~miss_pending ;
+   assign ifu_miss_state_idle      = (miss_state == IDLE) ;
+
+
+   assign sel_mb_addr  = ((miss_pending & write_ic_16_bytes & ~uncacheable_miss_ff) | reset_tag_valid_for_miss) ;
+   assign ifu_ic_rw_int_addr[31:1] = ({31{ sel_mb_addr}}  &  {imb_ff[31:pt.ICACHE_BEAT_ADDR_HI+1] , ic_wr_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] , imb_ff[2:1]})  |
+                                     ({31{~sel_mb_addr}}  &  ifc_fetch_addr_bf[31:1] )   ;
+
+   assign sel_mb_status_addr  = ((miss_pending & write_ic_16_bytes & ~uncacheable_miss_ff & last_beat & bus_ifu_wr_en_ff_q) | reset_tag_valid_for_miss) ;
+   assign ifu_status_wr_addr[31:1] = ({31{ sel_mb_status_addr}}  &  {imb_ff[31:pt.ICACHE_BEAT_ADDR_HI+1] , ic_wr_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] , imb_ff[2:1]})  |
+                                     ({31{~sel_mb_status_addr}}  &  ifu_fetch_addr_int_f[31:1] )   ;
+
+
+  assign ic_rw_addr[31:1]      = ifu_ic_rw_int_addr[31:1] ;
+
+
+if (pt.ICACHE_ECC == 1) begin: icache_ecc_1
+   logic [6:0]       ic_wr_ecc;
+   logic [6:0]       ic_miss_buff_ecc;
+   logic [141:0]     ic_wr_16bytes_data ;
+   logic [70:0]      ifu_ic_debug_rd_data_in   ;
+
+                rvecc_encode_64  ic_ecc_encode_64_bus (
+                           .din    (ifu_bus_rdata_ff[63:0]),
+                           .ecc_out(ic_wr_ecc[6:0]));
+                rvecc_encode_64  ic_ecc_encode_64_buff (
+                           .din    (ic_miss_buff_half[63:0]),
+                           .ecc_out(ic_miss_buff_ecc[6:0]));
+
+   for (genvar i=0; i < pt.ICACHE_BANKS_WAY ; i++) begin : ic_wr_data_loop
+      assign ic_wr_data[i][70:0]  =  ic_wr_16bytes_data[((71*i)+70): (71*i)];
+   end
+
+
+   assign ic_debug_wr_data[70:0]   = {dec_tlu_ic_diag_pkt.icache_wrdata[70:0]} ;
+   assign ic_error_start           = ((|ic_eccerr[pt.ICACHE_BANKS_WAY-1:0]) & ic_act_hit_f)  | ic_rd_parity_final_err;
+
+
+
+  assign ifu_ic_debug_rd_data_in[70:0] = ic_debug_ict_array_sel_ff ? {2'b0,ictag_debug_rd_data[25:21],32'b0,ictag_debug_rd_data[20:0],{7-pt.ICACHE_STATUS_BITS{1'b0}}, way_status[pt.ICACHE_STATUS_BITS-1:0],3'b0,ic_debug_tag_val_rd_out} :
+                                                                     ic_debug_rd_data[70:0];
+
+  rvdffe #(71) ifu_debug_data_ff (.*,
+                                  .en (debug_data_clken),
+                                  .din ({
+                                         ifu_ic_debug_rd_data_in[70:0]
+                                         }),
+                                  .dout({
+                                         ifu_ic_debug_rd_data[70:0]
+                                         })
+                                  );
+
+  assign ic_wr_16bytes_data[141:0] =  ifu_bus_rid_ff[0] ? {ic_wr_ecc[6:0] , ifu_bus_rdata_ff[63:0] ,  ic_miss_buff_ecc[6:0] , ic_miss_buff_half[63:0] } :
+                                                        {ic_miss_buff_ecc[6:0] ,  ic_miss_buff_half[63:0] , ic_wr_ecc[6:0] , ifu_bus_rdata_ff[63:0] } ;
+
+
+end
+else begin : icache_parity_1
+   logic [3:0]   ic_wr_parity;
+   logic [3:0]   ic_miss_buff_parity;
+   logic [135:0] ic_wr_16bytes_data ;
+   logic [70:0]  ifu_ic_debug_rd_data_in   ;
+    for (genvar i=0 ; i < 4 ; i++) begin : DATA_PGEN
+       rveven_paritygen #(16) par_bus  (.data_in   (ifu_bus_rdata_ff[((16*i)+15):(16*i)]),
+                                      .parity_out(ic_wr_parity[i]));
+       rveven_paritygen #(16) par_buff  (.data_in   (ic_miss_buff_half[((16*i)+15):(16*i)]),
+                                      .parity_out(ic_miss_buff_parity[i]));
+    end
+
+
+   for (genvar i=0; i < pt.ICACHE_BANKS_WAY ; i++) begin : ic_wr_data_loop
+      assign ic_wr_data[i][70:0]  =  {3'b0, ic_wr_16bytes_data[((68*i)+67): (68*i)]};
+   end
+
+
+
+
+
+   assign ic_debug_wr_data[70:0]   = {dec_tlu_ic_diag_pkt.icache_wrdata[70:0]} ;
+   assign ic_error_start           = ((|ic_parerr[pt.ICACHE_BANKS_WAY-1:0]) & ic_act_hit_f) | ic_rd_parity_final_err;
+
+   assign ifu_ic_debug_rd_data_in[70:0] = ic_debug_ict_array_sel_ff ? {6'b0,ictag_debug_rd_data[21],32'b0,ictag_debug_rd_data[20:0],{7-pt.ICACHE_STATUS_BITS{1'b0}},way_status[pt.ICACHE_STATUS_BITS-1:0],3'b0,ic_debug_tag_val_rd_out} :
+                                                                      ic_debug_rd_data[70:0] ;
+
+   rvdffe #(71) ifu_debug_data_ff (.*,
+                                   .en (debug_data_clken),
+                                   .din ({
+                                          ifu_ic_debug_rd_data_in[70:0]
+                                          }),
+                                   .dout({
+                                          ifu_ic_debug_rd_data[70:0]
+                                          })
+                                   );
+
+   assign ic_wr_16bytes_data[135:0] =  ifu_bus_rid_ff[0] ? {ic_wr_parity[3:0] , ifu_bus_rdata_ff[63:0] ,  ic_miss_buff_parity[3:0] , ic_miss_buff_half[63:0] } :
+                                                        {ic_miss_buff_parity[3:0] ,  ic_miss_buff_half[63:0] , ic_wr_parity[3:0] , ifu_bus_rdata_ff[63:0] } ;
+
+end
+
+
+  assign ifu_wr_data_comb_err       =  bus_ifu_wr_data_error_ff ;
+  assign ifu_wr_cumulative_err      = (ifu_wr_data_comb_err | ifu_wr_data_comb_err_ff) & ~reset_beat_cnt;
+  assign ifu_wr_cumulative_err_data =  ifu_wr_data_comb_err | ifu_wr_data_comb_err_ff ;
+
+
+  assign sel_byp_data     =  (ic_crit_wd_rdy | (miss_state == STREAM) | (miss_state == CRIT_BYP_OK));
+  assign sel_ic_data      = ~(ic_crit_wd_rdy | (miss_state == STREAM) | (miss_state == CRIT_BYP_OK) | (miss_state == MISS_WAIT)) & ~fetch_req_iccm_f & ~ifc_region_acc_fault_final_f;
+
+ if (pt.ICCM_ICACHE==1) begin: iccm_icache
+  assign sel_iccm_data    =  fetch_req_iccm_f  ;
+
+  assign ic_final_data[63:0]  = ({64{sel_byp_data | sel_iccm_data | sel_ic_data}} & {ic_rd_data[63:0]} ) ;
+
+  assign ic_premux_data[63:0] = ({64{sel_byp_data }} & {ic_byp_data_only_new[63:0]} ) |
+                          ({64{sel_iccm_data}} & {iccm_rd_data[63:0]});
+
+  assign ic_sel_premux_data = sel_iccm_data | sel_byp_data ;
+ end
+
+if (pt.ICCM_ONLY == 1 ) begin: iccm_only
+  assign sel_iccm_data    =  fetch_req_iccm_f  ;
+  assign ic_final_data[63:0]  = ({64{sel_byp_data }} & {ic_byp_data_only_new[63:0]} ) |
+                          ({64{sel_iccm_data}} & {iccm_rd_data[63:0]});
+  assign ic_premux_data = '0 ;
+  assign ic_sel_premux_data = '0 ;
+end
+
+if (pt.ICACHE_ONLY == 1 ) begin: icache_only
+  assign ic_final_data[63:0]  = ({64{sel_byp_data | sel_ic_data}} & {ic_rd_data[63:0]} ) ;
+  assign ic_premux_data[63:0] = ({64{sel_byp_data }} & {ic_byp_data_only_new[63:0]} ) ;
+  assign ic_sel_premux_data =  sel_byp_data ;
+end
+
+
+if (pt.NO_ICCM_NO_ICACHE == 1 ) begin: no_iccm_no_icache
+  assign ic_final_data[63:0]  = ({64{sel_byp_data }} & {ic_byp_data_only_new[63:0]} ) ;
+  assign ic_premux_data = 0 ;
+  assign ic_sel_premux_data = '0 ;
+end
+
+
+  assign ifc_bus_acc_fault_f[1:0]   =  {2{ic_byp_hit_f}} & ifu_byp_data_err_f[1:0] ;
+  assign ic_data_f[31:0]      = ic_final_data[31:0];
+
+
+
+assign fetch_req_f_qual       = ic_hit_f & ~exu_flush_final;
+assign ic_access_fault_f[1:0]  = ({2{ifc_region_acc_fault_final_f}} | ifc_bus_acc_fault_f[1:0])  & {2{~exu_flush_final}};
+assign ic_access_fault_type_f[1:0] = |iccm_rd_ecc_double_err       ? 2'b01 :
+                                     ifc_region_acc_fault_f        ? 2'b10 :
+                                     ifc_region_acc_fault_memory_f ? 2'b11 :  2'b00 ;
+
+  // right justified
+
+assign ic_fetch_val_f[1] = fetch_req_f_qual & ifu_bp_inst_mask_f & ~(vaddr_f[pt.ICACHE_BEAT_ADDR_HI:1] == {pt.ICACHE_BEAT_ADDR_HI{1'b1}}) & (err_stop_state != ERR_FETCH2);
+assign ic_fetch_val_f[0] = fetch_req_f_qual ;
+assign two_byte_instr    =  (ic_data_f[1:0] != 2'b11 )  ;
+
+/////////////////////////////////////////////////////////////////////////////////////
+//  Create full buffer...                                                          //
+/////////////////////////////////////////////////////////////////////////////////////
+     logic [63:0]       ic_miss_buff_data_in;
+     assign ic_miss_buff_data_in[63:0] = ifu_bus_rsp_rdata[63:0];
+
+     for (genvar i=0; i<pt.ICACHE_NUM_BEATS; i++) begin :  wr_flop
+
+        assign write_fill_data[i]        =   bus_ifu_wr_en & (  (pt.IFU_BUS_TAG)'(i)  == ifu_bus_rsp_tag[pt.IFU_BUS_TAG-1:0]);
+
+        rvdffe #(32) byp_data_0_ff (.*,
+                                    .en (write_fill_data[i]),
+                                    .din (ic_miss_buff_data_in[31:0]),
+                                    .dout(ic_miss_buff_data[i*2][31:0])
+                                    );
+
+        rvdffe #(32) byp_data_1_ff (.*,
+                                    .en (write_fill_data[i]),
+                                    .din (ic_miss_buff_data_in[63:32]),
+                                    .dout(ic_miss_buff_data[i*2+1][31:0])
+                                    );
+
+        assign ic_miss_buff_data_valid_in[i]  = write_fill_data[i] ? 1'b1  : (ic_miss_buff_data_valid[i]  & ~ic_act_miss_f) ;
+
+        rvdff #(1) byp_data_valid_ff (.*,
+                  .clk (active_clk),
+                  .din (ic_miss_buff_data_valid_in[i]),
+                  .dout(ic_miss_buff_data_valid[i]));
+
+        assign ic_miss_buff_data_error_in[i]  = write_fill_data[i] ? bus_ifu_wr_data_error  : (ic_miss_buff_data_error[i]  & ~ic_act_miss_f) ;
+
+        rvdff #(1) byp_data_error_ff (.*,
+                  .clk (active_clk),
+                  .din (ic_miss_buff_data_error_in[i] ),
+                  .dout(ic_miss_buff_data_error[i]));
+     end
+
+/////////////////////////////////////////////////////////////////////////////////////
+// New bypass ready                                                                //
+/////////////////////////////////////////////////////////////////////////////////////
+   logic   [pt.ICACHE_BEAT_ADDR_HI:1]  bypass_index;
+   logic   [pt.ICACHE_BEAT_ADDR_HI:3]  bypass_index_5_3_inc;
+   logic   bypass_data_ready_in;
+   logic   ic_crit_wd_rdy_new_in;
+
+   assign bypass_index[pt.ICACHE_BEAT_ADDR_HI:1] = imb_ff[pt.ICACHE_BEAT_ADDR_HI:1] ;
+   assign bypass_index_5_3_inc[pt.ICACHE_BEAT_ADDR_HI:3] = bypass_index[pt.ICACHE_BEAT_ADDR_HI:3] + 1 ;
+
+
+   assign bypass_data_ready_in = ((ic_miss_buff_data_valid_in[bypass_index[pt.ICACHE_BEAT_ADDR_HI:3]]                                                      & ~bypass_index[2] & ~bypass_index[1])) |
+                                 ((ic_miss_buff_data_valid_in[bypass_index[pt.ICACHE_BEAT_ADDR_HI:3]]                                                      & ~bypass_index[2] &  bypass_index[1])) |
+                                 ((ic_miss_buff_data_valid_in[bypass_index[pt.ICACHE_BEAT_ADDR_HI:3]]                                                      &  bypass_index[2] & ~bypass_index[1])) |
+                                 ((ic_miss_buff_data_valid_in[bypass_index[pt.ICACHE_BEAT_ADDR_HI:3]] & ic_miss_buff_data_valid_in[bypass_index_5_3_inc[pt.ICACHE_BEAT_ADDR_HI:3]] &  bypass_index[2] & bypass_index[1])) |
+                                 ((ic_miss_buff_data_valid_in[bypass_index[pt.ICACHE_BEAT_ADDR_HI:3]] & (bypass_index[pt.ICACHE_BEAT_ADDR_HI:3] == {pt.ICACHE_BEAT_ADDR_HI{1'b1}})))   ;
+
+
+
+   assign    ic_crit_wd_rdy_new_in = ( bypass_data_ready_in & crit_wd_byp_ok_ff   &  uncacheable_miss_ff &  ~exu_flush_final & ~ifu_bp_hit_taken_q_f) |
+                                     (                        crit_wd_byp_ok_ff   & ~uncacheable_miss_ff &  ~exu_flush_final & ~ifu_bp_hit_taken_q_f) |
+                                     (ic_crit_wd_rdy_new_ff & ~fetch_req_icache_f & crit_wd_byp_ok_ff    &  ~exu_flush_final) ;
+
+
+  assign byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:1]          =    ifu_fetch_addr_int_f[pt.ICACHE_BEAT_ADDR_HI:1]       ;
+  assign byp_fetch_index_0[pt.ICACHE_BEAT_ADDR_HI:2]        =   {ifu_fetch_addr_int_f[pt.ICACHE_BEAT_ADDR_HI:3],1'b0} ;
+  assign byp_fetch_index_1[pt.ICACHE_BEAT_ADDR_HI:2]        =   {ifu_fetch_addr_int_f[pt.ICACHE_BEAT_ADDR_HI:3],1'b1} ;
+  assign byp_fetch_index_inc[pt.ICACHE_BEAT_ADDR_HI:3]      =    ifu_fetch_addr_int_f[pt.ICACHE_BEAT_ADDR_HI:3]+1'b1 ;
+  assign byp_fetch_index_inc_0[pt.ICACHE_BEAT_ADDR_HI:2]    =   {byp_fetch_index_inc[pt.ICACHE_BEAT_ADDR_HI:3], 1'b0} ;
+  assign byp_fetch_index_inc_1[pt.ICACHE_BEAT_ADDR_HI:2]    =   {byp_fetch_index_inc[pt.ICACHE_BEAT_ADDR_HI:3], 1'b1} ;
+
+  assign  ifu_byp_data_err_new = (~ifu_fetch_addr_int_f[2] &  ~ifu_fetch_addr_int_f[1] &                                                                           ic_miss_buff_data_error[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] )  |
+                                 (~ifu_fetch_addr_int_f[2] &   ifu_fetch_addr_int_f[1] &                                                                           ic_miss_buff_data_error[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] )  |
+                                 ( ifu_fetch_addr_int_f[2] &  ~ifu_fetch_addr_int_f[1] &                                                                           ic_miss_buff_data_error[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] )  |
+                                 ( ifu_fetch_addr_int_f[2] &   ifu_fetch_addr_int_f[1] & (ic_miss_buff_data_error[byp_fetch_index_inc[pt.ICACHE_BEAT_ADDR_HI:3]] | ic_miss_buff_data_error[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] )) ;
+
+  assign  ifu_byp_data_err_f[1:0]  =   (ic_miss_buff_data_error[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] )  ? 2'b11 :
+                                      ( ifu_fetch_addr_int_f[2] &  ifu_fetch_addr_int_f[1] &   ~(ic_miss_buff_data_error[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] ) & (~miss_wrap_f & ic_miss_buff_data_error[byp_fetch_index_inc[pt.ICACHE_BEAT_ADDR_HI:3]])) ? 2'b10 : 2'b00;
+
+
+
+
+
+  assign ic_byp_data_only_pre_new[79:0] =  ({80{~ifu_fetch_addr_int_f[2]}} &   {ic_miss_buff_data[byp_fetch_index_inc_0][15:0],ic_miss_buff_data[byp_fetch_index_1][31:0]     , ic_miss_buff_data[byp_fetch_index_0][31:0]}) |
+                                           ({80{ ifu_fetch_addr_int_f[2]}} &   {ic_miss_buff_data[byp_fetch_index_inc_1][15:0],ic_miss_buff_data[byp_fetch_index_inc_0][31:0] , ic_miss_buff_data[byp_fetch_index_1][31:0]}) ;
+
+  assign ic_byp_data_only_new[79:0]      = ~ifu_fetch_addr_int_f[1] ? {ic_byp_data_only_pre_new[79:0]} :
+                                                                      {16'b0,ic_byp_data_only_pre_new[79:16]} ;
+
+  assign miss_wrap_f      =  (imb_ff[pt.ICACHE_TAG_INDEX_LO] != ifu_fetch_addr_int_f[pt.ICACHE_TAG_INDEX_LO] ) ;
+
+  assign miss_buff_hit_unq_f  = ((ic_miss_buff_data_valid[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]]                                                     & ~byp_fetch_index[2] & ~byp_fetch_index[1])) |
+                             ((ic_miss_buff_data_valid[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]]                                                     & ~byp_fetch_index[2] &  byp_fetch_index[1])) |
+                             ((ic_miss_buff_data_valid[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]]                                                     &  byp_fetch_index[2] & ~byp_fetch_index[1])) |
+                             ((ic_miss_buff_data_valid[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] & ic_miss_buff_data_valid[byp_fetch_index_inc[pt.ICACHE_BEAT_ADDR_HI:3]] &  byp_fetch_index[2] &  byp_fetch_index[1])) |
+                             ((ic_miss_buff_data_valid[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] &  (byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3] == {pt.ICACHE_BEAT_BITS{1'b1}})))   ;
+
+  assign stream_hit_f     =  (miss_buff_hit_unq_f & ~miss_wrap_f ) & (miss_state==STREAM) ;
+  assign stream_miss_f    = ~(miss_buff_hit_unq_f & ~miss_wrap_f ) & (miss_state==STREAM) & ifc_fetch_req_f;
+  assign stream_eol_f     =  (byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:2] == {pt.ICACHE_BEAT_BITS+1{1'b1}}) & ifc_fetch_req_f & stream_hit_f;
+
+  assign crit_byp_hit_f   =  (miss_buff_hit_unq_f ) & ((miss_state == CRIT_WRD_RDY) | (miss_state==CRIT_BYP_OK)) ;
+
+/////////////////////////////////////////////////////////////////////////////////////
+// Figure out if you have the data to write.                                       //
+/////////////////////////////////////////////////////////////////////////////////////
+
+assign other_tag[pt.IFU_BUS_TAG-1:0] = {ifu_bus_rid_ff[pt.IFU_BUS_TAG-1:1] , ~ifu_bus_rid_ff[0] } ;
+assign second_half_available      = ic_miss_buff_data_valid[other_tag] ;
+assign write_ic_16_bytes          = second_half_available & bus_ifu_wr_en_ff ;
+assign ic_miss_buff_half[63:0]    = {ic_miss_buff_data[{other_tag,1'b1}],ic_miss_buff_data[{other_tag,1'b0}] } ;
+
+
+/////////////////////////////////////////////////////////////////////////////////////
+// Parity checking logic for Icache logic.                                         //
+/////////////////////////////////////////////////////////////////////////////////////
+
+
+assign ic_rd_parity_final_err = ic_tag_perr & ~exu_flush_final & sel_ic_data & ~(ifc_region_acc_fault_final_f | (|ifc_bus_acc_fault_f)) &
+                                      (fetch_req_icache_f & ~reset_all_tags & (~miss_pending | (miss_state==HIT_U_MISS)) & ~sel_mb_addr_ff);
+
+logic [pt.ICACHE_NUM_WAYS-1:0]                   perr_err_inv_way;
+logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO]   perr_ic_index_ff;
+logic                                         perr_sel_invalidate;
+logic                                         perr_sb_write_status   ;
+
+
+
+   rvdffe #(.WIDTH(pt.ICACHE_INDEX_HI-pt.ICACHE_TAG_INDEX_LO+1),.OVERRIDE(1)) perr_dat_ff    (.din(ifu_ic_rw_int_addr_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO]), .dout(perr_ic_index_ff[pt.ICACHE_INDEX_HI : pt.ICACHE_TAG_INDEX_LO]), .en(perr_sb_write_status),  .*);
+
+   assign perr_err_inv_way[pt.ICACHE_NUM_WAYS-1:0]   =  {pt.ICACHE_NUM_WAYS{perr_sel_invalidate}} ;
+   assign iccm_correct_ecc     = (perr_state == ECC_CORR);
+   assign dma_sb_err_state     = (perr_state == DMA_SB_ERR);
+   assign iccm_buf_correct_ecc = iccm_correct_ecc & ~dma_sb_err_state_ff;
+
+
+
+   //////////////////////////////////// Create Parity Error State Machine ///////////////////////
+   //                                   Create Parity Error State Machine                      //
+   //                                   Create Parity Error State Machine                      //
+   //                                   Create Parity Error State Machine                      //
+   //////////////////////////////////// Create Parity Error State Machine ///////////////////////
+
+
+   // FIFO state machine
+   always_comb begin  : ERROR_SM
+      perr_nxtstate            = ERR_IDLE;
+      perr_state_en            = 1'b0;
+      perr_sb_write_status     = 1'b0;
+      perr_sel_invalidate      = 1'b0;
+
+      case (perr_state)
+         ERR_IDLE: begin : err_idle
+                  perr_nxtstate         =  iccm_dma_sb_error ? DMA_SB_ERR : (ic_error_start & ~exu_flush_final) ? IC_WFF : ECC_WFF;
+                  perr_state_en         =  (((iccm_error_start | ic_error_start) & ~exu_flush_final) | iccm_dma_sb_error) & ~dec_tlu_force_halt;
+                  perr_sb_write_status  =  perr_state_en;
+         end
+         IC_WFF: begin : icache_wff    // All the I$ data and/or Tag errors ( parity/ECC ) will come to this state
+                  perr_nxtstate       =  ERR_IDLE ;
+                  perr_state_en       =   dec_tlu_flush_lower_wb | dec_tlu_force_halt ;
+                  perr_sel_invalidate =  (dec_tlu_flush_err_wb &  dec_tlu_flush_lower_wb);
+         end
+         ECC_WFF: begin : ecc_wff
+                  perr_nxtstate       =  ((~dec_tlu_flush_err_wb &  dec_tlu_flush_lower_wb ) | dec_tlu_force_halt) ? ERR_IDLE : ECC_CORR ;
+                  perr_state_en       =   dec_tlu_flush_lower_wb | dec_tlu_force_halt  ;
+         end
+         DMA_SB_ERR : begin : dma_sb_ecc
+                 perr_nxtstate       = dec_tlu_force_halt ? ERR_IDLE : ECC_CORR;
+                 perr_state_en       = 1'b1;
+         end
+         ECC_CORR: begin : ecc_corr
+                  perr_nxtstate       =  ERR_IDLE  ;
+                  perr_state_en       =   1'b1   ;
+         end
+         default: begin : def_case
+                  perr_nxtstate            = ERR_IDLE;
+                  perr_state_en            = 1'b0;
+                  perr_sb_write_status     = 1'b0;
+                  perr_sel_invalidate      = 1'b0;
+         end
+      endcase
+   end
+
+   rvdffs #(($bits(perr_state_t))) perr_state_ff (.clk(active_clk), .din(perr_nxtstate), .dout({perr_state}), .en(perr_state_en),   .*);
+
+   //////////////////////////////////// Create stop fetch State Machine /////////////////////////
+   //////////////////////////////////// Create stop fetch State Machine /////////////////////////
+   //////////////////////////////////// Create stop fetch State Machine /////////////////////////
+   //////////////////////////////////// Create stop fetch State Machine /////////////////////////
+   //////////////////////////////////// Create stop fetch State Machine /////////////////////////
+   always_comb begin  : ERROR_STOP_FETCH
+      err_stop_nxtstate            = ERR_STOP_IDLE;
+      err_stop_state_en            = 1'b0;
+      err_stop_fetch               = 1'b0;
+      iccm_correction_state        = 1'b0;
+
+      case (err_stop_state)
+         ERR_STOP_IDLE: begin : err_stop_idle
+                  err_stop_nxtstate         =  ERR_FETCH1;
+                  err_stop_state_en         =  dec_tlu_flush_err_wb & (perr_state  == ECC_WFF) & ~dec_tlu_force_halt;
+         end
+         ERR_FETCH1: begin : err_fetch1    // All the I$ data and/or Tag errors ( parity/ECC ) will come to this state
+                  err_stop_nxtstate       =  (dec_tlu_flush_lower_wb | dec_tlu_i0_commit_cmt | dec_tlu_force_halt) ? ERR_STOP_IDLE : ((ifu_fetch_val[1:0] == 2'b11) | (ifu_fetch_val[0] & two_byte_instr))   ?  ERR_STOP_FETCH : ifu_fetch_val[0] ? ERR_FETCH2 :  ERR_FETCH1;
+                  err_stop_state_en       =   dec_tlu_flush_lower_wb | dec_tlu_i0_commit_cmt | ifu_fetch_val[0] | ifu_bp_hit_taken_q_f | dec_tlu_force_halt;
+                  err_stop_fetch          =   ((ifu_fetch_val[1:0] == 2'b11) | (ifu_fetch_val[0] & two_byte_instr))  & ~(exu_flush_final | dec_tlu_i0_commit_cmt);
+                  iccm_correction_state   = 1'b1;
+
+        end
+         ERR_FETCH2: begin : err_fetch2    // All the I$ data and/or Tag errors ( parity/ECC ) will come to this state
+                  err_stop_nxtstate       =  (dec_tlu_flush_lower_wb | dec_tlu_i0_commit_cmt | dec_tlu_force_halt) ? ERR_STOP_IDLE : ifu_fetch_val[0] ?  ERR_STOP_FETCH : ERR_FETCH2;
+                  err_stop_state_en       =   dec_tlu_flush_lower_wb | dec_tlu_i0_commit_cmt | ifu_fetch_val[0] | dec_tlu_force_halt ;
+                  err_stop_fetch          =   ifu_fetch_val[0] & ~exu_flush_final & ~dec_tlu_i0_commit_cmt ;
+                  iccm_correction_state   = 1'b1;
+
+         end
+         ERR_STOP_FETCH: begin : ecc_wff
+                  err_stop_nxtstate       =  ( (dec_tlu_flush_lower_wb & ~dec_tlu_flush_err_wb) | dec_tlu_i0_commit_cmt | dec_tlu_force_halt) ? ERR_STOP_IDLE : dec_tlu_flush_err_wb ? ERR_FETCH1 : ERR_STOP_FETCH ;
+                  err_stop_state_en       =   dec_tlu_flush_lower_wb |  dec_tlu_i0_commit_cmt | dec_tlu_force_halt   ;
+                  err_stop_fetch          =  1'b1;
+                  iccm_correction_state   = 1'b1;
+
+         end
+         default: begin : def_case
+                  err_stop_nxtstate            = ERR_STOP_IDLE;
+                  err_stop_state_en            = 1'b0;
+                  err_stop_fetch               = 1'b0 ;
+                  iccm_correction_state   = 1'b1;
+
+         end
+      endcase
+   end
+   rvdffs #(($bits(err_stop_state_t))) err_stop_state_ff (.clk(active_clk), .din(err_stop_nxtstate), .dout({err_stop_state}), .en(err_stop_state_en),   .*);
+
+
+
+   assign bus_ifu_bus_clk_en =  ifu_bus_clk_en ;
+
+`ifdef RV_FPGA_OPTIMIZE
+   assign busclk = 1'b0;
+   assign busclk_force = 1'b0;
+`else
+   rvclkhdr bus_clk_f(.en(bus_ifu_bus_clk_en), .l1clk(busclk), .*);
+   rvclkhdr bus_clk(.en(bus_ifu_bus_clk_en | dec_tlu_force_halt), .l1clk(busclk_force), .*);
+`endif
+
+
+
+   assign  scnd_miss_req = scnd_miss_req_q & ~exu_flush_final;
+
+   assign  ifc_bus_ic_req_ff_in  = (ic_act_miss_f | bus_cmd_req_hold | ifu_bus_cmd_valid) & ~dec_tlu_force_halt & ~((bus_cmd_beat_count== {pt.ICACHE_BEAT_BITS{1'b1}}) & ifu_bus_cmd_valid & ifu_bus_cmd_ready & miss_pending);
+
+   rvdff_fpga #(1) bus_ic_req_ff2(.*, .clk(busclk_force), .clken(bus_ifu_bus_clk_en | dec_tlu_force_halt), .rawclk(clk), .din(ifc_bus_ic_req_ff_in), .dout(ifu_bus_cmd_valid));
+
+   assign    bus_cmd_req_in  = (ic_act_miss_f | bus_cmd_req_hold) & ~bus_cmd_sent & ~dec_tlu_force_halt ; // hold until first command sent
+
+
+
+    // AXI command signals
+    //  Read Channel
+    assign ifu_axi_arvalid               =  ifu_bus_cmd_valid ;
+    assign ifu_axi_arid[pt.IFU_BUS_TAG-1:0] = ((pt.IFU_BUS_TAG)'(bus_rd_addr_count[pt.ICACHE_BEAT_BITS-1:0])) & {pt.IFU_BUS_TAG{ifu_bus_cmd_valid}};
+    assign ifu_axi_araddr[31:0]          =   {ifu_ic_req_addr_f[31:3],3'b0}  & {32{ifu_bus_cmd_valid}};
+    assign ifu_axi_arsize[2:0]           =  3'b011;
+    assign ifu_axi_arprot[2:0]           = 3'b101;
+    assign ifu_axi_arcache[3:0]          = 4'b1111;
+    assign ifu_axi_arregion[3:0]         = ifu_ic_req_addr_f[31:28];
+    assign ifu_axi_arlen[7:0]            = '0;
+    assign ifu_axi_arburst[1:0]          = 2'b01;
+    assign ifu_axi_arqos[3:0]            = '0;
+    assign ifu_axi_arlock                = '0;
+    assign ifu_axi_rready                = 1'b1;
+
+    //  Write Channel
+    assign ifu_axi_awvalid                  = '0 ;
+    assign ifu_axi_awid[pt.IFU_BUS_TAG-1:0] = '0 ;
+    assign ifu_axi_awaddr[31:0]             = '0 ;
+    assign ifu_axi_awsize[2:0]              = '0 ;
+    assign ifu_axi_awprot[2:0]              = '0;
+    assign ifu_axi_awcache[3:0]             = '0 ;
+    assign ifu_axi_awregion[3:0]            = '0 ;
+    assign ifu_axi_awlen[7:0]               = '0;
+    assign ifu_axi_awburst[1:0]             = '0 ;
+    assign ifu_axi_awqos[3:0]               = '0;
+    assign ifu_axi_awlock                   = '0;
+
+    assign ifu_axi_wvalid                =  '0;
+    assign ifu_axi_wstrb[7:0]            =  '0;
+    assign ifu_axi_wdata[63:0]           =  '0;
+    assign ifu_axi_wlast                 =  '0;
+    assign ifu_axi_bready                =  '0;
+
+
+   assign ifu_bus_arready_unq     =  ifu_axi_arready ;
+   assign ifu_bus_rvalid_unq      =  ifu_axi_rvalid ;
+   assign ifu_bus_arvalid         =  ifu_axi_arvalid ;
+
+   rvdff_fpga #(1)               bus_rdy_ff      (.*, .clk(busclk),  .clken(bus_ifu_bus_clk_en), .rawclk(clk), .din(ifu_bus_arready_unq),            .dout(ifu_bus_arready_unq_ff));
+   rvdff_fpga #(1)               bus_rsp_vld_ff  (.*, .clk(busclk),  .clken(bus_ifu_bus_clk_en), .rawclk(clk), .din(ifu_bus_rvalid_unq),             .dout(ifu_bus_rvalid_unq_ff));
+   rvdff_fpga #(1)               bus_cmd_ff      (.*, .clk(busclk),  .clken(bus_ifu_bus_clk_en), .rawclk(clk), .din(ifu_bus_arvalid),                .dout(ifu_bus_arvalid_ff));
+   rvdff_fpga #(2)               bus_rsp_cmd_ff  (.*, .clk(busclk),  .clken(bus_ifu_bus_clk_en), .rawclk(clk), .din(ifu_axi_rresp[1:0]),             .dout(ifu_bus_rresp_ff[1:0]));
+   rvdff_fpga #(pt.IFU_BUS_TAG)  bus_rsp_tag_ff  (.*, .clk(busclk),  .clken(bus_ifu_bus_clk_en), .rawclk(clk), .din(ifu_axi_rid[pt.IFU_BUS_TAG-1:0]),.dout(ifu_bus_rid_ff[pt.IFU_BUS_TAG-1:0]));
+   rvdffe #(64)                  bus_data_ff     (.*, .clk(clk),     .din(ifu_axi_rdata[63:0]),            .dout(ifu_bus_rdata_ff[63:0]), .en(ifu_bus_clk_en & ifu_axi_rvalid));
+
+   assign ifu_bus_cmd_ready = ifu_axi_arready ;
+   assign ifu_bus_rsp_valid = ifu_axi_rvalid ;
+   assign ifu_bus_rsp_ready = ifu_axi_rready ;
+   assign ifu_bus_rsp_tag[pt.IFU_BUS_TAG-1:0] = ifu_axi_rid[pt.IFU_BUS_TAG-1:0] ;
+   assign ifu_bus_rsp_rdata[63:0] = ifu_axi_rdata[63:0] ;
+   assign ifu_bus_rsp_opc[1:0] = {ifu_axi_rresp[1:0]} ;
+
+
+
+
+
+
+
+
+
+   // Create write signals so we can write to the miss-buffer directly from the bus.
+
+   assign ifu_bus_rvalid            =  ifu_bus_rsp_valid & bus_ifu_bus_clk_en ;
+
+
+
+   assign ifu_bus_arready            =  ifu_bus_arready_unq    & bus_ifu_bus_clk_en    ;
+   assign ifu_bus_arready_ff         =  ifu_bus_arready_unq_ff & bus_ifu_bus_clk_en_ff ;
+
+   assign ifu_bus_rvalid_ff          =  ifu_bus_rvalid_unq_ff & bus_ifu_bus_clk_en_ff ;
+   assign bus_cmd_sent               =  ifu_bus_arvalid & ifu_bus_arready & miss_pending & ~dec_tlu_force_halt;
+   assign bus_inc_data_beat_cnt      = (bus_ifu_wr_en_ff & ~bus_last_data_beat & ~dec_tlu_force_halt) ;
+   assign bus_reset_data_beat_cnt    =  ic_act_miss_f | (bus_ifu_wr_en_ff &  bus_last_data_beat) | dec_tlu_force_halt;
+   assign bus_hold_data_beat_cnt     = ~bus_inc_data_beat_cnt & ~bus_reset_data_beat_cnt ;
+
+   assign bus_new_data_beat_count[pt.ICACHE_BEAT_BITS-1:0] = ({pt.ICACHE_BEAT_BITS{bus_reset_data_beat_cnt}} & (pt.ICACHE_BEAT_BITS)'(0)) |
+                                                          ({pt.ICACHE_BEAT_BITS{bus_inc_data_beat_cnt}}   & (bus_data_beat_count[pt.ICACHE_BEAT_BITS-1:0] + {{pt.ICACHE_BEAT_BITS-1{1'b0}},1'b1})) |
+                                                          ({pt.ICACHE_BEAT_BITS{bus_hold_data_beat_cnt}}  &  bus_data_beat_count[pt.ICACHE_BEAT_BITS-1:0]);
+
+
+   assign last_data_recieved_in =  (bus_ifu_wr_en_ff &  bus_last_data_beat & ~scnd_miss_req) | (last_data_recieved_ff & ~ic_act_miss_f) ;
+
+
+
+// Request Address Count
+   assign bus_new_rd_addr_count[pt.ICACHE_BEAT_BITS-1:0] = (~miss_pending                    ) ? imb_ff[pt.ICACHE_BEAT_ADDR_HI:3] :
+                                                           (                scnd_miss_req_q  ) ? imb_scnd_ff[pt.ICACHE_BEAT_ADDR_HI:3] :
+                                                           ( bus_cmd_sent                    ) ? (bus_rd_addr_count[pt.ICACHE_BEAT_BITS-1:0] + 3'b001) :
+                                                                                                  bus_rd_addr_count[pt.ICACHE_BEAT_BITS-1:0];
+
+   rvdff_fpga #(pt.ICACHE_BEAT_BITS)  bus_rd_addr_ff (.*,  .clk(busclk_reset),  .clken (bus_ifu_bus_clk_en | ic_act_miss_f | dec_tlu_force_halt), .rawclk(clk), .din ({bus_new_rd_addr_count[pt.ICACHE_BEAT_BITS-1:0]}), .dout({bus_rd_addr_count[pt.ICACHE_BEAT_BITS-1:0]}));
+
+
+
+// command beat Count
+   assign bus_inc_cmd_beat_cnt              =  ifu_bus_cmd_valid    &  ifu_bus_cmd_ready & miss_pending & ~dec_tlu_force_halt;
+   assign bus_reset_cmd_beat_cnt_0          =  (ic_act_miss_f        & ~uncacheable_miss_in) | dec_tlu_force_halt ;
+   assign bus_reset_cmd_beat_cnt_secondlast =  ic_act_miss_f        &  uncacheable_miss_in ;
+   assign bus_hold_cmd_beat_cnt             = ~bus_inc_cmd_beat_cnt & ~(ic_act_miss_f | scnd_miss_req | dec_tlu_force_halt) ;
+   assign bus_cmd_beat_en                   =  bus_inc_cmd_beat_cnt | ic_act_miss_f | dec_tlu_force_halt;
+
+   assign bus_new_cmd_beat_count[pt.ICACHE_BEAT_BITS-1:0] =  ({pt.ICACHE_BEAT_BITS{bus_reset_cmd_beat_cnt_0}}       & (pt.ICACHE_BEAT_BITS)'(0) ) |
+                                                          ({pt.ICACHE_BEAT_BITS{bus_reset_cmd_beat_cnt_secondlast}} & (pt.ICACHE_BEAT_BITS)'(pt.ICACHE_SCND_LAST)) |
+                                                          ({pt.ICACHE_BEAT_BITS{bus_inc_cmd_beat_cnt}}              & (bus_cmd_beat_count[pt.ICACHE_BEAT_BITS-1:0] + {{pt.ICACHE_BEAT_BITS-1{1'b0}}, 1'b1})) |
+                                                          ({pt.ICACHE_BEAT_BITS{bus_hold_cmd_beat_cnt}}             &  bus_cmd_beat_count[pt.ICACHE_BEAT_BITS-1:0]) ;
+
+`ifdef RV_FPGA_OPTIMIZE
+   assign busclk_reset = 1'b0;
+`else
+   rvclkhdr bus_clk_reset(.en(bus_ifu_bus_clk_en | ic_act_miss_f | dec_tlu_force_halt), .l1clk(busclk_reset), .*);
+`endif
+
+
+
+   rvdffs_fpga #(pt.ICACHE_BEAT_BITS)  bus_cmd_beat_ff (.*, .clk(busclk_reset), .clken (bus_ifu_bus_clk_en | ic_act_miss_f | dec_tlu_force_halt), .rawclk(clk), .en (bus_cmd_beat_en), .din ({bus_new_cmd_beat_count[pt.ICACHE_BEAT_BITS-1:0]}),
+                    .dout({bus_cmd_beat_count[pt.ICACHE_BEAT_BITS-1:0]}));
+
+
+    assign bus_last_data_beat     =  uncacheable_miss_ff ? (bus_data_beat_count[pt.ICACHE_BEAT_BITS-1:0] == {{pt.ICACHE_BEAT_BITS-1{1'b0}},1'b1}) : (&bus_data_beat_count[pt.ICACHE_BEAT_BITS-1:0]);
+
+   assign  bus_ifu_wr_en            =  ifu_bus_rvalid     & miss_pending ;
+   assign  bus_ifu_wr_en_ff         =  ifu_bus_rvalid_ff  & miss_pending ;
+   assign  bus_ifu_wr_en_ff_q       =  ifu_bus_rvalid_ff  & miss_pending & ~uncacheable_miss_ff & ~(|ifu_bus_rresp_ff[1:0]) & write_ic_16_bytes; // qualify with no-error conditions ;
+   assign  bus_ifu_wr_en_ff_wo_err  =  ifu_bus_rvalid_ff & miss_pending &  ~uncacheable_miss_ff;
+
+
+   rvdffie #(10) misc_ff
+       ( .*,
+         .clk(free_l2clk),
+         .din( {ic_act_miss_f,        ifu_wr_cumulative_err,exu_flush_final,  ic_crit_wd_rdy_new_in,bus_ifu_bus_clk_en,   scnd_miss_req_in,bus_cmd_req_in,  last_data_recieved_in,
+ifc_dma_access_ok_d,   dma_iccm_req}),
+         .dout({ic_act_miss_f_delayed,ifu_wr_data_comb_err_ff,  flush_final_f,ic_crit_wd_rdy_new_ff,bus_ifu_bus_clk_en_ff,scnd_miss_req_q, bus_cmd_req_hold,last_data_recieved_ff,
+ifc_dma_access_ok_prev,dma_iccm_req_f})
+         );
+
+   rvdffie #(.WIDTH(pt.ICACHE_BEAT_BITS+5),.OVERRIDE(1)) misc1_ff
+       ( .*,
+         .clk(free_l2clk),
+         .din( {reset_ic_in,sel_mb_addr,   bus_new_data_beat_count[pt.ICACHE_BEAT_BITS-1:0],ifc_region_acc_fault_memory_bf,ic_debug_rd_en,       ic_debug_rd_en_ff}),
+         .dout({reset_ic_ff,sel_mb_addr_ff,bus_data_beat_count[pt.ICACHE_BEAT_BITS-1:0],    ifc_region_acc_fault_memory_f, ic_debug_rd_en_ff,ifu_ic_debug_rd_data_valid})
+         );
+
+   assign    reset_tag_valid_for_miss = ic_act_miss_f_delayed & (miss_state == CRIT_BYP_OK) & ~uncacheable_miss_ff;
+   assign    bus_ifu_wr_data_error    = |ifu_bus_rsp_opc[1:0] &  ifu_bus_rvalid  & miss_pending;
+   assign    bus_ifu_wr_data_error_ff = |ifu_bus_rresp_ff[1:0] &  ifu_bus_rvalid_ff  & miss_pending;
+
+
+   assign ic_crit_wd_rdy   =  ic_crit_wd_rdy_new_in | ic_crit_wd_rdy_new_ff ;
+   assign last_beat        =  bus_last_data_beat & bus_ifu_wr_en_ff;
+   assign reset_beat_cnt    = bus_reset_data_beat_cnt ;
+
+// DMA
+   // Making sure that the dma_access is allowed when we have 2 back to back dma_access_ok. Also gating with current state == idle
+   assign ifc_dma_access_ok_d  = ifc_dma_access_ok &  ~iccm_correct_ecc & ~iccm_dma_sb_error;
+   assign ifc_dma_access_q_ok  = ifc_dma_access_ok &  ~iccm_correct_ecc & ifc_dma_access_ok_prev &  (perr_state == ERR_IDLE)  & ~iccm_dma_sb_error;
+   assign iccm_ready           = ifc_dma_access_q_ok ;
+
+   logic [1:0]        iccm_ecc_word_enable;
+
+    if (pt.ICCM_ENABLE == 1 ) begin: iccm_enabled
+         logic  [3:2] dma_mem_addr_ff  ;
+         logic  iccm_dma_rden    ;
+
+         logic  iccm_dma_ecc_error_in;
+         logic  [13:0] dma_mem_ecc;
+         logic  [63:0] iccm_dma_rdata_in;
+         logic  [31:0] iccm_dma_rdata_1_muxed;
+         logic [1:0] [31:0] iccm_corrected_data;
+         logic [1:0] [06:0] iccm_corrected_ecc;
+
+
+         logic [1:0]        iccm_double_ecc_error;
+
+
+         logic [pt.ICCM_BITS-1:2]       iccm_rw_addr_f;
+
+         logic [31:0]       iccm_corrected_data_f_mux;
+         logic [06:0]       iccm_corrected_ecc_f_mux;
+         logic              iccm_dma_rvalid_in;
+         logic [77:0]       iccm_rdmux_data;
+         logic              iccm_rd_ecc_single_err_hold_in ;
+         logic [2:0]        dma_mem_tag_ff;
+
+
+
+
+         assign iccm_wren          =  (ifc_dma_access_q_ok & dma_iccm_req &  dma_mem_write) | iccm_correct_ecc;
+         assign iccm_rden          =  (ifc_dma_access_q_ok & dma_iccm_req & ~dma_mem_write) | (ifc_iccm_access_bf & ifc_fetch_req_bf);
+         assign iccm_dma_rden      =  (ifc_dma_access_q_ok & dma_iccm_req & ~dma_mem_write)                     ;
+         assign iccm_wr_size[2:0]  =  {3{dma_iccm_req}}    & dma_mem_sz[2:0] ;
+
+         rvecc_encode  iccm_ecc_encode0 (
+                           .din(dma_mem_wdata[31:0]),
+                           .ecc_out(dma_mem_ecc[6:0]));
+
+         rvecc_encode  iccm_ecc_encode1 (
+                           .din(dma_mem_wdata[63:32]),
+                           .ecc_out(dma_mem_ecc[13:7]));
+
+        assign iccm_wr_data[77:0]   =  (iccm_correct_ecc & ~(ifc_dma_access_q_ok & dma_iccm_req)) ?  {iccm_ecc_corr_data_ff[38:0], iccm_ecc_corr_data_ff[38:0]} :
+                                       {dma_mem_ecc[13:7],dma_mem_wdata[63:32], dma_mem_ecc[6:0],dma_mem_wdata[31:0]};
+
+         assign iccm_dma_rdata_1_muxed[31:0] = dma_mem_addr_ff[2] ?  iccm_corrected_data[0][31:0] : iccm_corrected_data[1][31:0] ;
+         assign iccm_dma_rdata_in[63:0]      = iccm_dma_ecc_error_in ? {2{dma_mem_addr[31:0]}} : {iccm_dma_rdata_1_muxed[31:0], iccm_corrected_data[0]};
+         assign iccm_dma_ecc_error_in   =   |(iccm_double_ecc_error[1:0]);
+
+         rvdffe    #(64) dma_data_ff      (.*, .clk(clk), .en(iccm_dma_rvalid_in),  .din(iccm_dma_rdata_in[63:0]), .dout(iccm_dma_rdata[63:0]));
+         rvdffie   #(11) dma_misc_bits    (.*, .clk(free_l2clk), .din({dma_mem_tag[2:0],
+                                                                       dma_mem_tag_ff[2:0],
+                                                                       dma_mem_addr[3:2],
+                                                                       iccm_dma_rden,
+                                                                       iccm_dma_rvalid_in,
+                                                                       iccm_dma_ecc_error_in }),
+                                                                .dout({dma_mem_tag_ff[2:0],
+                                                                       iccm_dma_rtag[2:0],
+                                                                       dma_mem_addr_ff[3:2],
+                                                                       iccm_dma_rvalid_in,
+                                                                       iccm_dma_rvalid,
+                                                                       iccm_dma_ecc_error }));
+
+         assign iccm_rw_addr[pt.ICCM_BITS-1:1]    = (  ifc_dma_access_q_ok & dma_iccm_req  & ~iccm_correct_ecc) ? dma_mem_addr[pt.ICCM_BITS-1:1] :
+                                                 (~(ifc_dma_access_q_ok & dma_iccm_req) &  iccm_correct_ecc) ? {iccm_ecc_corr_index_ff[pt.ICCM_BITS-1:2],1'b0} : ifc_fetch_addr_bf[pt.ICCM_BITS-1:1] ;
+
+
+
+
+/////////////////////////////////////////////////////////////////////////////////////
+// ECC checking logic for ICCM data.                                               //
+/////////////////////////////////////////////////////////////////////////////////////
+
+  logic [3:0] ic_fetch_val_int_f;
+  logic [3:0] ic_fetch_val_shift_right;
+  assign ic_fetch_val_int_f[3:0] = {2'b00 , ic_fetch_val_f[1:0] } ;
+  assign ic_fetch_val_shift_right[3:0] = {ic_fetch_val_int_f << ifu_fetch_addr_int_f[1] } ;
+
+   assign iccm_rdmux_data[77:0] = iccm_rd_data_ecc[77:0];
+   for (genvar i=0; i < 2 ; i++) begin : ICCM_ECC_CHECK
+      assign iccm_ecc_word_enable[i] = ((|ic_fetch_val_shift_right[(2*i+1):(2*i)] & ~exu_flush_final & sel_iccm_data) | iccm_dma_rvalid_in) & ~dec_tlu_core_ecc_disable;
+   rvecc_decode  ecc_decode (
+                           .en(iccm_ecc_word_enable[i]),
+                           .sed_ded ( 1'b0 ),    // 1 : means only detection
+                           .din(iccm_rdmux_data[(39*i+31):(39*i)]),
+                           .ecc_in(iccm_rdmux_data[(39*i+38):(39*i+32)]),
+                           .dout(iccm_corrected_data[i][31:0]),
+                           .ecc_out(iccm_corrected_ecc[i][6:0]),
+                           .single_ecc_error(iccm_single_ecc_error[i]),
+                           .double_ecc_error(iccm_double_ecc_error[i]));
+end
+
+  assign iccm_rd_ecc_single_err  = (|iccm_single_ecc_error[1:0] ) & ifc_iccm_access_f & ifc_fetch_req_f;
+  assign iccm_rd_ecc_double_err[1:0]  = ~ifu_fetch_addr_int_f[1] ? ({iccm_double_ecc_error[0], iccm_double_ecc_error[0]} ) & {2{ifc_iccm_access_f}} :
+                                                                   ({iccm_double_ecc_error[1], iccm_double_ecc_error[0]} ) & {2{ifc_iccm_access_f}} ;
+
+  assign iccm_corrected_data_f_mux[31:0] = iccm_single_ecc_error[0] ? iccm_corrected_data[0] : iccm_corrected_data[1];
+  assign iccm_corrected_ecc_f_mux[6:0]   = iccm_single_ecc_error[0] ? iccm_corrected_ecc[0]  : iccm_corrected_ecc[1];
+
+  assign iccm_ecc_write_status           = ((iccm_rd_ecc_single_err & ~iccm_rd_ecc_single_err_ff)  & ~exu_flush_final) | iccm_dma_sb_error;
+  assign iccm_rd_ecc_single_err_hold_in  = (iccm_rd_ecc_single_err | iccm_rd_ecc_single_err_ff) & ~exu_flush_final ;
+  assign iccm_error_start                =  iccm_rd_ecc_single_err;
+  assign iccm_ecc_corr_index_in[pt.ICCM_BITS-1:2] = iccm_single_ecc_error[0] ? iccm_rw_addr_f[pt.ICCM_BITS-1:2] : iccm_rw_addr_f[pt.ICCM_BITS-1:2] + 1'b1 ;
+
+   rvdffie #(pt.ICCM_BITS-1) iccm_index_f   (.*, .clk(free_l2clk), .din({iccm_rw_addr[pt.ICCM_BITS-1:2],
+                                                                         iccm_rd_ecc_single_err_hold_in
+                                                                                                       }),
+                                                                  .dout({iccm_rw_addr_f[pt.ICCM_BITS-1:2],
+                                                                         iccm_rd_ecc_single_err_ff}));
+
+   rvdffe #((39+(pt.ICCM_BITS-2)))      ecc_dat0_ff  (
+                                                      .clk(clk),
+                                                      .din({iccm_corrected_ecc_f_mux[6:0],  iccm_corrected_data_f_mux[31:0],iccm_ecc_corr_index_in[pt.ICCM_BITS-1:2]}),
+                                                      .dout({iccm_ecc_corr_data_ff[38:0]   ,iccm_ecc_corr_index_ff[pt.ICCM_BITS-1:2]}),
+                                                      .en(iccm_ecc_write_status),
+                                                      .*
+                                                      );
+
+     end else begin : iccm_disabled
+         assign iccm_dma_rvalid = 1'b0 ;
+         assign iccm_dma_ecc_error = 1'b0 ;
+         assign iccm_dma_rdata[63:0] = '0 ;
+         assign iccm_single_ecc_error = '0 ;
+         assign iccm_dma_rtag         = '0 ;
+
+
+
+
+
+
+         assign iccm_rd_ecc_single_err                 = 1'b0 ;
+         assign iccm_rd_ecc_double_err                 = '0 ;
+         assign iccm_rd_ecc_single_err_ff              = 1'b0 ;
+         assign iccm_error_start                         = 1'b0;
+         assign iccm_ecc_corr_index_ff[pt.ICCM_BITS-1:2]  =  '0;
+         assign iccm_ecc_corr_data_ff[38:0]            =  '0;
+         assign iccm_ecc_write_status                  =  '0;
+
+
+
+
+
+
+    end
+
+
+////// ICCM signals
+
+
+ assign   ic_rd_en    =  (ifc_fetch_req_bf & ~ifc_fetch_uncacheable_bf & ~ifc_iccm_access_bf  &
+                            ~(((miss_state == STREAM) & ~miss_state_en)                                       |
+                              ((miss_state == CRIT_BYP_OK) & ~miss_state_en)                                  |
+                              ((miss_state == STALL_SCND_MISS) & ~miss_state_en)                              |
+                              ((miss_state == MISS_WAIT) & ~miss_state_en)                                    |
+                              ((miss_state == CRIT_WRD_RDY) & ~miss_state_en)  |
+                              ((miss_state == CRIT_BYP_OK) &  miss_state_en &  (miss_nxtstate == MISS_WAIT))  ))  |
+                             ( ifc_fetch_req_bf & exu_flush_final  & ~ifc_fetch_uncacheable_bf & ~ifc_iccm_access_bf )     ;
+
+logic   ic_real_rd_wp_unused;
+assign  ic_real_rd_wp_unused  =  (ifc_fetch_req_bf &  ~ifc_iccm_access_bf  &  ~ifc_region_acc_fault_final_bf & ~dec_tlu_fence_i_wb & ~stream_miss_f & ~ic_act_miss_f &
+                            ~(((miss_state == STREAM) & ~miss_state_en) |
+                              ((miss_state == CRIT_BYP_OK) & ~miss_state_en & ~(miss_nxtstate == MISS_WAIT)) |
+                              ((miss_state == CRIT_BYP_OK) &  miss_state_en &  (miss_nxtstate == MISS_WAIT)) |
+                              ((miss_state == MISS_WAIT) & ~miss_state_en) |
+                              ((miss_state == STALL_SCND_MISS) & ~miss_state_en)  |
+                              ((miss_state == CRIT_WRD_RDY) & ~miss_state_en)  |
+                              ((miss_nxtstate == STREAM) &  miss_state_en)  |
+                              ((miss_state == SCND_MISS) & ~miss_state_en))) |
+                          (ifc_fetch_req_bf &  ~ifc_iccm_access_bf  &  ~ifc_region_acc_fault_final_bf & ~dec_tlu_fence_i_wb & ~stream_miss_f & exu_flush_final)  ;
+
+
+assign ic_wr_en[pt.ICACHE_NUM_WAYS-1:0] = bus_ic_wr_en[pt.ICACHE_NUM_WAYS-1:0] & {pt.ICACHE_NUM_WAYS{write_ic_16_bytes}};
+assign ic_write_stall                =  write_ic_16_bytes &  ~((((miss_state== CRIT_BYP_OK) | ((miss_state==STREAM) & ~(exu_flush_final | ifu_bp_hit_taken_q_f  | stream_eol_f ))) & ~(bus_ifu_wr_en_ff & last_beat & ~uncacheable_miss_ff)));
+
+
+
+
+///////////////////////////////////////////////////////////////
+// Icache status and LRU
+///////////////////////////////////////////////////////////////
+logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid_unq;
+if (pt.ICACHE_ENABLE == 1 ) begin: icache_enabled
+   assign  ic_valid  = ~ifu_wr_cumulative_err_data & ~(reset_ic_in | reset_ic_ff) & ~reset_tag_valid_for_miss;
+
+   assign ifu_status_wr_addr_w_debug[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] = ((ic_debug_rd_en | ic_debug_wr_en ) & ic_debug_tag_array) ?
+                                                                           ic_debug_addr[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] :
+                                                                           ifu_status_wr_addr[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO];
+
+   // status
+
+         assign way_status_wr_en_w_debug = way_status_wr_en | (ic_debug_wr_en  & ic_debug_tag_array);
+
+         assign way_status_new_w_debug[pt.ICACHE_STATUS_BITS-1:0]  = (ic_debug_wr_en  & ic_debug_tag_array) ? (pt.ICACHE_STATUS_BITS == 1) ? ic_debug_wr_data[4] : ic_debug_wr_data[6:4] :
+                                                way_status_new[pt.ICACHE_STATUS_BITS-1:0] ;
+
+   rvdffie #(.WIDTH(pt.ICACHE_TAG_LO-pt.ICACHE_TAG_INDEX_LO+1+pt.ICACHE_STATUS_BITS),.OVERRIDE(1))  status_misc_ff
+     (.*,
+      .clk(free_l2clk),
+      .din({ ifu_status_wr_addr_w_debug[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO], way_status_wr_en_w_debug, way_status_new_w_debug[pt.ICACHE_STATUS_BITS-1:0]}),
+      .dout({ifu_status_wr_addr_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO],      way_status_wr_en_ff,      way_status_new_ff[pt.ICACHE_STATUS_BITS-1:0]} )
+      );
+
+   logic [(pt.ICACHE_TAG_DEPTH/8)-1 : 0] way_status_clken;
+   logic [(pt.ICACHE_TAG_DEPTH/8)-1 : 0] way_status_clk;
+
+   for (genvar i=0 ; i<pt.ICACHE_TAG_DEPTH/8 ; i++) begin : CLK_GRP_WAY_STATUS
+      assign way_status_clken[i] = (ifu_status_wr_addr_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO+3] == i );
+     `ifdef RV_FPGA_OPTIMIZE
+        assign way_status_clk[i] = 1'b0;
+     `else
+           rvclkhdr way_status_cgc ( .en(way_status_clken[i]),   .l1clk(way_status_clk[i]), .* );
+     `endif
+
+
+      for (genvar j=0 ; j<8 ; j++) begin : WAY_STATUS
+         rvdffs_fpga #(pt.ICACHE_STATUS_BITS) ic_way_status (.*,
+                   .clk(way_status_clk[i]),
+                   .clken(way_status_clken[i]),
+                   .rawclk(clk),
+                   .en(((ifu_status_wr_addr_ff[pt.ICACHE_TAG_INDEX_LO+2:pt.ICACHE_TAG_INDEX_LO] == j) & way_status_wr_en_ff)),
+                   .din(way_status_new_ff[pt.ICACHE_STATUS_BITS-1:0]),
+                   .dout(way_status_out[8*i+j]));
+      end  // WAY_STATUS
+   end  // CLK_GRP_WAY_STATUS
+
+  always_comb begin : way_status_out_mux
+      way_status[pt.ICACHE_STATUS_BITS-1:0] = '0 ;
+      for (int j=0; j< pt.ICACHE_TAG_DEPTH; j++) begin : status_mux_loop
+        if (ifu_ic_rw_int_addr_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] == (pt.ICACHE_TAG_LO-pt.ICACHE_TAG_INDEX_LO)'(j)) begin : mux_out
+         way_status[pt.ICACHE_STATUS_BITS-1:0] =  way_status_out[j];
+        end
+      end
+  end
+
+         assign ifu_ic_rw_int_addr_w_debug[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] = ((ic_debug_rd_en | ic_debug_wr_en ) & ic_debug_tag_array) ?
+                                                                        ic_debug_addr[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] :
+                                                                        ifu_ic_rw_int_addr[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO];
+         assign ifu_tag_wren_w_debug[pt.ICACHE_NUM_WAYS-1:0] = ifu_tag_wren[pt.ICACHE_NUM_WAYS-1:0] | ic_debug_tag_wr_en[pt.ICACHE_NUM_WAYS-1:0] ;
+
+         assign ic_valid_w_debug = (ic_debug_wr_en & ic_debug_tag_array) ? ic_debug_wr_data[0] : ic_valid;
+
+         rvdffie #(pt.ICACHE_TAG_LO-pt.ICACHE_TAG_INDEX_LO+pt.ICACHE_NUM_WAYS+1) tag_addr_ff (.*,
+                                                                                              .clk(free_l2clk),
+                                                                                              .din({ifu_ic_rw_int_addr_w_debug[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO],
+                                                                                                    ifu_tag_wren_w_debug[pt.ICACHE_NUM_WAYS-1:0],
+                                                                                                    ic_valid_w_debug}),
+                                                                                              .dout({ifu_ic_rw_int_addr_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO],
+                                                                                                     ifu_tag_wren_ff[pt.ICACHE_NUM_WAYS-1:0],
+                                                                                                     ic_valid_ff})
+                                                                                              );
+
+
+   logic [pt.ICACHE_NUM_WAYS-1:0] [pt.ICACHE_TAG_DEPTH-1:0] ic_tag_valid_out ;
+
+   logic [(pt.ICACHE_TAG_DEPTH/32)-1:0] [pt.ICACHE_NUM_WAYS-1:0] tag_valid_clken ;
+   logic [(pt.ICACHE_TAG_DEPTH/32)-1:0] [pt.ICACHE_NUM_WAYS-1:0] tag_valid_clk   ;
+
+   for (genvar i=0 ; i<pt.ICACHE_TAG_DEPTH/32 ; i++) begin : CLK_GRP_TAG_VALID
+      for (genvar j=0; j<pt.ICACHE_NUM_WAYS; j++) begin : way_clken
+      if (pt.ICACHE_TAG_DEPTH == 32 ) begin
+        assign tag_valid_clken[i][j] =  ifu_tag_wren_ff[j] | perr_err_inv_way[j] | reset_all_tags;
+      end else begin
+         assign tag_valid_clken[i][j] = (((ifu_ic_rw_int_addr_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO+5] == i ) &  ifu_tag_wren_ff[j] ) |
+                                        ((perr_ic_index_ff     [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO+5] == i ) &  perr_err_inv_way[j]) | reset_all_tags);
+      end
+
+     `ifdef RV_FPGA_OPTIMIZE
+        assign tag_valid_clk[i][j]  = 1'b0;
+     `else
+           rvclkhdr way_status_cgc ( .en(tag_valid_clken[i][j]),   .l1clk(tag_valid_clk[i][j]), .* );
+     `endif
+
+
+
+      for (genvar k=0 ; k<32 ; k++) begin : TAG_VALID
+         rvdffs_fpga #(1) ic_way_tagvalid_dup (.*,
+                   .clk(tag_valid_clk[i][j]),
+                   .clken(tag_valid_clken[i][j]),
+                   .rawclk(clk),
+                   .en(((ifu_ic_rw_int_addr_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] == (k + 32*i)) & ifu_tag_wren_ff[j] ) |
+                       ((perr_ic_index_ff     [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] == (k + 32*i)) & perr_err_inv_way[j]) | reset_all_tags),
+                   .din(ic_valid_ff & ~reset_all_tags & ~perr_sel_invalidate),
+                   .dout(ic_tag_valid_out[j][32*i+k]));
+      end
+      end
+   end
+
+
+  always_comb begin : tag_valid_out_mux
+      ic_tag_valid_unq[pt.ICACHE_NUM_WAYS-1:0] = '0;
+      for (int j=0; j< pt.ICACHE_TAG_DEPTH; j++) begin : tag_valid_loop
+        if (ifu_ic_rw_int_addr_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] == (pt.ICACHE_TAG_LO-pt.ICACHE_TAG_INDEX_LO)'(j)) begin : valid_out
+           for ( int k=0; k<pt.ICACHE_NUM_WAYS; k++) begin
+             ic_tag_valid_unq[k] |= ic_tag_valid_out[k][j];
+        end
+      end
+      end
+  end
+   //   four-way set associative - three bits
+//   each bit represents one branch point in a binary decision tree; let 1
+//   represent that the left side has been referenced more recently than the
+//   right side, and 0 vice-versa
+//
+//              are all 4 ways valid?
+//                   /       \
+//                  |        no, use an invalid way.
+//                  |
+//                  |
+//             bit_0 == 0?             state | replace      ref to | next state
+//               /       \             ------+--------      -------+-----------
+//              y         n             x00  |  way_0      way_0 |    _11
+//             /           \            x10  |  way_1      way_1 |    _01
+//      bit_1 == 0?    bit_2 == 0?      0x1  |  way_2      way_2 |    1_0
+//        /    \          /    \        1x1  |  way_3      way_3 |    0_0
+//       y      n        y      n
+//      /        \      /        \        ('x' means don't care       ('_' means unchanged)
+//    way_0    way_1  way_2     way_3      don't care)
+
+   if (pt.ICACHE_NUM_WAYS == 4) begin: four_way_plru
+   assign replace_way_mb_any[3] = ( way_status_mb_ff[2]  & way_status_mb_ff[0] & (&tagv_mb_ff[3:0])) |
+                                  (~tagv_mb_ff[3]& tagv_mb_ff[2] &  tagv_mb_ff[1] &  tagv_mb_ff[0]) ;
+   assign replace_way_mb_any[2] = (~way_status_mb_ff[2]  & way_status_mb_ff[0] & (&tagv_mb_ff[3:0])) |
+                                  (~tagv_mb_ff[2]& tagv_mb_ff[1] &  tagv_mb_ff[0]) ;
+   assign replace_way_mb_any[1] = ( way_status_mb_ff[1] & ~way_status_mb_ff[0] & (&tagv_mb_ff[3:0])) |
+                                  (~tagv_mb_ff[1]& tagv_mb_ff[0] ) ;
+   assign replace_way_mb_any[0] = (~way_status_mb_ff[1] & ~way_status_mb_ff[0] & (&tagv_mb_ff[3:0])) |
+                                  (~tagv_mb_ff[0] ) ;
+
+   assign way_status_hit_new[pt.ICACHE_STATUS_BITS-1:0] = ({3{~exu_flush_final & ic_rd_hit[0]}} & {way_status[2] , 1'b1 , 1'b1}) |
+                                                          ({3{~exu_flush_final & ic_rd_hit[1]}} & {way_status[2] , 1'b0 , 1'b1}) |
+                                                          ({3{~exu_flush_final & ic_rd_hit[2]}} & {1'b1 ,way_status[1]  , 1'b0}) |
+                                                          ({3{~exu_flush_final & ic_rd_hit[3]}} & {1'b0 ,way_status[1]  , 1'b0}) ;
+
+  assign way_status_rep_new[pt.ICACHE_STATUS_BITS-1:0] = ({3{replace_way_mb_any[0]}} & {way_status_mb_ff[2] , 1'b1 , 1'b1}) |
+                                   ({3{replace_way_mb_any[1]}} & {way_status_mb_ff[2] , 1'b0 , 1'b1}) |
+                                   ({3{replace_way_mb_any[2]}} & {1'b1 ,way_status_mb_ff[1]  , 1'b0}) |
+                                   ({3{replace_way_mb_any[3]}} & {1'b0 ,way_status_mb_ff[1]  , 1'b0}) ;
+  end
+   else begin : two_ways_plru
+      assign replace_way_mb_any[0]                      = (~way_status_mb_ff  & tagv_mb_ff[0] & tagv_mb_ff[1]) | ~tagv_mb_ff[0];
+      assign replace_way_mb_any[1]                      = ( way_status_mb_ff  & tagv_mb_ff[0] & tagv_mb_ff[1]) | ~tagv_mb_ff[1] & tagv_mb_ff[0];
+      assign way_status_hit_new[pt.ICACHE_STATUS_BITS-1:0] = ic_rd_hit[0];
+      assign way_status_rep_new[pt.ICACHE_STATUS_BITS-1:0] = replace_way_mb_any[0];
+
+   end
+  // Make sure to select the way_status_hit_new even when in hit_under_miss.
+  assign way_status_new[pt.ICACHE_STATUS_BITS-1:0]     = (bus_ifu_wr_en_ff_q  & last_beat )  ? way_status_rep_new[pt.ICACHE_STATUS_BITS-1:0] :
+                                                          way_status_hit_new[pt.ICACHE_STATUS_BITS-1:0] ;
+
+
+  assign way_status_wr_en  = (bus_ifu_wr_en_ff_q  & last_beat) | ic_act_hit_f;
+
+   for (genvar i=0; i<pt.ICACHE_NUM_WAYS; i++) begin  : bus_wren_loop
+      assign bus_wren[i]           = bus_ifu_wr_en_ff_q & replace_way_mb_any[i] & miss_pending ;
+      assign bus_wren_last[i]      = bus_ifu_wr_en_ff_wo_err & replace_way_mb_any[i] & miss_pending & bus_last_data_beat;
+      assign ifu_tag_wren[i]       = bus_wren_last[i] | wren_reset_miss[i];
+      assign wren_reset_miss[i]    = replace_way_mb_any[i] & reset_tag_valid_for_miss ;
+
+   end
+   assign bus_ic_wr_en[pt.ICACHE_NUM_WAYS-1:0] = bus_wren[pt.ICACHE_NUM_WAYS-1:0];
+
+
+end else begin: icache_disabled
+   assign ic_tag_valid_unq[pt.ICACHE_NUM_WAYS-1:0]      = '0;
+   assign way_status[pt.ICACHE_STATUS_BITS-1:0]         = '0;
+   assign replace_way_mb_any[pt.ICACHE_NUM_WAYS-1:0]    = '0;
+   assign way_status_hit_new[pt.ICACHE_STATUS_BITS-1:0] = '0;
+   assign way_status_rep_new[pt.ICACHE_STATUS_BITS-1:0] = '0;
+   assign way_status_new[pt.ICACHE_STATUS_BITS-1:0]     = '0;
+   assign way_status_wr_en                           = '0;
+   assign bus_wren[pt.ICACHE_NUM_WAYS-1:0]              = '0;
+
+end
+
+   assign ic_tag_valid[pt.ICACHE_NUM_WAYS-1:0] = ic_tag_valid_unq[pt.ICACHE_NUM_WAYS-1:0]   & {pt.ICACHE_NUM_WAYS{(~fetch_uncacheable_ff & ifc_fetch_req_f_raw) }} ;
+   assign ic_debug_tag_val_rd_out           = |(ic_tag_valid_unq[pt.ICACHE_NUM_WAYS-1:0] &  ic_debug_way_ff[pt.ICACHE_NUM_WAYS-1:0]   & {pt.ICACHE_NUM_WAYS{ic_debug_rd_en_ff}}) ;
+///////////////////////////////////////////
+// PMU signals
+///////////////////////////////////////////
+
+ assign ifu_pmu_ic_miss_in   = ic_act_miss_f ;
+ assign ifu_pmu_ic_hit_in    = ic_act_hit_f  ;
+ assign ifu_pmu_bus_error_in = |ifc_bus_acc_fault_f;
+ assign ifu_pmu_bus_trxn_in  = bus_cmd_sent ;
+ assign ifu_pmu_bus_busy_in  = ifu_bus_arvalid_ff & ~ifu_bus_arready_ff & miss_pending ;
+
+   rvdffie #(9) ifu_pmu_sigs_ff (.*,
+                    .clk (free_l2clk),
+                    .din ({ifc_fetch_uncacheable_bf, ifc_fetch_req_qual_bf, dma_sb_err_state, dec_tlu_fence_i_wb,
+                           ifu_pmu_ic_miss_in,
+                           ifu_pmu_ic_hit_in,
+                           ifu_pmu_bus_error_in,
+                           ifu_pmu_bus_busy_in,
+                           ifu_pmu_bus_trxn_in
+                          }),
+                    .dout({fetch_uncacheable_ff, ifc_fetch_req_f_raw, dma_sb_err_state_ff, reset_all_tags,
+                           ifu_pmu_ic_miss,
+                           ifu_pmu_ic_hit,
+                           ifu_pmu_bus_error,
+                           ifu_pmu_bus_busy,
+                           ifu_pmu_bus_trxn
+                           }));
+
+
+///////////////////////////////////////////////////////
+// Cache debug logic                                 //
+///////////////////////////////////////////////////////
+assign ic_debug_addr[pt.ICACHE_INDEX_HI:3] = dec_tlu_ic_diag_pkt.icache_dicawics[pt.ICACHE_INDEX_HI-3:0] ;
+assign ic_debug_way_enc[01:00]             = dec_tlu_ic_diag_pkt.icache_dicawics[15:14] ;
+
+
+assign ic_debug_tag_array       = dec_tlu_ic_diag_pkt.icache_dicawics[16] ;
+assign ic_debug_rd_en           = dec_tlu_ic_diag_pkt.icache_rd_valid ;
+assign ic_debug_wr_en           = dec_tlu_ic_diag_pkt.icache_wr_valid ;
+
+
+assign ic_debug_way[pt.ICACHE_NUM_WAYS-1:0]        = {(ic_debug_way_enc[1:0] == 2'b11),
+                                                      (ic_debug_way_enc[1:0] == 2'b10),
+                                                      (ic_debug_way_enc[1:0] == 2'b01),
+                                                      (ic_debug_way_enc[1:0] == 2'b00) };
+
+assign ic_debug_tag_wr_en[pt.ICACHE_NUM_WAYS-1:0] = {pt.ICACHE_NUM_WAYS{ic_debug_wr_en & ic_debug_tag_array}} & ic_debug_way[pt.ICACHE_NUM_WAYS-1:0] ;
+
+assign ic_debug_ict_array_sel_in      =  ic_debug_rd_en & ic_debug_tag_array ;
+
+rvdff_fpga #(01+pt.ICACHE_NUM_WAYS) ifu_debug_sel_ff (.*, .clk (debug_c1_clk),
+                    .clken(debug_c1_clken), .rawclk(clk),
+                    .din ({ic_debug_ict_array_sel_in,
+                           ic_debug_way[pt.ICACHE_NUM_WAYS-1:0]
+                          }),
+                    .dout({ic_debug_ict_array_sel_ff,
+                           ic_debug_way_ff[pt.ICACHE_NUM_WAYS-1:0]
+                           }));
+
+
+
+
+assign debug_data_clken  =  ic_debug_rd_en_ff;
+
+
+
+
+// memory protection  - equation to look identical to the LSU equation
+   assign ifc_region_acc_okay = (~(|{pt.INST_ACCESS_ENABLE0,pt.INST_ACCESS_ENABLE1,pt.INST_ACCESS_ENABLE2,pt.INST_ACCESS_ENABLE3,pt.INST_ACCESS_ENABLE4,pt.INST_ACCESS_ENABLE5,pt.INST_ACCESS_ENABLE6,pt.INST_ACCESS_ENABLE7})) |
+                               (pt.INST_ACCESS_ENABLE0 & (({ifc_fetch_addr_bf[31:1],1'b0} | pt.INST_ACCESS_MASK0)) == (pt.INST_ACCESS_ADDR0 | pt.INST_ACCESS_MASK0)) |
+                               (pt.INST_ACCESS_ENABLE1 & (({ifc_fetch_addr_bf[31:1],1'b0} | pt.INST_ACCESS_MASK1)) == (pt.INST_ACCESS_ADDR1 | pt.INST_ACCESS_MASK1)) |
+                               (pt.INST_ACCESS_ENABLE2 & (({ifc_fetch_addr_bf[31:1],1'b0} | pt.INST_ACCESS_MASK2)) == (pt.INST_ACCESS_ADDR2 | pt.INST_ACCESS_MASK2)) |
+                               (pt.INST_ACCESS_ENABLE3 & (({ifc_fetch_addr_bf[31:1],1'b0} | pt.INST_ACCESS_MASK3)) == (pt.INST_ACCESS_ADDR3 | pt.INST_ACCESS_MASK3)) |
+                               (pt.INST_ACCESS_ENABLE4 & (({ifc_fetch_addr_bf[31:1],1'b0} | pt.INST_ACCESS_MASK4)) == (pt.INST_ACCESS_ADDR4 | pt.INST_ACCESS_MASK4)) |
+                               (pt.INST_ACCESS_ENABLE5 & (({ifc_fetch_addr_bf[31:1],1'b0} | pt.INST_ACCESS_MASK5)) == (pt.INST_ACCESS_ADDR5 | pt.INST_ACCESS_MASK5)) |
+                               (pt.INST_ACCESS_ENABLE6 & (({ifc_fetch_addr_bf[31:1],1'b0} | pt.INST_ACCESS_MASK6)) == (pt.INST_ACCESS_ADDR6 | pt.INST_ACCESS_MASK6)) |
+                               (pt.INST_ACCESS_ENABLE7 & (({ifc_fetch_addr_bf[31:1],1'b0} | pt.INST_ACCESS_MASK7)) == (pt.INST_ACCESS_ADDR7 | pt.INST_ACCESS_MASK7));
+
+   assign ifc_region_acc_fault_memory_bf   =  ~ifc_iccm_access_bf & ~ifc_region_acc_okay & ifc_fetch_req_bf;
+
+   assign ifc_region_acc_fault_final_bf = ifc_region_acc_fault_bf | ifc_region_acc_fault_memory_bf;
+
+
+
+
+endmodule  // eb1_ifu_mem_ctl
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_lib.sv b/verilog/rtl/BrqRV_EB1/design/eb1_lib.sv
new file mode 100644
index 0000000..3aee6f3
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_lib.sv
@@ -0,0 +1,64 @@
+module eb1_btb_tag_hash #(
+`include "eb1_param.vh"
+ ) (
+                       input logic [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1] pc,
+                       output logic [pt.BTB_BTAG_SIZE-1:0] hash
+                       );
+
+    assign hash = {(pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+1] ^
+                   pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+1] ^
+                   pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1])};
+endmodule
+
+module eb1_btb_tag_hash_fold  #(
+`include "eb1_param.vh"
+ )(
+                       input logic [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1] pc,
+                       output logic [pt.BTB_BTAG_SIZE-1:0] hash
+                       );
+
+    assign hash = {(
+                   pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+1] ^
+                   pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1])};
+
+endmodule
+
+module eb1_btb_addr_hash  #(
+`include "eb1_param.vh"
+ )(
+                        input logic [pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO] pc,
+                        output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] hash
+                        );
+
+
+if(pt.BTB_FOLD2_INDEX_HASH) begin : fold2
+   assign hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = pc[pt.BTB_INDEX1_HI:pt.BTB_INDEX1_LO] ^
+                                                pc[pt.BTB_INDEX3_HI:pt.BTB_INDEX3_LO];
+end
+   else begin
+   assign hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = pc[pt.BTB_INDEX1_HI:pt.BTB_INDEX1_LO] ^
+                                                pc[pt.BTB_INDEX2_HI:pt.BTB_INDEX2_LO] ^
+                                                pc[pt.BTB_INDEX3_HI:pt.BTB_INDEX3_LO];
+end
+
+endmodule
+
+module eb1_btb_ghr_hash  #(
+`include "eb1_param.vh"
+ )(
+                       input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] hashin,
+                       input logic [pt.BHT_GHR_SIZE-1:0] ghr,
+                       output logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] hash
+                       );
+
+   // The hash function is too complex to write in verilog for all cases.
+   // The config script generates the logic string based on the bp config.
+   if(pt.BHT_GHR_HASH_1) begin : ghrhash_cfg1
+     assign hash[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = { ghr[pt.BHT_GHR_SIZE-1:pt.BTB_INDEX1_HI-1], hashin[pt.BTB_INDEX1_HI:2]^ghr[pt.BTB_INDEX1_HI-2:0]};
+   end
+   else begin : ghrhash_cfg2
+     assign hash[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = { hashin[pt.BHT_GHR_SIZE+1:2]^ghr[pt.BHT_GHR_SIZE-1:0]};
+   end
+
+
+endmodule
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_lsu.sv b/verilog/rtl/BrqRV_EB1/design/eb1_lsu.sv
new file mode 100644
index 0000000..31d1148
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_lsu.sv
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//********************************************************************************
+// $Id$
+//
+//
+// Function: Top level file for load store unit
+// Comments:
+//
+//
+// DC1 -> DC2 -> DC3 -> DC4 (Commit)
+//
+//********************************************************************************
+
+module eb1_lsu
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )
+(
+
+   input logic                             clk_override,             // Override non-functional clock gating
+   input logic                             dec_tlu_flush_lower_r,    // I0/I1 writeback flush. This is used to flush the old packets only
+   input logic                             dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state
+   input logic                             dec_tlu_force_halt,       // This will be high till TLU goes to debug halt
+
+   // chicken signals
+   input logic                             dec_tlu_external_ldfwd_disable,     // disable load to load forwarding for externals
+   input logic                             dec_tlu_wb_coalescing_disable,     // disable the write buffer coalesce
+   input logic                             dec_tlu_sideeffect_posted_disable, // disable the posted sideeffect load store to the bus
+   input logic                             dec_tlu_core_ecc_disable,          // disable the generation of the ecc
+
+   input logic [31:0]                      exu_lsu_rs1_d,        // address rs operand
+   input logic [31:0]                      exu_lsu_rs2_d,        // store data
+   input logic [11:0]                      dec_lsu_offset_d,     // address offset operand
+
+   input                                   eb1_lsu_pkt_t lsu_p,  // lsu control packet
+   input logic                             dec_lsu_valid_raw_d,   // Raw valid for address computation
+   input logic [31:0]                      dec_tlu_mrac_ff,       // CSR for memory region control
+
+   output logic [31:0]                     lsu_result_m,          // lsu load data
+   output logic [31:0]                     lsu_result_corr_r,     // This is the ECC corrected data going to RF
+   output logic                            lsu_load_stall_any,    // This is for blocking loads in the decode
+   output logic                            lsu_store_stall_any,   // This is for blocking stores in the decode
+   output logic                            lsu_fastint_stall_any, // Stall the fastint in decode-1 stage
+   output logic                            lsu_idle_any,          // lsu buffers are empty and no instruction in the pipeline. Doesn't include DMA
+   output logic                            lsu_active,            // Used to turn off top level clk
+
+   output logic [31:1]                     lsu_fir_addr,        // fast interrupt address
+   output logic [1:0]                      lsu_fir_error,       // Error during fast interrupt lookup
+
+   output logic                            lsu_single_ecc_error_incr,     // Increment the ecc counter
+   output eb1_lsu_error_pkt_t             lsu_error_pkt_r,               // lsu exception packet
+   output logic                            lsu_imprecise_error_load_any,  // bus load imprecise error
+   output logic                            lsu_imprecise_error_store_any, // bus store imprecise error
+   output logic [31:0]                     lsu_imprecise_error_addr_any,  // bus store imprecise error address
+
+   // Non-blocking loads
+   output logic                               lsu_nonblock_load_valid_m,      // there is an external load -> put in the cam
+   output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m,        // the tag of the external non block load
+   output logic                               lsu_nonblock_load_inv_r,        // invalidate signal for the cam entry for non block loads
+   output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r,    // tag of the enrty which needs to be invalidated
+   output logic                               lsu_nonblock_load_data_valid,   // the non block is valid - sending information back to the cam
+   output logic                               lsu_nonblock_load_data_error,   // non block load has an error
+   output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag,     // the tag of the non block load sending the data/error
+   output logic [31:0]                        lsu_nonblock_load_data,         // Data of the non block load
+
+   output logic                            lsu_pmu_load_external_m,        // PMU : Bus loads
+   output logic                            lsu_pmu_store_external_m,       // PMU : Bus loads
+   output logic                            lsu_pmu_misaligned_m,           // PMU : misaligned
+   output logic                            lsu_pmu_bus_trxn,               // PMU : bus transaction
+   output logic                            lsu_pmu_bus_misaligned,         // PMU : misaligned access going to the bus
+   output logic                            lsu_pmu_bus_error,              // PMU : bus sending error back
+   output logic                            lsu_pmu_bus_busy,               // PMU : bus is not ready
+
+   // Trigger signals
+   input                                   eb1_trigger_pkt_t [3:0] trigger_pkt_any, // Trigger info from the decode
+   output logic [3:0]                      lsu_trigger_match_m,                      // lsu trigger hit (one bit per trigger)
+
+   // DCCM ports
+   output logic                            dccm_wren,       // DCCM write enable
+   output logic                            dccm_rden,       // DCCM read enable
+   output logic [pt.DCCM_BITS-1:0]         dccm_wr_addr_lo, // DCCM write address low bank
+   output logic [pt.DCCM_BITS-1:0]         dccm_wr_addr_hi, // DCCM write address hi bank
+   output logic [pt.DCCM_BITS-1:0]         dccm_rd_addr_lo, // DCCM read address low bank
+   output logic [pt.DCCM_BITS-1:0]         dccm_rd_addr_hi, // DCCM read address hi bank (hi and low same if aligned read)
+   output logic [pt.DCCM_FDATA_WIDTH-1:0]  dccm_wr_data_lo, // DCCM write data for lo bank
+   output logic [pt.DCCM_FDATA_WIDTH-1:0]  dccm_wr_data_hi, // DCCM write data for hi bank
+
+   input logic [pt.DCCM_FDATA_WIDTH-1:0]   dccm_rd_data_lo, // DCCM read data low bank
+   input logic [pt.DCCM_FDATA_WIDTH-1:0]   dccm_rd_data_hi, // DCCM read data hi bank
+
+   // PIC ports
+   output logic                            picm_wren,    // PIC memory write enable
+   output logic                            picm_rden,    // PIC memory read enable
+   output logic                            picm_mken,    // Need to read the mask for stores to determine which bits to write/forward
+   output logic [31:0]                     picm_rdaddr,  // address for pic read access
+   output logic [31:0]                     picm_wraddr,  // address for pic write access
+   output logic [31:0]                     picm_wr_data, // PIC memory write data
+   input logic [31:0]                      picm_rd_data, // PIC memory read/mask data
+
+   // AXI Write Channels
+   output logic                            lsu_axi_awvalid,
+   input  logic                            lsu_axi_awready,
+   output logic [pt.LSU_BUS_TAG-1:0]       lsu_axi_awid,
+   output logic [31:0]                     lsu_axi_awaddr,
+   output logic [3:0]                      lsu_axi_awregion,
+   output logic [7:0]                      lsu_axi_awlen,
+   output logic [2:0]                      lsu_axi_awsize,
+   output logic [1:0]                      lsu_axi_awburst,
+   output logic                            lsu_axi_awlock,
+   output logic [3:0]                      lsu_axi_awcache,
+   output logic [2:0]                      lsu_axi_awprot,
+   output logic [3:0]                      lsu_axi_awqos,
+
+   output logic                            lsu_axi_wvalid,
+   input  logic                            lsu_axi_wready,
+   output logic [63:0]                     lsu_axi_wdata,
+   output logic [7:0]                      lsu_axi_wstrb,
+   output logic                            lsu_axi_wlast,
+
+   input  logic                            lsu_axi_bvalid,
+   output logic                            lsu_axi_bready,
+   input  logic [1:0]                      lsu_axi_bresp,
+   input  logic [pt.LSU_BUS_TAG-1:0]       lsu_axi_bid,
+
+   // AXI Read Channels
+   output logic                            lsu_axi_arvalid,
+   input  logic                            lsu_axi_arready,
+   output logic [pt.LSU_BUS_TAG-1:0]       lsu_axi_arid,
+   output logic [31:0]                     lsu_axi_araddr,
+   output logic [3:0]                      lsu_axi_arregion,
+   output logic [7:0]                      lsu_axi_arlen,
+   output logic [2:0]                      lsu_axi_arsize,
+   output logic [1:0]                      lsu_axi_arburst,
+   output logic                            lsu_axi_arlock,
+   output logic [3:0]                      lsu_axi_arcache,
+   output logic [2:0]                      lsu_axi_arprot,
+   output logic [3:0]                      lsu_axi_arqos,
+
+   input  logic                            lsu_axi_rvalid,
+   output logic                            lsu_axi_rready,
+   input  logic [pt.LSU_BUS_TAG-1:0]       lsu_axi_rid,
+   input  logic [63:0]                     lsu_axi_rdata,
+   input  logic [1:0]                      lsu_axi_rresp,
+   input  logic                            lsu_axi_rlast,
+
+   input logic                             lsu_bus_clk_en,    // external drives a clock_en to control bus ratio
+
+   // DMA slave
+   input logic                             dma_dccm_req,       // DMA read/write to dccm
+   input logic [2:0]                       dma_mem_tag,        // DMA request tag
+   input logic [31:0]                      dma_mem_addr,       // DMA address
+   input logic [2:0]                       dma_mem_sz,         // DMA access size
+   input logic                             dma_mem_write,      // DMA access is a write
+   input logic [63:0]                      dma_mem_wdata,      // DMA write data
+
+   output logic                            dccm_dma_rvalid,     // lsu data valid for DMA dccm read
+   output logic                            dccm_dma_ecc_error,  // DMA load had ecc error
+   output logic [2:0]                      dccm_dma_rtag,       // DMA request tag
+   output logic [63:0]                     dccm_dma_rdata,      // lsu data for DMA dccm read
+   output logic                            dccm_ready,          // lsu ready for DMA access
+
+   input logic                             scan_mode,           // scan mode
+   input logic                             clk,                 // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
+   input logic                             active_clk,          // Clock only while core active.  Through two clock headers. For flops without second clock header built in.
+   input logic                             rst_l                // reset, active low
+
+   );
+
+
+   logic        lsu_dccm_rden_m;
+   logic        lsu_dccm_rden_r;
+   logic [31:0] store_data_m;
+   logic [31:0] store_data_r;
+   logic [31:0] store_data_hi_r, store_data_lo_r;
+   logic [31:0] store_datafn_hi_r, store_datafn_lo_r;
+   logic [31:0] sec_data_lo_m, sec_data_hi_m;
+   logic [31:0] sec_data_lo_r, sec_data_hi_r;
+
+   logic [31:0] lsu_ld_data_m;
+   logic [31:0] dccm_rdata_hi_m, dccm_rdata_lo_m;
+   logic [6:0]  dccm_data_ecc_hi_m, dccm_data_ecc_lo_m;
+   logic        lsu_single_ecc_error_m;
+   logic        lsu_double_ecc_error_m;
+
+   logic [31:0] lsu_ld_data_r;
+   logic [31:0] lsu_ld_data_corr_r;
+   logic [31:0] dccm_rdata_hi_r, dccm_rdata_lo_r;
+   logic [6:0]  dccm_data_ecc_hi_r, dccm_data_ecc_lo_r;
+   logic        single_ecc_error_hi_r, single_ecc_error_lo_r;
+   logic        lsu_single_ecc_error_r;
+   logic        lsu_double_ecc_error_r;
+   logic        ld_single_ecc_error_r, ld_single_ecc_error_r_ff;
+
+   logic [31:0] picm_mask_data_m;
+
+   logic [31:0] lsu_addr_d, lsu_addr_m, lsu_addr_r;
+   logic [31:0] end_addr_d, end_addr_m, end_addr_r;
+
+   eb1_lsu_pkt_t    lsu_pkt_d, lsu_pkt_m, lsu_pkt_r;
+   logic        lsu_i0_valid_d, lsu_i0_valid_m, lsu_i0_valid_r;
+
+   // Store Buffer signals
+   logic        store_stbuf_reqvld_r;
+   logic        ldst_stbuf_reqvld_r;
+
+   logic        lsu_commit_r;
+   logic        lsu_exc_m;
+
+   logic        addr_in_dccm_d, addr_in_dccm_m, addr_in_dccm_r;
+   logic        addr_in_pic_d, addr_in_pic_m, addr_in_pic_r;
+   logic        ldst_dual_d, ldst_dual_m, ldst_dual_r;
+   logic        addr_external_m;
+
+   logic                          stbuf_reqvld_any;
+   logic                          stbuf_reqvld_flushed_any;
+   logic [pt.LSU_SB_BITS-1:0]     stbuf_addr_any;
+   logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any;
+   logic [pt.DCCM_ECC_WIDTH-1:0]  stbuf_ecc_any;
+   logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, sec_data_hi_r_ff;
+   logic [pt.DCCM_ECC_WIDTH-1:0]  sec_data_ecc_hi_r_ff, sec_data_ecc_lo_r_ff;
+
+   logic                          lsu_cmpen_m;
+   logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m;
+   logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m;
+   logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m;
+   logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m;
+
+   logic        lsu_stbuf_commit_any;
+   logic        lsu_stbuf_empty_any;   // This is for blocking loads
+   logic        lsu_stbuf_full_any;
+
+    // Bus signals
+   logic        lsu_busreq_r;
+   logic        lsu_bus_buffer_pend_any;
+   logic        lsu_bus_buffer_empty_any;
+   logic        lsu_bus_buffer_full_any;
+   logic        lsu_busreq_m;
+   logic [31:0] bus_read_data_m;
+
+   logic        flush_m_up, flush_r;
+   logic        is_sideeffects_m;
+   logic [2:0]  dma_mem_tag_d, dma_mem_tag_m;
+   logic        ldst_nodma_mtor;
+   logic        dma_dccm_wen, dma_pic_wen;
+   logic [31:0] dma_dccm_wdata_lo, dma_dccm_wdata_hi;
+   logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, dma_dccm_wdata_ecc_hi;
+
+   // Clocks
+   logic        lsu_busm_clken;
+   logic        lsu_bus_obuf_c1_clken;
+   logic        lsu_c1_m_clk, lsu_c1_r_clk;
+   logic        lsu_c2_m_clk, lsu_c2_r_clk;
+   logic        lsu_store_c1_m_clk, lsu_store_c1_r_clk;
+
+   logic        lsu_stbuf_c1_clk;
+   logic        lsu_bus_ibuf_c1_clk, lsu_bus_obuf_c1_clk, lsu_bus_buf_c1_clk;
+   logic        lsu_busm_clk;
+   logic        lsu_free_c2_clk;
+
+   logic        lsu_raw_fwd_lo_m, lsu_raw_fwd_hi_m;
+   logic        lsu_raw_fwd_lo_r, lsu_raw_fwd_hi_r;
+
+   assign       lsu_raw_fwd_lo_m = (|stbuf_fwdbyteen_lo_m[pt.DCCM_BYTE_WIDTH-1:0]);
+   assign       lsu_raw_fwd_hi_m = (|stbuf_fwdbyteen_hi_m[pt.DCCM_BYTE_WIDTH-1:0]);
+
+   eb1_lsu_lsc_ctl #(.pt(pt)) lsu_lsc_ctl (.*);
+
+   // block stores in decode  - for either bus or stbuf reasons
+   assign lsu_store_stall_any = lsu_stbuf_full_any | lsu_bus_buffer_full_any | ld_single_ecc_error_r_ff;
+   assign lsu_load_stall_any = lsu_bus_buffer_full_any | ld_single_ecc_error_r_ff;
+   assign lsu_fastint_stall_any = ld_single_ecc_error_r;    // Stall the fastint in decode-1 stage
+
+   // Ready to accept dma trxns
+   // There can't be any inpipe forwarding from non-dma packet to dma packet since they can be flushed so we can't have st in r when dma is in m
+   assign dma_mem_tag_d[2:0]   = dma_mem_tag[2:0];
+   assign ldst_nodma_mtor = (lsu_pkt_m.valid & ~lsu_pkt_m.dma & (addr_in_dccm_m | addr_in_pic_m) & lsu_pkt_m.store);
+
+   assign dccm_ready = ~(dec_lsu_valid_raw_d | ldst_nodma_mtor | ld_single_ecc_error_r_ff);
+
+   assign dma_dccm_wen = dma_dccm_req & dma_mem_write & addr_in_dccm_d & dma_mem_sz[1];   // Perform DMA writes only for word/dword
+   assign dma_pic_wen  = dma_dccm_req & dma_mem_write & addr_in_pic_d;
+   assign {dma_dccm_wdata_hi[31:0], dma_dccm_wdata_lo[31:0]} = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000};   // Shift the dma data to lower bits to make it consistent to lsu stores
+
+
+   // Generate per cycle flush signals
+   assign flush_m_up = dec_tlu_flush_lower_r;
+   assign flush_r    = dec_tlu_i0_kill_writeb_r;
+
+   // lsu idle
+   // lsu halt idle. This is used for entering the halt mode. Also, DMA accesses are allowed during fence.
+   // Indicates non-idle if there is a instruction valid in d-r or read/write buffers are non-empty since they can come with error
+   // Store buffer now have only non-dma dccm stores
+   // stbuf_empty not needed since it has only dccm stores
+   assign lsu_idle_any = ~((lsu_pkt_m.valid & ~lsu_pkt_m.dma) |
+                           (lsu_pkt_r.valid & ~lsu_pkt_r.dma)) &
+                           lsu_bus_buffer_empty_any;
+
+   assign lsu_active = (lsu_pkt_m.valid | lsu_pkt_r.valid | ld_single_ecc_error_r_ff) | ~lsu_bus_buffer_empty_any;  // This includes DMA. Used for gating top clock
+
+   // Instantiate the store buffer
+   assign store_stbuf_reqvld_r = lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r & ~flush_r & (~lsu_pkt_r.dma | ((lsu_pkt_r.by | lsu_pkt_r.half) & ~lsu_double_ecc_error_r));
+
+   // Disable Forwarding for now
+   assign lsu_cmpen_m = lsu_pkt_m.valid & (lsu_pkt_m.load | lsu_pkt_m.store) & (addr_in_dccm_m | addr_in_pic_m);
+
+   // Bus signals
+   assign lsu_busreq_m = lsu_pkt_m.valid & ((lsu_pkt_m.load | lsu_pkt_m.store) & addr_external_m) & ~flush_m_up & ~lsu_exc_m & ~lsu_pkt_m.fast_int;
+
+   // Dual signals
+   assign ldst_dual_d  = (lsu_addr_d[2] != end_addr_d[2]);
+   assign ldst_dual_m  = (lsu_addr_m[2] != end_addr_m[2]);
+   assign ldst_dual_r  = (lsu_addr_r[2] != end_addr_r[2]);
+
+   // PMU signals
+   assign lsu_pmu_misaligned_m     = lsu_pkt_m.valid & ((lsu_pkt_m.half & lsu_addr_m[0]) | (lsu_pkt_m.word & (|lsu_addr_m[1:0])));
+   assign lsu_pmu_load_external_m  = lsu_pkt_m.valid & lsu_pkt_m.load & addr_external_m;
+   assign lsu_pmu_store_external_m = lsu_pkt_m.valid & lsu_pkt_m.store & addr_external_m;
+
+   eb1_lsu_dccm_ctl #(.pt(pt)) dccm_ctl (
+      .lsu_addr_d(lsu_addr_d[31:0]),
+      .end_addr_d(end_addr_d[pt.DCCM_BITS-1:0]),
+      .lsu_addr_m(lsu_addr_m[pt.DCCM_BITS-1:0]),
+      .lsu_addr_r(lsu_addr_r[31:0]),
+
+      .end_addr_m(end_addr_m[pt.DCCM_BITS-1:0]),
+      .end_addr_r(end_addr_r[pt.DCCM_BITS-1:0]),
+      .*
+   );
+
+   eb1_lsu_stbuf #(.pt(pt)) stbuf (
+      .lsu_addr_d(lsu_addr_d[pt.LSU_SB_BITS-1:0]),
+      .end_addr_d(end_addr_d[pt.LSU_SB_BITS-1:0]),
+
+      .*
+
+   );
+
+   eb1_lsu_ecc #(.pt(pt)) ecc (
+      .lsu_addr_r(lsu_addr_r[pt.DCCM_BITS-1:0]),
+      .end_addr_r(end_addr_r[pt.DCCM_BITS-1:0]),
+      .lsu_addr_m(lsu_addr_m[pt.DCCM_BITS-1:0]),
+      .end_addr_m(end_addr_m[pt.DCCM_BITS-1:0]),
+      .*
+   );
+
+   eb1_lsu_trigger #(.pt(pt)) trigger (
+      .store_data_m(store_data_m[31:0]),
+      .*
+   );
+
+   // Clk domain
+   eb1_lsu_clkdomain #(.pt(pt)) clkdomain (.*);
+
+   // Bus interface
+   eb1_lsu_bus_intf #(.pt(pt)) bus_intf (
+      .lsu_addr_m(lsu_addr_m[31:0] & {32{addr_external_m & lsu_pkt_m.valid}}),
+      .lsu_addr_r(lsu_addr_r[31:0] & {32{lsu_busreq_r}}),
+
+      .end_addr_m(end_addr_m[31:0] & {32{addr_external_m & lsu_pkt_m.valid}}),
+      .end_addr_r(end_addr_r[31:0] & {32{lsu_busreq_r}}),
+
+      .store_data_r(store_data_r[31:0] & {32{lsu_busreq_r}}),
+      .*
+   );
+
+   //Flops
+   rvdff #(3) dma_mem_tag_mff     (.*, .din(dma_mem_tag_d[2:0]), .dout(dma_mem_tag_m[2:0]), .clk(lsu_c1_m_clk));
+   rvdff #(2) lsu_raw_fwd_r_ff    (.*, .din({lsu_raw_fwd_hi_m, lsu_raw_fwd_lo_m}),     .dout({lsu_raw_fwd_hi_r, lsu_raw_fwd_lo_r}),     .clk(lsu_c2_r_clk));
+
+`ifdef RV_ASSERT_ON
+   logic [1:0] store_data_bypass_sel;
+   assign store_data_bypass_sel[1:0] =  {lsu_p.store_data_bypass_d, lsu_p.store_data_bypass_m};
+
+   property exception_no_lsu_flush;
+      @(posedge clk)  disable iff(~rst_l) lsu_lsc_ctl.lsu_error_pkt_m.exc_valid |-> ##[1:2] (flush_r );
+   endproperty
+   assert_exception_no_lsu_flush: assert property (exception_no_lsu_flush) else
+      $display("No flush within 2 cycles of exception");
+
+   // offset should be zero for fast interrupt
+   property offset_0_fastint;
+      @(posedge clk) disable iff(~rst_l) (lsu_p.valid & lsu_p.fast_int) |-> (dec_lsu_offset_d[11:0] == 12'b0);
+   endproperty
+   assert_offset_0_fastint: assert property (offset_0_fastint) else
+      $display("dec_tlu_offset_d not zero for fast interrupt redirect");
+
+   // DMA req should assert dccm rden/wren
+   property dmareq_dccm_wren_or_rden;
+      @(posedge clk) disable iff(~rst_l) dma_dccm_req |-> (dccm_rden | dccm_wren | addr_in_pic_d);
+   endproperty
+   assert_dmareq_dccm_wren_or_rden: assert property(dmareq_dccm_wren_or_rden) else
+      $display("dccm rden or wren not asserted during DMA request");
+
+   // fastint_stall should cause load/store stall next cycle
+   property fastint_stall_imply_loadstore_stall;
+      @(posedge clk) disable iff(~rst_l) (lsu_fastint_stall_any & (lsu_commit_r | lsu_pkt_r.dma)) |-> ##1 ((lsu_load_stall_any | lsu_store_stall_any) | ~ld_single_ecc_error_r_ff);
+   endproperty
+   assert_fastint_stall_imply_loadstore_stall: assert property (fastint_stall_imply_loadstore_stall) else
+      $display("fastint_stall should be followed by lsu_load/store_stall_any");
+
+   // Single ECC error implies rfnpc flush
+   property single_ecc_error_rfnpc_flush;
+      @(posedge clk) disable iff(~rst_l) (lsu_error_pkt_r.single_ecc_error & lsu_pkt_r.load) |=> ~lsu_commit_r;
+   endproperty
+   assert_single_ecc_error_rfnpc_flush: assert property (single_ecc_error_rfnpc_flush) else
+     $display("LSU commit next cycle after single ecc error");
+
+`endif
+
+endmodule // eb1_lsu
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_lsu_addrcheck.sv b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_addrcheck.sv
new file mode 100644
index 0000000..010779b
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_addrcheck.sv
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//********************************************************************************
+// $Id$
+//
+//
+// Owner:
+// Function: Checks the memory map for the address
+// Comments:
+//
+//********************************************************************************
+module eb1_lsu_addrcheck
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )(
+   input logic          lsu_c2_m_clk,              // clock
+   input logic          rst_l,                     // reset
+
+   input logic [31:0]   start_addr_d,              // start address for lsu
+   input logic [31:0]   end_addr_d,                // end address for lsu
+   input eb1_lsu_pkt_t lsu_pkt_d,                 // packet in d
+   input logic [31:0]   dec_tlu_mrac_ff,           // CSR read
+   input logic [3:0]    rs1_region_d,              // address rs operand [31:28]
+
+   input logic [31:0]   rs1_d,                     // address rs operand
+
+   output logic         is_sideeffects_m,          // is sideffects space
+   output logic         addr_in_dccm_d,            // address in dccm
+   output logic         addr_in_pic_d,             // address in pic
+   output logic         addr_external_d,           // address in external
+
+   output logic         access_fault_d,            // access fault
+   output logic         misaligned_fault_d,        // misaligned
+   output logic [3:0]   exc_mscause_d,             // mscause for access/misaligned faults
+
+   output logic         fir_dccm_access_error_d,   // Fast interrupt dccm access error
+   output logic         fir_nondccm_access_error_d,// Fast interrupt dccm access error
+
+   input  logic         scan_mode                  // Scan mode
+);
+
+
+   logic        non_dccm_access_ok;
+   logic        is_sideeffects_d, is_aligned_d;
+   logic        start_addr_in_dccm_d, end_addr_in_dccm_d;
+   logic        start_addr_in_dccm_region_d, end_addr_in_dccm_region_d;
+   logic        start_addr_in_pic_d, end_addr_in_pic_d;
+   logic        start_addr_in_pic_region_d, end_addr_in_pic_region_d;
+   logic [4:0]  csr_idx;
+   logic        addr_in_iccm;
+   logic        start_addr_dccm_or_pic;
+   logic        base_reg_dccm_or_pic;
+   logic        unmapped_access_fault_d, mpu_access_fault_d, picm_access_fault_d, regpred_access_fault_d;
+   logic        regcross_misaligned_fault_d, sideeffect_misaligned_fault_d;
+   logic [3:0]  access_fault_mscause_d;
+   logic [3:0]  misaligned_fault_mscause_d;
+
+   if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable
+      // Start address check
+      rvrangecheck #(.CCM_SADR(pt.DCCM_SADR),
+                     .CCM_SIZE(pt.DCCM_SIZE)) start_addr_dccm_rangecheck (
+         .addr(start_addr_d[31:0]),
+         .in_range(start_addr_in_dccm_d),
+         .in_region(start_addr_in_dccm_region_d)
+      );
+
+      // End address check
+      rvrangecheck #(.CCM_SADR(pt.DCCM_SADR),
+                     .CCM_SIZE(pt.DCCM_SIZE)) end_addr_dccm_rangecheck (
+         .addr(end_addr_d[31:0]),
+         .in_range(end_addr_in_dccm_d),
+         .in_region(end_addr_in_dccm_region_d)
+      );
+   end else begin: Gen_dccm_disable // block: Gen_dccm_enable
+      assign start_addr_in_dccm_d = '0;
+      assign start_addr_in_dccm_region_d = '0;
+      assign end_addr_in_dccm_d = '0;
+      assign end_addr_in_dccm_region_d = '0;
+   end
+
+   if (pt.ICCM_ENABLE == 1) begin : check_iccm
+      assign addr_in_iccm =  (start_addr_d[31:28] == pt.ICCM_REGION);
+   end else begin
+     assign addr_in_iccm = 1'b0;
+   end
+
+   // PIC memory check
+   // Start address check
+   rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR),
+                  .CCM_SIZE(pt.PIC_SIZE)) start_addr_pic_rangecheck (
+      .addr(start_addr_d[31:0]),
+      .in_range(start_addr_in_pic_d),
+      .in_region(start_addr_in_pic_region_d)
+   );
+
+   // End address check
+   rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR),
+                  .CCM_SIZE(pt.PIC_SIZE)) end_addr_pic_rangecheck (
+      .addr(end_addr_d[31:0]),
+      .in_range(end_addr_in_pic_d),
+      .in_region(end_addr_in_pic_region_d)
+   );
+
+   assign start_addr_dccm_or_pic  = start_addr_in_dccm_region_d | start_addr_in_pic_region_d;
+   assign base_reg_dccm_or_pic    = ((rs1_region_d[3:0] == pt.DCCM_REGION) & pt.DCCM_ENABLE) | (rs1_region_d[3:0] == pt.PIC_REGION);
+   assign addr_in_dccm_d          = (start_addr_in_dccm_d & end_addr_in_dccm_d);
+   assign addr_in_pic_d           = (start_addr_in_pic_d & end_addr_in_pic_d);
+
+   assign addr_external_d   = ~(start_addr_in_dccm_region_d | start_addr_in_pic_region_d);
+   assign csr_idx[4:0]       = {start_addr_d[31:28], 1'b1};
+   assign is_sideeffects_d = dec_tlu_mrac_ff[csr_idx] & ~(start_addr_in_dccm_region_d | start_addr_in_pic_region_d | addr_in_iccm) & lsu_pkt_d.valid & (lsu_pkt_d.store | lsu_pkt_d.load);  //every region has the 2 LSB indicating ( 1: sideeffects/no_side effects, and 0: cacheable ). Ignored in internal regions
+   assign is_aligned_d    = (lsu_pkt_d.word & (start_addr_d[1:0] == 2'b0)) |
+                              (lsu_pkt_d.half & (start_addr_d[0] == 1'b0)) |
+                              lsu_pkt_d.by;
+
+   assign non_dccm_access_ok = (~(|{pt.DATA_ACCESS_ENABLE0,pt.DATA_ACCESS_ENABLE1,pt.DATA_ACCESS_ENABLE2,pt.DATA_ACCESS_ENABLE3,pt.DATA_ACCESS_ENABLE4,pt.DATA_ACCESS_ENABLE5,pt.DATA_ACCESS_ENABLE6,pt.DATA_ACCESS_ENABLE7})) |
+                               (((pt.DATA_ACCESS_ENABLE0 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK0)) == (pt.DATA_ACCESS_ADDR0 | pt.DATA_ACCESS_MASK0)) |
+                                 (pt.DATA_ACCESS_ENABLE1 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK1)) == (pt.DATA_ACCESS_ADDR1 | pt.DATA_ACCESS_MASK1)) |
+                                 (pt.DATA_ACCESS_ENABLE2 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK2)) == (pt.DATA_ACCESS_ADDR2 | pt.DATA_ACCESS_MASK2)) |
+                                 (pt.DATA_ACCESS_ENABLE3 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK3)) == (pt.DATA_ACCESS_ADDR3 | pt.DATA_ACCESS_MASK3)) |
+                                 (pt.DATA_ACCESS_ENABLE4 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK4)) == (pt.DATA_ACCESS_ADDR4 | pt.DATA_ACCESS_MASK4)) |
+                                 (pt.DATA_ACCESS_ENABLE5 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK5)) == (pt.DATA_ACCESS_ADDR5 | pt.DATA_ACCESS_MASK5)) |
+                                 (pt.DATA_ACCESS_ENABLE6 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK6)) == (pt.DATA_ACCESS_ADDR6 | pt.DATA_ACCESS_MASK6)) |
+                                 (pt.DATA_ACCESS_ENABLE7 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK7)) == (pt.DATA_ACCESS_ADDR7 | pt.DATA_ACCESS_MASK7)))   &
+                                ((pt.DATA_ACCESS_ENABLE0 & ((end_addr_d[31:0]   | pt.DATA_ACCESS_MASK0)) == (pt.DATA_ACCESS_ADDR0 | pt.DATA_ACCESS_MASK0)) |
+                                 (pt.DATA_ACCESS_ENABLE1 & ((end_addr_d[31:0]   | pt.DATA_ACCESS_MASK1)) == (pt.DATA_ACCESS_ADDR1 | pt.DATA_ACCESS_MASK1)) |
+                                 (pt.DATA_ACCESS_ENABLE2 & ((end_addr_d[31:0]   | pt.DATA_ACCESS_MASK2)) == (pt.DATA_ACCESS_ADDR2 | pt.DATA_ACCESS_MASK2)) |
+                                 (pt.DATA_ACCESS_ENABLE3 & ((end_addr_d[31:0]   | pt.DATA_ACCESS_MASK3)) == (pt.DATA_ACCESS_ADDR3 | pt.DATA_ACCESS_MASK3)) |
+                                 (pt.DATA_ACCESS_ENABLE4 & ((end_addr_d[31:0]   | pt.DATA_ACCESS_MASK4)) == (pt.DATA_ACCESS_ADDR4 | pt.DATA_ACCESS_MASK4)) |
+                                 (pt.DATA_ACCESS_ENABLE5 & ((end_addr_d[31:0]   | pt.DATA_ACCESS_MASK5)) == (pt.DATA_ACCESS_ADDR5 | pt.DATA_ACCESS_MASK5)) |
+                                 (pt.DATA_ACCESS_ENABLE6 & ((end_addr_d[31:0]   | pt.DATA_ACCESS_MASK6)) == (pt.DATA_ACCESS_ADDR6 | pt.DATA_ACCESS_MASK6)) |
+                                 (pt.DATA_ACCESS_ENABLE7 & ((end_addr_d[31:0]   | pt.DATA_ACCESS_MASK7)) == (pt.DATA_ACCESS_ADDR7 | pt.DATA_ACCESS_MASK7))));
+
+   // Access fault logic
+   // 0. Unmapped local memory : Addr in dccm region but not in dccm offset OR Addr in picm region but not in picm offset OR DCCM -> PIC cross when DCCM/PIC in same region
+   // 1. Uncorrectable (double bit) ECC error
+   // 3. Address is not in a populated non-dccm region
+   // 5. Region predication access fault: Base Address in DCCM/PIC and Final address in non-DCCM/non-PIC region or vice versa
+   // 6. Ld/St access to picm are not word aligned or word size
+   assign regpred_access_fault_d  = (start_addr_dccm_or_pic ^ base_reg_dccm_or_pic);                   // 5. Region predication access fault: Base Address in DCCM/PIC and Final address in non-DCCM/non-PIC region or vice versa
+   assign picm_access_fault_d     = (addr_in_pic_d & ((start_addr_d[1:0] != 2'b0) | ~lsu_pkt_d.word));                                               // 6. Ld/St access to picm are not word aligned or word size
+
+   if (pt.DCCM_ENABLE & (pt.DCCM_REGION == pt.PIC_REGION)) begin
+      assign unmapped_access_fault_d = ((start_addr_in_dccm_region_d & ~(start_addr_in_dccm_d | start_addr_in_pic_d)) |   // 0. Addr in dccm/pic region but not in dccm/pic offset
+                                        (end_addr_in_dccm_region_d & ~(end_addr_in_dccm_d | end_addr_in_pic_d))       |   // 0. Addr in dccm/pic region but not in dccm/pic offset
+                                        (start_addr_in_dccm_d & end_addr_in_pic_d)                                    |   // 0. DCCM -> PIC cross when DCCM/PIC in same region
+                                        (start_addr_in_pic_d  & end_addr_in_dccm_d));                                     // 0. DCCM -> PIC cross when DCCM/PIC in same region
+      assign mpu_access_fault_d      = (~start_addr_in_dccm_region_d & ~non_dccm_access_ok);                              // 3. Address is not in a populated non-dccm region
+   end else begin
+      assign unmapped_access_fault_d = ((start_addr_in_dccm_region_d & ~start_addr_in_dccm_d)                              |   // 0. Addr in dccm region but not in dccm offset
+                                        (end_addr_in_dccm_region_d & ~end_addr_in_dccm_d)                                  |   // 0. Addr in dccm region but not in dccm offset
+                                        (start_addr_in_pic_region_d & ~start_addr_in_pic_d)                                |   // 0. Addr in picm region but not in picm offset
+                                        (end_addr_in_pic_region_d & ~end_addr_in_pic_d));                                      // 0. Addr in picm region but not in picm offset
+      assign mpu_access_fault_d      = (~start_addr_in_pic_region_d & ~start_addr_in_dccm_region_d & ~non_dccm_access_ok);     // 3. Address is not in a populated non-dccm region
+   end
+
+   assign access_fault_d = (unmapped_access_fault_d | mpu_access_fault_d | picm_access_fault_d | regpred_access_fault_d) & lsu_pkt_d.valid & ~lsu_pkt_d.dma;
+   assign access_fault_mscause_d[3:0] = unmapped_access_fault_d ? 4'h2 : mpu_access_fault_d ? 4'h3 : regpred_access_fault_d ? 4'h5 : picm_access_fault_d ? 4'h6 : 4'h0;
+
+   // Misaligned happens due to 2 reasons
+   // 0. Region cross
+   // 1. sideeffects access which are not aligned
+   assign regcross_misaligned_fault_d = (start_addr_d[31:28] != end_addr_d[31:28]);
+   assign sideeffect_misaligned_fault_d = (is_sideeffects_d & ~is_aligned_d);
+   assign misaligned_fault_d = (regcross_misaligned_fault_d | (sideeffect_misaligned_fault_d & addr_external_d)) & lsu_pkt_d.valid & ~lsu_pkt_d.dma;
+   assign misaligned_fault_mscause_d[3:0] = regcross_misaligned_fault_d ? 4'h2 : sideeffect_misaligned_fault_d ? 4'h1 : 4'h0;
+
+   assign exc_mscause_d[3:0] = misaligned_fault_d ? misaligned_fault_mscause_d[3:0] : access_fault_mscause_d[3:0];
+
+   // Fast interrupt error logic
+   assign fir_dccm_access_error_d    = ((start_addr_in_dccm_region_d & ~start_addr_in_dccm_d) |
+                                        (end_addr_in_dccm_region_d   & ~end_addr_in_dccm_d)) & lsu_pkt_d.valid & lsu_pkt_d.fast_int;
+   assign fir_nondccm_access_error_d = ~(start_addr_in_dccm_region_d & end_addr_in_dccm_region_d) & lsu_pkt_d.valid & lsu_pkt_d.fast_int;
+
+   rvdff #(.WIDTH(1))   is_sideeffects_mff (.din(is_sideeffects_d), .dout(is_sideeffects_m), .clk(lsu_c2_m_clk), .*);
+
+endmodule // eb1_lsu_addrcheck
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_lsu_bus_buffer.sv b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_bus_buffer.sv
new file mode 100644
index 0000000..1293f6e
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_bus_buffer.sv
@@ -0,0 +1,936 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//********************************************************************************
+// $Id$
+//
+//
+// Owner:
+// Function: lsu interface with interface queue
+// Comments:
+//
+//********************************************************************************
+
+module eb1_lsu_bus_buffer
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )(
+   input logic                          clk,                                // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
+   input logic                          clk_override,                       // Override non-functional clock gating
+   input logic                          rst_l,                              // reset, active low
+   input logic                          scan_mode,                          // scan mode
+   input logic                          dec_tlu_external_ldfwd_disable,     // disable load to load forwarding for externals
+   input logic                          dec_tlu_wb_coalescing_disable,      // disable write buffer coalescing
+   input logic                          dec_tlu_sideeffect_posted_disable,  // Don't block the sideeffect load store to the bus
+   input logic                          dec_tlu_force_halt,
+
+   // various clocks needed for the bus reads and writes
+   input logic                          lsu_bus_obuf_c1_clken,
+   input logic                          lsu_busm_clken,
+   input logic                          lsu_c2_r_clk,
+   input logic                          lsu_bus_ibuf_c1_clk,
+   input logic                          lsu_bus_obuf_c1_clk,
+   input logic                          lsu_bus_buf_c1_clk,
+   input logic                          lsu_free_c2_clk,
+   input logic                          lsu_busm_clk,
+
+
+   input logic                          dec_lsu_valid_raw_d,            // Raw valid for address computation
+   input eb1_lsu_pkt_t                 lsu_pkt_m,                      // lsu packet flowing down the pipe
+   input eb1_lsu_pkt_t                 lsu_pkt_r,                      // lsu packet flowing down the pipe
+
+   input logic [31:0]                   lsu_addr_m,                     // lsu address flowing down the pipe
+   input logic [31:0]                   end_addr_m,                     // lsu address flowing down the pipe
+   input logic [31:0]                   lsu_addr_r,                     // lsu address flowing down the pipe
+   input logic [31:0]                   end_addr_r,                     // lsu address flowing down the pipe
+   input logic [31:0]                   store_data_r,                   // store data flowing down the pipe
+
+   input logic                          no_word_merge_r,                // r store doesn't need to wait in ibuf since it will not coalesce
+   input logic                          no_dword_merge_r,               // r store doesn't need to wait in ibuf since it will not coalesce
+   input logic                          lsu_busreq_m,                   // bus request is in m
+   output logic                         lsu_busreq_r,                   // bus request is in r
+   input logic                          ld_full_hit_m,                  // load can get all its byte from a write buffer entry
+   input logic                          flush_m_up,                     // flush
+   input logic                          flush_r,                        // flush
+   input logic                          lsu_commit_r,                   // lsu instruction in r commits
+   input logic                          is_sideeffects_r,               // lsu attribute is side_effects
+   input logic                          ldst_dual_d,                    // load/store is unaligned at 32 bit boundary
+   input logic                          ldst_dual_m,                    // load/store is unaligned at 32 bit boundary
+   input logic                          ldst_dual_r,                    // load/store is unaligned at 32 bit boundary
+
+   input logic [7:0]                    ldst_byteen_ext_m,              // HI and LO signals
+
+   output logic                         lsu_bus_buffer_pend_any,          // bus buffer has a pending bus entry
+   output logic                         lsu_bus_buffer_full_any,          // bus buffer is full
+   output logic                         lsu_bus_buffer_empty_any,         // bus buffer is empty
+
+   output logic [3:0]                   ld_byte_hit_buf_lo, ld_byte_hit_buf_hi,    // Byte enables for forwarding data
+   output logic [31:0]                  ld_fwddata_buf_lo, ld_fwddata_buf_hi,      // load forwarding data
+
+   output logic                         lsu_imprecise_error_load_any,     // imprecise load bus error
+   output logic                         lsu_imprecise_error_store_any,    // imprecise store bus error
+   output logic [31:0]                  lsu_imprecise_error_addr_any,     // address of the imprecise error
+
+   // Non-blocking loads
+   output logic                               lsu_nonblock_load_valid_m,     // there is an external load -> put in the cam
+   output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m,       // the tag of the external non block load
+   output logic                               lsu_nonblock_load_inv_r,       // invalidate signal for the cam entry for non block loads
+   output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r,   // tag of the enrty which needs to be invalidated
+   output logic                               lsu_nonblock_load_data_valid,  // the non block is valid - sending information back to the cam
+   output logic                               lsu_nonblock_load_data_error,  // non block load has an error
+   output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag,    // the tag of the non block load sending the data/error
+   output logic [31:0]                        lsu_nonblock_load_data,        // Data of the non block load
+
+   // PMU events
+   output logic                         lsu_pmu_bus_trxn,
+   output logic                         lsu_pmu_bus_misaligned,
+   output logic                         lsu_pmu_bus_error,
+   output logic                         lsu_pmu_bus_busy,
+
+   // AXI Write Channels
+   output logic                            lsu_axi_awvalid,
+   input  logic                            lsu_axi_awready,
+   output logic [pt.LSU_BUS_TAG-1:0]       lsu_axi_awid,
+   output logic [31:0]                     lsu_axi_awaddr,
+   output logic [3:0]                      lsu_axi_awregion,
+   output logic [7:0]                      lsu_axi_awlen,
+   output logic [2:0]                      lsu_axi_awsize,
+   output logic [1:0]                      lsu_axi_awburst,
+   output logic                            lsu_axi_awlock,
+   output logic [3:0]                      lsu_axi_awcache,
+   output logic [2:0]                      lsu_axi_awprot,
+   output logic [3:0]                      lsu_axi_awqos,
+
+   output logic                            lsu_axi_wvalid,
+   input  logic                            lsu_axi_wready,
+   output logic [63:0]                     lsu_axi_wdata,
+   output logic [7:0]                      lsu_axi_wstrb,
+   output logic                            lsu_axi_wlast,
+
+   input  logic                            lsu_axi_bvalid,
+   output logic                            lsu_axi_bready,
+   input  logic [1:0]                      lsu_axi_bresp,
+   input  logic [pt.LSU_BUS_TAG-1:0]       lsu_axi_bid,
+
+   // AXI Read Channels
+   output logic                            lsu_axi_arvalid,
+   input  logic                            lsu_axi_arready,
+   output logic [pt.LSU_BUS_TAG-1:0]       lsu_axi_arid,
+   output logic [31:0]                     lsu_axi_araddr,
+   output logic [3:0]                      lsu_axi_arregion,
+   output logic [7:0]                      lsu_axi_arlen,
+   output logic [2:0]                      lsu_axi_arsize,
+   output logic [1:0]                      lsu_axi_arburst,
+   output logic                            lsu_axi_arlock,
+   output logic [3:0]                      lsu_axi_arcache,
+   output logic [2:0]                      lsu_axi_arprot,
+   output logic [3:0]                      lsu_axi_arqos,
+
+   input  logic                            lsu_axi_rvalid,
+   output logic                            lsu_axi_rready,
+   input  logic [pt.LSU_BUS_TAG-1:0]       lsu_axi_rid,
+   input  logic [63:0]                     lsu_axi_rdata,
+   input  logic [1:0]                      lsu_axi_rresp,
+
+   input logic                             lsu_bus_clk_en,
+   input logic                             lsu_bus_clk_en_q
+
+);
+
+   // For Ld: IDLE -> WAIT -> CMD -> RESP -> DONE_PARTIAL(?) -> DONE_WAIT(?) -> DONE -> IDLE
+   // For St: IDLE -> WAIT -> CMD -> RESP(?) -> IDLE
+   typedef enum logic [2:0] {IDLE=3'b000, WAIT=3'b001, CMD=3'b010, RESP=3'b011, DONE_PARTIAL=3'b100, DONE_WAIT=3'b101, DONE=3'b110} state_t;
+
+   localparam DEPTH     = pt.LSU_NUM_NBLOAD;
+   localparam DEPTH_LOG2 = pt.LSU_NUM_NBLOAD_WIDTH;
+   localparam TIMER     = 8;   // This can be only power of 2
+   localparam TIMER_MAX = TIMER - 1;  // Maximum value of timer
+   localparam TIMER_LOG2 = (TIMER < 2) ? 1 : $clog2(TIMER);
+
+   logic [3:0]                          ldst_byteen_hi_m, ldst_byteen_lo_m;
+   logic [DEPTH-1:0]                    ld_addr_hitvec_lo, ld_addr_hitvec_hi;
+   logic [3:0][DEPTH-1:0]               ld_byte_hitvec_lo, ld_byte_hitvec_hi;
+   logic [3:0][DEPTH-1:0]               ld_byte_hitvecfn_lo, ld_byte_hitvecfn_hi;
+
+   logic                                ld_addr_ibuf_hit_lo, ld_addr_ibuf_hit_hi;
+   logic [3:0]                          ld_byte_ibuf_hit_lo, ld_byte_ibuf_hit_hi;
+
+   logic [3:0]                          ldst_byteen_r;
+   logic [3:0]                          ldst_byteen_hi_r, ldst_byteen_lo_r;
+   logic [31:0]                         store_data_hi_r, store_data_lo_r;
+   logic                                is_aligned_r;                   // Aligned load/store
+   logic                                ldst_samedw_r;
+
+   logic                                lsu_nonblock_load_valid_r;
+   logic [31:0]                         lsu_nonblock_load_data_hi, lsu_nonblock_load_data_lo, lsu_nonblock_data_unalgn;
+   logic [1:0]                          lsu_nonblock_addr_offset;
+   logic [1:0]                          lsu_nonblock_sz;
+   logic                                lsu_nonblock_unsign;
+   logic                                lsu_nonblock_load_data_ready;
+
+   logic [DEPTH-1:0]                    CmdPtr0Dec, CmdPtr1Dec;
+   logic [DEPTH-1:0]                    RspPtrDec;
+   logic [DEPTH_LOG2-1:0]               CmdPtr0, CmdPtr1;
+   logic [DEPTH_LOG2-1:0]               RspPtr;
+   logic [DEPTH_LOG2-1:0]               WrPtr0_m, WrPtr0_r;
+   logic [DEPTH_LOG2-1:0]               WrPtr1_m, WrPtr1_r;
+   logic                                found_wrptr0, found_wrptr1, found_cmdptr0, found_cmdptr1;
+   logic [3:0]                          buf_numvld_any, buf_numvld_wrcmd_any, buf_numvld_cmd_any, buf_numvld_pend_any;
+   logic                                any_done_wait_state;
+   logic                                bus_sideeffect_pend;
+   logic                                bus_coalescing_disable;
+
+   logic                                bus_addr_match_pending;
+   logic                                bus_cmd_sent, bus_cmd_ready;
+   logic                                bus_wcmd_sent, bus_wdata_sent;
+   logic                                bus_rsp_read, bus_rsp_write;
+   logic [pt.LSU_BUS_TAG-1:0]           bus_rsp_read_tag, bus_rsp_write_tag;
+   logic                                bus_rsp_read_error, bus_rsp_write_error;
+   logic [63:0]                         bus_rsp_rdata;
+
+   // Bus buffer signals
+   state_t [DEPTH-1:0]                  buf_state;
+   logic   [DEPTH-1:0][1:0]             buf_sz;
+   logic   [DEPTH-1:0][31:0]            buf_addr;
+   logic   [DEPTH-1:0][3:0]             buf_byteen;
+   logic   [DEPTH-1:0]                  buf_sideeffect;
+   logic   [DEPTH-1:0]                  buf_write;
+   logic   [DEPTH-1:0]                  buf_unsign;
+   logic   [DEPTH-1:0]                  buf_dual;
+   logic   [DEPTH-1:0]                  buf_samedw;
+   logic   [DEPTH-1:0]                  buf_nomerge;
+   logic   [DEPTH-1:0]                  buf_dualhi;
+   logic   [DEPTH-1:0][DEPTH_LOG2-1:0]  buf_dualtag;
+   logic   [DEPTH-1:0]                  buf_ldfwd;
+   logic   [DEPTH-1:0][DEPTH_LOG2-1:0]  buf_ldfwdtag;
+   logic   [DEPTH-1:0]                  buf_error;
+   logic   [DEPTH-1:0][31:0]            buf_data;
+   logic   [DEPTH-1:0][DEPTH-1:0]       buf_age, buf_age_younger;
+   logic   [DEPTH-1:0][DEPTH-1:0]       buf_rspage, buf_rsp_pickage;
+
+   state_t [DEPTH-1:0]                  buf_nxtstate;
+   logic   [DEPTH-1:0]                  buf_rst;
+   logic   [DEPTH-1:0]                  buf_state_en;
+   logic   [DEPTH-1:0]                  buf_cmd_state_bus_en;
+   logic   [DEPTH-1:0]                  buf_resp_state_bus_en;
+   logic   [DEPTH-1:0]                  buf_state_bus_en;
+   logic   [DEPTH-1:0]                  buf_dual_in;
+   logic   [DEPTH-1:0]                  buf_samedw_in;
+   logic   [DEPTH-1:0]                  buf_nomerge_in;
+   logic   [DEPTH-1:0]                  buf_sideeffect_in;
+   logic   [DEPTH-1:0]                  buf_unsign_in;
+   logic   [DEPTH-1:0][1:0]             buf_sz_in;
+   logic   [DEPTH-1:0]                  buf_write_in;
+   logic   [DEPTH-1:0]                  buf_wr_en;
+   logic   [DEPTH-1:0]                  buf_dualhi_in;
+   logic   [DEPTH-1:0][DEPTH_LOG2-1:0]  buf_dualtag_in;
+   logic   [DEPTH-1:0]                  buf_ldfwd_en;
+   logic   [DEPTH-1:0]                  buf_ldfwd_in;
+   logic   [DEPTH-1:0][DEPTH_LOG2-1:0]  buf_ldfwdtag_in;
+   logic   [DEPTH-1:0][3:0]             buf_byteen_in;
+   logic   [DEPTH-1:0][31:0]            buf_addr_in;
+   logic   [DEPTH-1:0][31:0]            buf_data_in;
+   logic   [DEPTH-1:0]                  buf_error_en;
+   logic   [DEPTH-1:0]                  buf_data_en;
+   logic   [DEPTH-1:0][DEPTH-1:0]       buf_age_in;
+   logic   [DEPTH-1:0][DEPTH-1:0]       buf_ageQ;
+   logic   [DEPTH-1:0][DEPTH-1:0]       buf_rspage_set;
+   logic   [DEPTH-1:0][DEPTH-1:0]       buf_rspage_in;
+   logic   [DEPTH-1:0][DEPTH-1:0]       buf_rspageQ;
+
+   // Input buffer signals
+   logic                               ibuf_valid;
+   logic                               ibuf_dual;
+   logic                               ibuf_samedw;
+   logic                               ibuf_nomerge;
+   logic [DEPTH_LOG2-1:0]              ibuf_tag;
+   logic [DEPTH_LOG2-1:0]              ibuf_dualtag;
+   logic                               ibuf_sideeffect;
+   logic                               ibuf_unsign;
+   logic                               ibuf_write;
+   logic [1:0]                         ibuf_sz;
+   logic [3:0]                         ibuf_byteen;
+   logic [31:0]                        ibuf_addr;
+   logic [31:0]                        ibuf_data;
+   logic [TIMER_LOG2-1:0]              ibuf_timer;
+
+   logic                               ibuf_byp;
+   logic                               ibuf_wr_en;
+   logic                               ibuf_rst;
+   logic                               ibuf_force_drain;
+   logic                               ibuf_drain_vld;
+   logic [DEPTH-1:0]                   ibuf_drainvec_vld;
+   logic [DEPTH_LOG2-1:0]              ibuf_tag_in;
+   logic [DEPTH_LOG2-1:0]              ibuf_dualtag_in;
+   logic [1:0]                         ibuf_sz_in;
+   logic [31:0]                        ibuf_addr_in;
+   logic [3:0]                         ibuf_byteen_in;
+   logic [31:0]                        ibuf_data_in;
+   logic [TIMER_LOG2-1:0]              ibuf_timer_in;
+   logic [3:0]                         ibuf_byteen_out;
+   logic [31:0]                        ibuf_data_out;
+   logic                               ibuf_merge_en, ibuf_merge_in;
+
+   // Output buffer signals
+   logic                               obuf_valid;
+   logic                               obuf_write;
+   logic                               obuf_nosend;
+   logic                               obuf_rdrsp_pend;
+   logic                               obuf_sideeffect;
+   logic [31:0]                        obuf_addr;
+   logic [63:0]                        obuf_data;
+   logic [1:0]                         obuf_sz;
+   logic [7:0]                         obuf_byteen;
+   logic                               obuf_merge;
+   logic                               obuf_cmd_done, obuf_data_done;
+   logic [pt.LSU_BUS_TAG-1:0]          obuf_tag0;
+   logic [pt.LSU_BUS_TAG-1:0]          obuf_tag1;
+   logic [pt.LSU_BUS_TAG-1:0]          obuf_rdrsp_tag;
+
+   logic                               ibuf_buf_byp;
+   logic                               obuf_force_wr_en;
+   logic                               obuf_wr_wait;
+   logic                               obuf_wr_en, obuf_wr_enQ;
+   logic                               obuf_rst;
+   logic                               obuf_write_in;
+   logic                               obuf_nosend_in;
+   logic                               obuf_rdrsp_pend_en;
+   logic                               obuf_rdrsp_pend_in;
+   logic                               obuf_sideeffect_in;
+   logic                               obuf_aligned_in;
+   logic [31:0]                        obuf_addr_in;
+   logic [63:0]                        obuf_data_in;
+   logic [1:0]                         obuf_sz_in;
+   logic [7:0]                         obuf_byteen_in;
+   logic                               obuf_merge_in;
+   logic                               obuf_cmd_done_in, obuf_data_done_in;
+   logic [pt.LSU_BUS_TAG-1:0]          obuf_tag0_in;
+   logic [pt.LSU_BUS_TAG-1:0]          obuf_tag1_in;
+   logic [pt.LSU_BUS_TAG-1:0]          obuf_rdrsp_tag_in;
+
+   logic                               obuf_merge_en;
+   logic [TIMER_LOG2-1:0]              obuf_wr_timer, obuf_wr_timer_in;
+   logic [7:0]                         obuf_byteen0_in, obuf_byteen1_in;
+   logic [63:0]                        obuf_data0_in, obuf_data1_in;
+
+   logic                               lsu_axi_awvalid_q, lsu_axi_awready_q;
+   logic                               lsu_axi_wvalid_q, lsu_axi_wready_q;
+   logic                               lsu_axi_arvalid_q, lsu_axi_arready_q;
+   logic                               lsu_axi_bvalid_q, lsu_axi_bready_q;
+   logic                               lsu_axi_rvalid_q, lsu_axi_rready_q;
+   logic [pt.LSU_BUS_TAG-1:0]          lsu_axi_bid_q, lsu_axi_rid_q;
+   logic [1:0]                         lsu_axi_bresp_q, lsu_axi_rresp_q;
+   logic [pt.LSU_BUS_TAG-1:0]          lsu_imprecise_error_store_tag;
+   logic [63:0]                        lsu_axi_rdata_q;
+
+   //------------------------------------------------------------------------------
+   // Load forwarding logic start
+   //------------------------------------------------------------------------------
+
+   // Function to do 8 to 3 bit encoding
+   function automatic logic [2:0] f_Enc8to3;
+      input logic [7:0] Dec_value;
+
+      logic [2:0]       Enc_value;
+      Enc_value[0] = Dec_value[1] | Dec_value[3] | Dec_value[5] | Dec_value[7];
+      Enc_value[1] = Dec_value[2] | Dec_value[3] | Dec_value[6] | Dec_value[7];
+      Enc_value[2] = Dec_value[4] | Dec_value[5] | Dec_value[6] | Dec_value[7];
+
+      return Enc_value[2:0];
+   endfunction // f_Enc8to3
+
+   // Buffer hit logic for bus load forwarding
+   assign ldst_byteen_hi_m[3:0]   = ldst_byteen_ext_m[7:4];
+   assign ldst_byteen_lo_m[3:0]   = ldst_byteen_ext_m[3:0];
+   for (genvar i=0; i<DEPTH; i++) begin
+      assign ld_addr_hitvec_lo[i] = (lsu_addr_m[31:2] == buf_addr[i][31:2]) & buf_write[i] & (buf_state[i] != IDLE) & lsu_busreq_m;
+      assign ld_addr_hitvec_hi[i] = (end_addr_m[31:2] == buf_addr[i][31:2]) & buf_write[i] & (buf_state[i] != IDLE) & lsu_busreq_m;
+   end
+
+   for (genvar j=0; j<4; j++) begin
+     assign ld_byte_hit_buf_lo[j] = |(ld_byte_hitvecfn_lo[j]) | ld_byte_ibuf_hit_lo[j];
+     assign ld_byte_hit_buf_hi[j] = |(ld_byte_hitvecfn_hi[j]) | ld_byte_ibuf_hit_hi[j];
+     for (genvar i=0; i<DEPTH; i++) begin
+         assign ld_byte_hitvec_lo[j][i] = ld_addr_hitvec_lo[i] & buf_byteen[i][j] & ldst_byteen_lo_m[j];
+         assign ld_byte_hitvec_hi[j][i] = ld_addr_hitvec_hi[i] & buf_byteen[i][j] & ldst_byteen_hi_m[j];
+
+         assign ld_byte_hitvecfn_lo[j][i] = ld_byte_hitvec_lo[j][i] & ~(|(ld_byte_hitvec_lo[j] & buf_age_younger[i])) & ~ld_byte_ibuf_hit_lo[j];  // Kill the byte enable if younger entry exists or byte exists in ibuf
+         assign ld_byte_hitvecfn_hi[j][i] = ld_byte_hitvec_hi[j][i] & ~(|(ld_byte_hitvec_hi[j] & buf_age_younger[i])) & ~ld_byte_ibuf_hit_hi[j];  // Kill the byte enable if younger entry exists or byte exists in ibuf
+      end
+   end
+
+   // Hit in the ibuf
+   assign ld_addr_ibuf_hit_lo = (lsu_addr_m[31:2] == ibuf_addr[31:2]) & ibuf_write & ibuf_valid & lsu_busreq_m;
+   assign ld_addr_ibuf_hit_hi = (end_addr_m[31:2] == ibuf_addr[31:2]) & ibuf_write & ibuf_valid & lsu_busreq_m;
+
+   for (genvar i=0; i<4; i++) begin
+      assign ld_byte_ibuf_hit_lo[i] = ld_addr_ibuf_hit_lo & ibuf_byteen[i] & ldst_byteen_lo_m[i];
+      assign ld_byte_ibuf_hit_hi[i] = ld_addr_ibuf_hit_hi & ibuf_byteen[i] & ldst_byteen_hi_m[i];
+   end
+
+   always_comb begin
+      ld_fwddata_buf_lo[31:0] = {{8{ld_byte_ibuf_hit_lo[3]}},{8{ld_byte_ibuf_hit_lo[2]}},{8{ld_byte_ibuf_hit_lo[1]}},{8{ld_byte_ibuf_hit_lo[0]}}} & ibuf_data[31:0];
+      ld_fwddata_buf_hi[31:0] = {{8{ld_byte_ibuf_hit_hi[3]}},{8{ld_byte_ibuf_hit_hi[2]}},{8{ld_byte_ibuf_hit_hi[1]}},{8{ld_byte_ibuf_hit_hi[0]}}} & ibuf_data[31:0];
+      for (int i=0; i<DEPTH; i++) begin
+         ld_fwddata_buf_lo[7:0]   |= {8{ld_byte_hitvecfn_lo[0][i]}} & buf_data[i][7:0];
+         ld_fwddata_buf_lo[15:8]  |= {8{ld_byte_hitvecfn_lo[1][i]}} & buf_data[i][15:8];
+         ld_fwddata_buf_lo[23:16] |= {8{ld_byte_hitvecfn_lo[2][i]}} & buf_data[i][23:16];
+         ld_fwddata_buf_lo[31:24] |= {8{ld_byte_hitvecfn_lo[3][i]}} & buf_data[i][31:24];
+
+         ld_fwddata_buf_hi[7:0]   |= {8{ld_byte_hitvecfn_hi[0][i]}} & buf_data[i][7:0];
+         ld_fwddata_buf_hi[15:8]  |= {8{ld_byte_hitvecfn_hi[1][i]}} & buf_data[i][15:8];
+         ld_fwddata_buf_hi[23:16] |= {8{ld_byte_hitvecfn_hi[2][i]}} & buf_data[i][23:16];
+         ld_fwddata_buf_hi[31:24] |= {8{ld_byte_hitvecfn_hi[3][i]}} & buf_data[i][31:24];
+      end
+   end
+
+   //------------------------------------------------------------------------------
+   // Load forwarding logic end
+   //------------------------------------------------------------------------------
+
+   assign bus_coalescing_disable = dec_tlu_wb_coalescing_disable | pt.BUILD_AHB_LITE;
+
+   // Get the hi/lo byte enable
+   assign ldst_byteen_r[3:0] = ({4{lsu_pkt_r.by}}   & 4'b0001) |
+                                 ({4{lsu_pkt_r.half}} & 4'b0011) |
+                                 ({4{lsu_pkt_r.word}} & 4'b1111);
+
+   assign {ldst_byteen_hi_r[3:0], ldst_byteen_lo_r[3:0]} = {4'b0,ldst_byteen_r[3:0]} << lsu_addr_r[1:0];
+   assign {store_data_hi_r[31:0], store_data_lo_r[31:0]} = {32'b0,store_data_r[31:0]} << 8*lsu_addr_r[1:0];
+   assign ldst_samedw_r    = (lsu_addr_r[3] == end_addr_r[3]);
+   assign is_aligned_r    = (lsu_pkt_r.word & (lsu_addr_r[1:0] == 2'b0)) |
+                            (lsu_pkt_r.half & (lsu_addr_r[0] == 1'b0))   |
+                            lsu_pkt_r.by;
+
+   //------------------------------------------------------------------------------
+   // Input buffer logic starts here
+   //------------------------------------------------------------------------------
+
+   assign ibuf_byp = lsu_busreq_r & (lsu_pkt_r.load | no_word_merge_r) & ~ibuf_valid;
+   assign ibuf_wr_en = lsu_busreq_r & lsu_commit_r & ~ibuf_byp;
+   assign ibuf_rst   = (ibuf_drain_vld & ~ibuf_wr_en) | dec_tlu_force_halt;
+   assign ibuf_force_drain = lsu_busreq_m & ~lsu_busreq_r & ibuf_valid & (lsu_pkt_m.load | (ibuf_addr[31:2] != lsu_addr_m[31:2]));  // Move the ibuf to buf if there is a non-colaescable ld/st in m but nothing in r
+   assign ibuf_drain_vld = ibuf_valid & (((ibuf_wr_en | (ibuf_timer == TIMER_MAX)) & ~(ibuf_merge_en & ibuf_merge_in)) | ibuf_byp | ibuf_force_drain | ibuf_sideeffect | ~ibuf_write | bus_coalescing_disable);
+   assign ibuf_tag_in[DEPTH_LOG2-1:0] = (ibuf_merge_en & ibuf_merge_in) ? ibuf_tag[DEPTH_LOG2-1:0] : (ldst_dual_r ? WrPtr1_r : WrPtr0_r);
+   assign ibuf_dualtag_in[DEPTH_LOG2-1:0] = WrPtr0_r;
+   assign ibuf_sz_in[1:0]   = {lsu_pkt_r.word, lsu_pkt_r.half};
+   assign ibuf_addr_in[31:0] = ldst_dual_r ? end_addr_r[31:0] : lsu_addr_r[31:0];
+   assign ibuf_byteen_in[3:0] = (ibuf_merge_en & ibuf_merge_in) ? (ibuf_byteen[3:0] | ldst_byteen_lo_r[3:0]) : (ldst_dual_r ? ldst_byteen_hi_r[3:0] : ldst_byteen_lo_r[3:0]);
+   for (genvar i=0; i<4; i++) begin
+      assign ibuf_data_in[(8*i)+7:(8*i)] = (ibuf_merge_en & ibuf_merge_in) ? (ldst_byteen_lo_r[i] ? store_data_lo_r[(8*i)+7:(8*i)] : ibuf_data[(8*i)+7:(8*i)]) :
+                                                                             (ldst_dual_r ? store_data_hi_r[(8*i)+7:(8*i)] : store_data_lo_r[(8*i)+7:(8*i)]);
+   end
+   assign ibuf_timer_in = ibuf_wr_en ? '0 : (ibuf_timer < TIMER_MAX) ? (ibuf_timer + 1'b1) : ibuf_timer;
+
+
+   assign ibuf_merge_en = lsu_busreq_r & lsu_commit_r & lsu_pkt_r.store & ibuf_valid & ibuf_write & (lsu_addr_r[31:2] == ibuf_addr[31:2]) & ~is_sideeffects_r & ~bus_coalescing_disable;
+   assign ibuf_merge_in = ~ldst_dual_r;   // If it's a unaligned store, merge needs to happen on the way out of ibuf
+
+   // ibuf signals going to bus buffer after merging
+   for (genvar i=0; i<4; i++) begin
+      assign ibuf_byteen_out[i] = (ibuf_merge_en & ~ibuf_merge_in) ? (ibuf_byteen[i] | ldst_byteen_lo_r[i]) : ibuf_byteen[i];
+      assign ibuf_data_out[(8*i)+7:(8*i)] = (ibuf_merge_en & ~ibuf_merge_in) ? (ldst_byteen_lo_r[i] ? store_data_lo_r[(8*i)+7:(8*i)] : ibuf_data[(8*i)+7:(8*i)]) :
+                                                                                                        ibuf_data[(8*i)+7:(8*i)];
+   end
+
+   rvdffsc #(.WIDTH(1))              ibuf_valid_ff     (.din(1'b1),                      .dout(ibuf_valid),      .en(ibuf_wr_en), .clear(ibuf_rst), .clk(lsu_free_c2_clk), .*);
+   rvdffs  #(.WIDTH(DEPTH_LOG2))     ibuf_tagff        (.din(ibuf_tag_in),               .dout(ibuf_tag),        .en(ibuf_wr_en),                   .clk(lsu_bus_ibuf_c1_clk), .*);
+   rvdffs  #(.WIDTH(DEPTH_LOG2))     ibuf_dualtagff    (.din(ibuf_dualtag_in),           .dout(ibuf_dualtag),    .en(ibuf_wr_en),                   .clk(lsu_bus_ibuf_c1_clk), .*);
+   rvdffs  #(.WIDTH(1))              ibuf_dualff       (.din(ldst_dual_r),               .dout(ibuf_dual),       .en(ibuf_wr_en),                   .clk(lsu_bus_ibuf_c1_clk), .*);
+   rvdffs  #(.WIDTH(1))              ibuf_samedwff     (.din(ldst_samedw_r),             .dout(ibuf_samedw),     .en(ibuf_wr_en),                   .clk(lsu_bus_ibuf_c1_clk), .*);
+   rvdffs  #(.WIDTH(1))              ibuf_nomergeff    (.din(no_dword_merge_r),          .dout(ibuf_nomerge),    .en(ibuf_wr_en),                   .clk(lsu_bus_ibuf_c1_clk), .*);
+   rvdffs  #(.WIDTH(1))              ibuf_sideeffectff (.din(is_sideeffects_r),          .dout(ibuf_sideeffect), .en(ibuf_wr_en),                   .clk(lsu_bus_ibuf_c1_clk), .*);
+   rvdffs  #(.WIDTH(1))              ibuf_unsignff     (.din(lsu_pkt_r.unsign),          .dout(ibuf_unsign),     .en(ibuf_wr_en),                   .clk(lsu_bus_ibuf_c1_clk), .*);
+   rvdffs  #(.WIDTH(1))              ibuf_writeff      (.din(lsu_pkt_r.store),           .dout(ibuf_write),      .en(ibuf_wr_en),                   .clk(lsu_bus_ibuf_c1_clk), .*);
+   rvdffs  #(.WIDTH(2))              ibuf_szff         (.din(ibuf_sz_in[1:0]),           .dout(ibuf_sz),         .en(ibuf_wr_en),                   .clk(lsu_bus_ibuf_c1_clk), .*);
+   rvdffe  #(.WIDTH(32))             ibuf_addrff       (.din(ibuf_addr_in[31:0]),        .dout(ibuf_addr),       .en(ibuf_wr_en),                                              .*);
+   rvdffs  #(.WIDTH(4))              ibuf_byteenff     (.din(ibuf_byteen_in[3:0]),       .dout(ibuf_byteen),     .en(ibuf_wr_en),                   .clk(lsu_bus_ibuf_c1_clk), .*);
+   rvdffe  #(.WIDTH(32))             ibuf_dataff       (.din(ibuf_data_in[31:0]),        .dout(ibuf_data),       .en(ibuf_wr_en),                                              .*);
+   rvdff   #(.WIDTH(TIMER_LOG2))     ibuf_timerff      (.din(ibuf_timer_in),             .dout(ibuf_timer),                                         .clk(lsu_free_c2_clk),     .*);
+
+
+   //------------------------------------------------------------------------------
+   // Input buffer logic ends here
+   //------------------------------------------------------------------------------
+
+
+   //------------------------------------------------------------------------------
+   // Output buffer logic starts here
+   //------------------------------------------------------------------------------
+
+   assign obuf_wr_wait = (buf_numvld_wrcmd_any[3:0] == 4'b1) & (buf_numvld_cmd_any[3:0] == 4'b1) & (obuf_wr_timer != TIMER_MAX) &
+                         ~bus_coalescing_disable & ~buf_nomerge[CmdPtr0] & ~buf_sideeffect[CmdPtr0] & ~obuf_force_wr_en;
+   assign obuf_wr_timer_in = obuf_wr_en ? 3'b0: (((buf_numvld_cmd_any > 4'b0) & (obuf_wr_timer < TIMER_MAX)) ? (obuf_wr_timer + 1'b1) : obuf_wr_timer);
+   assign obuf_force_wr_en = lsu_busreq_m & ~lsu_busreq_r & ~ibuf_valid & (buf_numvld_cmd_any[3:0] == 4'b1) & (lsu_addr_m[31:2] != buf_addr[CmdPtr0][31:2]);   // Entry in m can't merge with entry going to obuf and there is no entry in between
+   assign ibuf_buf_byp = ibuf_byp & (buf_numvld_pend_any[3:0] == 4'b0) & (~lsu_pkt_r.store | no_dword_merge_r);
+
+   assign obuf_wr_en = ((ibuf_buf_byp & lsu_commit_r & ~(is_sideeffects_r & bus_sideeffect_pend)) |
+                        ((buf_state[CmdPtr0] == CMD) & found_cmdptr0 & ~buf_cmd_state_bus_en[CmdPtr0] & ~(buf_sideeffect[CmdPtr0] & bus_sideeffect_pend) &
+                         (~(buf_dual[CmdPtr0] & buf_samedw[CmdPtr0] & ~buf_write[CmdPtr0]) | found_cmdptr1 | buf_nomerge[CmdPtr0] | obuf_force_wr_en))) &
+                       (bus_cmd_ready | ~obuf_valid | obuf_nosend) & ~obuf_wr_wait  & ~bus_addr_match_pending & lsu_bus_clk_en;
+
+   assign obuf_rst   = ((bus_cmd_sent | (obuf_valid & obuf_nosend)) & ~obuf_wr_en & lsu_bus_clk_en) | dec_tlu_force_halt;
+
+   assign obuf_write_in      = ibuf_buf_byp ? lsu_pkt_r.store : buf_write[CmdPtr0];
+   assign obuf_sideeffect_in = ibuf_buf_byp ? is_sideeffects_r : buf_sideeffect[CmdPtr0];
+   assign obuf_addr_in[31:0] = ibuf_buf_byp ? lsu_addr_r[31:0] : buf_addr[CmdPtr0];
+   assign obuf_sz_in[1:0]    = ibuf_buf_byp ? {lsu_pkt_r.word, lsu_pkt_r.half} : buf_sz[CmdPtr0];
+   assign obuf_merge_in      = obuf_merge_en;
+   assign obuf_tag0_in[pt.LSU_BUS_TAG-1:0] = ibuf_buf_byp ? (pt.LSU_BUS_TAG)'(WrPtr0_r) : (pt.LSU_BUS_TAG)'(CmdPtr0);
+   assign obuf_tag1_in[pt.LSU_BUS_TAG-1:0] = ibuf_buf_byp ? (pt.LSU_BUS_TAG)'(WrPtr1_r) : (pt.LSU_BUS_TAG)'(CmdPtr1);
+
+   assign obuf_cmd_done_in    = ~(obuf_wr_en | obuf_rst) & (obuf_cmd_done | bus_wcmd_sent);
+   assign obuf_data_done_in   = ~(obuf_wr_en | obuf_rst) & (obuf_data_done | bus_wdata_sent);
+
+   assign obuf_aligned_in    = ibuf_buf_byp ? is_aligned_r : ((obuf_sz_in[1:0] == 2'b0) |
+                                                              (obuf_sz_in[0] & ~obuf_addr_in[0]) |
+                                                              (obuf_sz_in[1] & ~(|obuf_addr_in[1:0])));
+
+   assign obuf_rdrsp_pend_in  = ((~(obuf_wr_en & ~obuf_nosend_in) & obuf_rdrsp_pend & ~(bus_rsp_read & (bus_rsp_read_tag == obuf_rdrsp_tag))) | (bus_cmd_sent & ~obuf_write)) & ~dec_tlu_force_halt;
+   assign obuf_rdrsp_pend_en  = lsu_bus_clk_en | dec_tlu_force_halt;
+   assign obuf_rdrsp_tag_in[pt.LSU_BUS_TAG-1:0] = (bus_cmd_sent & ~obuf_write) ? obuf_tag0[pt.LSU_BUS_TAG-1:0] : obuf_rdrsp_tag[pt.LSU_BUS_TAG-1:0];
+   // No ld to ld fwd for aligned
+   assign obuf_nosend_in      = (obuf_addr_in[31:3] == obuf_addr[31:3]) & obuf_aligned_in & ~obuf_sideeffect & ~obuf_write & ~obuf_write_in & ~dec_tlu_external_ldfwd_disable &
+                                ((obuf_valid & ~obuf_nosend) | (obuf_rdrsp_pend & ~(bus_rsp_read & (bus_rsp_read_tag == obuf_rdrsp_tag))));
+
+   assign obuf_byteen0_in[7:0] = ibuf_buf_byp ? (lsu_addr_r[2] ? {ldst_byteen_lo_r[3:0],4'b0} : {4'b0,ldst_byteen_lo_r[3:0]}) :
+                                                (buf_addr[CmdPtr0][2] ? {buf_byteen[CmdPtr0],4'b0} : {4'b0,buf_byteen[CmdPtr0]});
+   assign obuf_byteen1_in[7:0] = ibuf_buf_byp ? (end_addr_r[2] ? {ldst_byteen_hi_r[3:0],4'b0} : {4'b0,ldst_byteen_hi_r[3:0]}) :
+                                                (buf_addr[CmdPtr1][2] ? {buf_byteen[CmdPtr1],4'b0} : {4'b0,buf_byteen[CmdPtr1]});
+   assign obuf_data0_in[63:0]  = ibuf_buf_byp ? (lsu_addr_r[2] ? {store_data_lo_r[31:0],32'b0} : {32'b0,store_data_lo_r[31:0]}) :
+                                                (buf_addr[CmdPtr0][2] ? {buf_data[CmdPtr0],32'b0} : {32'b0,buf_data[CmdPtr0]});
+   assign obuf_data1_in[63:0]  = ibuf_buf_byp ? (end_addr_r[2] ? {store_data_hi_r[31:0],32'b0} :{32'b0,store_data_hi_r[31:0]}) :
+                                                (buf_addr[CmdPtr1][2] ? {buf_data[CmdPtr1],32'b0} : {32'b0,buf_data[CmdPtr1]});
+
+   for (genvar i=0 ;i<8; i++) begin
+      assign obuf_byteen_in[i] = obuf_byteen0_in[i] | (obuf_merge_en & obuf_byteen1_in[i]);
+      assign obuf_data_in[(8*i)+7:(8*i)] = (obuf_merge_en & obuf_byteen1_in[i]) ? obuf_data1_in[(8*i)+7:(8*i)] : obuf_data0_in[(8*i)+7:(8*i)];
+   end
+
+   // No store obuf merging for AXI since all stores are sent non-posted. Can't track the second id right now
+   assign obuf_merge_en = ((CmdPtr0 != CmdPtr1) & found_cmdptr0 & found_cmdptr1 & (buf_state[CmdPtr0] == CMD) & (buf_state[CmdPtr1] == CMD) &
+                           ~buf_cmd_state_bus_en[CmdPtr0] & ~buf_sideeffect[CmdPtr0] &
+                           (~buf_write[CmdPtr0] & buf_dual[CmdPtr0] & ~buf_dualhi[CmdPtr0] & buf_samedw[CmdPtr0])) |  // CmdPtr0/CmdPtr1 are for same load which is within a DW
+                          (ibuf_buf_byp & ldst_samedw_r & ldst_dual_r);
+
+
+   rvdff_fpga  #(.WIDTH(1))              obuf_wren_ff      (.din(obuf_wr_en),                  .dout(obuf_wr_enQ),                                        .clk(lsu_busm_clk),        .clken(lsu_busm_clken), .rawclk(clk),        .*);
+   rvdffsc     #(.WIDTH(1))              obuf_valid_ff     (.din(1'b1),                        .dout(obuf_valid),      .en(obuf_wr_en), .clear(obuf_rst), .clk(lsu_free_c2_clk),                                                  .*);
+   rvdffs      #(.WIDTH(1))              obuf_nosend_ff    (.din(obuf_nosend_in),              .dout(obuf_nosend),     .en(obuf_wr_en),                   .clk(lsu_free_c2_clk),                                                  .*);
+   rvdffs      #(.WIDTH(1))              obuf_rdrsp_pend_ff(.din(obuf_rdrsp_pend_in),          .dout(obuf_rdrsp_pend), .en(obuf_rdrsp_pend_en),           .clk(lsu_free_c2_clk),                                                  .*);
+   rvdff_fpga  #(.WIDTH(1))              obuf_cmd_done_ff  (.din(obuf_cmd_done_in),            .dout(obuf_cmd_done),                                      .clk(lsu_busm_clk),        .clken(lsu_busm_clken),        .rawclk(clk), .*);
+   rvdff_fpga  #(.WIDTH(1))              obuf_data_done_ff (.din(obuf_data_done_in),           .dout(obuf_data_done),                                     .clk(lsu_busm_clk),        .clken(lsu_busm_clken),        .rawclk(clk), .*);
+   rvdff_fpga  #(.WIDTH(pt.LSU_BUS_TAG)) obuf_rdrsp_tagff  (.din(obuf_rdrsp_tag_in),           .dout(obuf_rdrsp_tag),                                     .clk(lsu_busm_clk),        .clken(lsu_busm_clken),        .rawclk(clk), .*);
+   rvdffs_fpga #(.WIDTH(pt.LSU_BUS_TAG)) obuf_tag0ff       (.din(obuf_tag0_in),                .dout(obuf_tag0),       .en(obuf_wr_en),                   .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*);
+   rvdffs_fpga #(.WIDTH(pt.LSU_BUS_TAG)) obuf_tag1ff       (.din(obuf_tag1_in),                .dout(obuf_tag1),       .en(obuf_wr_en),                   .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*);
+   rvdffs_fpga #(.WIDTH(1))              obuf_mergeff      (.din(obuf_merge_in),               .dout(obuf_merge),      .en(obuf_wr_en),                   .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*);
+   rvdffs_fpga #(.WIDTH(1))              obuf_writeff      (.din(obuf_write_in),               .dout(obuf_write),      .en(obuf_wr_en),                   .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*);
+   rvdffs_fpga #(.WIDTH(1))              obuf_sideeffectff (.din(obuf_sideeffect_in),          .dout(obuf_sideeffect), .en(obuf_wr_en),                   .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*);
+   rvdffs_fpga #(.WIDTH(2))              obuf_szff         (.din(obuf_sz_in[1:0]),             .dout(obuf_sz),         .en(obuf_wr_en),                   .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*);
+   rvdffs_fpga #(.WIDTH(8))              obuf_byteenff     (.din(obuf_byteen_in[7:0]),         .dout(obuf_byteen),     .en(obuf_wr_en),                   .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*);
+   rvdffe     #(.WIDTH(32))              obuf_addrff       (.din(obuf_addr_in[31:0]),          .dout(obuf_addr),       .en(obuf_wr_en),                                                                                           .*);
+   rvdffe     #(.WIDTH(64))              obuf_dataff       (.din(obuf_data_in[63:0]),          .dout(obuf_data),       .en(obuf_wr_en),                                                                                           .*);
+   rvdff_fpga #(.WIDTH(TIMER_LOG2))      obuf_timerff      (.din(obuf_wr_timer_in),            .dout(obuf_wr_timer),                                      .clk(lsu_busm_clk),        .clken(lsu_busm_clken), .rawclk(clk),        .*);
+
+
+   //------------------------------------------------------------------------------
+   // Output buffer logic ends here
+   //------------------------------------------------------------------------------
+
+   // Find the entry to allocate and entry to send
+   always_comb begin
+      WrPtr0_m[DEPTH_LOG2-1:0] = '0;
+      WrPtr1_m[DEPTH_LOG2-1:0] = '0;
+      found_wrptr0  = '0;
+      found_wrptr1  = '0;
+
+      // Find first write pointer
+      for (int i=0; i<DEPTH; i++) begin
+         if (~found_wrptr0) begin
+            WrPtr0_m[DEPTH_LOG2-1:0] = DEPTH_LOG2'(i);
+            found_wrptr0 = (buf_state[i] == IDLE) & ~((ibuf_valid & (ibuf_tag == i))                                               |
+                                                      (lsu_busreq_r & ((WrPtr0_r == i) | (ldst_dual_r & (WrPtr1_r == i)))));
+         end
+      end
+
+      // Find second write pointer
+      for (int i=0; i<DEPTH; i++) begin
+         if (~found_wrptr1) begin
+            WrPtr1_m[DEPTH_LOG2-1:0] = DEPTH_LOG2'(i);
+            found_wrptr1 = (buf_state[i] == IDLE) & ~((ibuf_valid & (ibuf_tag == i))                                               |
+                                                      (lsu_busreq_m & (WrPtr0_m == i))                                         |
+                                                      (lsu_busreq_r & ((WrPtr0_r == i) | (ldst_dual_r & (WrPtr1_r == i)))));
+         end
+      end
+   end
+
+   // Get the command ptr
+   for (genvar i=0; i<DEPTH; i++) begin
+      // These should be one-hot
+      assign CmdPtr0Dec[i] = ~(|buf_age[i]) & (buf_state[i] == CMD) & ~buf_cmd_state_bus_en[i];
+      assign CmdPtr1Dec[i] = ~(|(buf_age[i] & ~CmdPtr0Dec)) & ~CmdPtr0Dec[i] & (buf_state[i] == CMD) & ~buf_cmd_state_bus_en[i];
+      assign RspPtrDec[i]  = ~(|buf_rsp_pickage[i]) & (buf_state[i] == DONE_WAIT);
+   end
+
+   assign found_cmdptr0 = |CmdPtr0Dec;
+   assign found_cmdptr1 = |CmdPtr1Dec;
+   assign CmdPtr0 = f_Enc8to3(8'(CmdPtr0Dec[DEPTH-1:0]));
+   assign CmdPtr1 = f_Enc8to3(8'(CmdPtr1Dec[DEPTH-1:0]));
+   assign RspPtr  = f_Enc8to3(8'(RspPtrDec[DEPTH-1:0]));
+
+   // Age vector
+   for (genvar i=0; i<DEPTH; i++) begin: GenAgeVec
+      for (genvar j=0; j<DEPTH; j++) begin
+         assign buf_age_in[i][j] = (((buf_state[i] == IDLE) & buf_state_en[i]) &
+                                    (((buf_state[j] == WAIT) | ((buf_state[j] == CMD) & ~buf_cmd_state_bus_en[j]))                   |       // Set age bit for older entries
+                                     (ibuf_drain_vld & lsu_busreq_r & (ibuf_byp | ldst_dual_r) & (i == WrPtr0_r) & (j == ibuf_tag))  |       // Set case for dual lo
+                                     (ibuf_byp & lsu_busreq_r & ldst_dual_r & (i == WrPtr1_r) & (j == WrPtr0_r))))                      |     // ibuf bypass case
+                                   buf_age[i][j];
+
+
+         assign buf_age[i][j]    = buf_ageQ[i][j] & ~((buf_state[j] == CMD) & buf_cmd_state_bus_en[j]) & ~dec_tlu_force_halt;  // Reset case
+
+         assign buf_age_younger[i][j] = (i == j) ? 1'b0: (~buf_age[i][j] & (buf_state[j] != IDLE));   // Younger entries
+      end
+   end
+
+   // Age vector for responses
+   for (genvar i=0; i<DEPTH; i++) begin: GenRspAgeVec
+      for (genvar j=0; j<DEPTH; j++) begin
+         assign buf_rspage_set[i][j] = ((buf_state[i] == IDLE) & buf_state_en[i]) &
+                                           (~((buf_state[j] == IDLE) | (buf_state[j] == DONE))                                         |       // Set age bit for older entries
+                                            (ibuf_drain_vld & lsu_busreq_r & (ibuf_byp | ldst_dual_r) & (DEPTH_LOG2'(i) == WrPtr0_r) & (DEPTH_LOG2'(j) == ibuf_tag))  |       // Set case for dual lo
+                                            (ibuf_byp & lsu_busreq_r & ldst_dual_r & (DEPTH_LOG2'(i) == WrPtr1_r) & (DEPTH_LOG2'(j) == WrPtr0_r)));
+         assign buf_rspage_in[i][j] = buf_rspage_set[i][j] | buf_rspage[i][j];
+         assign buf_rspage[i][j]    = buf_rspageQ[i][j] & ~((buf_state[j] == DONE) | (buf_state[j] == IDLE)) & ~dec_tlu_force_halt;  // Reset case
+         assign buf_rsp_pickage[i][j] = buf_rspageQ[i][j] & (buf_state[j] == DONE_WAIT);
+     end
+   end
+
+   //------------------------------------------------------------------------------
+   // Buffer logic
+   //------------------------------------------------------------------------------
+   for (genvar i=0; i<DEPTH; i++) begin
+
+      assign ibuf_drainvec_vld[i] = (ibuf_drain_vld & (i == ibuf_tag));
+      assign buf_byteen_in[i]     = ibuf_drainvec_vld[i] ? ibuf_byteen_out[3:0] : ((ibuf_byp & ldst_dual_r & (i == WrPtr1_r)) ? ldst_byteen_hi_r[3:0] : ldst_byteen_lo_r[3:0]);
+      assign buf_addr_in[i]       = ibuf_drainvec_vld[i] ? ibuf_addr[31:0] : ((ibuf_byp & ldst_dual_r & (i == WrPtr1_r)) ? end_addr_r[31:0] : lsu_addr_r[31:0]);
+      assign buf_dual_in[i]       = ibuf_drainvec_vld[i] ? ibuf_dual : ldst_dual_r;
+      assign buf_samedw_in[i]     = ibuf_drainvec_vld[i] ? ibuf_samedw : ldst_samedw_r;
+      assign buf_nomerge_in[i]    = ibuf_drainvec_vld[i] ? (ibuf_nomerge | ibuf_force_drain) : no_dword_merge_r;
+      assign buf_dualhi_in[i]     = ibuf_drainvec_vld[i] ? ibuf_dual : (ibuf_byp & ldst_dual_r & (i == WrPtr1_r));   // If it's dual, ibuf will always have the high
+      assign buf_dualtag_in[i]    = ibuf_drainvec_vld[i] ? ibuf_dualtag : ((ibuf_byp & ldst_dual_r & (i == WrPtr1_r)) ? WrPtr0_r : WrPtr1_r);
+      assign buf_sideeffect_in[i] = ibuf_drainvec_vld[i] ? ibuf_sideeffect : is_sideeffects_r;
+      assign buf_unsign_in[i]     = ibuf_drainvec_vld[i] ? ibuf_unsign : lsu_pkt_r.unsign;
+      assign buf_sz_in[i]         = ibuf_drainvec_vld[i] ? ibuf_sz : {lsu_pkt_r.word, lsu_pkt_r.half};
+      assign buf_write_in[i]      = ibuf_drainvec_vld[i] ? ibuf_write : lsu_pkt_r.store;
+
+      // Buffer entry state machine
+      always_comb begin
+         buf_nxtstate[i]          = IDLE;
+         buf_state_en[i]          = '0;
+         buf_resp_state_bus_en[i] = '0;
+         buf_state_bus_en[i]      = '0;
+         buf_wr_en[i]             = '0;
+         buf_data_in[i]           = '0;
+         buf_data_en[i]           = '0;
+         buf_error_en[i]          = '0;
+         buf_rst[i]               = dec_tlu_force_halt;
+         buf_ldfwd_en[i]          = dec_tlu_force_halt;
+         buf_ldfwd_in[i]          = '0;
+         buf_ldfwdtag_in[i]       = '0;
+
+         case (buf_state[i])
+            IDLE: begin
+                     buf_nxtstate[i] = lsu_bus_clk_en ? CMD : WAIT;
+                     buf_state_en[i] = (lsu_busreq_r & lsu_commit_r & (((ibuf_byp | ldst_dual_r) & ~ibuf_merge_en & (i == WrPtr0_r)) | (ibuf_byp & ldst_dual_r & (i == WrPtr1_r)))) |
+                                       (ibuf_drain_vld & (i == ibuf_tag));
+                     buf_wr_en[i]    = buf_state_en[i];
+                     buf_data_en[i]  = buf_state_en[i];
+                     buf_data_in[i]   = (ibuf_drain_vld & (i == ibuf_tag)) ? ibuf_data_out[31:0] : store_data_lo_r[31:0];
+                     buf_cmd_state_bus_en[i]  = '0;
+            end
+            WAIT: begin
+                     buf_nxtstate[i] = dec_tlu_force_halt ? IDLE : CMD;
+                     buf_state_en[i] = lsu_bus_clk_en | dec_tlu_force_halt;
+                     buf_cmd_state_bus_en[i]  = '0;
+            end
+            CMD: begin
+                     buf_nxtstate[i]          = dec_tlu_force_halt ? IDLE : (obuf_nosend & bus_rsp_read & (bus_rsp_read_tag == obuf_rdrsp_tag)) ? DONE_WAIT : RESP;
+                     buf_cmd_state_bus_en[i]  = ((obuf_tag0 == i) | (obuf_merge & (obuf_tag1 == i))) & obuf_valid & obuf_wr_enQ;  // Just use the recently written obuf_valid
+                     buf_state_bus_en[i]      = buf_cmd_state_bus_en[i];
+                     buf_state_en[i]          = (buf_state_bus_en[i] & lsu_bus_clk_en) | dec_tlu_force_halt;
+                     buf_ldfwd_in[i]          = 1'b1;
+                     buf_ldfwd_en[i]          = buf_state_en[i] & ~buf_write[i] & obuf_nosend & ~dec_tlu_force_halt;
+                     buf_ldfwdtag_in[i]       = DEPTH_LOG2'(obuf_rdrsp_tag[pt.LSU_BUS_TAG-2:0]);
+                     buf_data_en[i]           = buf_state_bus_en[i] & lsu_bus_clk_en & obuf_nosend & bus_rsp_read;
+                     buf_error_en[i]          = buf_state_bus_en[i] & lsu_bus_clk_en & obuf_nosend & bus_rsp_read_error;
+                     buf_data_in[i]           = buf_error_en[i] ? bus_rsp_rdata[31:0] : (buf_addr[i][2] ? bus_rsp_rdata[63:32] : bus_rsp_rdata[31:0]);
+            end
+            RESP: begin
+                     buf_nxtstate[i]           = (dec_tlu_force_halt | (buf_write[i] & ~bus_rsp_write_error)) ? IDLE :    // Side-effect writes will be non-posted
+                                                      (buf_dual[i] & ~buf_samedw[i] & ~buf_write[i] & (buf_state[buf_dualtag[i]] != DONE_PARTIAL)) ? DONE_PARTIAL : // Goto DONE_PARTIAL if this is the first return of dual
+                                                           (buf_ldfwd[i] | any_done_wait_state |
+                                                            (buf_dual[i] & ~buf_samedw[i] & ~buf_write[i] & buf_ldfwd[buf_dualtag[i]] &
+                                                             (buf_state[buf_dualtag[i]] == DONE_PARTIAL) & any_done_wait_state)) ? DONE_WAIT : DONE;
+                     buf_resp_state_bus_en[i]  = (bus_rsp_write & (bus_rsp_write_tag == (pt.LSU_BUS_TAG)'(i))) |
+                                                 (bus_rsp_read  & ((bus_rsp_read_tag == (pt.LSU_BUS_TAG)'(i)) |
+                                                                   (buf_ldfwd[i] & (bus_rsp_read_tag == (pt.LSU_BUS_TAG)'(buf_ldfwdtag[i]))) |
+                                                                   (buf_dual[i] & buf_dualhi[i] & ~buf_write[i] & buf_samedw[i] & (bus_rsp_read_tag == (pt.LSU_BUS_TAG)'(buf_dualtag[i])))));
+                     buf_state_bus_en[i]       = buf_resp_state_bus_en[i];
+                     buf_state_en[i]           = (buf_state_bus_en[i] & lsu_bus_clk_en) | dec_tlu_force_halt;
+                     buf_data_en[i]            = buf_state_bus_en[i] & bus_rsp_read & lsu_bus_clk_en;
+                      // Need to capture the error for stores as well for AXI
+                     buf_error_en[i]           = buf_state_bus_en[i] & lsu_bus_clk_en & ((bus_rsp_read_error  & (bus_rsp_read_tag  == (pt.LSU_BUS_TAG)'(i))) |
+                                                                                         (bus_rsp_read_error  & buf_ldfwd[i] & (bus_rsp_read_tag == (pt.LSU_BUS_TAG)'(buf_ldfwdtag[i]))) |
+                                                                                         (bus_rsp_write_error & (bus_rsp_write_tag == (pt.LSU_BUS_TAG)'(i))));
+                     buf_data_in[i][31:0]      = (buf_state_en[i] & ~buf_error_en[i]) ? (buf_addr[i][2] ? bus_rsp_rdata[63:32] : bus_rsp_rdata[31:0]) : bus_rsp_rdata[31:0];
+                     buf_cmd_state_bus_en[i]  = '0;
+            end
+            DONE_PARTIAL: begin   // Other part of dual load hasn't returned
+                     buf_nxtstate[i]           = dec_tlu_force_halt ? IDLE : (buf_ldfwd[i] | buf_ldfwd[buf_dualtag[i]] | any_done_wait_state) ? DONE_WAIT : DONE;
+                     buf_state_bus_en[i]       = bus_rsp_read & ((bus_rsp_read_tag == (pt.LSU_BUS_TAG)'(buf_dualtag[i])) |
+                                                                 (buf_ldfwd[buf_dualtag[i]] & (bus_rsp_read_tag == (pt.LSU_BUS_TAG)'(buf_ldfwdtag[buf_dualtag[i]]))));
+                     buf_state_en[i]           = (buf_state_bus_en[i] & lsu_bus_clk_en) | dec_tlu_force_halt;
+                     buf_cmd_state_bus_en[i]  = '0;
+            end
+            DONE_WAIT: begin  // WAIT state if there are multiple outstanding nb returns
+                      buf_nxtstate[i]           = dec_tlu_force_halt ? IDLE : DONE;
+                      buf_state_en[i]           = ((RspPtr == DEPTH_LOG2'(i)) | (buf_dual[i] & (buf_dualtag[i] == RspPtr))) | dec_tlu_force_halt;
+                      buf_cmd_state_bus_en[i]  = '0;
+            end
+            DONE: begin
+                     buf_nxtstate[i]           = IDLE;
+                     buf_rst[i]                = 1'b1;
+                     buf_state_en[i]           = 1'b1;
+                     buf_ldfwd_in[i]           = 1'b0;
+                     buf_ldfwd_en[i]           = buf_state_en[i];
+                     buf_cmd_state_bus_en[i]  = '0;
+            end
+            default : begin
+                     buf_nxtstate[i]          = IDLE;
+                     buf_state_en[i]          = '0;
+                     buf_resp_state_bus_en[i] = '0;
+                     buf_state_bus_en[i]      = '0;
+                     buf_wr_en[i]             = '0;
+                     buf_data_in[i]           = '0;
+                     buf_data_en[i]           = '0;
+                     buf_error_en[i]          = '0;
+                     buf_rst[i]               = '0;
+                     buf_cmd_state_bus_en[i]  = '0;
+            end
+         endcase
+      end
+
+      rvdffs  #(.WIDTH($bits(state_t))) buf_state_ff     (.din(buf_nxtstate[i]),             .dout({buf_state[i]}),    .en(buf_state_en[i]),                                        .clk(lsu_bus_buf_c1_clk), .*);
+      rvdff   #(.WIDTH(DEPTH))          buf_ageff        (.din(buf_age_in[i]),               .dout(buf_ageQ[i]),                                                                    .clk(lsu_bus_buf_c1_clk), .*);
+      rvdff   #(.WIDTH(DEPTH))          buf_rspageff     (.din(buf_rspage_in[i]),            .dout(buf_rspageQ[i]),                                                                 .clk(lsu_bus_buf_c1_clk), .*);
+      rvdffs  #(.WIDTH(DEPTH_LOG2))     buf_dualtagff    (.din(buf_dualtag_in[i]),           .dout(buf_dualtag[i]),    .en(buf_wr_en[i]),                                           .clk(lsu_bus_buf_c1_clk), .*);
+      rvdffs  #(.WIDTH(1))              buf_dualff       (.din(buf_dual_in[i]),              .dout(buf_dual[i]),       .en(buf_wr_en[i]),                                           .clk(lsu_bus_buf_c1_clk), .*);
+      rvdffs  #(.WIDTH(1))              buf_samedwff     (.din(buf_samedw_in[i]),            .dout(buf_samedw[i]),     .en(buf_wr_en[i]),                                           .clk(lsu_bus_buf_c1_clk), .*);
+      rvdffs  #(.WIDTH(1))              buf_nomergeff    (.din(buf_nomerge_in[i]),           .dout(buf_nomerge[i]),    .en(buf_wr_en[i]),                                           .clk(lsu_bus_buf_c1_clk), .*);
+      rvdffs  #(.WIDTH(1))              buf_dualhiff     (.din(buf_dualhi_in[i]),            .dout(buf_dualhi[i]),     .en(buf_wr_en[i]),                                           .clk(lsu_bus_buf_c1_clk), .*);
+      rvdffs  #(.WIDTH(1))              buf_ldfwdff      (.din(buf_ldfwd_in[i]),             .dout(buf_ldfwd[i]),      .en(buf_ldfwd_en[i]),                                        .clk(lsu_bus_buf_c1_clk), .*);
+      rvdffs  #(.WIDTH(DEPTH_LOG2))     buf_ldfwdtagff   (.din(buf_ldfwdtag_in[i]),          .dout(buf_ldfwdtag[i]),   .en(buf_ldfwd_en[i]),                                        .clk(lsu_bus_buf_c1_clk), .*);
+      rvdffs  #(.WIDTH(1))              buf_sideeffectff (.din(buf_sideeffect_in[i]),        .dout(buf_sideeffect[i]), .en(buf_wr_en[i]),                                           .clk(lsu_bus_buf_c1_clk), .*);
+      rvdffs  #(.WIDTH(1))              buf_unsignff     (.din(buf_unsign_in[i]),            .dout(buf_unsign[i]),     .en(buf_wr_en[i]),                                           .clk(lsu_bus_buf_c1_clk), .*);
+      rvdffs  #(.WIDTH(1))              buf_writeff      (.din(buf_write_in[i]),             .dout(buf_write[i]),      .en(buf_wr_en[i]),                                           .clk(lsu_bus_buf_c1_clk), .*);
+      rvdffs  #(.WIDTH(2))              buf_szff         (.din(buf_sz_in[i]),                .dout(buf_sz[i]),         .en(buf_wr_en[i]),                                           .clk(lsu_bus_buf_c1_clk), .*);
+      rvdffe  #(.WIDTH(32))             buf_addrff       (.din(buf_addr_in[i][31:0]),        .dout(buf_addr[i]),       .en(buf_wr_en[i]),                                                                     .*);
+      rvdffs  #(.WIDTH(4))              buf_byteenff     (.din(buf_byteen_in[i][3:0]),       .dout(buf_byteen[i]),     .en(buf_wr_en[i]),                                           .clk(lsu_bus_buf_c1_clk), .*);
+      rvdffe  #(.WIDTH(32))             buf_dataff       (.din(buf_data_in[i][31:0]),        .dout(buf_data[i]),       .en(buf_data_en[i]),                                                                   .*);
+      rvdffsc #(.WIDTH(1))              buf_errorff      (.din(1'b1),                        .dout(buf_error[i]),      .en(buf_error_en[i]),                    .clear(buf_rst[i]), .clk(lsu_bus_buf_c1_clk), .*);
+
+   end
+
+   // buffer full logic
+   always_comb begin
+      buf_numvld_any[3:0] =  ({1'b0,lsu_busreq_m} << ldst_dual_m) +
+                             ({1'b0,lsu_busreq_r} << ldst_dual_r) +
+                             ibuf_valid;
+      buf_numvld_wrcmd_any[3:0] = 4'b0;
+      buf_numvld_cmd_any[3:0] = 4'b0;
+      buf_numvld_pend_any[3:0] = 4'b0;
+      any_done_wait_state = 1'b0;
+      for (int i=0; i<DEPTH; i++) begin
+         buf_numvld_any[3:0] += {3'b0, (buf_state[i] != IDLE)};
+         buf_numvld_wrcmd_any[3:0] += {3'b0, (buf_write[i] & (buf_state[i] == CMD) & ~buf_cmd_state_bus_en[i])};
+         buf_numvld_cmd_any[3:0]   += {3'b0, ((buf_state[i] == CMD) & ~buf_cmd_state_bus_en[i])};
+         buf_numvld_pend_any[3:0]   += {3'b0, ((buf_state[i] == WAIT) | ((buf_state[i] == CMD) & ~buf_cmd_state_bus_en[i]))};
+         any_done_wait_state |= (buf_state[i] == DONE_WAIT);
+      end
+   end
+
+   assign lsu_bus_buffer_pend_any = (buf_numvld_pend_any != 0);
+   assign lsu_bus_buffer_full_any = (ldst_dual_d & dec_lsu_valid_raw_d) ? (buf_numvld_any[3:0] >= (DEPTH-1)) : (buf_numvld_any[3:0] == DEPTH);
+   assign lsu_bus_buffer_empty_any = ~(|buf_state[DEPTH-1:0]) & ~ibuf_valid & ~obuf_valid;
+
+
+   // Non blocking ports
+   assign lsu_nonblock_load_valid_m = lsu_busreq_m & lsu_pkt_m.valid & lsu_pkt_m.load & ~flush_m_up & ~ld_full_hit_m;
+   assign lsu_nonblock_load_tag_m[DEPTH_LOG2-1:0] = WrPtr0_m[DEPTH_LOG2-1:0];
+   assign lsu_nonblock_load_inv_r = lsu_nonblock_load_valid_r & ~lsu_commit_r;
+   assign lsu_nonblock_load_inv_tag_r[DEPTH_LOG2-1:0] = WrPtr0_r[DEPTH_LOG2-1:0];      // r tag needs to be accurate even if there is no invalidate
+
+   always_comb begin
+      lsu_nonblock_load_data_ready = '0;
+      lsu_nonblock_load_data_error = '0;
+      lsu_nonblock_load_data_tag[DEPTH_LOG2-1:0] = '0;
+      lsu_nonblock_load_data_lo[31:0] = '0;
+      lsu_nonblock_load_data_hi[31:0] = '0;
+      for (int i=0; i<DEPTH; i++) begin
+          // Use buf_rst[i] instead of buf_state_en[i] for timing
+          lsu_nonblock_load_data_ready      |= (buf_state[i] == DONE) & ~buf_write[i];
+          lsu_nonblock_load_data_error      |= (buf_state[i] == DONE) & buf_error[i] & ~buf_write[i];
+          lsu_nonblock_load_data_tag[DEPTH_LOG2-1:0]   |= DEPTH_LOG2'(i) & {DEPTH_LOG2{((buf_state[i] == DONE) & ~buf_write[i] & (~buf_dual[i] | ~buf_dualhi[i]))}};
+          lsu_nonblock_load_data_lo[31:0]     |= buf_data[i][31:0] & {32{((buf_state[i] == DONE) & ~buf_write[i] & (~buf_dual[i] | ~buf_dualhi[i]))}};
+          lsu_nonblock_load_data_hi[31:0]     |= buf_data[i][31:0] & {32{((buf_state[i] == DONE) & ~buf_write[i] & (buf_dual[i] & buf_dualhi[i]))}};
+      end
+   end
+
+   assign lsu_nonblock_addr_offset[1:0] = buf_addr[lsu_nonblock_load_data_tag][1:0];
+   assign lsu_nonblock_sz[1:0]          = buf_sz[lsu_nonblock_load_data_tag][1:0];
+   assign lsu_nonblock_unsign           = buf_unsign[lsu_nonblock_load_data_tag];
+   assign lsu_nonblock_data_unalgn[31:0] = 32'({lsu_nonblock_load_data_hi[31:0], lsu_nonblock_load_data_lo[31:0]} >> 8*lsu_nonblock_addr_offset[1:0]);
+
+   assign lsu_nonblock_load_data_valid = lsu_nonblock_load_data_ready & ~lsu_nonblock_load_data_error;
+   assign lsu_nonblock_load_data[31:0] = ({32{ lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b00)}} & {24'b0,lsu_nonblock_data_unalgn[7:0]}) |
+                                         ({32{ lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b01)}} & {16'b0,lsu_nonblock_data_unalgn[15:0]}) |
+                                         ({32{~lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b00)}} & {{24{lsu_nonblock_data_unalgn[7]}}, lsu_nonblock_data_unalgn[7:0]}) |
+                                         ({32{~lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b01)}} & {{16{lsu_nonblock_data_unalgn[15]}},lsu_nonblock_data_unalgn[15:0]}) |
+                                         ({32{(lsu_nonblock_sz[1:0] == 2'b10)}} & lsu_nonblock_data_unalgn[31:0]);
+
+   // Determine if there is a pending return to sideeffect load/store
+   always_comb begin
+      bus_sideeffect_pend = obuf_valid & obuf_sideeffect & dec_tlu_sideeffect_posted_disable;
+      for (int i=0; i<DEPTH; i++) begin
+         bus_sideeffect_pend |= ((buf_state[i] == RESP) & buf_sideeffect[i] & dec_tlu_sideeffect_posted_disable);
+      end
+   end
+
+   // We have no ordering rules for AXI. Need to check outstanding trxns to same address for AXI
+   always_comb begin
+      bus_addr_match_pending = '0;
+      for (int i=0; i<DEPTH; i++) begin
+         bus_addr_match_pending |= (obuf_valid & (obuf_addr[31:3] == buf_addr[i][31:3]) & (buf_state[i] == RESP) & ~((obuf_tag0 == (pt.LSU_BUS_TAG)'(i)) | (obuf_merge & (obuf_tag1 == (pt.LSU_BUS_TAG)'(i)))));
+      end
+   end
+
+   // Generic bus signals
+   assign bus_cmd_ready                      = obuf_write ? ((obuf_cmd_done | obuf_data_done) ? (obuf_cmd_done ? lsu_axi_wready : lsu_axi_awready) : (lsu_axi_awready & lsu_axi_wready)) : lsu_axi_arready;
+   assign bus_wcmd_sent                      = lsu_axi_awvalid & lsu_axi_awready;
+   assign bus_wdata_sent                     = lsu_axi_wvalid & lsu_axi_wready;
+   assign bus_cmd_sent                       = ((obuf_cmd_done | bus_wcmd_sent) & (obuf_data_done | bus_wdata_sent)) | (lsu_axi_arvalid & lsu_axi_arready);
+
+   assign bus_rsp_read                       = lsu_axi_rvalid & lsu_axi_rready;
+   assign bus_rsp_write                      = lsu_axi_bvalid & lsu_axi_bready;
+   assign bus_rsp_read_tag[pt.LSU_BUS_TAG-1:0]  = lsu_axi_rid[pt.LSU_BUS_TAG-1:0];
+   assign bus_rsp_write_tag[pt.LSU_BUS_TAG-1:0] = lsu_axi_bid[pt.LSU_BUS_TAG-1:0];
+   assign bus_rsp_write_error                = bus_rsp_write & (lsu_axi_bresp[1:0] != 2'b0);
+   assign bus_rsp_read_error                 = bus_rsp_read  & (lsu_axi_rresp[1:0] != 2'b0);
+   assign bus_rsp_rdata[63:0]                = lsu_axi_rdata[63:0];
+
+   // AXI command signals
+   assign lsu_axi_awvalid               = obuf_valid & obuf_write & ~obuf_cmd_done & ~bus_addr_match_pending;
+   assign lsu_axi_awid[pt.LSU_BUS_TAG-1:0] = (pt.LSU_BUS_TAG)'(obuf_tag0);
+   assign lsu_axi_awaddr[31:0]          = obuf_sideeffect ? obuf_addr[31:0] : {obuf_addr[31:3],3'b0};
+   assign lsu_axi_awsize[2:0]           = obuf_sideeffect ? {1'b0, obuf_sz[1:0]} : 3'b011;
+   assign lsu_axi_awprot[2:0]           = 3'b001;
+   assign lsu_axi_awcache[3:0]          = obuf_sideeffect ? 4'b0 : 4'b1111;
+   assign lsu_axi_awregion[3:0]         = obuf_addr[31:28];
+   assign lsu_axi_awlen[7:0]            = '0;
+   assign lsu_axi_awburst[1:0]          = 2'b01;
+   assign lsu_axi_awqos[3:0]            = '0;
+   assign lsu_axi_awlock                = '0;
+
+   assign lsu_axi_wvalid                = obuf_valid & obuf_write & ~obuf_data_done & ~bus_addr_match_pending;
+   assign lsu_axi_wstrb[7:0]            = obuf_byteen[7:0] & {8{obuf_write}};
+   assign lsu_axi_wdata[63:0]           = obuf_data[63:0];
+   assign lsu_axi_wlast                 = '1;
+
+   assign lsu_axi_arvalid               = obuf_valid & ~obuf_write & ~obuf_nosend & ~bus_addr_match_pending;
+   assign lsu_axi_arid[pt.LSU_BUS_TAG-1:0] = (pt.LSU_BUS_TAG)'(obuf_tag0);
+   assign lsu_axi_araddr[31:0]          = obuf_sideeffect ? obuf_addr[31:0] : {obuf_addr[31:3],3'b0};
+   assign lsu_axi_arsize[2:0]           = obuf_sideeffect ? {1'b0, obuf_sz[1:0]} : 3'b011;
+   assign lsu_axi_arprot[2:0]           = 3'b001;
+   assign lsu_axi_arcache[3:0]          = obuf_sideeffect ? 4'b0 : 4'b1111;
+   assign lsu_axi_arregion[3:0]         = obuf_addr[31:28];
+   assign lsu_axi_arlen[7:0]            = '0;
+   assign lsu_axi_arburst[1:0]          = 2'b01;
+   assign lsu_axi_arqos[3:0]            = '0;
+   assign lsu_axi_arlock                = '0;
+
+   assign lsu_axi_bready = 1;
+   assign lsu_axi_rready = 1;
+
+   always_comb begin
+      lsu_imprecise_error_store_any = '0;
+      lsu_imprecise_error_store_tag = '0;
+      for (int i=0; i<DEPTH; i++) begin
+         lsu_imprecise_error_store_any |= lsu_bus_clk_en_q & (buf_state[i] == DONE) & buf_error[i] & buf_write[i];
+         lsu_imprecise_error_store_tag |= DEPTH_LOG2'(i) & {DEPTH_LOG2{((buf_state[i] == DONE) & buf_error[i] & buf_write[i])}};
+      end
+   end
+   assign lsu_imprecise_error_load_any       = lsu_nonblock_load_data_error & ~lsu_imprecise_error_store_any;   // This is to make sure we send only one imprecise error for load/store
+   assign lsu_imprecise_error_addr_any[31:0] = lsu_imprecise_error_store_any ? buf_addr[lsu_imprecise_error_store_tag] : buf_addr[lsu_nonblock_load_data_tag];
+
+   // PMU signals
+   assign lsu_pmu_bus_trxn  = (lsu_axi_awvalid & lsu_axi_awready) | (lsu_axi_wvalid & lsu_axi_wready) | (lsu_axi_arvalid & lsu_axi_arready);
+   assign lsu_pmu_bus_misaligned = lsu_busreq_r & ldst_dual_r & lsu_commit_r;
+   assign lsu_pmu_bus_error = lsu_imprecise_error_load_any | lsu_imprecise_error_store_any;
+   assign lsu_pmu_bus_busy  = (lsu_axi_awvalid & ~lsu_axi_awready) | (lsu_axi_wvalid & ~lsu_axi_wready) | (lsu_axi_arvalid & ~lsu_axi_arready);
+
+   rvdff_fpga #(.WIDTH(1))               lsu_axi_awvalid_ff (.din(lsu_axi_awvalid),                .dout(lsu_axi_awvalid_q),                .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
+   rvdff_fpga #(.WIDTH(1))               lsu_axi_awready_ff (.din(lsu_axi_awready),                .dout(lsu_axi_awready_q),                .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
+   rvdff_fpga #(.WIDTH(1))               lsu_axi_wvalid_ff  (.din(lsu_axi_wvalid),                 .dout(lsu_axi_wvalid_q),                 .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
+   rvdff_fpga #(.WIDTH(1))               lsu_axi_wready_ff  (.din(lsu_axi_wready),                 .dout(lsu_axi_wready_q),                 .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
+   rvdff_fpga #(.WIDTH(1))               lsu_axi_arvalid_ff (.din(lsu_axi_arvalid),                .dout(lsu_axi_arvalid_q),                .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
+   rvdff_fpga #(.WIDTH(1))               lsu_axi_arready_ff (.din(lsu_axi_arready),                .dout(lsu_axi_arready_q),                .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
+
+   rvdff_fpga  #(.WIDTH(1))              lsu_axi_bvalid_ff  (.din(lsu_axi_bvalid),                 .dout(lsu_axi_bvalid_q),                 .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
+   rvdff_fpga  #(.WIDTH(1))              lsu_axi_bready_ff  (.din(lsu_axi_bready),                 .dout(lsu_axi_bready_q),                 .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
+   rvdff_fpga  #(.WIDTH(2))              lsu_axi_bresp_ff   (.din(lsu_axi_bresp[1:0]),             .dout(lsu_axi_bresp_q[1:0]),             .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
+   rvdff_fpga  #(.WIDTH(pt.LSU_BUS_TAG)) lsu_axi_bid_ff     (.din(lsu_axi_bid[pt.LSU_BUS_TAG-1:0]),.dout(lsu_axi_bid_q[pt.LSU_BUS_TAG-1:0]),.clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
+   rvdffe      #(.WIDTH(64))             lsu_axi_rdata_ff   (.din(lsu_axi_rdata[63:0]),            .dout(lsu_axi_rdata_q[63:0]),            .en((lsu_axi_rvalid | clk_override) & lsu_bus_clk_en), .*);
+
+   rvdff_fpga  #(.WIDTH(1))              lsu_axi_rvalid_ff  (.din(lsu_axi_rvalid),                 .dout(lsu_axi_rvalid_q),                 .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
+   rvdff_fpga  #(.WIDTH(1))              lsu_axi_rready_ff  (.din(lsu_axi_rready),                 .dout(lsu_axi_rready_q),                 .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
+   rvdff_fpga  #(.WIDTH(2))              lsu_axi_rresp_ff   (.din(lsu_axi_rresp[1:0]),             .dout(lsu_axi_rresp_q[1:0]),             .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
+   rvdff_fpga  #(.WIDTH(pt.LSU_BUS_TAG)) lsu_axi_rid_ff     (.din(lsu_axi_rid[pt.LSU_BUS_TAG-1:0]),.dout(lsu_axi_rid_q[pt.LSU_BUS_TAG-1:0]),.clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
+
+   rvdff #(.WIDTH(DEPTH_LOG2)) lsu_WrPtr0_rff (.din(WrPtr0_m), .dout(WrPtr0_r), .clk(lsu_c2_r_clk), .*);
+   rvdff #(.WIDTH(DEPTH_LOG2)) lsu_WrPtr1_rff (.din(WrPtr1_m), .dout(WrPtr1_r), .clk(lsu_c2_r_clk), .*);
+
+   rvdff #(.WIDTH(1)) lsu_busreq_rff (.din(lsu_busreq_m & ~flush_r & ~ld_full_hit_m),      .dout(lsu_busreq_r), .clk(lsu_c2_r_clk), .*);
+   rvdff #(.WIDTH(1)) lsu_nonblock_load_valid_rff  (.din(lsu_nonblock_load_valid_m),  .dout(lsu_nonblock_load_valid_r), .clk(lsu_c2_r_clk), .*);
+
+`ifdef RV_ASSERT_ON
+
+   for (genvar i=0; i<4; i++) begin: GenByte
+      assert_ld_byte_hitvecfn_lo_onehot: assert #0 ($onehot0(ld_byte_hitvecfn_lo[i][DEPTH-1:0]));
+      assert_ld_byte_hitvecfn_hi_onehot: assert #0 ($onehot0(ld_byte_hitvecfn_hi[i][DEPTH-1:0]));
+   end
+
+   for (genvar i=0; i<DEPTH; i++) begin: GenAssertAge
+      assert_bufempty_agevec: assert #0 (~(lsu_bus_buffer_empty_any & |(buf_age[i])));
+   end
+
+   assert_CmdPtr0Dec_onehot: assert #0 ($onehot0(CmdPtr0Dec[DEPTH-1:0] & ~{DEPTH{dec_tlu_force_halt}}));
+   assert_CmdPtr1Dec_onehot: assert #0 ($onehot0(CmdPtr1Dec[DEPTH-1:0] & ~{DEPTH{dec_tlu_force_halt}}));
+
+`endif
+
+endmodule // eb1_lsu_bus_buffer
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_lsu_bus_intf.sv b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_bus_intf.sv
new file mode 100644
index 0000000..fe80ab0
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_bus_intf.sv
@@ -0,0 +1,365 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//********************************************************************************
+// $Id$
+//
+//
+// Owner:
+// Function: lsu interface with interface queue
+// Comments:
+//
+//********************************************************************************
+module eb1_lsu_bus_intf
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )(
+   input logic                          clk,                                // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
+   input logic                          clk_override,                       // Override non-functional clock gating
+   input logic                          rst_l,                              // reset, active low
+   input logic                          scan_mode,                          // scan mode
+   input logic                          dec_tlu_external_ldfwd_disable,     // disable load to load forwarding for externals
+   input logic                          dec_tlu_wb_coalescing_disable,      // disable write buffer coalescing
+   input logic                          dec_tlu_sideeffect_posted_disable,  // disable the posted sideeffect load store to the bus
+
+   // various clocks needed for the bus reads and writes
+   input logic                          lsu_bus_obuf_c1_clken,              // obuf clock enable
+   input logic                          lsu_busm_clken,                     // bus clock enable
+
+   input logic                          lsu_c1_r_clk,                       // r pipe single pulse clock
+   input logic                          lsu_c2_r_clk,                       // r pipe double pulse clock
+   input logic                          lsu_bus_ibuf_c1_clk,                // ibuf single pulse clock
+   input logic                          lsu_bus_obuf_c1_clk,                // obuf single pulse clock
+   input logic                          lsu_bus_buf_c1_clk,                 // buf  single pulse clock
+   input logic                          lsu_free_c2_clk,                    // free clock double pulse clock
+   input logic                          active_clk,                         // Clock only while core active.  Through two clock headers. For flops without second clock header built in.
+   input logic                          lsu_busm_clk,                       // bus clock
+
+   input logic                          dec_lsu_valid_raw_d,               // Raw valid for address computation
+   input logic                          lsu_busreq_m,                      // bus request is in m
+
+   input                                eb1_lsu_pkt_t lsu_pkt_m,          // lsu packet flowing down the pipe
+   input                                eb1_lsu_pkt_t lsu_pkt_r,          // lsu packet flowing down the pipe
+
+   input logic [31:0]                   lsu_addr_m,                        // lsu address flowing down the pipe
+   input logic [31:0]                   lsu_addr_r,                        // lsu address flowing down the pipe
+
+   input logic [31:0]                   end_addr_m,                        // lsu address flowing down the pipe
+   input logic [31:0]                   end_addr_r,                        // lsu address flowing down the pipe
+
+   input logic [31:0]                   store_data_r,                      // store data flowing down the pipe
+   input logic                          dec_tlu_force_halt,
+
+   input logic                          lsu_commit_r,                      // lsu instruction in r commits
+   input logic                          is_sideeffects_m,                  // lsu attribute is side_effects
+   input logic                          flush_m_up,                        // flush
+   input logic                          flush_r,                           // flush
+   input logic                          ldst_dual_d, ldst_dual_m, ldst_dual_r,
+
+   output logic                         lsu_busreq_r,                      // bus request is in r
+   output logic                         lsu_bus_buffer_pend_any,           // bus buffer has a pending bus entry
+   output logic                         lsu_bus_buffer_full_any,           // write buffer is full
+   output logic                         lsu_bus_buffer_empty_any,          // write buffer is empty
+   output logic [31:0]                  bus_read_data_m,                   // the bus return data
+
+
+   output logic                         lsu_imprecise_error_load_any,      // imprecise load bus error
+   output logic                         lsu_imprecise_error_store_any,     // imprecise store bus error
+   output logic [31:0]                  lsu_imprecise_error_addr_any,      // address of the imprecise error
+
+   // Non-blocking loads
+   output logic                               lsu_nonblock_load_valid_m,   // there is an external load -> put in the cam
+   output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m,     // the tag of the external non block load
+   output logic                               lsu_nonblock_load_inv_r,     // invalidate signal for the cam entry for non block loads
+   output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated
+   output logic                               lsu_nonblock_load_data_valid,// the non block is valid - sending information back to the cam
+   output logic                               lsu_nonblock_load_data_error,// non block load has an error
+   output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag,  // the tag of the non block load sending the data/error
+   output logic [31:0]                        lsu_nonblock_load_data,      // Data of the non block load
+
+   // PMU events
+   output logic                         lsu_pmu_bus_trxn,
+   output logic                         lsu_pmu_bus_misaligned,
+   output logic                         lsu_pmu_bus_error,
+   output logic                         lsu_pmu_bus_busy,
+
+   // AXI Write Channels
+   output logic                        lsu_axi_awvalid,
+   input  logic                        lsu_axi_awready,
+   output logic [pt.LSU_BUS_TAG-1:0]   lsu_axi_awid,
+   output logic [31:0]                 lsu_axi_awaddr,
+   output logic [3:0]                  lsu_axi_awregion,
+   output logic [7:0]                  lsu_axi_awlen,
+   output logic [2:0]                  lsu_axi_awsize,
+   output logic [1:0]                  lsu_axi_awburst,
+   output logic                        lsu_axi_awlock,
+   output logic [3:0]                  lsu_axi_awcache,
+   output logic [2:0]                  lsu_axi_awprot,
+   output logic [3:0]                  lsu_axi_awqos,
+
+   output logic                        lsu_axi_wvalid,
+   input  logic                        lsu_axi_wready,
+   output logic [63:0]                 lsu_axi_wdata,
+   output logic [7:0]                  lsu_axi_wstrb,
+   output logic                        lsu_axi_wlast,
+
+   input  logic                        lsu_axi_bvalid,
+   output logic                        lsu_axi_bready,
+   input  logic [1:0]                  lsu_axi_bresp,
+   input  logic [pt.LSU_BUS_TAG-1:0]   lsu_axi_bid,
+
+   // AXI Read Channels
+   output logic                        lsu_axi_arvalid,
+   input  logic                        lsu_axi_arready,
+   output logic [pt.LSU_BUS_TAG-1:0]   lsu_axi_arid,
+   output logic [31:0]                 lsu_axi_araddr,
+   output logic [3:0]                  lsu_axi_arregion,
+   output logic [7:0]                  lsu_axi_arlen,
+   output logic [2:0]                  lsu_axi_arsize,
+   output logic [1:0]                  lsu_axi_arburst,
+   output logic                        lsu_axi_arlock,
+   output logic [3:0]                  lsu_axi_arcache,
+   output logic [2:0]                  lsu_axi_arprot,
+   output logic [3:0]                  lsu_axi_arqos,
+
+   input  logic                        lsu_axi_rvalid,
+   output logic                        lsu_axi_rready,
+   input  logic [pt.LSU_BUS_TAG-1:0]   lsu_axi_rid,
+   input  logic [63:0]                 lsu_axi_rdata,
+   input  logic [1:0]                  lsu_axi_rresp,
+
+   input logic                         lsu_bus_clk_en
+
+);
+
+
+
+   logic              lsu_bus_clk_en_q;
+
+   logic [3:0]        ldst_byteen_m, ldst_byteen_r;
+   logic [7:0]        ldst_byteen_ext_m, ldst_byteen_ext_r;
+   logic [3:0]        ldst_byteen_hi_m, ldst_byteen_hi_r;
+   logic [3:0]        ldst_byteen_lo_m, ldst_byteen_lo_r;
+   logic              is_sideeffects_r;
+
+   logic [63:0]       store_data_ext_r;
+   logic [31:0]       store_data_hi_r;
+   logic [31:0]       store_data_lo_r;
+
+   logic              addr_match_dw_lo_r_m;
+   logic              addr_match_word_lo_r_m;
+   logic              no_word_merge_r, no_dword_merge_r;
+
+   logic              ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi;
+   logic [3:0]        ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi;
+
+   logic [3:0]        ld_byte_hit_lo, ld_byte_rhit_lo;
+   logic [3:0]        ld_byte_hit_hi, ld_byte_rhit_hi;
+
+   logic [31:0]       ld_fwddata_rpipe_lo;
+   logic [31:0]       ld_fwddata_rpipe_hi;
+
+   logic [3:0]        ld_byte_hit_buf_lo, ld_byte_hit_buf_hi;
+   logic [31:0]       ld_fwddata_buf_lo, ld_fwddata_buf_hi;
+
+   logic [63:0]       ld_fwddata_lo, ld_fwddata_hi;
+   logic [63:0]       ld_fwddata_m;
+
+   logic              ld_full_hit_hi_m, ld_full_hit_lo_m;
+   logic              ld_full_hit_m;
+
+   assign ldst_byteen_m[3:0] = ({4{lsu_pkt_m.by}}   & 4'b0001) |
+                                 ({4{lsu_pkt_m.half}} & 4'b0011) |
+                                 ({4{lsu_pkt_m.word}} & 4'b1111);
+
+   // Read/Write Buffer
+   eb1_lsu_bus_buffer #(.pt(pt)) bus_buffer (
+      .*
+   );
+
+   // Logic to determine if dc5 store can be coalesced or not with younger stores. Bypass ibuf if cannot colaesced
+   assign addr_match_dw_lo_r_m = (lsu_addr_r[31:3] == lsu_addr_m[31:3]);
+   assign addr_match_word_lo_r_m = addr_match_dw_lo_r_m & ~(lsu_addr_r[2]^lsu_addr_m[2]);
+
+   assign no_word_merge_r  = lsu_busreq_r & ~ldst_dual_r & lsu_busreq_m & (lsu_pkt_m.load | ~addr_match_word_lo_r_m);
+   assign no_dword_merge_r = lsu_busreq_r & ~ldst_dual_r & lsu_busreq_m & (lsu_pkt_m.load | ~addr_match_dw_lo_r_m);
+
+   // Create Hi/Lo signals
+   assign ldst_byteen_ext_m[7:0] = {4'b0,ldst_byteen_m[3:0]} << lsu_addr_m[1:0];
+   assign ldst_byteen_ext_r[7:0] = {4'b0,ldst_byteen_r[3:0]} << lsu_addr_r[1:0];
+
+   assign store_data_ext_r[63:0] = {32'b0,store_data_r[31:0]} << {lsu_addr_r[1:0],3'b0};
+
+   assign ldst_byteen_hi_m[3:0]   = ldst_byteen_ext_m[7:4];
+   assign ldst_byteen_lo_m[3:0]   = ldst_byteen_ext_m[3:0];
+   assign ldst_byteen_hi_r[3:0]   = ldst_byteen_ext_r[7:4];
+   assign ldst_byteen_lo_r[3:0]   = ldst_byteen_ext_r[3:0];
+
+   assign store_data_hi_r[31:0]   = store_data_ext_r[63:32];
+   assign store_data_lo_r[31:0]   = store_data_ext_r[31:0];
+
+   assign ld_addr_rhit_lo_lo = (lsu_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m;
+   assign ld_addr_rhit_lo_hi = (end_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m;
+   assign ld_addr_rhit_hi_lo = (lsu_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m;
+   assign ld_addr_rhit_hi_hi = (end_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m;
+
+   for (genvar i=0; i<4; i++) begin: GenBusBufFwd
+      assign ld_byte_rhit_lo_lo[i] = ld_addr_rhit_lo_lo & ldst_byteen_lo_r[i] & ldst_byteen_lo_m[i];
+      assign ld_byte_rhit_lo_hi[i] = ld_addr_rhit_lo_hi & ldst_byteen_lo_r[i] & ldst_byteen_hi_m[i];
+      assign ld_byte_rhit_hi_lo[i] = ld_addr_rhit_hi_lo & ldst_byteen_hi_r[i] & ldst_byteen_lo_m[i];
+      assign ld_byte_rhit_hi_hi[i] = ld_addr_rhit_hi_hi & ldst_byteen_hi_r[i] & ldst_byteen_hi_m[i];
+
+      assign ld_byte_hit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i] |
+                                 ld_byte_hit_buf_lo[i];
+
+      assign ld_byte_hit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i] |
+                                 ld_byte_hit_buf_hi[i];
+
+      assign ld_byte_rhit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i];
+      assign ld_byte_rhit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i];
+
+      assign ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_lo[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
+                                                    ({8{ld_byte_rhit_hi_lo[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
+
+      assign ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_hi[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
+                                                    ({8{ld_byte_rhit_hi_hi[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
+
+      // Final muxing between m/r
+      assign ld_fwddata_lo[(8*i)+7:(8*i)] = ld_byte_rhit_lo[i]    ? ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] : ld_fwddata_buf_lo[(8*i)+7:(8*i)];
+
+      assign ld_fwddata_hi[(8*i)+7:(8*i)] = ld_byte_rhit_hi[i]    ? ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] : ld_fwddata_buf_hi[(8*i)+7:(8*i)];
+
+   end
+
+   always_comb begin
+      ld_full_hit_lo_m = 1'b1;
+      ld_full_hit_hi_m = 1'b1;
+      for (int i=0; i<4; i++) begin
+         ld_full_hit_lo_m &= (ld_byte_hit_lo[i] | ~ldst_byteen_lo_m[i]);
+         ld_full_hit_hi_m &= (ld_byte_hit_hi[i] | ~ldst_byteen_hi_m[i]);
+      end
+   end
+
+   // This will be high if all the bytes of load hit the stores in pipe/write buffer (m/r/wrbuf)
+   assign ld_full_hit_m = ld_full_hit_lo_m & ld_full_hit_hi_m & lsu_busreq_m & lsu_pkt_m.load & ~is_sideeffects_m;
+
+   assign ld_fwddata_m[63:0] = {ld_fwddata_hi[31:0], ld_fwddata_lo[31:0]} >> (8*lsu_addr_m[1:0]);
+   assign bus_read_data_m[31:0]                        = ld_fwddata_m[31:0];
+
+   // Fifo flops
+
+   rvdff #(.WIDTH(1)) clken_ff (.din(lsu_bus_clk_en), .dout(lsu_bus_clk_en_q), .clk(active_clk), .*);
+
+   rvdff #(.WIDTH(1)) is_sideeffects_rff (.din(is_sideeffects_m), .dout(is_sideeffects_r), .clk(lsu_c1_r_clk), .*);
+
+   rvdff #(4) lsu_byten_rff (.*, .din(ldst_byteen_m[3:0]), .dout(ldst_byteen_r[3:0]), .clk(lsu_c1_r_clk));
+
+`ifdef RV_ASSERT_ON
+
+  // Assertion to check AXI write address is aligned to size
+  property lsu_axi_awaddr_aligned;
+    @(posedge lsu_busm_clk) disable iff(~rst_l) lsu_axi_awvalid |-> ((lsu_axi_awsize[2:0] == 3'h0)                                   |
+                                                                     ((lsu_axi_awsize[2:0] == 3'h1) & (lsu_axi_awaddr[0] == 1'b0))   |
+                                                                     ((lsu_axi_awsize[2:0] == 3'h2) & (lsu_axi_awaddr[1:0] == 2'b0)) |
+                                                                     ((lsu_axi_awsize[2:0] == 3'h3) & (lsu_axi_awaddr[2:0] == 3'b0)));
+  endproperty
+  assert_lsu_axi_awaddr_aligned: assert property (lsu_axi_awaddr_aligned) else
+    $display("Assertion lsu_axi_awaddr_aligned failed: lsu_axi_awvalid=1'b%b, lsu_axi_awsize=3'h%h, lsu_axi_awaddr=32'h%h",lsu_axi_awvalid, lsu_axi_awsize[2:0], lsu_axi_awaddr[31:0]);
+  // Assertion to check awvalid stays stable during entire bus clock
+
+  // Assertion to check AXI read address is aligned to size
+  property lsu_axi_araddr_aligned;
+    @(posedge lsu_busm_clk) disable iff(~rst_l) lsu_axi_arvalid |-> ((lsu_axi_arsize[2:0] == 3'h0)                                   |
+                                                                     ((lsu_axi_arsize[2:0] == 3'h1) & (lsu_axi_araddr[0] == 1'b0))   |
+                                                                     ((lsu_axi_arsize[2:0] == 3'h2) & (lsu_axi_araddr[1:0] == 2'b0)) |
+                                                                     ((lsu_axi_arsize[2:0] == 3'h3) & (lsu_axi_araddr[2:0] == 3'b0)));
+  endproperty
+  assert_lsu_axi_araddr_aligned: assert property (lsu_axi_araddr_aligned) else
+    $display("Assertion lsu_axi_araddr_aligned failed: lsu_axi_awvalid=1'b%b, lsu_axi_awsize=3'h%h, lsu_axi_araddr=32'h%h",lsu_axi_awvalid, lsu_axi_awsize[2:0], lsu_axi_araddr[31:0]);
+
+  // Assertion to check awvalid stays stable during entire bus clock
+ property lsu_axi_awvalid_stable;
+     @(posedge clk) disable iff(~rst_l)  (lsu_axi_awvalid != $past(lsu_axi_awvalid)) |-> ($past(lsu_bus_clk_en) | dec_tlu_force_halt);
+  endproperty
+  assert_lsu_axi_awvalid_stable: assert property (lsu_axi_awvalid_stable) else
+     $display("LSU AXI awvalid changed in middle of bus clock");
+
+  // Assertion to check awid stays stable during entire bus clock
+  property lsu_axi_awid_stable;
+     @(posedge clk) disable iff(~rst_l)  (lsu_axi_awvalid & (lsu_axi_awid[pt.LSU_BUS_TAG-1:0] != $past(lsu_axi_awid[pt.LSU_BUS_TAG-1:0]))) |-> $past(lsu_bus_clk_en);
+  endproperty
+  assert_lsu_axi_awid_stable: assert property (lsu_axi_awid_stable) else
+     $display("LSU AXI awid changed in middle of bus clock");
+
+  // Assertion to check awaddr stays stable during entire bus clock
+  property lsu_axi_awaddr_stable;
+     @(posedge clk) disable iff(~rst_l)  (lsu_axi_awvalid & (lsu_axi_awaddr[31:0] != $past(lsu_axi_awaddr[31:0]))) |-> $past(lsu_bus_clk_en);
+  endproperty
+  assert_lsu_axi_awaddr_stable: assert property (lsu_axi_awaddr_stable) else
+     $display("LSU AXI awaddr changed in middle of bus clock");
+
+  // Assertion to check awsize stays stable during entire bus clock
+  property lsu_axi_awsize_stable;
+     @(posedge clk) disable iff(~rst_l)  (lsu_axi_awvalid & (lsu_axi_awsize[2:0] != $past(lsu_axi_awsize[2:0]))) |-> $past(lsu_bus_clk_en);
+  endproperty
+  assert_lsu_axi_awsize_stable: assert property (lsu_axi_awsize_stable) else
+     $display("LSU AXI awsize changed in middle of bus clock");
+
+  // Assertion to check wstrb stays stable during entire bus clock
+  property lsu_axi_wstrb_stable;
+     @(posedge clk) disable iff(~rst_l)  (lsu_axi_wvalid & (lsu_axi_wstrb[7:0] != $past(lsu_axi_wstrb[7:0]))) |-> $past(lsu_bus_clk_en);
+  endproperty
+  assert_lsu_axi_wstrb_stable: assert property (lsu_axi_wstrb_stable) else
+     $display("LSU AXI wstrb changed in middle of bus clock");
+
+  // Assertion to check wdata stays stable during entire bus clock
+  property lsu_axi_wdata_stable;
+     @(posedge clk) disable iff(~rst_l)  (lsu_axi_wvalid & (lsu_axi_wdata[63:0] != $past(lsu_axi_wdata[63:0]))) |-> $past(lsu_bus_clk_en);
+  endproperty
+  assert_lsu_axi_wdata_stable: assert property (lsu_axi_wdata_stable) else
+     $display("LSU AXI wdata changed in middle of bus clock");
+
+  // Assertion to check awvalid stays stable during entire bus clock
+  property lsu_axi_arvalid_stable;
+     @(posedge clk) disable iff(~rst_l)  (lsu_axi_arvalid != $past(lsu_axi_arvalid)) |-> ($past(lsu_bus_clk_en) | dec_tlu_force_halt);
+  endproperty
+  assert_lsu_axi_arvalid_stable: assert property (lsu_axi_arvalid_stable) else
+     $display("LSU AXI awvalid changed in middle of bus clock");
+
+  // Assertion to check awid stays stable during entire bus clock
+  property lsu_axi_arid_stable;
+     @(posedge clk) disable iff(~rst_l)  (lsu_axi_arvalid & (lsu_axi_arid[pt.LSU_BUS_TAG-1:0] != $past(lsu_axi_arid[pt.LSU_BUS_TAG-1:0]))) |-> $past(lsu_bus_clk_en);
+  endproperty
+  assert_lsu_axi_arid_stable: assert property (lsu_axi_arid_stable) else
+     $display("LSU AXI awid changed in middle of bus clock");
+
+  // Assertion to check awaddr stays stable during entire bus clock
+  property lsu_axi_araddr_stable;
+     @(posedge clk) disable iff(~rst_l)  (lsu_axi_arvalid & (lsu_axi_araddr[31:0] != $past(lsu_axi_araddr[31:0]))) |-> $past(lsu_bus_clk_en);
+  endproperty
+  assert_lsu_axi_araddr_stable: assert property (lsu_axi_araddr_stable) else
+     $display("LSU AXI awaddr changed in middle of bus clock");
+
+  // Assertion to check awsize stays stable during entire bus clock
+  property lsu_axi_arsize_stable;
+     @(posedge clk) disable iff(~rst_l)  (lsu_axi_awvalid & (lsu_axi_arsize[2:0] != $past(lsu_axi_arsize[2:0]))) |-> $past(lsu_bus_clk_en);
+  endproperty
+  assert_lsu_axi_arsize_stable: assert property (lsu_axi_arsize_stable) else
+     $display("LSU AXI awsize changed in middle of bus clock");
+
+`endif
+
+endmodule // eb1_lsu_bus_intf
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_lsu_clkdomain.sv b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_clkdomain.sv
new file mode 100644
index 0000000..afa1be9
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_clkdomain.sv
@@ -0,0 +1,145 @@
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//********************************************************************************
+// $Id$
+//
+//
+// Owner:
+// Function: Clock Generation Block
+// Comments: All the clocks are generate here
+//
+// //********************************************************************************
+
+
+module eb1_lsu_clkdomain
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+)(
+   input logic      clk,                               // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
+   input logic      active_clk,                        // Clock only while core active.  Through two clock headers. For flops without second clock header built in.
+   input logic      rst_l,                             // reset, active low
+   input logic      dec_tlu_force_halt,                // This will be high till TLU goes to debug halt
+
+   // Inputs
+   input logic      clk_override,                      // chciken bit to turn off clock gating
+   input logic      dma_dccm_req,                      // dma is active
+   input logic      ldst_stbuf_reqvld_r,               // allocating in to the store queue
+
+   input logic      stbuf_reqvld_any,                  // stbuf is draining
+   input logic      stbuf_reqvld_flushed_any,          // instruction going to stbuf is flushed
+   input logic      lsu_busreq_r,                      // busreq in r
+   input logic      lsu_bus_buffer_pend_any,           // bus buffer has a pending bus entry
+   input logic      lsu_bus_buffer_empty_any,          // external bus buffer is empty
+   input logic      lsu_stbuf_empty_any,               // stbuf is empty
+
+   input logic      lsu_bus_clk_en,                    // bus clock enable
+
+   input eb1_lsu_pkt_t  lsu_p,                        // lsu packet in decode
+   input eb1_lsu_pkt_t  lsu_pkt_d,                    // lsu packet in d
+   input eb1_lsu_pkt_t  lsu_pkt_m,                    // lsu packet in m
+   input eb1_lsu_pkt_t  lsu_pkt_r,                    // lsu packet in r
+
+   // Outputs
+   output logic     lsu_bus_obuf_c1_clken,             // obuf clock enable
+   output logic     lsu_busm_clken,                    // bus clock enable
+
+   output logic     lsu_c1_m_clk,                      // m pipe single pulse clock
+   output logic     lsu_c1_r_clk,                      // r pipe single pulse clock
+
+   output logic     lsu_c2_m_clk,                      // m pipe double pulse clock
+   output logic     lsu_c2_r_clk,                      // r pipe double pulse clock
+
+   output logic     lsu_store_c1_m_clk,                // store in m
+   output logic     lsu_store_c1_r_clk,                // store in r
+
+   output logic     lsu_stbuf_c1_clk,
+   output logic     lsu_bus_obuf_c1_clk,               // ibuf clock
+   output logic     lsu_bus_ibuf_c1_clk,               // ibuf clock
+   output logic     lsu_bus_buf_c1_clk,                // ibuf clock
+   output logic     lsu_busm_clk,                      // bus clock
+
+   output logic     lsu_free_c2_clk,                   // free double pulse clock
+
+   input  logic     scan_mode                          // Scan mode
+);
+
+   logic lsu_c1_m_clken, lsu_c1_r_clken;
+   logic lsu_c2_m_clken, lsu_c2_r_clken;
+   logic lsu_c1_m_clken_q, lsu_c1_r_clken_q;
+   logic lsu_store_c1_m_clken, lsu_store_c1_r_clken;
+
+
+   logic lsu_stbuf_c1_clken;
+   logic lsu_bus_ibuf_c1_clken, lsu_bus_buf_c1_clken;
+
+   logic lsu_free_c1_clken, lsu_free_c1_clken_q, lsu_free_c2_clken;
+
+   //-------------------------------------------------------------------------------------------
+   // Clock Enable logic
+   //-------------------------------------------------------------------------------------------
+
+   assign lsu_c1_m_clken = lsu_p.valid | dma_dccm_req | clk_override;
+   assign lsu_c1_r_clken = lsu_pkt_m.valid | lsu_c1_m_clken_q | clk_override;
+
+   assign lsu_c2_m_clken = lsu_c1_m_clken | lsu_c1_m_clken_q | clk_override;
+   assign lsu_c2_r_clken = lsu_c1_r_clken | lsu_c1_r_clken_q | clk_override;
+
+   assign lsu_store_c1_m_clken = ((lsu_c1_m_clken & lsu_pkt_d.store) | clk_override) ;
+   assign lsu_store_c1_r_clken = ((lsu_c1_r_clken & lsu_pkt_m.store) | clk_override) ;
+
+   assign lsu_stbuf_c1_clken = ldst_stbuf_reqvld_r | stbuf_reqvld_any | stbuf_reqvld_flushed_any | clk_override;
+   assign lsu_bus_ibuf_c1_clken = lsu_busreq_r | clk_override;
+   assign lsu_bus_obuf_c1_clken = (lsu_bus_buffer_pend_any | lsu_busreq_r | clk_override) & lsu_bus_clk_en;
+   assign lsu_bus_buf_c1_clken  = ~lsu_bus_buffer_empty_any | lsu_busreq_r | dec_tlu_force_halt | clk_override;
+
+   assign lsu_free_c1_clken = (lsu_p.valid | lsu_pkt_d.valid | lsu_pkt_m.valid | lsu_pkt_r.valid) |
+                              ~lsu_bus_buffer_empty_any | ~lsu_stbuf_empty_any | clk_override;
+   assign lsu_free_c2_clken = lsu_free_c1_clken | lsu_free_c1_clken_q | clk_override;
+
+    // Flops
+   rvdff #(1) lsu_free_c1_clkenff (.din(lsu_free_c1_clken), .dout(lsu_free_c1_clken_q), .clk(active_clk), .*);
+
+   rvdff #(1) lsu_c1_m_clkenff (.din(lsu_c1_m_clken), .dout(lsu_c1_m_clken_q), .clk(lsu_free_c2_clk), .*);
+   rvdff #(1) lsu_c1_r_clkenff (.din(lsu_c1_r_clken), .dout(lsu_c1_r_clken_q), .clk(lsu_free_c2_clk), .*);
+
+   // Clock Headers
+   rvoclkhdr lsu_c1m_cgc ( .en(lsu_c1_m_clken), .l1clk(lsu_c1_m_clk), .* );
+   rvoclkhdr lsu_c1r_cgc ( .en(lsu_c1_r_clken), .l1clk(lsu_c1_r_clk), .* );
+
+   rvoclkhdr lsu_c2m_cgc ( .en(lsu_c2_m_clken), .l1clk(lsu_c2_m_clk), .* );
+   rvoclkhdr lsu_c2r_cgc ( .en(lsu_c2_r_clken), .l1clk(lsu_c2_r_clk), .* );
+
+   rvoclkhdr lsu_store_c1m_cgc (.en(lsu_store_c1_m_clken), .l1clk(lsu_store_c1_m_clk), .*);
+   rvoclkhdr lsu_store_c1r_cgc (.en(lsu_store_c1_r_clken), .l1clk(lsu_store_c1_r_clk), .*);
+
+   rvoclkhdr lsu_stbuf_c1_cgc ( .en(lsu_stbuf_c1_clken), .l1clk(lsu_stbuf_c1_clk), .* );
+   rvoclkhdr lsu_bus_ibuf_c1_cgc ( .en(lsu_bus_ibuf_c1_clken), .l1clk(lsu_bus_ibuf_c1_clk), .* );
+   rvoclkhdr lsu_bus_buf_c1_cgc  ( .en(lsu_bus_buf_c1_clken),  .l1clk(lsu_bus_buf_c1_clk), .* );
+
+   assign lsu_busm_clken = (~lsu_bus_buffer_empty_any | lsu_busreq_r | clk_override) & lsu_bus_clk_en;
+
+`ifdef RV_FPGA_OPTIMIZE
+   assign lsu_busm_clk = 1'b0;
+   assign lsu_bus_obuf_c1_clk = 1'b0;
+`else
+   rvclkhdr  lsu_bus_obuf_c1_cgc ( .en(lsu_bus_obuf_c1_clken), .l1clk(lsu_bus_obuf_c1_clk), .* );
+   rvclkhdr  lsu_busm_cgc (.en(lsu_busm_clken), .l1clk(lsu_busm_clk), .*);
+`endif
+
+   rvoclkhdr lsu_free_cgc (.en(lsu_free_c2_clken), .l1clk(lsu_free_c2_clk), .*);
+
+endmodule
+
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_lsu_dccm_ctl.sv b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_dccm_ctl.sv
new file mode 100644
index 0000000..ba070bc
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_dccm_ctl.sv
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//********************************************************************************
+// $Id$
+//
+//
+// Owner:
+// Function: DCCM for LSU pipe
+// Comments: Single ported memory
+//
+//
+// DC1 -> DC2 -> DC3 -> DC4 (Commit)
+//
+// //********************************************************************************
+
+module eb1_lsu_dccm_ctl
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )
+  (
+   input logic                             lsu_c2_m_clk,            // clocks
+   input logic                             lsu_c2_r_clk,            // clocks
+   input logic                             lsu_c1_r_clk,            // clocks
+   input logic                             lsu_store_c1_r_clk,      // clocks
+   input logic                             lsu_free_c2_clk,         // clocks
+   input logic                             clk_override,            // Override non-functional clock gating
+   input logic                             clk,                     // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
+
+   input logic                             rst_l,                   // reset, active low
+
+   input                                   eb1_lsu_pkt_t lsu_pkt_r,// lsu packets
+   input                                   eb1_lsu_pkt_t lsu_pkt_m,// lsu packets
+   input                                   eb1_lsu_pkt_t lsu_pkt_d,// lsu packets
+   input logic                             addr_in_dccm_d,          // address maps to dccm
+   input logic                             addr_in_pic_d,           // address maps to pic
+   input logic                             addr_in_pic_m,           // address maps to pic
+   input logic                             addr_in_dccm_m, addr_in_dccm_r,   // address in dccm per pipe stage
+   input logic                             addr_in_pic_r,                    // address in pic  per pipe stage
+   input logic                             lsu_raw_fwd_lo_r, lsu_raw_fwd_hi_r,
+   input logic                             lsu_commit_r,            // lsu instruction in r commits
+   input logic                             ldst_dual_m, ldst_dual_r,// load/store is unaligned at 32 bit boundary per pipe stage
+
+   // lsu address down the pipe
+   input logic [31:0]                      lsu_addr_d,
+   input logic [pt.DCCM_BITS-1:0]          lsu_addr_m,
+   input logic [31:0]                      lsu_addr_r,
+
+   // lsu address down the pipe - needed to check unaligned
+   input logic [pt.DCCM_BITS-1:0]          end_addr_d,
+   input logic [pt.DCCM_BITS-1:0]          end_addr_m,
+   input logic [pt.DCCM_BITS-1:0]          end_addr_r,
+
+
+   input logic                             stbuf_reqvld_any,        // write enable
+   input logic [pt.LSU_SB_BITS-1:0]        stbuf_addr_any,          // stbuf address (aligned)
+
+   input logic [pt.DCCM_DATA_WIDTH-1:0]    stbuf_data_any,          // the read out from stbuf
+   input logic [pt.DCCM_ECC_WIDTH-1:0]     stbuf_ecc_any,           // the encoded data with ECC bits
+   input logic [pt.DCCM_DATA_WIDTH-1:0]    stbuf_fwddata_hi_m,      // stbuf fowarding to load
+   input logic [pt.DCCM_DATA_WIDTH-1:0]    stbuf_fwddata_lo_m,      // stbuf fowarding to load
+   input logic [pt.DCCM_BYTE_WIDTH-1:0]    stbuf_fwdbyteen_hi_m,    // stbuf fowarding to load
+   input logic [pt.DCCM_BYTE_WIDTH-1:0]    stbuf_fwdbyteen_lo_m,    // stbuf fowarding to load
+
+   output logic [pt.DCCM_DATA_WIDTH-1:0]   dccm_rdata_hi_r,         // data from the dccm
+   output logic [pt.DCCM_DATA_WIDTH-1:0]   dccm_rdata_lo_r,         // data from the dccm
+   output logic [pt.DCCM_ECC_WIDTH-1:0]    dccm_data_ecc_hi_r,      // data from the dccm + ecc
+   output logic [pt.DCCM_ECC_WIDTH-1:0]    dccm_data_ecc_lo_r,
+   output logic [pt.DCCM_DATA_WIDTH-1:0]   lsu_ld_data_r,           // right justified, ie load byte will have data at 7:0
+   output logic [pt.DCCM_DATA_WIDTH-1:0]   lsu_ld_data_corr_r,      // right justified & ECC corrected, ie load byte will have data at 7:0
+
+   input logic                             lsu_double_ecc_error_r,  // lsu has a DED
+   input logic                             single_ecc_error_hi_r,   // sec detected on hi dccm bank
+   input logic                             single_ecc_error_lo_r,   // sec detected on lower dccm bank
+   input logic [pt.DCCM_DATA_WIDTH-1:0]    sec_data_hi_r,           // corrected dccm data
+   input logic [pt.DCCM_DATA_WIDTH-1:0]    sec_data_lo_r,           // corrected dccm data
+   input logic [pt.DCCM_DATA_WIDTH-1:0]    sec_data_hi_r_ff,        // corrected dccm data
+   input logic [pt.DCCM_DATA_WIDTH-1:0]    sec_data_lo_r_ff,        // corrected dccm data
+   input logic [pt.DCCM_ECC_WIDTH-1:0]     sec_data_ecc_hi_r_ff,    // the encoded data with ECC bits
+   input logic [pt.DCCM_ECC_WIDTH-1:0]     sec_data_ecc_lo_r_ff,    // the encoded data with ECC bits
+
+   output logic [pt.DCCM_DATA_WIDTH-1:0]   dccm_rdata_hi_m,         // data from the dccm
+   output logic [pt.DCCM_DATA_WIDTH-1:0]   dccm_rdata_lo_m,         // data from the dccm
+   output logic [pt.DCCM_ECC_WIDTH-1:0]    dccm_data_ecc_hi_m,      // data from the dccm + ecc
+   output logic [pt.DCCM_ECC_WIDTH-1:0]    dccm_data_ecc_lo_m,
+   output logic [pt.DCCM_DATA_WIDTH-1:0]   lsu_ld_data_m,           // right justified, ie load byte will have data at 7:0
+
+   input logic                             lsu_double_ecc_error_m,  // lsu has a DED
+   input logic [pt.DCCM_DATA_WIDTH-1:0]    sec_data_hi_m,           // corrected dccm data
+   input logic [pt.DCCM_DATA_WIDTH-1:0]    sec_data_lo_m,           // corrected dccm data
+
+   input logic [31:0]                      store_data_m,            // Store data M-stage
+   input logic                             dma_dccm_wen,            // Perform DMA writes only for word/dword
+   input logic                             dma_pic_wen,             // Perform PIC writes
+   input logic [2:0]                       dma_mem_tag_m,           // DMA Buffer entry number M-stage
+   input logic [31:0]                      dma_mem_addr,            // DMA request address
+   input logic [63:0]                      dma_mem_wdata,           // DMA write data
+   input logic [31:0]                      dma_dccm_wdata_lo,       // Shift the dma data to lower bits to make it consistent to lsu stores
+   input logic [31:0]                      dma_dccm_wdata_hi,       // Shift the dma data to lower bits to make it consistent to lsu stores
+   input logic [pt.DCCM_ECC_WIDTH-1:0]     dma_dccm_wdata_ecc_hi,   // ECC bits for the DMA wdata
+   input logic [pt.DCCM_ECC_WIDTH-1:0]     dma_dccm_wdata_ecc_lo,   // ECC bits for the DMA wdata
+
+   output logic [pt.DCCM_DATA_WIDTH-1:0]   store_data_hi_r,
+   output logic [pt.DCCM_DATA_WIDTH-1:0]   store_data_lo_r,
+   output logic [pt.DCCM_DATA_WIDTH-1:0]   store_datafn_hi_r,       // data from the dccm
+   output logic [pt.DCCM_DATA_WIDTH-1:0]   store_datafn_lo_r,       // data from the dccm
+   output logic [31:0]                     store_data_r,            // raw store data to be sent to bus
+   output logic                            ld_single_ecc_error_r,
+   output logic                            ld_single_ecc_error_r_ff,
+
+   output logic [31:0]                     picm_mask_data_m,        // pic data to stbuf
+   output logic                            lsu_stbuf_commit_any,    // stbuf wins the dccm port or is to pic
+   output logic                            lsu_dccm_rden_m,         // dccm read
+   output logic                            lsu_dccm_rden_r,         // dccm read
+
+   output logic                            dccm_dma_rvalid,         // dccm serviving the dma load
+   output logic                            dccm_dma_ecc_error,      // DMA load had ecc error
+   output logic [2:0]                      dccm_dma_rtag,           // DMA return tag
+   output logic [63:0]                     dccm_dma_rdata,          // dccm data to dma request
+
+   // DCCM ports
+   output logic                            dccm_wren,               // dccm interface -- write
+   output logic                            dccm_rden,               // dccm interface -- write
+   output logic [pt.DCCM_BITS-1:0]         dccm_wr_addr_lo,         // dccm interface -- wr addr for lo bank
+   output logic [pt.DCCM_BITS-1:0]         dccm_wr_addr_hi,         // dccm interface -- wr addr for hi bank
+   output logic [pt.DCCM_BITS-1:0]         dccm_rd_addr_lo,         // dccm interface -- read address for lo bank
+   output logic [pt.DCCM_BITS-1:0]         dccm_rd_addr_hi,         // dccm interface -- read address for hi bank
+   output logic [pt.DCCM_FDATA_WIDTH-1:0]  dccm_wr_data_lo,         // dccm write data for lo bank
+   output logic [pt.DCCM_FDATA_WIDTH-1:0]  dccm_wr_data_hi,         // dccm write data for hi bank
+
+   input logic [pt.DCCM_FDATA_WIDTH-1:0]   dccm_rd_data_lo,         // dccm read data back from the dccm
+   input logic [pt.DCCM_FDATA_WIDTH-1:0]   dccm_rd_data_hi,         // dccm read data back from the dccm
+
+   // PIC ports
+   output logic                            picm_wren,               // write to pic
+   output logic                            picm_rden,               // read to pick
+   output logic                            picm_mken,               // write to pic need a mask
+   output logic [31:0]                     picm_rdaddr,             // address for pic read access
+   output logic [31:0]                     picm_wraddr,             // address for pic write access
+   output logic [31:0]                     picm_wr_data,            // write data
+   input logic [31:0]                      picm_rd_data,            // read data
+
+   input logic                             scan_mode                // scan mode
+);
+
+
+   localparam DCCM_WIDTH_BITS = $clog2(pt.DCCM_BYTE_WIDTH);
+
+   logic                           lsu_dccm_rden_d, lsu_dccm_wren_d;
+   logic                           ld_single_ecc_error_lo_r, ld_single_ecc_error_hi_r;
+   logic                           ld_single_ecc_error_lo_r_ns, ld_single_ecc_error_hi_r_ns;
+   logic                           ld_single_ecc_error_lo_r_ff, ld_single_ecc_error_hi_r_ff;
+   logic                           lsu_double_ecc_error_r_ff;
+   logic [pt.DCCM_BITS-1:0]        ld_sec_addr_lo_r_ff, ld_sec_addr_hi_r_ff;
+   logic [pt.DCCM_DATA_WIDTH-1:0]  store_data_lo_r_in, store_data_hi_r_in ;
+   logic [63:0]                    picm_rd_data_m;
+
+   logic                           dccm_wr_bypass_d_m_hi, dccm_wr_bypass_d_r_hi;
+   logic                           dccm_wr_bypass_d_m_lo, dccm_wr_bypass_d_r_lo;
+   logic                           kill_ecc_corr_lo_r, kill_ecc_corr_hi_r;
+
+    // byte_en flowing down
+   logic [3:0]                     store_byteen_m ,store_byteen_r;
+   logic [7:0]                     store_byteen_ext_m, store_byteen_ext_r;
+
+   if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1
+      logic [63:0]  lsu_rdata_r, lsu_rdata_corr_r;
+      logic [63:0]  dccm_rdata_r, dccm_rdata_corr_r;
+      logic [63:0]  stbuf_fwddata_r;
+      logic [7:0]   stbuf_fwdbyteen_r;
+      logic [31:0]  stbuf_fwddata_lo_r, stbuf_fwddata_hi_r;
+      logic [3:0]   stbuf_fwdbyteen_lo_r, stbuf_fwdbyteen_hi_r;
+      logic [31:0]  lsu_rdata_lo_r, lsu_rdata_hi_r;
+      logic [63:0]  picm_rd_data_r;
+      logic [63:32] lsu_ld_data_r_nc, lsu_ld_data_corr_r_nc;
+      logic [2:0]   dma_mem_tag_r;
+      logic         stbuf_fwddata_en;
+
+      assign dccm_dma_rvalid      = lsu_pkt_r.valid & lsu_pkt_r.load & lsu_pkt_r.dma;
+      assign dccm_dma_ecc_error   = lsu_double_ecc_error_r;
+      assign dccm_dma_rtag[2:0]   = dma_mem_tag_r[2:0];
+      assign dccm_dma_rdata[63:0] = ldst_dual_r ? lsu_rdata_corr_r[63:0] : {2{lsu_rdata_corr_r[31:0]}};
+      assign {lsu_ld_data_r_nc[63:32], lsu_ld_data_r[31:0]}           = lsu_rdata_r[63:0] >> 8*lsu_addr_r[1:0];
+      assign {lsu_ld_data_corr_r_nc[63:32], lsu_ld_data_corr_r[31:0]} = lsu_rdata_corr_r[63:0] >> 8*lsu_addr_r[1:0];
+
+      assign picm_rd_data_r[63:32]   = picm_rd_data_r[31:0];
+      assign dccm_rdata_r[63:0]      = {dccm_rdata_hi_r[31:0],dccm_rdata_lo_r[31:0]};
+      assign dccm_rdata_corr_r[63:0] = {sec_data_hi_r[31:0],sec_data_lo_r[31:0]};
+      assign stbuf_fwddata_r[63:0]   = {stbuf_fwddata_hi_r[31:0], stbuf_fwddata_lo_r[31:0]};
+      assign stbuf_fwdbyteen_r[7:0]  = {stbuf_fwdbyteen_hi_r[3:0], stbuf_fwdbyteen_lo_r[3:0]};
+      assign stbuf_fwddata_en        = (|stbuf_fwdbyteen_hi_m[3:0]) | (|stbuf_fwdbyteen_lo_m[3:0]) | clk_override;
+
+      for (genvar i=0; i<8; i++) begin: GenDMAData
+         assign lsu_rdata_corr_r[(8*i)+7:8*i]  = stbuf_fwdbyteen_r[i] ? stbuf_fwddata_r[(8*i)+7:8*i] :
+                                                                        (addr_in_pic_r ? picm_rd_data_r[(8*i)+7:8*i] :  ({8{addr_in_dccm_r}} & dccm_rdata_corr_r[(8*i)+7:8*i]));
+
+         assign lsu_rdata_r[(8*i)+7:8*i]       = stbuf_fwdbyteen_r[i] ? stbuf_fwddata_r[(8*i)+7:8*i] :
+                                                                        (addr_in_pic_r ? picm_rd_data_r[(8*i)+7:8*i] :  ({8{addr_in_dccm_r}} & dccm_rdata_r[(8*i)+7:8*i]));
+      end
+      rvdffe #(pt.DCCM_DATA_WIDTH) dccm_rdata_hi_r_ff    (.*, .din(dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(dccm_rdata_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .en((lsu_dccm_rden_m & ldst_dual_m) | clk_override));
+      rvdffe #(pt.DCCM_DATA_WIDTH) dccm_rdata_lo_r_ff    (.*, .din(dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(dccm_rdata_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .en(lsu_dccm_rden_m | clk_override));
+      rvdffe #(2*pt.DCCM_ECC_WIDTH)  dccm_data_ecc_r_ff  (.*, .din({dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0], dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0]}),
+                                                              .dout({dccm_data_ecc_hi_r[pt.DCCM_ECC_WIDTH-1:0], dccm_data_ecc_lo_r[pt.DCCM_ECC_WIDTH-1:0]}),                                  .en(lsu_dccm_rden_m | clk_override));
+      rvdff #(8)                   stbuf_fwdbyteen_ff    (.*, .din({stbuf_fwdbyteen_hi_m[3:0], stbuf_fwdbyteen_lo_m[3:0]}), .dout({stbuf_fwdbyteen_hi_r[3:0], stbuf_fwdbyteen_lo_r[3:0]}), .clk(lsu_c2_r_clk));
+      rvdffe #(64)                 stbuf_fwddata_ff      (.*, .din({stbuf_fwddata_hi_m[31:0], stbuf_fwddata_lo_m[31:0]}),   .dout({stbuf_fwddata_hi_r[31:0], stbuf_fwddata_lo_r[31:0]}),   .en(stbuf_fwddata_en));
+      rvdffe #(32)                 picm_rddata_rff       (.*, .din(picm_rd_data_m[31:0]),                                   .dout(picm_rd_data_r[31:0]),                                   .en(addr_in_pic_m | clk_override));
+      rvdff #(3)                   dma_mem_tag_rff       (.*, .din(dma_mem_tag_m[2:0]),                                     .dout(dma_mem_tag_r[2:0]),                                     .clk(lsu_c1_r_clk));
+
+   end else begin: L2U_Plus1_0
+
+      logic [63:0]  lsu_rdata_m, lsu_rdata_corr_m;
+      logic [63:0]  dccm_rdata_m, dccm_rdata_corr_m;
+      logic [63:0]  stbuf_fwddata_m;
+      logic [7:0]   stbuf_fwdbyteen_m;
+      logic [63:32] lsu_ld_data_m_nc, lsu_ld_data_corr_m_nc;
+      logic [31:0]  lsu_ld_data_corr_m;
+
+      assign dccm_dma_rvalid      = lsu_pkt_m.valid & lsu_pkt_m.load & lsu_pkt_m.dma;
+      assign dccm_dma_ecc_error   = lsu_double_ecc_error_m;
+      assign dccm_dma_rtag[2:0]   = dma_mem_tag_m[2:0];
+      assign dccm_dma_rdata[63:0] = ldst_dual_m ? lsu_rdata_corr_m[63:0] : {2{lsu_rdata_corr_m[31:0]}};
+      assign {lsu_ld_data_m_nc[63:32], lsu_ld_data_m[31:0]} = lsu_rdata_m[63:0] >> 8*lsu_addr_m[1:0];
+      assign {lsu_ld_data_corr_m_nc[63:32], lsu_ld_data_corr_m[31:0]} = lsu_rdata_corr_m[63:0] >> 8*lsu_addr_m[1:0];
+
+      assign dccm_rdata_m[63:0]      = {dccm_rdata_hi_m[31:0],dccm_rdata_lo_m[31:0]};
+      assign dccm_rdata_corr_m[63:0] = {sec_data_hi_m[31:0],sec_data_lo_m[31:0]};
+      assign stbuf_fwddata_m[63:0]   = {stbuf_fwddata_hi_m[31:0], stbuf_fwddata_lo_m[31:0]};
+      assign stbuf_fwdbyteen_m[7:0]  = {stbuf_fwdbyteen_hi_m[3:0], stbuf_fwdbyteen_lo_m[3:0]};
+
+      for (genvar i=0; i<8; i++) begin: GenLoop
+         assign lsu_rdata_corr_m[(8*i)+7:8*i] = stbuf_fwdbyteen_m[i] ? stbuf_fwddata_m[(8*i)+7:8*i] :
+                                                                       (addr_in_pic_m ? picm_rd_data_m[(8*i)+7:8*i] : ({8{addr_in_dccm_m}} & dccm_rdata_corr_m[(8*i)+7:8*i]));
+
+         assign lsu_rdata_m[(8*i)+7:8*i]      = stbuf_fwdbyteen_m[i] ? stbuf_fwddata_m[(8*i)+7:8*i] :
+                                                                       (addr_in_pic_m ? picm_rd_data_m[(8*i)+7:8*i] : ({8{addr_in_dccm_m}} & dccm_rdata_m[(8*i)+7:8*i]));
+      end
+
+      rvdffe #(32) lsu_ld_data_corr_rff(.*, .din(lsu_ld_data_corr_m[31:0]), .dout(lsu_ld_data_corr_r[31:0]), .en((lsu_pkt_m.valid & lsu_pkt_m.load & (addr_in_pic_m | addr_in_dccm_m)) | clk_override));
+   end
+
+   assign kill_ecc_corr_lo_r = (((lsu_addr_d[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_d[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_d.valid & lsu_pkt_d.store & lsu_pkt_d.dma & addr_in_dccm_d) |
+                               (((lsu_addr_m[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_m[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_m.valid & lsu_pkt_m.store & lsu_pkt_m.dma & addr_in_dccm_m);
+
+   assign kill_ecc_corr_hi_r = (((lsu_addr_d[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_d[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_d.valid & lsu_pkt_d.store & lsu_pkt_d.dma & addr_in_dccm_d) |
+                               (((lsu_addr_m[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_m[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_m.valid & lsu_pkt_m.store & lsu_pkt_m.dma & addr_in_dccm_m);
+
+   assign ld_single_ecc_error_lo_r = lsu_pkt_r.load & single_ecc_error_lo_r & ~lsu_raw_fwd_lo_r;
+   assign ld_single_ecc_error_hi_r = lsu_pkt_r.load & single_ecc_error_hi_r & ~lsu_raw_fwd_hi_r;
+   assign ld_single_ecc_error_r    = (ld_single_ecc_error_lo_r | ld_single_ecc_error_hi_r) & ~lsu_double_ecc_error_r;
+
+   assign ld_single_ecc_error_lo_r_ns = ld_single_ecc_error_lo_r & (lsu_commit_r | lsu_pkt_r.dma) & ~kill_ecc_corr_lo_r;
+   assign ld_single_ecc_error_hi_r_ns = ld_single_ecc_error_hi_r & (lsu_commit_r | lsu_pkt_r.dma) & ~kill_ecc_corr_hi_r;
+   assign ld_single_ecc_error_r_ff = (ld_single_ecc_error_lo_r_ff | ld_single_ecc_error_hi_r_ff) & ~lsu_double_ecc_error_r_ff;
+
+   assign lsu_stbuf_commit_any = stbuf_reqvld_any &
+                                 (~(lsu_dccm_rden_d | lsu_dccm_wren_d | ld_single_ecc_error_r_ff) |
+                                  (lsu_dccm_rden_d & ~((stbuf_addr_any[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] == lsu_addr_d[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]) |
+                                                       (stbuf_addr_any[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] == end_addr_d[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]))));
+
+   // No need to read for aligned word/dword stores since ECC will come by new data completely
+   assign lsu_dccm_rden_d = lsu_pkt_d.valid & (lsu_pkt_d.load | (lsu_pkt_d.store & (~(lsu_pkt_d.word | lsu_pkt_d.dword) | (lsu_addr_d[1:0] != 2'b0)))) & addr_in_dccm_d;
+
+   // DMA will read/write in decode stage
+   assign lsu_dccm_wren_d = dma_dccm_wen;
+
+   // DCCM inputs
+   assign dccm_wren                             = lsu_dccm_wren_d | lsu_stbuf_commit_any | ld_single_ecc_error_r_ff;
+   assign dccm_rden                             = lsu_dccm_rden_d & addr_in_dccm_d;
+   assign dccm_wr_addr_lo[pt.DCCM_BITS-1:0]     = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_lo_r_ff ? ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0] : ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0]) :
+                                                                             lsu_dccm_wren_d ? lsu_addr_d[pt.DCCM_BITS-1:0] : stbuf_addr_any[pt.DCCM_BITS-1:0];
+   assign dccm_wr_addr_hi[pt.DCCM_BITS-1:0]     = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_hi_r_ff ? ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0] : ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0]) :
+                                                                             lsu_dccm_wren_d ? end_addr_d[pt.DCCM_BITS-1:0] : stbuf_addr_any[pt.DCCM_BITS-1:0];
+   assign dccm_rd_addr_lo[pt.DCCM_BITS-1:0]     = lsu_addr_d[pt.DCCM_BITS-1:0];
+   assign dccm_rd_addr_hi[pt.DCCM_BITS-1:0]     = end_addr_d[pt.DCCM_BITS-1:0];
+   assign dccm_wr_data_lo[pt.DCCM_FDATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_lo_r_ff ? {sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]} :
+                                                                                                               {sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]}) :
+                                                                                (dma_dccm_wen ? {dma_dccm_wdata_ecc_lo[pt.DCCM_ECC_WIDTH-1:0],dma_dccm_wdata_lo[pt.DCCM_DATA_WIDTH-1:0]} :
+                                                                                                {stbuf_ecc_any[pt.DCCM_ECC_WIDTH-1:0],stbuf_data_any[pt.DCCM_DATA_WIDTH-1:0]});
+   assign dccm_wr_data_hi[pt.DCCM_FDATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_hi_r_ff ? {sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]} :
+                                                                                                               {sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]}) :
+                                                                                (dma_dccm_wen ? {dma_dccm_wdata_ecc_hi[pt.DCCM_ECC_WIDTH-1:0],dma_dccm_wdata_hi[pt.DCCM_DATA_WIDTH-1:0]} :
+                                                                                                {stbuf_ecc_any[pt.DCCM_ECC_WIDTH-1:0],stbuf_data_any[pt.DCCM_DATA_WIDTH-1:0]});
+
+   // DCCM outputs
+   assign store_byteen_m[3:0] = {4{lsu_pkt_m.store}} &
+                                (({4{lsu_pkt_m.by}}    & 4'b0001) |
+                                 ({4{lsu_pkt_m.half}}  & 4'b0011) |
+                                 ({4{lsu_pkt_m.word}}  & 4'b1111));
+
+   assign store_byteen_r[3:0] =  {4{lsu_pkt_r.store}} &
+                                 (({4{lsu_pkt_r.by}}    & 4'b0001) |
+                                  ({4{lsu_pkt_r.half}}  & 4'b0011) |
+                                  ({4{lsu_pkt_r.word}}  & 4'b1111));
+
+   assign store_byteen_ext_m[7:0] = {4'b0,store_byteen_m[3:0]} << lsu_addr_m[1:0];      // The packet in m
+   assign store_byteen_ext_r[7:0] = {4'b0,store_byteen_r[3:0]} << lsu_addr_r[1:0];
+
+
+
+   assign dccm_wr_bypass_d_m_lo   = (stbuf_addr_any[pt.DCCM_BITS-1:2] == lsu_addr_m[pt.DCCM_BITS-1:2]) & addr_in_dccm_m;
+   assign dccm_wr_bypass_d_m_hi   = (stbuf_addr_any[pt.DCCM_BITS-1:2] == end_addr_m[pt.DCCM_BITS-1:2]) & addr_in_dccm_m;
+
+   assign dccm_wr_bypass_d_r_lo   = (stbuf_addr_any[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) & addr_in_dccm_r;
+   assign dccm_wr_bypass_d_r_hi   = (stbuf_addr_any[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) & addr_in_dccm_r;
+
+
+   if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U1_Plus1_1
+      logic        dccm_wren_Q;
+      logic [31:0] dccm_wr_data_Q;
+      logic        dccm_wr_bypass_d_m_lo_Q, dccm_wr_bypass_d_m_hi_Q;
+      logic [31:0] store_data_pre_hi_r, store_data_pre_lo_r;
+
+      assign {store_data_pre_hi_r[31:0], store_data_pre_lo_r[31:0]} = {32'b0,store_data_r[31:0]} << 8*lsu_addr_r[1:0];
+
+      for (genvar i=0; i<4; i++) begin
+          assign store_data_lo_r[(8*i)+7:(8*i)]   = store_byteen_ext_r[i] ? store_data_pre_lo_r[(8*i)+7:(8*i)] : ((dccm_wren_Q & dccm_wr_bypass_d_m_lo_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_lo_r[(8*i)+7:(8*i)]);
+          assign store_data_hi_r[(8*i)+7:(8*i)]   = store_byteen_ext_r[i+4] ? store_data_pre_hi_r[(8*i)+7:(8*i)] : ((dccm_wren_Q & dccm_wr_bypass_d_m_hi_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_hi_r[(8*i)+7:(8*i)]);
+
+          assign store_datafn_lo_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i] ? store_data_pre_lo_r[(8*i)+7:(8*i)] : ((lsu_stbuf_commit_any & dccm_wr_bypass_d_r_lo) ? stbuf_data_any[(8*i)+7:(8*i)] :
+                                                                                                                    ((dccm_wren_Q & dccm_wr_bypass_d_m_lo_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_lo_r[(8*i)+7:(8*i)]));
+          assign store_datafn_hi_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i+4] ? store_data_pre_hi_r[(8*i)+7:(8*i)] : ((lsu_stbuf_commit_any & dccm_wr_bypass_d_r_hi) ? stbuf_data_any[(8*i)+7:(8*i)] :
+                                                                                                                    ((dccm_wren_Q & dccm_wr_bypass_d_m_hi_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_hi_r[(8*i)+7:(8*i)]));
+      end
+
+      rvdff #(1)   dccm_wren_ff       (.*, .din(lsu_stbuf_commit_any),  .dout(dccm_wren_Q),             .clk(lsu_free_c2_clk));   // ECC load errors writing to dccm shouldn't fwd to stores in pipe
+      rvdffe #(32) dccm_wrdata_ff     (.*, .din(stbuf_data_any[31:0]),  .dout(dccm_wr_data_Q[31:0]),    .en(lsu_stbuf_commit_any | clk_override), .clk(clk));
+      rvdff #(1)   dccm_wrbyp_dm_loff (.*, .din(dccm_wr_bypass_d_m_lo), .dout(dccm_wr_bypass_d_m_lo_Q), .clk(lsu_free_c2_clk));
+      rvdff #(1)   dccm_wrbyp_dm_hiff (.*, .din(dccm_wr_bypass_d_m_hi), .dout(dccm_wr_bypass_d_m_hi_Q), .clk(lsu_free_c2_clk));
+      rvdff #(32)  store_data_rff     (.*, .din(store_data_m[31:0]),    .dout(store_data_r[31:0]),      .clk(lsu_store_c1_r_clk));
+
+   end else begin: L2U1_Plus1_0
+
+      logic [31:0] store_data_hi_m, store_data_lo_m;
+      logic [63:0] store_data_mask;
+      assign {store_data_hi_m[31:0] , store_data_lo_m[31:0]} = {32'b0,store_data_m[31:0]} << 8*lsu_addr_m[1:0];
+
+      for (genvar i=0; i<4; i++) begin
+         assign store_data_hi_r_in[(8*i)+7:(8*i)]  = store_byteen_ext_m[i+4] ? store_data_hi_m[(8*i)+7:(8*i)] :
+                                                                               ((lsu_stbuf_commit_any &  dccm_wr_bypass_d_m_hi)   ? stbuf_data_any[(8*i)+7:(8*i)] : sec_data_hi_m[(8*i)+7:(8*i)]);
+         assign store_data_lo_r_in[(8*i)+7:(8*i)]  = store_byteen_ext_m[i]   ? store_data_lo_m[(8*i)+7:(8*i)] :
+                                                                               ((lsu_stbuf_commit_any &  dccm_wr_bypass_d_m_lo) ? stbuf_data_any[(8*i)+7:(8*i)] : sec_data_lo_m[(8*i)+7:(8*i)]);
+
+         assign store_datafn_lo_r[(8*i)+7:(8*i)]   = (lsu_stbuf_commit_any & dccm_wr_bypass_d_r_lo & ~store_byteen_ext_r[i])   ? stbuf_data_any[(8*i)+7:(8*i)] : store_data_lo_r[(8*i)+7:(8*i)];
+         assign store_datafn_hi_r[(8*i)+7:(8*i)]   = (lsu_stbuf_commit_any & dccm_wr_bypass_d_r_hi & ~store_byteen_ext_r[i+4]) ? stbuf_data_any[(8*i)+7:(8*i)] : store_data_hi_r[(8*i)+7:(8*i)];
+      end // for (genvar i=0; i<BYTE_WIDTH; i++)
+
+      for (genvar i=0; i<4; i++) begin
+         assign store_data_mask[(8*i)+7:(8*i)] = {8{store_byteen_r[i]}};
+      end
+      assign store_data_r[31:0]      = 32'({store_data_hi_r[31:0],store_data_lo_r[31:0]} >> 8*lsu_addr_r[1:0]) & store_data_mask[31:0];
+
+      rvdffe #(pt.DCCM_DATA_WIDTH) store_data_hi_rff (.*, .din(store_data_hi_r_in[pt.DCCM_DATA_WIDTH-1:0]), .dout(store_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .en((ldst_dual_m & lsu_pkt_m.valid & lsu_pkt_m.store) | clk_override), .clk(clk));
+      rvdff  #(pt.DCCM_DATA_WIDTH) store_data_lo_rff (.*, .din(store_data_lo_r_in[pt.DCCM_DATA_WIDTH-1:0]), .dout(store_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .clk(lsu_store_c1_r_clk));
+
+   end
+
+   assign dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0]   = dccm_rd_data_lo[pt.DCCM_DATA_WIDTH-1:0]; // for ld choose dccm_out
+   assign dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0]   = dccm_rd_data_hi[pt.DCCM_DATA_WIDTH-1:0]; // for ld this is used for ecc
+
+   assign dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0] = dccm_rd_data_lo[pt.DCCM_FDATA_WIDTH-1:pt.DCCM_DATA_WIDTH];
+   assign dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0] = dccm_rd_data_hi[pt.DCCM_FDATA_WIDTH-1:pt.DCCM_DATA_WIDTH];
+
+   // PIC signals. PIC ignores the lower 2 bits of address since PIC memory registers are 32-bits
+   assign picm_wren          = (lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_pic_r & lsu_commit_r) | dma_pic_wen;
+   assign picm_rden          = lsu_pkt_d.valid & lsu_pkt_d.load  & addr_in_pic_d;
+   assign picm_mken          = lsu_pkt_d.valid & lsu_pkt_d.store & addr_in_pic_d;  // Get the mask for stores
+   assign picm_rdaddr[31:0]  = pt.PIC_BASE_ADDR | {{32-pt.PIC_BITS{1'b0}},lsu_addr_d[pt.PIC_BITS-1:0]};
+
+   assign picm_wraddr[31:0]  = pt.PIC_BASE_ADDR | {{32-pt.PIC_BITS{1'b0}},(dma_pic_wen ? dma_mem_addr[pt.PIC_BITS-1:0] : lsu_addr_r[pt.PIC_BITS-1:0])};
+
+   assign picm_wr_data[31:0] = dma_pic_wen ? dma_mem_wdata[31:0] : store_datafn_lo_r[31:0];
+
+   assign picm_mask_data_m[31:0] = picm_rd_data_m[31:0];
+   assign picm_rd_data_m[63:0]   = {picm_rd_data[31:0],picm_rd_data[31:0]};
+
+   if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable
+      rvdff #(1) dccm_rden_mff (.*, .din(lsu_dccm_rden_d), .dout(lsu_dccm_rden_m), .clk(lsu_c2_m_clk));
+      rvdff #(1) dccm_rden_rff (.*, .din(lsu_dccm_rden_m), .dout(lsu_dccm_rden_r), .clk(lsu_c2_r_clk));
+
+      // ECC correction flops since dccm write happens next cycle
+      // We are writing to dccm in r+1 for ecc correction since fast_int needs to be blocked in decode - 1. We can probably write in r for plus0 configuration since we know ecc error in M.
+      // In that case these (_ff) flops are needed only in plus1 configuration
+      rvdff #(1) ld_double_ecc_error_rff    (.*, .din(lsu_double_ecc_error_r),   .dout(lsu_double_ecc_error_r_ff),   .clk(lsu_free_c2_clk));
+      rvdff #(1) ld_single_ecc_error_hi_rff (.*, .din(ld_single_ecc_error_hi_r_ns), .dout(ld_single_ecc_error_hi_r_ff), .clk(lsu_free_c2_clk));
+      rvdff #(1) ld_single_ecc_error_lo_rff (.*, .din(ld_single_ecc_error_lo_r_ns), .dout(ld_single_ecc_error_lo_r_ff), .clk(lsu_free_c2_clk));
+      rvdffe #(pt.DCCM_BITS) ld_sec_addr_hi_rff (.*, .din(end_addr_r[pt.DCCM_BITS-1:0]), .dout(ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0]), .en(ld_single_ecc_error_r | clk_override), .clk(clk));
+      rvdffe #(pt.DCCM_BITS) ld_sec_addr_lo_rff (.*, .din(lsu_addr_r[pt.DCCM_BITS-1:0]), .dout(ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0]), .en(ld_single_ecc_error_r | clk_override), .clk(clk));
+
+   end else begin: Gen_dccm_disable
+      assign lsu_dccm_rden_m = '0;
+      assign lsu_dccm_rden_r = '0;
+
+      assign lsu_double_ecc_error_r_ff = 1'b0;
+      assign ld_single_ecc_error_hi_r_ff = 1'b0;
+      assign ld_single_ecc_error_lo_r_ff = 1'b0;
+      assign ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0] = '0;
+      assign ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0] = '0;
+   end
+
+`ifdef RV_ASSERT_ON
+
+   // Load single ECC error correction implies commit/dma
+   property ld_single_ecc_error_commit;
+      @(posedge clk) disable iff(~rst_l) (ld_single_ecc_error_r_ff & dccm_wren) |-> ($past(lsu_commit_r | lsu_pkt_r.dma));
+   endproperty
+   assert_ld_single_ecc_error_commit: assert property (ld_single_ecc_error_commit) else
+     $display("No commit or DMA but ECC correction happened");
+
+
+`endif
+
+endmodule
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_lsu_dccm_mem.sv b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_dccm_mem.sv
new file mode 100644
index 0000000..4be942d
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_dccm_mem.sv
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//********************************************************************************
+// $Id$
+//
+//
+// Owner:
+// Function: DCCM for LSU pipe
+// Comments: Single ported memory
+//
+//
+// DC1 -> DC2 -> DC3 -> DC4 (Commit)
+//
+// //********************************************************************************
+
+
+`define eb1_LOCAL_DCCM_RAM_TEST_PORTS    .TEST1(dccm_ext_in_pkt[i].TEST1),                      \
+                                     .RME(dccm_ext_in_pkt[i].RME),                      \
+                                     .RM(dccm_ext_in_pkt[i].RM),                        \
+                                     .LS(dccm_ext_in_pkt[i].LS),                        \
+                                     .DS(dccm_ext_in_pkt[i].DS),                        \
+                                     .SD(dccm_ext_in_pkt[i].SD),                        \
+                                     .TEST_RNM(dccm_ext_in_pkt[i].TEST_RNM),            \
+                                     .BC1(dccm_ext_in_pkt[i].BC1),                      \
+                                     .BC2(dccm_ext_in_pkt[i].BC2),                      \
+
+
+
+module eb1_lsu_dccm_mem
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )(
+   input logic 	vccd1,
+   input logic		vssd1,
+   input logic         clk,                                             // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
+   input logic         active_clk,                                      // Clock only while core active.  Through two clock headers. For flops without second clock header built in.
+   input logic         rst_l,                                           // reset, active low
+   input logic         clk_override,                                    // Override non-functional clock gating
+
+   input logic         dccm_wren,                                       // write enable
+   input logic         dccm_rden,                                       // read enable
+   input logic [pt.DCCM_BITS-1:0]  dccm_wr_addr_lo,                     // write address
+   input logic [pt.DCCM_BITS-1:0]  dccm_wr_addr_hi,                     // write address
+   input logic [pt.DCCM_BITS-1:0]  dccm_rd_addr_lo,                     // read address
+   input logic [pt.DCCM_BITS-1:0]  dccm_rd_addr_hi,                     // read address for the upper bank in case of a misaligned access
+   input logic [pt.DCCM_FDATA_WIDTH-1:0]  dccm_wr_data_lo,              // write data
+   input logic [pt.DCCM_FDATA_WIDTH-1:0]  dccm_wr_data_hi,              // write data
+   input eb1_dccm_ext_in_pkt_t  [pt.DCCM_NUM_BANKS-1:0] dccm_ext_in_pkt,    // the dccm packet from the soc
+
+   output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo,              // read data from the lo bank
+   output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi,              // read data from the hi bank
+
+   input  logic         scan_mode
+);
+
+
+   localparam DCCM_WIDTH_BITS = $clog2(pt.DCCM_BYTE_WIDTH);
+   localparam DCCM_INDEX_BITS = (pt.DCCM_BITS - pt.DCCM_BANK_BITS - pt.DCCM_WIDTH_BITS);
+   localparam DCCM_INDEX_DEPTH = ((pt.DCCM_SIZE)*1024)/((pt.DCCM_BYTE_WIDTH)*(pt.DCCM_NUM_BANKS));  // Depth of memory bank
+
+   logic [pt.DCCM_NUM_BANKS-1:0]                                        wren_bank;
+   logic [pt.DCCM_NUM_BANKS-1:0]                                        rden_bank;
+   logic [pt.DCCM_NUM_BANKS-1:0] [pt.DCCM_BITS-1:(pt.DCCM_BANK_BITS+2)] addr_bank;
+   logic [pt.DCCM_BITS-1:(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)]           rd_addr_even, rd_addr_odd;
+   logic                                                                rd_unaligned, wr_unaligned;
+   logic [pt.DCCM_NUM_BANKS-1:0] [pt.DCCM_FDATA_WIDTH-1:0]              dccm_bank_dout;
+   logic [pt.DCCM_FDATA_WIDTH-1:0]                                      wrdata;
+
+   logic [pt.DCCM_NUM_BANKS-1:0][pt.DCCM_FDATA_WIDTH-1:0]               wr_data_bank;
+
+   logic [(DCCM_WIDTH_BITS+pt.DCCM_BANK_BITS-1):DCCM_WIDTH_BITS]        dccm_rd_addr_lo_q;
+   logic [(DCCM_WIDTH_BITS+pt.DCCM_BANK_BITS-1):DCCM_WIDTH_BITS]        dccm_rd_addr_hi_q;
+
+   logic [pt.DCCM_NUM_BANKS-1:0]            dccm_clken;
+
+   assign rd_unaligned = (dccm_rd_addr_lo[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] != dccm_rd_addr_hi[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]);
+   assign wr_unaligned = (dccm_wr_addr_lo[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] != dccm_wr_addr_hi[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]);
+
+   // Align the read data
+   assign dccm_rd_data_lo[pt.DCCM_FDATA_WIDTH-1:0]  = dccm_bank_dout[dccm_rd_addr_lo_q[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]][pt.DCCM_FDATA_WIDTH-1:0];
+   assign dccm_rd_data_hi[pt.DCCM_FDATA_WIDTH-1:0]  = dccm_bank_dout[dccm_rd_addr_hi_q[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]][pt.DCCM_FDATA_WIDTH-1:0];
+
+
+   // 8 Banks, 16KB each (2048 x 72)
+   for (genvar i=0; i<pt.DCCM_NUM_BANKS; i++) begin: mem_bank
+      assign  wren_bank[i]        = dccm_wren & ((dccm_wr_addr_hi[2+:pt.DCCM_BANK_BITS] == i) | (dccm_wr_addr_lo[2+:pt.DCCM_BANK_BITS] == i));
+      assign  rden_bank[i]        = dccm_rden & ((dccm_rd_addr_hi[2+:pt.DCCM_BANK_BITS] == i) | (dccm_rd_addr_lo[2+:pt.DCCM_BANK_BITS] == i));
+      assign  addr_bank[i][(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] = wren_bank[i] ? (((dccm_wr_addr_hi[2+:pt.DCCM_BANK_BITS] == i) & wr_unaligned) ?
+                                                                                                        dccm_wr_addr_hi[(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] :
+                                                                                                        dccm_wr_addr_lo[(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS])  :
+                                                                                                  (((dccm_rd_addr_hi[2+:pt.DCCM_BANK_BITS] == i) & rd_unaligned) ?
+                                                                                                        dccm_rd_addr_hi[(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] :
+                                                                                                        dccm_rd_addr_lo[(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS]);
+
+      assign wr_data_bank[i]     = ((dccm_wr_addr_hi[2+:pt.DCCM_BANK_BITS] == i) & wr_unaligned) ? dccm_wr_data_hi[pt.DCCM_FDATA_WIDTH-1:0] : dccm_wr_data_lo[pt.DCCM_FDATA_WIDTH-1:0];
+
+      // clock gating section
+      assign  dccm_clken[i] = (wren_bank[i] | rden_bank[i] | clk_override) ;
+      // end clock gating section
+
+`ifdef VERILATOR
+
+       /* eb1_ram #(DCCM_INDEX_DEPTH,39)  ram (
+                                  // Primary ports
+                                  .ME(dccm_clken[i]),
+                                  .CLK(clk),
+                                  .WE(wren_bank[i]),
+                                  .ADR(addr_bank[i]),
+                                  .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                  .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                  .ROP ( ),
+                                  // These are used by SoC
+                                  `eb1_LOCAL_DCCM_RAM_TEST_PORTS
+                                  .*
+                                  );
+            
+              */                    
+                                  sky130_sram_1kbyte_1rw1r_32x256_8 sram(
+                                  					`ifdef USE_POWER_PINS
+    									.vccd1(vccd1),
+    									.vssd1(vssd1),
+    									`endif
+									.clk0(clk),
+									.csb0(~dccm_clken[i]),
+									.web0(~wren_bank[i]),
+									.wmask0(4'hf),
+									.addr0(addr_bank[i]),
+									.din0(wr_data_bank[i]),
+									.dout0(dccm_bank_dout[i]),
+    									.clk1(clk),
+    									.csb1(1'b1),
+    									.addr1(10'h000),
+    									.dout1()
+    				   );
+
+`else
+
+      if (DCCM_INDEX_DEPTH == 32768) begin : dccm
+         ram_32768x39  dccm_bank (
+                                  // Primary ports
+                                  .ME(dccm_clken[i]),
+                                  .CLK(clk),
+                                  .WE(wren_bank[i]),
+                                  .ADR(addr_bank[i]),
+                                  .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                  .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                  .ROP ( ),
+                                  // These are used by SoC
+                                  `eb1_LOCAL_DCCM_RAM_TEST_PORTS
+                                  .*
+                                  );
+      end
+      else if (DCCM_INDEX_DEPTH == 16384) begin : dccm
+         ram_16384x39  dccm_bank (
+                                  // Primary ports
+                                  .ME(dccm_clken[i]),
+                                  .CLK(clk),
+                                  .WE(wren_bank[i]),
+                                  .ADR(addr_bank[i]),
+                                  .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                  .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                  .ROP ( ),
+                                  // These are used by SoC
+                                  `eb1_LOCAL_DCCM_RAM_TEST_PORTS
+                                  .*
+                                  );
+      end
+      else if (DCCM_INDEX_DEPTH == 8192) begin : dccm
+         ram_8192x39  dccm_bank (
+                                 // Primary ports
+                                 .ME(dccm_clken[i]),
+                                 .CLK(clk),
+                                 .WE(wren_bank[i]),
+                                 .ADR(addr_bank[i]),
+                                 .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                 .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                 .ROP ( ),
+                                 // These are used by SoC
+                                 `eb1_LOCAL_DCCM_RAM_TEST_PORTS
+                                 .*
+                                 );
+      end
+      else if (DCCM_INDEX_DEPTH == 4096) begin : dccm
+         ram_4096x39  dccm_bank (
+                                 // Primary ports
+                                 .ME(dccm_clken[i]),
+                                 .CLK(clk),
+                                 .WE(wren_bank[i]),
+                                 .ADR(addr_bank[i]),
+                                 .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                 .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                 .ROP ( ),
+                                 // These are used by SoC
+                                 `eb1_LOCAL_DCCM_RAM_TEST_PORTS
+                                 .*
+                                 );
+      end
+      else if (DCCM_INDEX_DEPTH == 3072) begin : dccm
+         ram_3072x39  dccm_bank (
+                                 // Primary ports
+                                 .ME(dccm_clken[i]),
+                                 .CLK(clk),
+                                 .WE(wren_bank[i]),
+                                 .ADR(addr_bank[i]),
+                                 .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                 .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                 .ROP ( ),
+                                 // These are used by SoC
+                                 `eb1_LOCAL_DCCM_RAM_TEST_PORTS
+                                 .*
+                                 );
+      end
+      else if (DCCM_INDEX_DEPTH == 2048) begin : dccm
+         ram_2048x39  dccm_bank (
+                                 // Primary ports
+                                 .ME(dccm_clken[i]),
+                                 .CLK(clk),
+                                 .WE(wren_bank[i]),
+                                 .ADR(addr_bank[i]),
+                                 .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                 .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                 .ROP ( ),
+                                 // These are used by SoC
+                                 `eb1_LOCAL_DCCM_RAM_TEST_PORTS
+                                 .*
+                                 );
+      end
+      else if (DCCM_INDEX_DEPTH == 1024) begin : dccm
+         /*ram_1024x39  dccm_bank (
+                                 // Primary ports
+                                 .ME(dccm_clken[i]),
+                                 .CLK(clk),
+                                 .WE(wren_bank[i]),
+                                 .ADR(addr_bank[i]),
+                                 .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                 .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                 .ROP ( ),
+                                 // These are used by SoC
+                                 `eb1_LOCAL_DCCM_RAM_TEST_PORTS
+                                 .*
+                                 );
+                                 */
+                                 sky130_sram_1kbyte_1rw1r_32x256_8 sram(
+    									`ifdef USE_POWER_PINS
+    									.vccd1(vccd1),
+    									.vssd1(vssd1),
+    									`endif
+									.clk0(clk),
+									.csb0(~dccm_clken[i]),
+									.web0(~wren_bank[i]),
+									.wmask0(4'hf),
+									.addr0(addr_bank[i]),
+									.din0(wr_data_bank[i]),
+									.dout0(dccm_bank_dout[i]),
+    									.clk1(clk),
+    									.csb1(1'b1),
+    									.addr1(10'h000),
+    									.dout1()
+    				   );
+      end
+      else if (DCCM_INDEX_DEPTH == 512) begin : dccm
+         ram_512x39  dccm_bank (
+                                // Primary ports
+                                .ME(dccm_clken[i]),
+                                .CLK(clk),
+                                .WE(wren_bank[i]),
+                                .ADR(addr_bank[i]),
+                                .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                .ROP ( ),
+                                // These are used by SoC
+                                `eb1_LOCAL_DCCM_RAM_TEST_PORTS
+                                .*
+                                );
+      end
+      else if (DCCM_INDEX_DEPTH == 256) begin : dccm
+         ram_256x39  dccm_bank (
+                                // Primary ports
+                                .ME(dccm_clken[i]),
+                                .CLK(clk),
+                                .WE(wren_bank[i]),
+                                .ADR(addr_bank[i]),
+                                .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                .ROP ( ),
+                                // These are used by SoC
+                                `eb1_LOCAL_DCCM_RAM_TEST_PORTS
+                                .*
+                                );
+      end
+      else if (DCCM_INDEX_DEPTH == 128) begin : dccm
+         ram_128x39  dccm_bank (
+                                // Primary ports
+                                .ME(dccm_clken[i]),
+                                .CLK(clk),
+                                .WE(wren_bank[i]),
+                                .ADR(addr_bank[i]),
+                                .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
+                                .ROP ( ),
+                                // These are used by SoC
+                                `eb1_LOCAL_DCCM_RAM_TEST_PORTS
+                                .*
+                                );
+      end
+`endif
+
+   end : mem_bank
+
+   // Flops
+   rvdff  #(pt.DCCM_BANK_BITS) rd_addr_lo_ff (.*, .din(dccm_rd_addr_lo[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]), .dout(dccm_rd_addr_lo_q[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]), .clk(active_clk));
+   rvdff  #(pt.DCCM_BANK_BITS) rd_addr_hi_ff (.*, .din(dccm_rd_addr_hi[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]), .dout(dccm_rd_addr_hi_q[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]), .clk(active_clk));
+
+`undef eb1_LOCAL_DCCM_RAM_TEST_PORTS
+
+endmodule // eb1_lsu_dccm_mem
+
+
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_lsu_ecc.sv b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_ecc.sv
new file mode 100644
index 0000000..c91a01f
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_ecc.sv
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//********************************************************************************
+// $Id$
+//
+//
+// Owner:
+// Function: Top level file for load store unit
+// Comments:
+//
+//
+// DC1 -> DC2 -> DC3 -> DC4 (Commit)
+//
+//********************************************************************************
+module eb1_lsu_ecc
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )
+(
+   input logic                           clk,                // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
+   input logic                           lsu_c2_r_clk,       // clock
+   input logic                           clk_override,       // Override non-functional clock gating
+   input logic                           rst_l,              // reset, active low
+   input logic                           scan_mode,          // scan mode
+
+   input eb1_lsu_pkt_t                  lsu_pkt_m,          // packet in m
+   input eb1_lsu_pkt_t                  lsu_pkt_r,          // packet in r
+   input logic [pt.DCCM_DATA_WIDTH-1:0]  stbuf_data_any,
+
+   input logic                           dec_tlu_core_ecc_disable,  // disables the ecc computation and error flagging
+
+   input logic                           lsu_dccm_rden_r,          // dccm rden
+   input logic                           addr_in_dccm_r,           // address in dccm
+   input logic  [pt.DCCM_BITS-1:0]       lsu_addr_r,               // start address
+   input logic  [pt.DCCM_BITS-1:0]       end_addr_r,               // end address
+   input logic  [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_r,          // data from the dccm
+   input logic  [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_r,          // data from the dccm
+   input logic  [pt.DCCM_ECC_WIDTH-1:0]  dccm_data_ecc_hi_r,       // data from the dccm + ecc
+   input logic  [pt.DCCM_ECC_WIDTH-1:0]  dccm_data_ecc_lo_r,       // data from the dccm + ecc
+   output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r,            // corrected dccm data R-stage
+   output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r,            // corrected dccm data R-stage
+   output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r_ff,         // corrected dccm data R+1 stage
+   output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff,         // corrected dccm data R+1 stage
+
+   input logic                           ld_single_ecc_error_r,     // ld has a single ecc error
+   input logic                           ld_single_ecc_error_r_ff,  // ld has a single ecc error
+   input logic                           lsu_dccm_rden_m,           // dccm rden
+   input logic                           addr_in_dccm_m,            // address in dccm
+   input logic  [pt.DCCM_BITS-1:0]       lsu_addr_m,                // start address
+   input logic  [pt.DCCM_BITS-1:0]       end_addr_m,                // end address
+   input logic  [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_m,           // raw data from mem
+   input logic  [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_m,           // raw data from mem
+   input logic  [pt.DCCM_ECC_WIDTH-1:0]  dccm_data_ecc_hi_m,        // ecc read out from mem
+   input logic  [pt.DCCM_ECC_WIDTH-1:0]  dccm_data_ecc_lo_m,        // ecc read out from mem
+   output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_m,             // corrected dccm data M-stage
+   output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_m,             // corrected dccm data M-stage
+
+   input logic                           dma_dccm_wen,              // Perform DMA writes only for word/dword
+   input logic  [31:0]                   dma_dccm_wdata_lo,         // Shifted dma data to lower bits to make it consistent to lsu stores
+   input logic  [31:0]                   dma_dccm_wdata_hi,         // Shifted dma data to lower bits to make it consistent to lsu stores
+   output logic [pt.DCCM_ECC_WIDTH-1:0]  dma_dccm_wdata_ecc_hi,     // ECC bits for the DMA wdata
+   output logic [pt.DCCM_ECC_WIDTH-1:0]  dma_dccm_wdata_ecc_lo,     // ECC bits for the DMA wdata
+
+   output logic [pt.DCCM_ECC_WIDTH-1:0]  stbuf_ecc_any,             // Encoded data with ECC bits
+   output logic [pt.DCCM_ECC_WIDTH-1:0]  sec_data_ecc_hi_r_ff,      // Encoded data with ECC bits
+   output logic [pt.DCCM_ECC_WIDTH-1:0]  sec_data_ecc_lo_r_ff,      // Encoded data with ECC bits
+
+   output logic                          single_ecc_error_hi_r,                   // sec detected
+   output logic                          single_ecc_error_lo_r,                   // sec detected on lower dccm bank
+   output logic                          lsu_single_ecc_error_r,                  // or of the 2
+   output logic                          lsu_double_ecc_error_r,                   // double error detected
+
+   output logic                          lsu_single_ecc_error_m,                  // or of the 2
+   output logic                          lsu_double_ecc_error_m                   // double error detected
+
+ );
+
+   logic                           is_ldst_r;
+   logic                           is_ldst_hi_any, is_ldst_lo_any;
+   logic [pt.DCCM_DATA_WIDTH-1:0]  dccm_wdata_hi_any, dccm_wdata_lo_any;
+   logic [pt.DCCM_ECC_WIDTH-1:0]  dccm_wdata_ecc_hi_any, dccm_wdata_ecc_lo_any;
+   logic [pt.DCCM_DATA_WIDTH-1:0]  dccm_rdata_hi_any, dccm_rdata_lo_any;
+   logic [pt.DCCM_ECC_WIDTH-1:0]   dccm_data_ecc_hi_any, dccm_data_ecc_lo_any;
+   logic [pt.DCCM_DATA_WIDTH-1:0]  sec_data_hi_any, sec_data_lo_any;
+   logic                           single_ecc_error_hi_any, single_ecc_error_lo_any;
+   logic                           double_ecc_error_hi_any, double_ecc_error_lo_any;
+
+   logic                           double_ecc_error_hi_m, double_ecc_error_lo_m;
+   logic                           double_ecc_error_hi_r, double_ecc_error_lo_r;
+
+   logic [6:0]                     ecc_out_hi_nc, ecc_out_lo_nc;
+
+
+   if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1
+      logic        ldst_dual_m, ldst_dual_r;
+      logic        is_ldst_m;
+      logic        is_ldst_hi_r, is_ldst_lo_r;
+
+      assign ldst_dual_r                                 = (lsu_addr_r[2] != end_addr_r[2]);
+      assign is_ldst_r                                   = lsu_pkt_r.valid & (lsu_pkt_r.load | lsu_pkt_r.store) & addr_in_dccm_r & lsu_dccm_rden_r;
+      assign is_ldst_lo_r                                = is_ldst_r & ~dec_tlu_core_ecc_disable;
+      assign is_ldst_hi_r                                = is_ldst_r & ldst_dual_r & ~dec_tlu_core_ecc_disable;   // Always check the ECC Hi/Lo for DMA since we don't align for DMA
+
+      assign is_ldst_hi_any                              = is_ldst_hi_r;
+      assign dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0]   = dccm_rdata_hi_r[pt.DCCM_DATA_WIDTH-1:0];
+      assign dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_hi_r[pt.DCCM_ECC_WIDTH-1:0];
+      assign is_ldst_lo_any                              = is_ldst_lo_r;
+      assign dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0]   = dccm_rdata_lo_r[pt.DCCM_DATA_WIDTH-1:0];
+      assign dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_lo_r[pt.DCCM_ECC_WIDTH-1:0];
+
+      assign sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]       = sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0];
+      assign single_ecc_error_hi_r                       = single_ecc_error_hi_any;
+      assign double_ecc_error_hi_r                       = double_ecc_error_hi_any;
+      assign sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]       = sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0];
+      assign single_ecc_error_lo_r                       = single_ecc_error_lo_any;
+      assign double_ecc_error_lo_r                       = double_ecc_error_lo_any;
+
+      assign lsu_single_ecc_error_r                      = single_ecc_error_hi_r | single_ecc_error_lo_r;
+      assign lsu_double_ecc_error_r                      = double_ecc_error_hi_r | double_ecc_error_lo_r;
+
+   end else begin: L2U_Plus1_0
+
+      logic        ldst_dual_m;
+      logic        is_ldst_m;
+      logic        is_ldst_hi_m, is_ldst_lo_m;
+
+      assign ldst_dual_m                                 = (lsu_addr_m[2] != end_addr_m[2]);
+      assign is_ldst_m                                   = lsu_pkt_m.valid & (lsu_pkt_m.load | lsu_pkt_m.store) & addr_in_dccm_m & lsu_dccm_rden_m;
+      assign is_ldst_lo_m                                = is_ldst_m & ~dec_tlu_core_ecc_disable;
+      assign is_ldst_hi_m                                = is_ldst_m & (ldst_dual_m | lsu_pkt_m.dma) & ~dec_tlu_core_ecc_disable;   // Always check the ECC Hi/Lo for DMA since we don't align for DMA
+
+      assign is_ldst_hi_any                              = is_ldst_hi_m;
+      assign dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0]   = dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0];
+      assign dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0];
+      assign is_ldst_lo_any                              = is_ldst_lo_m;
+      assign dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0]   = dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0];
+      assign dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0];
+
+      assign sec_data_hi_m[pt.DCCM_DATA_WIDTH-1:0]       = sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0];
+      assign double_ecc_error_hi_m                       = double_ecc_error_hi_any;
+      assign sec_data_lo_m[pt.DCCM_DATA_WIDTH-1:0]       = sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0];
+      assign double_ecc_error_lo_m                       = double_ecc_error_lo_any;
+
+      assign lsu_single_ecc_error_m                      = single_ecc_error_hi_any | single_ecc_error_lo_any;
+      assign lsu_double_ecc_error_m                      = double_ecc_error_hi_m   | double_ecc_error_lo_m;
+
+      // Flops
+      rvdff  #(1) lsu_single_ecc_err_r    (.din(lsu_single_ecc_error_m), .dout(lsu_single_ecc_error_r), .clk(lsu_c2_r_clk), .*);
+      rvdff  #(1) lsu_double_ecc_err_r    (.din(lsu_double_ecc_error_m), .dout(lsu_double_ecc_error_r), .clk(lsu_c2_r_clk), .*);
+      rvdff  #(.WIDTH(1)) ldst_sec_lo_rff (.din(single_ecc_error_lo_any),  .dout(single_ecc_error_lo_r),  .clk(lsu_c2_r_clk), .*);
+      rvdff  #(.WIDTH(1)) ldst_sec_hi_rff (.din(single_ecc_error_hi_any),  .dout(single_ecc_error_hi_r),  .clk(lsu_c2_r_clk), .*);
+      rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_hi_rff (.din(sec_data_hi_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .en(lsu_single_ecc_error_m | clk_override), .*);
+      rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_lo_rff (.din(sec_data_lo_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .en(lsu_single_ecc_error_m | clk_override), .*);
+
+   end
+
+   // Logic for ECC generation during write
+   assign dccm_wdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0] : (dma_dccm_wen ? dma_dccm_wdata_lo[pt.DCCM_DATA_WIDTH-1:0] : stbuf_data_any[pt.DCCM_DATA_WIDTH-1:0]);
+   assign dccm_wdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0] : (dma_dccm_wen ? dma_dccm_wdata_hi[pt.DCCM_DATA_WIDTH-1:0] : 32'h0);
+
+   assign sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0]  = dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0];
+   assign sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0]  = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0];
+   assign stbuf_ecc_any[pt.DCCM_ECC_WIDTH-1:0]         = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0];
+   assign dma_dccm_wdata_ecc_hi[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0];
+   assign dma_dccm_wdata_ecc_lo[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0];
+
+   // Instantiate ECC blocks
+   if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable
+
+      //Detect/Repair for Hi
+      rvecc_decode lsu_ecc_decode_hi (
+         // Inputs
+         .en(is_ldst_hi_any),
+         .sed_ded (1'b0),    // 1 : means only detection
+         .din(dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0]),
+         .ecc_in(dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0]),
+         // Outputs
+         .dout(sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0]),
+         .ecc_out (ecc_out_hi_nc[6:0]),
+         .single_ecc_error(single_ecc_error_hi_any),
+         .double_ecc_error(double_ecc_error_hi_any),
+         .*
+      );
+
+      //Detect/Repair for Lo
+      rvecc_decode lsu_ecc_decode_lo (
+         // Inputs
+         .en(is_ldst_lo_any),
+         .sed_ded (1'b0),    // 1 : means only detection
+         .din(dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] ),
+         .ecc_in(dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]),
+         // Outputs
+         .dout(sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0]),
+         .ecc_out (ecc_out_lo_nc[6:0]),
+         .single_ecc_error(single_ecc_error_lo_any),
+         .double_ecc_error(double_ecc_error_lo_any),
+         .*
+      );
+
+      rvecc_encode lsu_ecc_encode_hi (
+         //Inputs
+         .din(dccm_wdata_hi_any[pt.DCCM_DATA_WIDTH-1:0]),
+         //Outputs
+         .ecc_out(dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0]),
+         .*
+      );
+      rvecc_encode lsu_ecc_encode_lo (
+         //Inputs
+         .din(dccm_wdata_lo_any[pt.DCCM_DATA_WIDTH-1:0]),
+         //Outputs
+         .ecc_out(dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]),
+         .*
+      );
+   end else begin: Gen_dccm_disable // block: Gen_dccm_enable
+      assign sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0] = '0;
+      assign sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0] = '0;
+      assign single_ecc_error_hi_any = '0;
+      assign double_ecc_error_hi_any = '0;
+      assign single_ecc_error_lo_any = '0;
+      assign double_ecc_error_lo_any = '0;
+   end
+
+   rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_hi_rplus1ff (.din(sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]), .en(ld_single_ecc_error_r | clk_override), .clk(clk), .*);
+   rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_lo_rplus1ff (.din(sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]), .en(ld_single_ecc_error_r | clk_override), .clk(clk), .*);
+
+
+endmodule // eb1_lsu_ecc
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_lsu_lsc_ctl.sv b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_lsc_ctl.sv
new file mode 100644
index 0000000..7b0517b
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_lsc_ctl.sv
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//********************************************************************************
+// $Id$
+//
+//
+// Owner:
+// Function: LSU control
+// Comments:
+//
+//
+// DC1 -> DC2 -> DC3 -> DC4 (Commit)
+//
+//********************************************************************************
+module eb1_lsu_lsc_ctl
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )(
+   input logic                rst_l,                     // reset, active low
+   input logic                clk_override,              // Override non-functional clock gating
+   input logic                clk,                       // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
+
+   // clocks per pipe
+   input logic                lsu_c1_m_clk,
+   input logic                lsu_c1_r_clk,
+   input logic                lsu_c2_m_clk,
+   input logic                lsu_c2_r_clk,
+   input logic                lsu_store_c1_m_clk,
+
+   input logic [31:0]         lsu_ld_data_r,             // Load data R-stage
+   input logic [31:0]         lsu_ld_data_corr_r,        // ECC corrected data R-stage
+   input logic                lsu_single_ecc_error_r,    // ECC single bit error R-stage
+   input logic                lsu_double_ecc_error_r,    // ECC double bit error R-stage
+
+   input logic [31:0]         lsu_ld_data_m,             // Load data M-stage
+   input logic                lsu_single_ecc_error_m,    // ECC single bit error M-stage
+   input logic                lsu_double_ecc_error_m,    // ECC double bit error M-stage
+
+   input logic                flush_m_up,                // Flush M and D stage
+   input logic                flush_r,                   // Flush R-stage
+   input logic                ldst_dual_d,               // load/store is unaligned at 32 bit boundary D-stage
+   input logic                ldst_dual_m,               // load/store is unaligned at 32 bit boundary M-stage
+   input logic                ldst_dual_r,               // load/store is unaligned at 32 bit boundary R-stage
+
+   input logic [31:0]         exu_lsu_rs1_d,             // address
+   input logic [31:0]         exu_lsu_rs2_d,             // store data
+
+   input eb1_lsu_pkt_t       lsu_p,                     // lsu control packet
+   input logic                dec_lsu_valid_raw_d,       // Raw valid for address computation
+   input logic [11:0]         dec_lsu_offset_d,          // 12b offset for load/store addresses
+
+   input  logic [31:0]        picm_mask_data_m,          // PIC data M-stage
+   input  logic [31:0]        bus_read_data_m,           // the bus return data
+   output logic [31:0]        lsu_result_m,              // lsu load data
+   output logic [31:0]        lsu_result_corr_r,         // This is the ECC corrected data going to RF
+   // lsu address down the pipe
+   output logic [31:0]        lsu_addr_d,
+   output logic [31:0]        lsu_addr_m,
+   output logic [31:0]        lsu_addr_r,
+   // lsu address down the pipe - needed to check unaligned
+   output logic [31:0]        end_addr_d,
+   output logic [31:0]        end_addr_m,
+   output logic [31:0]        end_addr_r,
+   // store data down the pipe
+   output logic [31:0]        store_data_m,
+
+   input  logic [31:0]         dec_tlu_mrac_ff,          // CSR for memory region control
+   output logic                lsu_exc_m,                // Access or misaligned fault
+   output logic                is_sideeffects_m,         // is sideffects space
+   output logic                lsu_commit_r,             // lsu instruction in r commits
+   output logic                lsu_single_ecc_error_incr,// LSU inc SB error counter
+   output eb1_lsu_error_pkt_t lsu_error_pkt_r,          // lsu exception packet
+
+   output logic [31:1]         lsu_fir_addr,             // fast interrupt address
+   output logic [1:0]          lsu_fir_error,            // Error during fast interrupt lookup
+
+   // address in dccm/pic/external per pipe stage
+   output logic               addr_in_dccm_d,
+   output logic               addr_in_dccm_m,
+   output logic               addr_in_dccm_r,
+
+   output logic               addr_in_pic_d,
+   output logic               addr_in_pic_m,
+   output logic               addr_in_pic_r,
+
+   output logic               addr_external_m,
+
+   // DMA slave
+   input logic                dma_dccm_req,
+   input logic [31:0]         dma_mem_addr,
+   input logic [2:0]          dma_mem_sz,
+   input logic                dma_mem_write,
+   input logic [63:0]         dma_mem_wdata,
+
+   // Store buffer related signals
+   output eb1_lsu_pkt_t      lsu_pkt_d,
+   output eb1_lsu_pkt_t      lsu_pkt_m,
+   output eb1_lsu_pkt_t      lsu_pkt_r,
+
+   input  logic               scan_mode                  // Scan mode
+
+   );
+
+   logic [31:3]        end_addr_pre_m, end_addr_pre_r;
+   logic [31:0]        full_addr_d;
+   logic [31:0]        full_end_addr_d;
+   logic [31:0]        lsu_rs1_d;
+   logic [11:0]        lsu_offset_d;
+   logic [31:0]        rs1_d;
+   logic [11:0]        offset_d;
+   logic [12:0]        end_addr_offset_d;
+   logic [2:0]         addr_offset_d;
+
+   logic [63:0]        dma_mem_wdata_shifted;
+   logic               addr_external_d;
+   logic               addr_external_r;
+   logic               access_fault_d, misaligned_fault_d;
+   logic               access_fault_m, misaligned_fault_m;
+
+   logic               fir_dccm_access_error_d, fir_nondccm_access_error_d;
+   logic               fir_dccm_access_error_m, fir_nondccm_access_error_m;
+
+   logic [3:0]         exc_mscause_d, exc_mscause_m;
+   logic [31:0]        rs1_d_raw;
+   logic [31:0]        store_data_d, store_data_pre_m, store_data_m_in;
+   logic [31:0]        bus_read_data_r;
+
+   eb1_lsu_pkt_t           dma_pkt_d;
+   eb1_lsu_pkt_t           lsu_pkt_m_in, lsu_pkt_r_in;
+   eb1_lsu_error_pkt_t     lsu_error_pkt_m;
+
+
+   // Premux the rs1/offset for dma
+   assign lsu_rs1_d[31:0]    = dec_lsu_valid_raw_d ? exu_lsu_rs1_d[31:0] : dma_mem_addr[31:0];
+   assign lsu_offset_d[11:0] = dec_lsu_offset_d[11:0] & {12{dec_lsu_valid_raw_d}};
+   assign rs1_d_raw[31:0]    = lsu_rs1_d[31:0];
+   assign offset_d[11:0]     = lsu_offset_d[11:0];
+
+   assign rs1_d[31:0] = (lsu_pkt_d.load_ldst_bypass_d) ? lsu_result_m[31:0] : rs1_d_raw[31:0];
+
+   // generate the ls address
+   rvlsadder   lsadder  (.rs1(rs1_d[31:0]),
+                       .offset(offset_d[11:0]),
+                       .dout(full_addr_d[31:0])
+                       );
+
+   // Module to generate the memory map of the address
+   eb1_lsu_addrcheck addrcheck (
+              .start_addr_d(full_addr_d[31:0]),
+              .end_addr_d(full_end_addr_d[31:0]),
+              .rs1_region_d(rs1_d[31:28]),
+              .*
+  );
+
+   // Calculate start/end address for load/store
+   assign addr_offset_d[2:0]      = ({3{lsu_pkt_d.half}} & 3'b01) | ({3{lsu_pkt_d.word}} & 3'b11) | ({3{lsu_pkt_d.dword}} & 3'b111);
+   assign end_addr_offset_d[12:0] = {offset_d[11],offset_d[11:0]} + {9'b0,addr_offset_d[2:0]};
+   assign full_end_addr_d[31:0]   = rs1_d[31:0] + {{19{end_addr_offset_d[12]}},end_addr_offset_d[12:0]};
+   assign end_addr_d[31:0]        = full_end_addr_d[31:0];
+   assign lsu_exc_m               = access_fault_m | misaligned_fault_m;
+
+   // Goes to TLU to increment the ECC error counter
+   assign lsu_single_ecc_error_incr = (lsu_single_ecc_error_r & ~lsu_double_ecc_error_r) & (lsu_commit_r | lsu_pkt_r.dma) & lsu_pkt_r.valid;
+
+   if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1
+      logic               access_fault_r, misaligned_fault_r;
+      logic [3:0]         exc_mscause_r;
+      logic               fir_dccm_access_error_r, fir_nondccm_access_error_r;
+
+      // Generate exception packet
+      assign lsu_error_pkt_r.exc_valid = (access_fault_r | misaligned_fault_r | lsu_double_ecc_error_r) & lsu_pkt_r.valid & ~lsu_pkt_r.dma & ~lsu_pkt_r.fast_int;
+      assign lsu_error_pkt_r.single_ecc_error = lsu_single_ecc_error_r & ~lsu_error_pkt_r.exc_valid & ~lsu_pkt_r.dma;
+      assign lsu_error_pkt_r.inst_type = lsu_pkt_r.store;
+      assign lsu_error_pkt_r.exc_type  = ~misaligned_fault_r;
+      assign lsu_error_pkt_r.mscause[3:0] = (lsu_double_ecc_error_r & ~misaligned_fault_r & ~access_fault_r) ? 4'h1 : exc_mscause_r[3:0];
+      assign lsu_error_pkt_r.addr[31:0] = lsu_addr_r[31:0];
+
+      assign lsu_fir_error[1:0] = fir_nondccm_access_error_r ? 2'b11 : (fir_dccm_access_error_r ? 2'b10 : ((lsu_pkt_r.fast_int & lsu_double_ecc_error_r) ? 2'b01 : 2'b00));
+
+      rvdff #(1) access_fault_rff             (.din(access_fault_m),             .dout(access_fault_r),             .clk(lsu_c1_r_clk), .*);
+      rvdff #(1) misaligned_fault_rff         (.din(misaligned_fault_m),         .dout(misaligned_fault_r),         .clk(lsu_c1_r_clk), .*);
+      rvdff #(4) exc_mscause_rff              (.din(exc_mscause_m[3:0]),         .dout(exc_mscause_r[3:0]),         .clk(lsu_c1_r_clk), .*);
+      rvdff #(1) fir_dccm_access_error_mff    (.din(fir_dccm_access_error_m),    .dout(fir_dccm_access_error_r),    .clk(lsu_c1_r_clk), .*);
+      rvdff #(1) fir_nondccm_access_error_mff (.din(fir_nondccm_access_error_m), .dout(fir_nondccm_access_error_r), .clk(lsu_c1_r_clk), .*);
+
+   end else begin: L2U_Plus1_0
+      logic [1:0] lsu_fir_error_m;
+
+      // Generate exception packet
+      assign lsu_error_pkt_m.exc_valid = (access_fault_m | misaligned_fault_m | lsu_double_ecc_error_m) & lsu_pkt_m.valid & ~lsu_pkt_m.dma & ~lsu_pkt_m.fast_int & ~flush_m_up;
+      assign lsu_error_pkt_m.single_ecc_error = lsu_single_ecc_error_m & ~lsu_error_pkt_m.exc_valid & ~lsu_pkt_m.dma;
+      assign lsu_error_pkt_m.inst_type = lsu_pkt_m.store;
+      assign lsu_error_pkt_m.exc_type  = ~misaligned_fault_m;
+      assign lsu_error_pkt_m.mscause[3:0] = (lsu_double_ecc_error_m & ~misaligned_fault_m & ~access_fault_m) ? 4'h1 : exc_mscause_m[3:0];
+      assign lsu_error_pkt_m.addr[31:0] = lsu_addr_m[31:0];
+
+      assign lsu_fir_error_m[1:0] = fir_nondccm_access_error_m ? 2'b11 : (fir_dccm_access_error_m ? 2'b10 : ((lsu_pkt_m.fast_int & lsu_double_ecc_error_m) ? 2'b01 : 2'b00));
+
+      rvdff  #(1)                             lsu_exc_valid_rff       (.*, .din(lsu_error_pkt_m.exc_valid),                        .dout(lsu_error_pkt_r.exc_valid),                        .clk(lsu_c2_r_clk));
+      rvdff  #(1)                             lsu_single_ecc_error_rff(.*, .din(lsu_error_pkt_m.single_ecc_error),                 .dout(lsu_error_pkt_r.single_ecc_error),                 .clk(lsu_c2_r_clk));
+      rvdffe #($bits(eb1_lsu_error_pkt_t)-2) lsu_error_pkt_rff       (.*, .din(lsu_error_pkt_m[$bits(eb1_lsu_error_pkt_t)-1:2]), .dout(lsu_error_pkt_r[$bits(eb1_lsu_error_pkt_t)-1:2]), .en(lsu_error_pkt_m.exc_valid | lsu_error_pkt_m.single_ecc_error | clk_override));
+      rvdff #(2)                              lsu_fir_error_rff       (.*, .din(lsu_fir_error_m[1:0]),                             .dout(lsu_fir_error[1:0]),                               .clk(lsu_c2_r_clk));
+   end
+
+   //Create DMA packet
+   always_comb begin
+      dma_pkt_d = '0;
+      dma_pkt_d.valid   = dma_dccm_req;
+      dma_pkt_d.dma     = 1'b1;
+      dma_pkt_d.store   = dma_mem_write;
+      dma_pkt_d.load    = ~dma_mem_write;
+      dma_pkt_d.by      = (dma_mem_sz[2:0] == 3'b0);
+      dma_pkt_d.half    = (dma_mem_sz[2:0] == 3'b1);
+      dma_pkt_d.word    = (dma_mem_sz[2:0] == 3'b10);
+      dma_pkt_d.dword   = (dma_mem_sz[2:0] == 3'b11);
+   end
+
+   always_comb begin
+      lsu_pkt_d = dec_lsu_valid_raw_d ? lsu_p : dma_pkt_d;
+      lsu_pkt_m_in = lsu_pkt_d;
+      lsu_pkt_r_in = lsu_pkt_m;
+
+      lsu_pkt_d.valid = (lsu_p.valid & ~(flush_m_up & ~lsu_p.fast_int)) | dma_dccm_req;
+      lsu_pkt_m_in.valid = lsu_pkt_d.valid & ~(flush_m_up & ~lsu_pkt_d.dma);
+      lsu_pkt_r_in.valid = lsu_pkt_m.valid & ~(flush_m_up & ~lsu_pkt_m.dma) ;
+   end
+
+   // C2 clock for valid and C1 for other bits of packet
+   rvdff #(1) lsu_pkt_vldmff (.*, .din(lsu_pkt_m_in.valid), .dout(lsu_pkt_m.valid), .clk(lsu_c2_m_clk));
+   rvdff #(1) lsu_pkt_vldrff (.*, .din(lsu_pkt_r_in.valid), .dout(lsu_pkt_r.valid), .clk(lsu_c2_r_clk));
+
+   rvdff #($bits(eb1_lsu_pkt_t)-1) lsu_pkt_mff (.*, .din(lsu_pkt_m_in[$bits(eb1_lsu_pkt_t)-1:1]), .dout(lsu_pkt_m[$bits(eb1_lsu_pkt_t)-1:1]), .clk(lsu_c1_m_clk));
+   rvdff #($bits(eb1_lsu_pkt_t)-1) lsu_pkt_rff (.*, .din(lsu_pkt_r_in[$bits(eb1_lsu_pkt_t)-1:1]), .dout(lsu_pkt_r[$bits(eb1_lsu_pkt_t)-1:1]), .clk(lsu_c1_r_clk));
+
+
+
+   if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U1_Plus1_1
+      logic [31:0] lsu_ld_datafn_r, lsu_ld_datafn_corr_r;
+
+      assign lsu_ld_datafn_r[31:0]  = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_r[31:0];
+      assign lsu_ld_datafn_corr_r[31:0]  = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0];
+
+      // this is really R stage signal
+      assign lsu_result_m[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by  }} & {24'b0,lsu_ld_datafn_r[7:0]}) |
+                                  ({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_r[15:0]}) |
+                                  ({32{~lsu_pkt_r.unsign & lsu_pkt_r.by  }} & {{24{  lsu_ld_datafn_r[7]}}, lsu_ld_datafn_r[7:0]}) |
+                                  ({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{  lsu_ld_datafn_r[15]}},lsu_ld_datafn_r[15:0]}) |
+                                  ({32{lsu_pkt_r.word}}                     & lsu_ld_datafn_r[31:0]);
+
+      // this signal is used for gpr update
+      assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by  }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) |
+                                       ({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) |
+                                       ({32{~lsu_pkt_r.unsign & lsu_pkt_r.by  }} & {{24{  lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) |
+                                       ({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{  lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) |
+                                       ({32{lsu_pkt_r.word}}                     & lsu_ld_datafn_corr_r[31:0]);
+
+   end else begin: L2U1_Plus1_0 // block: L2U1_Plus1_1
+      logic [31:0] lsu_ld_datafn_m, lsu_ld_datafn_corr_r;
+
+      assign lsu_ld_datafn_m[31:0] = addr_external_m ? bus_read_data_m[31:0] : lsu_ld_data_m[31:0];
+      assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0];
+
+      // this result must look at prior stores and merge them in
+      assign lsu_result_m[31:0] = ({32{ lsu_pkt_m.unsign & lsu_pkt_m.by  }} & {24'b0,lsu_ld_datafn_m[7:0]}) |
+                                  ({32{ lsu_pkt_m.unsign & lsu_pkt_m.half}} & {16'b0,lsu_ld_datafn_m[15:0]}) |
+                                  ({32{~lsu_pkt_m.unsign & lsu_pkt_m.by  }} & {{24{  lsu_ld_datafn_m[7]}}, lsu_ld_datafn_m[7:0]}) |
+                                  ({32{~lsu_pkt_m.unsign & lsu_pkt_m.half}} & {{16{  lsu_ld_datafn_m[15]}},lsu_ld_datafn_m[15:0]}) |
+                                  ({32{lsu_pkt_m.word}}                     & lsu_ld_datafn_m[31:0]);
+
+      // this signal is used for gpr update
+      assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by  }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) |
+                                       ({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) |
+                                       ({32{~lsu_pkt_r.unsign & lsu_pkt_r.by  }} & {{24{  lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) |
+                                       ({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{  lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) |
+                                       ({32{lsu_pkt_r.word}}                     & lsu_ld_datafn_corr_r[31:0]);
+   end
+
+   // Fast interrupt address
+   assign lsu_fir_addr[31:1]    = lsu_ld_data_corr_r[31:1];
+
+   // absence load/store all 0's
+   assign lsu_addr_d[31:0] = full_addr_d[31:0];
+
+   // Interrupt as a flush source allows the WB to occur
+   assign lsu_commit_r = lsu_pkt_r.valid & (lsu_pkt_r.store | lsu_pkt_r.load) & ~flush_r & ~lsu_pkt_r.dma;
+
+   assign dma_mem_wdata_shifted[63:0] = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000};   // Shift the dma data to lower bits to make it consistent to lsu stores
+   assign store_data_d[31:0] = dma_dccm_req ? dma_mem_wdata_shifted[31:0] : exu_lsu_rs2_d[31:0];  // Write to PIC still happens in r stage
+
+   assign store_data_m_in[31:0] = (lsu_pkt_d.store_data_bypass_d) ? lsu_result_m[31:0] : store_data_d[31:0];
+
+   assign store_data_m[31:0] = (picm_mask_data_m[31:0] | {32{~addr_in_pic_m}}) & ((lsu_pkt_m.store_data_bypass_m) ? lsu_result_m[31:0] : store_data_pre_m[31:0]);
+
+
+   rvdff #(32)  sdmff (.*, .din(store_data_m_in[31:0]), .dout(store_data_pre_m[31:0]),                       .clk(lsu_store_c1_m_clk));
+
+   rvdff #(32) samff (.*, .din(lsu_addr_d[31:0]), .dout(lsu_addr_m[31:0]), .clk(lsu_c1_m_clk));
+   rvdff #(32) sarff (.*, .din(lsu_addr_m[31:0]), .dout(lsu_addr_r[31:0]), .clk(lsu_c1_r_clk));
+
+   assign end_addr_m[31:3] = ldst_dual_m ? end_addr_pre_m[31:3] : lsu_addr_m[31:3];       // This is for power saving
+   assign end_addr_r[31:3] = ldst_dual_r ? end_addr_pre_r[31:3] : lsu_addr_r[31:3];       // This is for power saving
+
+   rvdffe #(29) end_addr_hi_mff (.*, .din(end_addr_d[31:3]), .dout(end_addr_pre_m[31:3]), .en((lsu_pkt_d.valid & ldst_dual_d) | clk_override));
+   rvdffe #(29) end_addr_hi_rff (.*, .din(end_addr_m[31:3]), .dout(end_addr_pre_r[31:3]), .en((lsu_pkt_m.valid & ldst_dual_m) | clk_override));
+
+   rvdff #(3)  end_addr_lo_mff (.*, .din(end_addr_d[2:0]), .dout(end_addr_m[2:0]), .clk(lsu_c1_m_clk));
+   rvdff #(3)  end_addr_lo_rff (.*, .din(end_addr_m[2:0]), .dout(end_addr_r[2:0]), .clk(lsu_c1_r_clk));
+
+   rvdff #(1) addr_in_dccm_mff(.din(addr_in_dccm_d), .dout(addr_in_dccm_m), .clk(lsu_c1_m_clk), .*);
+   rvdff #(1) addr_in_dccm_rff(.din(addr_in_dccm_m), .dout(addr_in_dccm_r), .clk(lsu_c1_r_clk), .*);
+
+   rvdff #(1) addr_in_pic_mff(.din(addr_in_pic_d), .dout(addr_in_pic_m), .clk(lsu_c1_m_clk), .*);
+   rvdff #(1) addr_in_pic_rff(.din(addr_in_pic_m), .dout(addr_in_pic_r), .clk(lsu_c1_r_clk), .*);
+
+   rvdff #(1) addr_external_mff(.din(addr_external_d), .dout(addr_external_m), .clk(lsu_c1_m_clk), .*);
+   rvdff #(1) addr_external_rff(.din(addr_external_m), .dout(addr_external_r), .clk(lsu_c1_r_clk), .*);
+
+   rvdff #(1) access_fault_mff     (.din(access_fault_d),     .dout(access_fault_m),     .clk(lsu_c1_m_clk), .*);
+   rvdff #(1) misaligned_fault_mff (.din(misaligned_fault_d), .dout(misaligned_fault_m), .clk(lsu_c1_m_clk), .*);
+   rvdff #(4) exc_mscause_mff      (.din(exc_mscause_d[3:0]), .dout(exc_mscause_m[3:0]), .clk(lsu_c1_m_clk), .*);
+
+   rvdff #(1) fir_dccm_access_error_mff    (.din(fir_dccm_access_error_d),    .dout(fir_dccm_access_error_m),    .clk(lsu_c1_m_clk), .*);
+   rvdff #(1) fir_nondccm_access_error_mff (.din(fir_nondccm_access_error_d), .dout(fir_nondccm_access_error_m), .clk(lsu_c1_m_clk), .*);
+
+   rvdffe #(32) bus_read_data_r_ff (.*, .din(bus_read_data_m[31:0]), .dout(bus_read_data_r[31:0]), .en(addr_external_m | clk_override));
+
+endmodule
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_lsu_stbuf.sv b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_stbuf.sv
new file mode 100644
index 0000000..1704a45
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_stbuf.sv
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//********************************************************************************
+// $Id$
+//
+//
+// Owner:
+// Function: Store Buffer
+// Comments: Dual writes and single drain
+//
+//
+// DC1 -> DC2 -> DC3 -> DC4 (Commit)
+//
+// //********************************************************************************
+
+
+module eb1_lsu_stbuf
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )
+(
+   input logic                           clk,                         // core clock
+   input logic                           rst_l,                       // reset
+
+   input logic                           lsu_stbuf_c1_clk,            // stbuf clock
+   input logic                           lsu_free_c2_clk,             // free clk
+
+   // Store Buffer input
+   input logic                           store_stbuf_reqvld_r,        // core instruction goes to stbuf
+   input logic                           lsu_commit_r,                // lsu commits
+   input logic                           dec_lsu_valid_raw_d,         // Speculative decode valid
+   input logic [pt.DCCM_DATA_WIDTH-1:0]  store_data_hi_r,             // merged data from the dccm for stores. This is used for fwding
+   input logic [pt.DCCM_DATA_WIDTH-1:0]  store_data_lo_r,             // merged data from the dccm for stores. This is used for fwding
+   input logic [pt.DCCM_DATA_WIDTH-1:0]  store_datafn_hi_r,           // merged data from the dccm for stores
+   input logic [pt.DCCM_DATA_WIDTH-1:0]  store_datafn_lo_r,           // merged data from the dccm for stores
+
+   // Store Buffer output
+   output logic                          stbuf_reqvld_any,            // stbuf is draining
+   output logic                          stbuf_reqvld_flushed_any,    // Top entry is flushed
+   output logic [pt.LSU_SB_BITS-1:0]     stbuf_addr_any,              // address
+   output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any,              // stbuf data
+
+   input  logic                          lsu_stbuf_commit_any,        // pop the stbuf as it commite
+   output logic                          lsu_stbuf_full_any,          // stbuf is full
+   output logic                          lsu_stbuf_empty_any,         // stbuf is empty
+   output logic                          ldst_stbuf_reqvld_r,         // needed for clocking
+
+   input logic [pt.LSU_SB_BITS-1:0]      lsu_addr_d,                  // lsu address D-stage
+   input logic [31:0]                    lsu_addr_m,                  // lsu address M-stage
+   input logic [31:0]                    lsu_addr_r,                  // lsu address R-stage
+
+   input logic [pt.LSU_SB_BITS-1:0]      end_addr_d,                  // lsu end address D-stage - needed to check unaligned
+   input logic [31:0]                    end_addr_m,                  // lsu end address M-stage - needed to check unaligned
+   input logic [31:0]                    end_addr_r,                  // lsu end address R-stage - needed to check unaligned
+
+   input logic                           ldst_dual_d, ldst_dual_m, ldst_dual_r,
+   input logic                           addr_in_dccm_m,              // address is in dccm
+   input logic                           addr_in_dccm_r,              // address is in dccm
+
+   // Forwarding signals
+   input logic                           lsu_cmpen_m,                 // needed for forwarding stbuf - load
+   input eb1_lsu_pkt_t                  lsu_pkt_m,                   // LSU packet M-stage
+   input eb1_lsu_pkt_t                  lsu_pkt_r,                   // LSU packet R-stage
+
+   output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m,          // stbuf data
+   output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m,          // stbuf data
+   output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m,        // stbuf data
+   output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m,        // stbuf data
+
+   input  logic       scan_mode                                       // Scan mode
+
+);
+
+
+   localparam DEPTH      = pt.LSU_STBUF_DEPTH;
+   localparam DATA_WIDTH = pt.DCCM_DATA_WIDTH;
+   localparam BYTE_WIDTH = pt.DCCM_BYTE_WIDTH;
+   localparam DEPTH_LOG2 = $clog2(DEPTH);
+
+   // These are the fields in the store queue
+   logic [DEPTH-1:0]                     stbuf_vld;
+   logic [DEPTH-1:0]                     stbuf_dma_kill;
+   logic [DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addr;
+   logic [DEPTH-1:0][BYTE_WIDTH-1:0]     stbuf_byteen;
+   logic [DEPTH-1:0][DATA_WIDTH-1:0]     stbuf_data;
+
+   logic [DEPTH-1:0]                     sel_lo;
+   logic [DEPTH-1:0]                     stbuf_wr_en;
+   logic [DEPTH-1:0]                     stbuf_dma_kill_en;
+   logic [DEPTH-1:0]                     stbuf_reset;
+   logic [DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addrin;
+   logic [DEPTH-1:0][DATA_WIDTH-1:0]     stbuf_datain;
+   logic [DEPTH-1:0][BYTE_WIDTH-1:0]     stbuf_byteenin;
+
+   logic [7:0]             store_byteen_ext_r;
+   logic [BYTE_WIDTH-1:0]  store_byteen_hi_r;
+   logic [BYTE_WIDTH-1:0]  store_byteen_lo_r;
+
+   logic                   WrPtrEn, RdPtrEn;
+   logic [DEPTH_LOG2-1:0]  WrPtr, RdPtr;
+   logic [DEPTH_LOG2-1:0]  NxtWrPtr, NxtRdPtr;
+   logic [DEPTH_LOG2-1:0]  WrPtrPlus1, WrPtrPlus2, RdPtrPlus1;
+
+   logic                   dual_stbuf_write_r;
+
+   logic                   isdccmst_m, isdccmst_r;
+   logic [3:0]             stbuf_numvld_any, stbuf_specvld_any;
+   logic [1:0]             stbuf_specvld_m, stbuf_specvld_r;
+
+   logic [pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] cmpaddr_hi_m, cmpaddr_lo_m;
+
+   // variables to detect matching from the store queue
+   logic [DEPTH-1:0]                 stbuf_match_hi, stbuf_match_lo;
+   logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_fwdbyteenvec_hi, stbuf_fwdbyteenvec_lo;
+   logic [DATA_WIDTH-1:0]            stbuf_fwddata_hi_pre_m, stbuf_fwddata_lo_pre_m;
+   logic [BYTE_WIDTH-1:0]            stbuf_fwdbyteen_hi_pre_m, stbuf_fwdbyteen_lo_pre_m;
+
+   // logic to detect matching from the pipe - needed for store - load forwarding
+   logic [BYTE_WIDTH-1:0]  ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi;
+   logic                   ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi;
+
+   logic [BYTE_WIDTH-1:0]  ld_byte_hit_lo, ld_byte_rhit_lo;
+   logic [BYTE_WIDTH-1:0]  ld_byte_hit_hi, ld_byte_rhit_hi;
+
+   logic [BYTE_WIDTH-1:0]  ldst_byteen_hi_r;
+   logic [BYTE_WIDTH-1:0]  ldst_byteen_lo_r;
+   // byte_en flowing down
+   logic [7:0]             ldst_byteen_r;
+   logic [7:0]             ldst_byteen_ext_r;
+   // fwd data through the pipe
+   logic [31:0]       ld_fwddata_rpipe_lo;
+   logic [31:0]       ld_fwddata_rpipe_hi;
+
+   // coalescing signals
+   logic [DEPTH-1:0]      store_matchvec_lo_r, store_matchvec_hi_r;
+   logic                  store_coalesce_lo_r, store_coalesce_hi_r;
+
+   //----------------------------------------
+   // Logic starts here
+   //----------------------------------------
+   // Create high/low byte enables
+   assign store_byteen_ext_r[7:0]           = ldst_byteen_r[7:0] << lsu_addr_r[1:0];
+   assign store_byteen_hi_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[7:4] & {4{lsu_pkt_r.store}};
+   assign store_byteen_lo_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[3:0] & {4{lsu_pkt_r.store}};
+
+   assign RdPtrPlus1[DEPTH_LOG2-1:0]     = RdPtr[DEPTH_LOG2-1:0] + 1'b1;
+   assign WrPtrPlus1[DEPTH_LOG2-1:0]     = WrPtr[DEPTH_LOG2-1:0] + 1'b1;
+   assign WrPtrPlus2[DEPTH_LOG2-1:0]     = WrPtr[DEPTH_LOG2-1:0] + 2'b10;
+
+   // ecc error on both hi/lo
+   assign dual_stbuf_write_r   = ldst_dual_r & store_stbuf_reqvld_r;
+   assign ldst_stbuf_reqvld_r  = ((lsu_commit_r | lsu_pkt_r.dma) & store_stbuf_reqvld_r);
+
+  // Store Buffer coalescing
+   for (genvar i=0; i<DEPTH; i++) begin: FindMatchEntry
+       assign store_matchvec_lo_r[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == lsu_addr_r[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & ~stbuf_reset[i];
+       assign store_matchvec_hi_r[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == end_addr_r[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & dual_stbuf_write_r & ~stbuf_reset[i];
+   end: FindMatchEntry
+
+   assign store_coalesce_lo_r = |store_matchvec_lo_r[DEPTH-1:0];
+   assign store_coalesce_hi_r = |store_matchvec_hi_r[DEPTH-1:0];
+
+
+   if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable
+      // Allocate new in this entry if :
+      // 1. wrptr, single allocate, lo did not coalesce
+      // 2. wrptr, double allocate, lo ^ hi coalesced
+      // 3. wrptr + 1, double alloacte, niether lo or hi coalesced
+      // Also update if there is a hi or a lo coalesce to this entry
+      // Store Buffer instantiation
+      for (genvar i=0; i<DEPTH; i++) begin: GenStBuf
+         assign stbuf_wr_en[i] = ldst_stbuf_reqvld_r & (
+                                   ( (i == WrPtr[DEPTH_LOG2-1:0])      &  ~store_coalesce_lo_r)   |                                                    // Allocate : new Lo
+                                   ( (i == WrPtr[DEPTH_LOG2-1:0])      &  dual_stbuf_write_r & ~store_coalesce_hi_r) |                               // Allocate : only 1 new Write Either
+                                   ( (i == WrPtrPlus1[DEPTH_LOG2-1:0]) &  dual_stbuf_write_r & ~(store_coalesce_lo_r | store_coalesce_hi_r)) |     // Allocate2 : 2 new so Write Hi
+                                   store_matchvec_lo_r[i] | store_matchvec_hi_r[i]);                                                                 // Coalesced Write Lo or Hi
+         assign stbuf_reset[i] = (lsu_stbuf_commit_any | stbuf_reqvld_flushed_any) & (i == RdPtr[DEPTH_LOG2-1:0]);
+
+         // Mux select for start/end address
+         assign sel_lo[i]                         = ((~ldst_dual_r | store_stbuf_reqvld_r) & (i == WrPtr[DEPTH_LOG2-1:0]) & ~store_coalesce_lo_r) |   // lo allocated new entry
+                                                    store_matchvec_lo_r[i];                                                                                                           // lo coalesced in to this entry
+         assign stbuf_addrin[i][pt.LSU_SB_BITS-1:0]  = sel_lo[i] ? lsu_addr_r[pt.LSU_SB_BITS-1:0]       : end_addr_r[pt.LSU_SB_BITS-1:0];
+         assign stbuf_byteenin[i][BYTE_WIDTH-1:0] = sel_lo[i] ? (stbuf_byteen[i][BYTE_WIDTH-1:0] | store_byteen_lo_r[BYTE_WIDTH-1:0])          : (stbuf_byteen[i][BYTE_WIDTH-1:0] | store_byteen_hi_r[BYTE_WIDTH-1:0]);
+         assign stbuf_datain[i][7:0]              = sel_lo[i] ? ((~stbuf_byteen[i][0] | store_byteen_lo_r[0]) ? store_datafn_lo_r[7:0]   : stbuf_data[i][7:0])    :
+                                                                ((~stbuf_byteen[i][0] | store_byteen_hi_r[0]) ? store_datafn_hi_r[7:0]   : stbuf_data[i][7:0]);
+         assign stbuf_datain[i][15:8]             = sel_lo[i] ? ((~stbuf_byteen[i][1] | store_byteen_lo_r[1]) ? store_datafn_lo_r[15:8]  : stbuf_data[i][15:8])    :
+                                                                ((~stbuf_byteen[i][1] | store_byteen_hi_r[1]) ? store_datafn_hi_r[15:8]  : stbuf_data[i][15:8]);
+         assign stbuf_datain[i][23:16]            = sel_lo[i] ? ((~stbuf_byteen[i][2] | store_byteen_lo_r[2]) ? store_datafn_lo_r[23:16] : stbuf_data[i][23:16])    :
+                                                                ((~stbuf_byteen[i][2] | store_byteen_hi_r[2]) ? store_datafn_hi_r[23:16] : stbuf_data[i][23:16]);
+         assign stbuf_datain[i][31:24]            = sel_lo[i] ? ((~stbuf_byteen[i][3] | store_byteen_lo_r[3]) ? store_datafn_lo_r[31:24] : stbuf_data[i][31:24])    :
+                                                                ((~stbuf_byteen[i][3] | store_byteen_hi_r[3]) ? store_datafn_hi_r[31:24] : stbuf_data[i][31:24]);
+
+         rvdffsc #(.WIDTH(1))              stbuf_vldff         (.din(1'b1),                                .dout(stbuf_vld[i]),                      .en(stbuf_wr_en[i]), .clear(stbuf_reset[i]), .clk(lsu_free_c2_clk), .*);
+         rvdffsc #(.WIDTH(1))              stbuf_killff        (.din(1'b1),                                .dout(stbuf_dma_kill[i]),                 .en(stbuf_dma_kill_en[i]), .clear(stbuf_reset[i]), .clk(lsu_free_c2_clk), .*);
+         rvdffe  #(.WIDTH(pt.LSU_SB_BITS)) stbuf_addrff        (.din(stbuf_addrin[i][pt.LSU_SB_BITS-1:0]), .dout(stbuf_addr[i][pt.LSU_SB_BITS-1:0]), .en(stbuf_wr_en[i]), .*);
+         rvdffsc #(.WIDTH(BYTE_WIDTH))     stbuf_byteenff      (.din(stbuf_byteenin[i][BYTE_WIDTH-1:0]),   .dout(stbuf_byteen[i][BYTE_WIDTH-1:0]),   .en(stbuf_wr_en[i]), .clear(stbuf_reset[i]), .clk(lsu_stbuf_c1_clk), .*);
+         rvdffe  #(.WIDTH(DATA_WIDTH))     stbuf_dataff        (.din(stbuf_datain[i][DATA_WIDTH-1:0]),     .dout(stbuf_data[i][DATA_WIDTH-1:0]),     .en(stbuf_wr_en[i]), .*);
+      end
+   end else begin: Gen_dccm_disable
+      assign stbuf_wr_en[DEPTH-1:0] = '0;
+      assign stbuf_reset[DEPTH-1:0] = '0;
+      assign stbuf_vld[DEPTH-1:0]   = '0;
+      assign stbuf_dma_kill[DEPTH-1:0] = '0;
+      assign stbuf_addr[DEPTH-1:0]  = '0;
+      assign stbuf_byteen[DEPTH-1:0] = '0;
+      assign stbuf_data[DEPTH-1:0]   = '0;
+   end
+
+   // Store Buffer drain logic
+   assign stbuf_reqvld_flushed_any            = stbuf_vld[RdPtr] & stbuf_dma_kill[RdPtr];
+   assign stbuf_reqvld_any                    = stbuf_vld[RdPtr] & ~stbuf_dma_kill[RdPtr] & ~(|stbuf_dma_kill_en[DEPTH-1:0]);  // Don't drain if some kill bit is being set this cycle
+   assign stbuf_addr_any[pt.LSU_SB_BITS-1:0]  = stbuf_addr[RdPtr][pt.LSU_SB_BITS-1:0];
+   assign stbuf_data_any[DATA_WIDTH-1:0]      = stbuf_data[RdPtr][DATA_WIDTH-1:0];
+
+   // Update the RdPtr/WrPtr logic
+   // Need to revert the WrPtr for flush cases. Also revert the pipe WrPtrs
+   assign WrPtrEn                  = (ldst_stbuf_reqvld_r  & ~dual_stbuf_write_r & ~(store_coalesce_hi_r | store_coalesce_lo_r))  |  // writing 1 and did not coalesce
+                                     (ldst_stbuf_reqvld_r  &  dual_stbuf_write_r & ~(store_coalesce_hi_r & store_coalesce_lo_r));    // writing 2 and atleast 1 did not coalesce
+   assign NxtWrPtr[DEPTH_LOG2-1:0] = (ldst_stbuf_reqvld_r & dual_stbuf_write_r & ~(store_coalesce_hi_r | store_coalesce_lo_r)) ? WrPtrPlus2[DEPTH_LOG2-1:0] : WrPtrPlus1[DEPTH_LOG2-1:0];
+   assign RdPtrEn                  = lsu_stbuf_commit_any | stbuf_reqvld_flushed_any;
+   assign NxtRdPtr[DEPTH_LOG2-1:0] = RdPtrPlus1[DEPTH_LOG2-1:0];
+
+   always_comb begin
+      stbuf_numvld_any[3:0] = '0;
+      for (int i=0; i<DEPTH; i++) begin
+         stbuf_numvld_any[3:0] += {3'b0, stbuf_vld[i]};
+      end
+   end
+
+    // These go to store buffer to detect full
+   assign isdccmst_m = lsu_pkt_m.valid & lsu_pkt_m.store & addr_in_dccm_m & ~lsu_pkt_m.dma;
+   assign isdccmst_r = lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r & ~lsu_pkt_r.dma;
+
+   assign stbuf_specvld_m[1:0] = {1'b0,isdccmst_m} << (isdccmst_m & ldst_dual_m);
+   assign stbuf_specvld_r[1:0] = {1'b0,isdccmst_r} << (isdccmst_r & ldst_dual_r);
+   assign stbuf_specvld_any[3:0] = stbuf_numvld_any[3:0] +  {2'b0, stbuf_specvld_m[1:0]} + {2'b0, stbuf_specvld_r[1:0]};
+
+   assign lsu_stbuf_full_any  = (~ldst_dual_d & dec_lsu_valid_raw_d) ? (stbuf_specvld_any[3:0] >= DEPTH) : (stbuf_specvld_any[3:0] >= (DEPTH-1));
+   assign lsu_stbuf_empty_any = (stbuf_numvld_any[3:0] == 4'b0);
+
+   // Load forwarding logic from the store queue
+   assign cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] = end_addr_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)];
+
+   assign cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] = lsu_addr_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)];
+
+   always_comb begin: GenLdFwd
+      stbuf_fwdbyteen_hi_pre_m[BYTE_WIDTH-1:0]   = '0;
+      stbuf_fwdbyteen_lo_pre_m[BYTE_WIDTH-1:0]   = '0;
+
+      for (int i=0; i<DEPTH; i++) begin
+         stbuf_match_hi[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & addr_in_dccm_m;
+         stbuf_match_lo[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & addr_in_dccm_m;
+
+         // Kill the store buffer entry if there is a dma store since it already updated the dccm
+         stbuf_dma_kill_en[i] = (stbuf_match_hi[i] | stbuf_match_lo[i]) & lsu_pkt_m.valid & lsu_pkt_m.dma & lsu_pkt_m.store;
+
+         for (int j=0; j<BYTE_WIDTH; j++) begin
+            stbuf_fwdbyteenvec_hi[i][j] = stbuf_match_hi[i] & stbuf_byteen[i][j] & stbuf_vld[i];
+            stbuf_fwdbyteen_hi_pre_m[j]  |= stbuf_fwdbyteenvec_hi[i][j];
+
+            stbuf_fwdbyteenvec_lo[i][j] = stbuf_match_lo[i] & stbuf_byteen[i][j] & stbuf_vld[i];
+            stbuf_fwdbyteen_lo_pre_m[j]  |= stbuf_fwdbyteenvec_lo[i][j];
+         end
+      end
+   end // block: GenLdFwd
+
+   always_comb begin: GenLdData
+      stbuf_fwddata_hi_pre_m[31:0]   = '0;
+      stbuf_fwddata_lo_pre_m[31:0]   = '0;
+
+      for (int i=0; i<DEPTH; i++) begin
+         stbuf_fwddata_hi_pre_m[31:0] |= {32{stbuf_match_hi[i]}} & stbuf_data[i][31:0];
+         stbuf_fwddata_lo_pre_m[31:0] |= {32{stbuf_match_lo[i]}} & stbuf_data[i][31:0];
+
+      end
+
+   end // block: GenLdData
+
+   // Create Hi/Lo signals - needed for the pipe forwarding
+   assign ldst_byteen_r[7:0] =  ({8{lsu_pkt_r.by}}    & 8'b0000_0001) |
+                                 ({8{lsu_pkt_r.half}}  & 8'b0000_0011) |
+                                 ({8{lsu_pkt_r.word}}  & 8'b0000_1111) |
+                                 ({8{lsu_pkt_r.dword}} & 8'b1111_1111);
+
+   assign ldst_byteen_ext_r[7:0] = ldst_byteen_r[7:0] << lsu_addr_r[1:0];
+
+   assign ldst_byteen_hi_r[3:0]   = ldst_byteen_ext_r[7:4];
+   assign ldst_byteen_lo_r[3:0]   = ldst_byteen_ext_r[3:0];
+
+   assign ld_addr_rhit_lo_lo = (lsu_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma;
+   assign ld_addr_rhit_lo_hi = (end_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma;
+   assign ld_addr_rhit_hi_lo = (lsu_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma & dual_stbuf_write_r;
+   assign ld_addr_rhit_hi_hi = (end_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma & dual_stbuf_write_r;
+
+   for (genvar i=0; i<BYTE_WIDTH; i++) begin
+      assign ld_byte_rhit_lo_lo[i] = ld_addr_rhit_lo_lo & ldst_byteen_lo_r[i];
+      assign ld_byte_rhit_lo_hi[i] = ld_addr_rhit_lo_hi & ldst_byteen_lo_r[i];
+      assign ld_byte_rhit_hi_lo[i] = ld_addr_rhit_hi_lo & ldst_byteen_hi_r[i];
+      assign ld_byte_rhit_hi_hi[i] = ld_addr_rhit_hi_hi & ldst_byteen_hi_r[i];
+
+      assign ld_byte_rhit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i];
+      assign ld_byte_rhit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i];
+
+       assign ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_lo[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
+                                                     ({8{ld_byte_rhit_hi_lo[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
+
+       assign ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_hi[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
+                                                     ({8{ld_byte_rhit_hi_hi[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
+
+      assign ld_byte_hit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i];
+      assign ld_byte_hit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i];
+
+      assign stbuf_fwdbyteen_hi_m[i] = ld_byte_hit_hi[i] | stbuf_fwdbyteen_hi_pre_m[i];
+      assign stbuf_fwdbyteen_lo_m[i] = ld_byte_hit_lo[i] | stbuf_fwdbyteen_lo_pre_m[i];
+      // // Pipe vs Store Queue priority
+      assign stbuf_fwddata_lo_m[(8*i)+7:(8*i)] = ld_byte_rhit_lo[i]    ? ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] : stbuf_fwddata_lo_pre_m[(8*i)+7:(8*i)];
+      // // Pipe vs Store Queue priority
+      assign stbuf_fwddata_hi_m[(8*i)+7:(8*i)] = ld_byte_rhit_hi[i]    ? ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] : stbuf_fwddata_hi_pre_m[(8*i)+7:(8*i)];
+   end
+
+   // Flops
+   rvdffs #(.WIDTH(DEPTH_LOG2)) WrPtrff (.din(NxtWrPtr[DEPTH_LOG2-1:0]), .dout(WrPtr[DEPTH_LOG2-1:0]), .en(WrPtrEn), .clk(lsu_stbuf_c1_clk), .*);
+   rvdffs #(.WIDTH(DEPTH_LOG2)) RdPtrff (.din(NxtRdPtr[DEPTH_LOG2-1:0]), .dout(RdPtr[DEPTH_LOG2-1:0]), .en(RdPtrEn), .clk(lsu_stbuf_c1_clk), .*);
+
+`ifdef RV_ASSERT_ON
+
+   assert_stbuf_overflow: assert #0 (stbuf_specvld_any[2:0] <= DEPTH);
+   property stbuf_wren_store_dccm;
+      @(posedge clk)  disable iff(~rst_l) (|stbuf_wr_en[DEPTH-1:0]) |-> (lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r);
+   endproperty
+   assert_stbuf_wren_store_dccm: assert property (stbuf_wren_store_dccm) else
+      $display("Illegal store buffer write");
+
+`endif
+
+endmodule
+
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_lsu_trigger.sv b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_trigger.sv
new file mode 100644
index 0000000..d3c5058
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_lsu_trigger.sv
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2020 MERL Corporation or its affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//********************************************************************************
+// $Id$
+//
+//
+// Owner:
+// Function: LSU Trigger logic
+// Comments:
+//
+//********************************************************************************
+module eb1_lsu_trigger
+import eb1_pkg::*;
+#(
+`include "eb1_param.vh"
+ )(
+   input eb1_trigger_pkt_t [3:0] trigger_pkt_any,            // trigger packet from dec
+   input eb1_lsu_pkt_t           lsu_pkt_m,                  // lsu packet
+   input logic [31:0]             lsu_addr_m,                 // address
+   input logic [31:0]             store_data_m,               // store data
+
+   output logic [3:0]             lsu_trigger_match_m         // match result
+);
+
+   logic               trigger_enable;
+   logic [3:0][31:0]  lsu_match_data;
+   logic [3:0]        lsu_trigger_data_match;
+   logic [31:0]       store_data_trigger_m;
+   logic [31:0]       ldst_addr_trigger_m;
+
+   // Generate the trigger enable (This is for power)
+   always_comb begin
+      trigger_enable = 1'b0;
+      for (int i=0; i<4; i++) begin
+         trigger_enable |= trigger_pkt_any[i].m;
+      end
+   end
+
+   assign store_data_trigger_m[31:0] = {({16{lsu_pkt_m.word}} & store_data_m[31:16]),({8{(lsu_pkt_m.half | lsu_pkt_m.word)}} & store_data_m[15:8]), store_data_m[7:0]} & {32{trigger_enable}};
+   assign ldst_addr_trigger_m[31:0]  = lsu_addr_m[31:0] & {32{trigger_enable}};
+
+
+   for (genvar i=0; i<4; i++) begin
+      assign lsu_match_data[i][31:0] = ({32{~trigger_pkt_any[i].select}} & ldst_addr_trigger_m[31:0]) |
+                                       ({32{trigger_pkt_any[i].select & trigger_pkt_any[i].store}} & store_data_trigger_m[31:0]);
+
+      rvmaskandmatch trigger_match (.mask(trigger_pkt_any[i].tdata2[31:0]), .data(lsu_match_data[i][31:0]), .masken(trigger_pkt_any[i].match), .match(lsu_trigger_data_match[i]));
+
+      assign lsu_trigger_match_m[i] = lsu_pkt_m.valid & ~lsu_pkt_m.dma & trigger_enable &
+                                        ((trigger_pkt_any[i].store & lsu_pkt_m.store) | (trigger_pkt_any[i].load & lsu_pkt_m.load & ~trigger_pkt_any[i].select)) &
+                                        lsu_trigger_data_match[i];
+   end
+
+
+endmodule // eb1_lsu_trigger
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_param.vh b/verilog/rtl/BrqRV_EB1/design/eb1_param.vh
new file mode 100644
index 0000000..77adbc4
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_param.vh
@@ -0,0 +1,175 @@
+parameter eb1_param_t pt = '{
+	BHT_ADDR_HI            : 8'h08         ,
+	BHT_ADDR_LO            : 6'h02         ,
+	BHT_ARRAY_DEPTH        : 15'h0080       ,
+	BHT_GHR_HASH_1         : 5'h00         ,
+	BHT_GHR_SIZE           : 8'h07         ,
+	BHT_SIZE               : 16'h0100       ,
+	BITMANIP_ZBA           : 5'h00         ,
+	BITMANIP_ZBB           : 5'h00         ,
+	BITMANIP_ZBC           : 5'h00         ,
+	BITMANIP_ZBE           : 5'h00         ,
+	BITMANIP_ZBF           : 5'h00         ,
+	BITMANIP_ZBP           : 5'h00         ,
+	BITMANIP_ZBR           : 5'h00         ,
+	BITMANIP_ZBS           : 5'h00         ,
+	BTB_ADDR_HI            : 9'h008        ,
+	BTB_ADDR_LO            : 6'h02         ,
+	BTB_ARRAY_DEPTH        : 13'h0080       ,
+	BTB_BTAG_FOLD          : 5'h00         ,
+	BTB_BTAG_SIZE          : 9'h006        ,
+	BTB_ENABLE             : 5'h01         ,
+	BTB_FOLD2_INDEX_HASH   : 5'h00         ,
+	BTB_FULLYA             : 5'h00         ,
+	BTB_INDEX1_HI          : 9'h008        ,
+	BTB_INDEX1_LO          : 9'h002        ,
+	BTB_INDEX2_HI          : 9'h00F        ,
+	BTB_INDEX2_LO          : 9'h009        ,
+	BTB_INDEX3_HI          : 9'h016        ,
+	BTB_INDEX3_LO          : 9'h010        ,
+	BTB_SIZE               : 14'h0100       ,
+	BTB_TOFFSET_SIZE       : 9'h00C        ,
+	BUILD_AHB_LITE         : 4'h0          ,
+	BUILD_AXI4             : 5'h01         ,
+	BUILD_AXI_NATIVE       : 5'h01         ,
+	BUS_PRTY_DEFAULT       : 6'h03         ,
+	DATA_ACCESS_ADDR0      : 36'h000000000  ,
+	DATA_ACCESS_ADDR1      : 36'h000000000  ,
+	DATA_ACCESS_ADDR2      : 36'h000000000  ,
+	DATA_ACCESS_ADDR3      : 36'h000000000  ,
+	DATA_ACCESS_ADDR4      : 36'h000000000  ,
+	DATA_ACCESS_ADDR5      : 36'h000000000  ,
+	DATA_ACCESS_ADDR6      : 36'h000000000  ,
+	DATA_ACCESS_ADDR7      : 36'h000000000  ,
+	DATA_ACCESS_ENABLE0    : 5'h00         ,
+	DATA_ACCESS_ENABLE1    : 5'h00         ,
+	DATA_ACCESS_ENABLE2    : 5'h00         ,
+	DATA_ACCESS_ENABLE3    : 5'h00         ,
+	DATA_ACCESS_ENABLE4    : 5'h00         ,
+	DATA_ACCESS_ENABLE5    : 5'h00         ,
+	DATA_ACCESS_ENABLE6    : 5'h00         ,
+	DATA_ACCESS_ENABLE7    : 5'h00         ,
+	DATA_ACCESS_MASK0      : 36'h0FFFFFFFF  ,
+	DATA_ACCESS_MASK1      : 36'h0FFFFFFFF  ,
+	DATA_ACCESS_MASK2      : 36'h0FFFFFFFF  ,
+	DATA_ACCESS_MASK3      : 36'h0FFFFFFFF  ,
+	DATA_ACCESS_MASK4      : 36'h0FFFFFFFF  ,
+	DATA_ACCESS_MASK5      : 36'h0FFFFFFFF  ,
+	DATA_ACCESS_MASK6      : 36'h0FFFFFFFF  ,
+	DATA_ACCESS_MASK7      : 36'h0FFFFFFFF  ,
+	DCCM_BANK_BITS         : 7'h02         ,
+	DCCM_BITS              : 9'h00C        ,
+	DCCM_BYTE_WIDTH        : 7'h04         ,
+	DCCM_DATA_WIDTH        : 10'h020        ,
+	DCCM_ECC_WIDTH         : 7'h07         ,
+	DCCM_ENABLE            : 5'h01         ,
+	DCCM_FDATA_WIDTH       : 10'h027        ,
+	DCCM_INDEX_BITS        : 8'h08         ,
+	DCCM_NUM_BANKS         : 9'h004        ,
+	DCCM_REGION            : 8'h0F         ,
+	DCCM_SADR              : 36'h0F0040000  ,
+	DCCM_SIZE              : 14'h0004       ,
+	DCCM_WIDTH_BITS        : 6'h02         ,
+	DIV_BIT                : 7'h03         ,
+	DIV_NEW                : 5'h01         ,
+	DMA_BUF_DEPTH          : 7'h05         ,
+	DMA_BUS_ID             : 9'h001        ,
+	DMA_BUS_PRTY           : 6'h02         ,
+	DMA_BUS_TAG            : 8'h01         ,
+	FAST_INTERRUPT_REDIRECT : 5'h01         ,
+	ICACHE_2BANKS          : 5'h01         ,
+	ICACHE_BANK_BITS       : 7'h01         ,
+	ICACHE_BANK_HI         : 7'h03         ,
+	ICACHE_BANK_LO         : 6'h03         ,
+	ICACHE_BANK_WIDTH      : 8'h08         ,
+	ICACHE_BANKS_WAY       : 7'h02         ,
+	ICACHE_BEAT_ADDR_HI    : 8'h05         ,
+	ICACHE_BEAT_BITS       : 8'h03         ,
+	ICACHE_BYPASS_ENABLE   : 5'h01         ,
+	ICACHE_DATA_DEPTH      : 18'h00200      ,
+	ICACHE_DATA_INDEX_LO   : 7'h04         ,
+	ICACHE_DATA_WIDTH      : 11'h040        ,
+	ICACHE_ECC             : 5'h01         ,
+	ICACHE_ENABLE          : 5'h00         ,
+	ICACHE_FDATA_WIDTH     : 11'h047        ,
+	ICACHE_INDEX_HI        : 9'h00C        ,
+	ICACHE_LN_SZ           : 11'h040        ,
+	ICACHE_NUM_BEATS       : 8'h08         ,
+	ICACHE_NUM_BYPASS      : 8'h02         ,
+	ICACHE_NUM_BYPASS_WIDTH : 8'h02         ,
+	ICACHE_NUM_WAYS        : 7'h02         ,
+	ICACHE_ONLY            : 5'h00         ,
+	ICACHE_SCND_LAST       : 8'h06         ,
+	ICACHE_SIZE            : 13'h0010       ,
+	ICACHE_STATUS_BITS     : 7'h01         ,
+	ICACHE_TAG_BYPASS_ENABLE : 5'h01         ,
+	ICACHE_TAG_DEPTH       : 17'h00080      ,
+	ICACHE_TAG_INDEX_LO    : 7'h06         ,
+	ICACHE_TAG_LO          : 9'h00D        ,
+	ICACHE_TAG_NUM_BYPASS  : 8'h02         ,
+	ICACHE_TAG_NUM_BYPASS_WIDTH : 8'h02         ,
+	ICACHE_WAYPACK         : 5'h01         ,
+	ICCM_BANK_BITS         : 7'h02         ,
+	ICCM_BANK_HI           : 9'h003        ,
+	ICCM_BANK_INDEX_LO     : 9'h004        ,
+	ICCM_BITS              : 9'h00C        ,
+	ICCM_ENABLE            : 5'h01         ,
+	ICCM_ICACHE            : 5'h00         ,
+	ICCM_INDEX_BITS        : 8'h08         ,
+	ICCM_NUM_BANKS         : 9'h004        ,
+	ICCM_ONLY              : 5'h01         ,
+	ICCM_REGION            : 8'h0A         ,
+	ICCM_SADR              : 36'h0AFFFF000  ,
+	ICCM_SIZE              : 14'h0004       ,
+	IFU_BUS_ID             : 5'h01         ,
+	IFU_BUS_PRTY           : 6'h02         ,
+	IFU_BUS_TAG            : 8'h03         ,
+	INST_ACCESS_ADDR0      : 36'h000000000  ,
+	INST_ACCESS_ADDR1      : 36'h000000000  ,
+	INST_ACCESS_ADDR2      : 36'h000000000  ,
+	INST_ACCESS_ADDR3      : 36'h000000000  ,
+	INST_ACCESS_ADDR4      : 36'h000000000  ,
+	INST_ACCESS_ADDR5      : 36'h000000000  ,
+	INST_ACCESS_ADDR6      : 36'h000000000  ,
+	INST_ACCESS_ADDR7      : 36'h000000000  ,
+	INST_ACCESS_ENABLE0    : 5'h00         ,
+	INST_ACCESS_ENABLE1    : 5'h00         ,
+	INST_ACCESS_ENABLE2    : 5'h00         ,
+	INST_ACCESS_ENABLE3    : 5'h00         ,
+	INST_ACCESS_ENABLE4    : 5'h00         ,
+	INST_ACCESS_ENABLE5    : 5'h00         ,
+	INST_ACCESS_ENABLE6    : 5'h00         ,
+	INST_ACCESS_ENABLE7    : 5'h00         ,
+	INST_ACCESS_MASK0      : 36'h0FFFFFFFF  ,
+	INST_ACCESS_MASK1      : 36'h0FFFFFFFF  ,
+	INST_ACCESS_MASK2      : 36'h0FFFFFFFF  ,
+	INST_ACCESS_MASK3      : 36'h0FFFFFFFF  ,
+	INST_ACCESS_MASK4      : 36'h0FFFFFFFF  ,
+	INST_ACCESS_MASK5      : 36'h0FFFFFFFF  ,
+	INST_ACCESS_MASK6      : 36'h0FFFFFFFF  ,
+	INST_ACCESS_MASK7      : 36'h0FFFFFFFF  ,
+	LOAD_TO_USE_PLUS1      : 5'h00         ,
+	LSU2DMA                : 5'h00         ,
+	LSU_BUS_ID             : 5'h01         ,
+	LSU_BUS_PRTY           : 6'h02         ,
+	LSU_BUS_TAG            : 8'h03         ,
+	LSU_NUM_NBLOAD         : 9'h004        ,
+	LSU_NUM_NBLOAD_WIDTH   : 7'h02         ,
+	LSU_SB_BITS            : 9'h00C        ,
+	LSU_STBUF_DEPTH        : 8'h04         ,
+	NO_ICCM_NO_ICACHE      : 5'h00         ,
+	PIC_2CYCLE             : 5'h00         ,
+	PIC_BASE_ADDR          : 36'h0F00C0000  ,
+	PIC_BITS               : 9'h00F        ,
+	PIC_INT_WORDS          : 8'h01         ,
+	PIC_REGION             : 8'h0F         ,
+	PIC_SIZE               : 13'h0020       ,
+	PIC_TOTAL_INT          : 12'h01F        ,
+	PIC_TOTAL_INT_PLUS1    : 13'h0020       ,
+	RET_STACK_SIZE         : 8'h08         ,
+	SB_BUS_ID              : 5'h01         ,
+	SB_BUS_PRTY            : 6'h02         ,
+	SB_BUS_TAG             : 8'h01         ,
+	TIMER_LEGAL_EN         : 5'h01         
+}
+// parameter eb1_param_t pt = 2271'h0404020000E0200000000000008081000030400040081E090B040100060210C00000000000000000000000000000000000000000000000000000000000000000000000000000000003FFFFFFFC3FFFFFFFC3FFFFFFFC3FFFFFFFC3FFFFFFFC3FFFFFFFC3FFFFFFFC3FFFFFFFC103020401C213840103C3C01000000040818428042010840830C2010281840200081002008E0C0801004040800C01002100400606810104100C080C080200810A0AFFFF00000102101800000000000000000000000000000000000000000000000000000000000000000000000000000000007FFFFFFF87FFFFFFF87FFFFFFF87FFFFFFF87FFFFFFF87FFFFFFF87FFFFFFF87FFFFFFF8001080C080818080007806000003C043C04003E02008084021
diff --git a/verilog/rtl/BrqRV_EB1/design/eb1_pdef.vh b/verilog/rtl/BrqRV_EB1/design/eb1_pdef.vh
new file mode 100644
index 0000000..af6de1e
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/eb1_pdef.vh
@@ -0,0 +1,175 @@
+typedef struct packed {
+	bit [7:0]      BHT_ADDR_HI;
+	bit [5:0]      BHT_ADDR_LO;
+	bit [14:0]     BHT_ARRAY_DEPTH;
+	bit [4:0]      BHT_GHR_HASH_1;
+	bit [7:0]      BHT_GHR_SIZE;
+	bit [15:0]     BHT_SIZE;
+	bit [4:0]      BITMANIP_ZBA;
+	bit [4:0]      BITMANIP_ZBB;
+	bit [4:0]      BITMANIP_ZBC;
+	bit [4:0]      BITMANIP_ZBE;
+	bit [4:0]      BITMANIP_ZBF;
+	bit [4:0]      BITMANIP_ZBP;
+	bit [4:0]      BITMANIP_ZBR;
+	bit [4:0]      BITMANIP_ZBS;
+	bit [8:0]      BTB_ADDR_HI;
+	bit [5:0]      BTB_ADDR_LO;
+	bit [12:0]     BTB_ARRAY_DEPTH;
+	bit [4:0]      BTB_BTAG_FOLD;
+	bit [8:0]      BTB_BTAG_SIZE;
+	bit [4:0]      BTB_ENABLE;
+	bit [4:0]      BTB_FOLD2_INDEX_HASH;
+	bit [4:0]      BTB_FULLYA;
+	bit [8:0]      BTB_INDEX1_HI;
+	bit [8:0]      BTB_INDEX1_LO;
+	bit [8:0]      BTB_INDEX2_HI;
+	bit [8:0]      BTB_INDEX2_LO;
+	bit [8:0]      BTB_INDEX3_HI;
+	bit [8:0]      BTB_INDEX3_LO;
+	bit [13:0]     BTB_SIZE;
+	bit [8:0]      BTB_TOFFSET_SIZE;
+	bit            BUILD_AHB_LITE;
+	bit [4:0]      BUILD_AXI4;
+	bit [4:0]      BUILD_AXI_NATIVE;
+	bit [5:0]      BUS_PRTY_DEFAULT;
+	bit [35:0]     DATA_ACCESS_ADDR0;
+	bit [35:0]     DATA_ACCESS_ADDR1;
+	bit [35:0]     DATA_ACCESS_ADDR2;
+	bit [35:0]     DATA_ACCESS_ADDR3;
+	bit [35:0]     DATA_ACCESS_ADDR4;
+	bit [35:0]     DATA_ACCESS_ADDR5;
+	bit [35:0]     DATA_ACCESS_ADDR6;
+	bit [35:0]     DATA_ACCESS_ADDR7;
+	bit [4:0]      DATA_ACCESS_ENABLE0;
+	bit [4:0]      DATA_ACCESS_ENABLE1;
+	bit [4:0]      DATA_ACCESS_ENABLE2;
+	bit [4:0]      DATA_ACCESS_ENABLE3;
+	bit [4:0]      DATA_ACCESS_ENABLE4;
+	bit [4:0]      DATA_ACCESS_ENABLE5;
+	bit [4:0]      DATA_ACCESS_ENABLE6;
+	bit [4:0]      DATA_ACCESS_ENABLE7;
+	bit [35:0]     DATA_ACCESS_MASK0;
+	bit [35:0]     DATA_ACCESS_MASK1;
+	bit [35:0]     DATA_ACCESS_MASK2;
+	bit [35:0]     DATA_ACCESS_MASK3;
+	bit [35:0]     DATA_ACCESS_MASK4;
+	bit [35:0]     DATA_ACCESS_MASK5;
+	bit [35:0]     DATA_ACCESS_MASK6;
+	bit [35:0]     DATA_ACCESS_MASK7;
+	bit [6:0]      DCCM_BANK_BITS;
+	bit [8:0]      DCCM_BITS;
+	bit [6:0]      DCCM_BYTE_WIDTH;
+	bit [9:0]      DCCM_DATA_WIDTH;
+	bit [6:0]      DCCM_ECC_WIDTH;
+	bit [4:0]      DCCM_ENABLE;
+	bit [9:0]      DCCM_FDATA_WIDTH;
+	bit [7:0]      DCCM_INDEX_BITS;
+	bit [8:0]      DCCM_NUM_BANKS;
+	bit [7:0]      DCCM_REGION;
+	bit [35:0]     DCCM_SADR;
+	bit [13:0]     DCCM_SIZE;
+	bit [5:0]      DCCM_WIDTH_BITS;
+	bit [6:0]      DIV_BIT;
+	bit [4:0]      DIV_NEW;
+	bit [6:0]      DMA_BUF_DEPTH;
+	bit [8:0]      DMA_BUS_ID;
+	bit [5:0]      DMA_BUS_PRTY;
+	bit [7:0]      DMA_BUS_TAG;
+	bit [4:0]      FAST_INTERRUPT_REDIRECT;
+	bit [4:0]      ICACHE_2BANKS;
+	bit [6:0]      ICACHE_BANK_BITS;
+	bit [6:0]      ICACHE_BANK_HI;
+	bit [5:0]      ICACHE_BANK_LO;
+	bit [7:0]      ICACHE_BANK_WIDTH;
+	bit [6:0]      ICACHE_BANKS_WAY;
+	bit [7:0]      ICACHE_BEAT_ADDR_HI;
+	bit [7:0]      ICACHE_BEAT_BITS;
+	bit [4:0]      ICACHE_BYPASS_ENABLE;
+	bit [17:0]     ICACHE_DATA_DEPTH;
+	bit [6:0]      ICACHE_DATA_INDEX_LO;
+	bit [10:0]     ICACHE_DATA_WIDTH;
+	bit [4:0]      ICACHE_ECC;
+	bit [4:0]      ICACHE_ENABLE;
+	bit [10:0]     ICACHE_FDATA_WIDTH;
+	bit [8:0]      ICACHE_INDEX_HI;
+	bit [10:0]     ICACHE_LN_SZ;
+	bit [7:0]      ICACHE_NUM_BEATS;
+	bit [7:0]      ICACHE_NUM_BYPASS;
+	bit [7:0]      ICACHE_NUM_BYPASS_WIDTH;
+	bit [6:0]      ICACHE_NUM_WAYS;
+	bit [4:0]      ICACHE_ONLY;
+	bit [7:0]      ICACHE_SCND_LAST;
+	bit [12:0]     ICACHE_SIZE;
+	bit [6:0]      ICACHE_STATUS_BITS;
+	bit [4:0]      ICACHE_TAG_BYPASS_ENABLE;
+	bit [16:0]     ICACHE_TAG_DEPTH;
+	bit [6:0]      ICACHE_TAG_INDEX_LO;
+	bit [8:0]      ICACHE_TAG_LO;
+	bit [7:0]      ICACHE_TAG_NUM_BYPASS;
+	bit [7:0]      ICACHE_TAG_NUM_BYPASS_WIDTH;
+	bit [4:0]      ICACHE_WAYPACK;
+	bit [6:0]      ICCM_BANK_BITS;
+	bit [8:0]      ICCM_BANK_HI;
+	bit [8:0]      ICCM_BANK_INDEX_LO;
+	bit [8:0]      ICCM_BITS;
+	bit [4:0]      ICCM_ENABLE;
+	bit [4:0]      ICCM_ICACHE;
+	bit [7:0]      ICCM_INDEX_BITS;
+	bit [8:0]      ICCM_NUM_BANKS;
+	bit [4:0]      ICCM_ONLY;
+	bit [7:0]      ICCM_REGION;
+	bit [35:0]     ICCM_SADR;
+	bit [13:0]     ICCM_SIZE;
+	bit [4:0]      IFU_BUS_ID;
+	bit [5:0]      IFU_BUS_PRTY;
+	bit [7:0]      IFU_BUS_TAG;
+	bit [35:0]     INST_ACCESS_ADDR0;
+	bit [35:0]     INST_ACCESS_ADDR1;
+	bit [35:0]     INST_ACCESS_ADDR2;
+	bit [35:0]     INST_ACCESS_ADDR3;
+	bit [35:0]     INST_ACCESS_ADDR4;
+	bit [35:0]     INST_ACCESS_ADDR5;
+	bit [35:0]     INST_ACCESS_ADDR6;
+	bit [35:0]     INST_ACCESS_ADDR7;
+	bit [4:0]      INST_ACCESS_ENABLE0;
+	bit [4:0]      INST_ACCESS_ENABLE1;
+	bit [4:0]      INST_ACCESS_ENABLE2;
+	bit [4:0]      INST_ACCESS_ENABLE3;
+	bit [4:0]      INST_ACCESS_ENABLE4;
+	bit [4:0]      INST_ACCESS_ENABLE5;
+	bit [4:0]      INST_ACCESS_ENABLE6;
+	bit [4:0]      INST_ACCESS_ENABLE7;
+	bit [35:0]     INST_ACCESS_MASK0;
+	bit [35:0]     INST_ACCESS_MASK1;
+	bit [35:0]     INST_ACCESS_MASK2;
+	bit [35:0]     INST_ACCESS_MASK3;
+	bit [35:0]     INST_ACCESS_MASK4;
+	bit [35:0]     INST_ACCESS_MASK5;
+	bit [35:0]     INST_ACCESS_MASK6;
+	bit [35:0]     INST_ACCESS_MASK7;
+	bit [4:0]      LOAD_TO_USE_PLUS1;
+	bit [4:0]      LSU2DMA;
+	bit [4:0]      LSU_BUS_ID;
+	bit [5:0]      LSU_BUS_PRTY;
+	bit [7:0]      LSU_BUS_TAG;
+	bit [8:0]      LSU_NUM_NBLOAD;
+	bit [6:0]      LSU_NUM_NBLOAD_WIDTH;
+	bit [8:0]      LSU_SB_BITS;
+	bit [7:0]      LSU_STBUF_DEPTH;
+	bit [4:0]      NO_ICCM_NO_ICACHE;
+	bit [4:0]      PIC_2CYCLE;
+	bit [35:0]     PIC_BASE_ADDR;
+	bit [8:0]      PIC_BITS;
+	bit [7:0]      PIC_INT_WORDS;
+	bit [7:0]      PIC_REGION;
+	bit [12:0]     PIC_SIZE;
+	bit [11:0]     PIC_TOTAL_INT;
+	bit [12:0]     PIC_TOTAL_INT_PLUS1;
+	bit [7:0]      RET_STACK_SIZE;
+	bit [4:0]      SB_BUS_ID;
+	bit [5:0]      SB_BUS_PRTY;
+	bit [7:0]      SB_BUS_TAG;
+	bit [4:0]      TIMER_LEGAL_EN;
+} eb1_param_t;
+
diff --git a/verilog/rtl/BrqRV_EB1/design/iccm_controller.v b/verilog/rtl/BrqRV_EB1/design/iccm_controller.v
new file mode 100644
index 0000000..c28bc9d
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/iccm_controller.v
@@ -0,0 +1,120 @@
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+module eb1_iccm_controller (
+	clk_i,
+	rst_ni,
+	rx_dv_i,
+	rx_byte_i,
+	we_o,
+	addr_o,
+	wdata_o,
+	reset_o
+);
+	input wire clk_i;
+	input wire rst_ni;
+	input wire rx_dv_i;
+	input wire [7:0] rx_byte_i;
+	output wire we_o;
+	output wire [13:0] addr_o;
+	output wire [31:0] wdata_o;
+	output wire reset_o;
+	reg [1:0] ctrl_fsm_cs;
+	reg [1:0] ctrl_fsm_ns;
+	wire [7:0] rx_byte_d;
+	reg [7:0] rx_byte_q0;
+	reg [7:0] rx_byte_q1;
+	reg [7:0] rx_byte_q2;
+	reg [7:0] rx_byte_q3;
+	reg we_q;
+	reg we_d;
+	reg [13:0] addr_q;
+	reg [13:0] addr_d;
+	reg reset_q;
+	reg reset_d;
+	reg [1:0] byte_count;
+	localparam [1:0] DONE = 3;
+	localparam [1:0] LOAD = 1;
+	localparam [1:0] PROG = 2;
+	localparam [1:0] RESET = 0;
+	always @(*) begin
+		we_d = we_q;
+		addr_d = addr_q;
+		reset_d = reset_q;
+		ctrl_fsm_ns = ctrl_fsm_cs;
+		case (ctrl_fsm_cs)
+			RESET: begin
+				we_d = 1'b0;
+				reset_d = 1'b0;
+				if (rx_dv_i)
+					ctrl_fsm_ns = LOAD;
+				else
+					ctrl_fsm_ns = RESET;
+			end
+			LOAD:
+				if (((byte_count == 2'b11) && (rx_byte_q2 != 8'h0f)) && (rx_byte_d != 8'hff)) begin
+					we_d = 1'b1;
+					ctrl_fsm_ns = PROG;
+				end
+				else
+					ctrl_fsm_ns = DONE;
+			PROG: begin
+				we_d = 1'b0;
+				ctrl_fsm_ns = DONE;
+			end
+			DONE:
+				if (wdata_o == 32'h00000fff) begin
+					ctrl_fsm_ns = DONE;
+					reset_d = 1'b1;
+				end
+				else if (rx_dv_i)
+					ctrl_fsm_ns = LOAD;
+				else
+					ctrl_fsm_ns = DONE;
+			default: ctrl_fsm_ns = RESET;
+		endcase
+	end
+	assign rx_byte_d = rx_byte_i;
+	assign we_o = we_q;
+	assign addr_o = addr_q;
+	assign wdata_o = {rx_byte_q0, rx_byte_q1, rx_byte_q2, rx_byte_q3};
+	assign reset_o = reset_q;
+	always @(posedge clk_i or negedge rst_ni)
+		if (!rst_ni) begin
+			we_q <= 1'b0;
+			addr_q <= 14'b00000000000000;
+			rx_byte_q0 <= 8'b00000000;
+			rx_byte_q1 <= 8'b00000000;
+			rx_byte_q2 <= 8'b00000000;
+			rx_byte_q3 <= 8'b00000000;
+			reset_q <= 1'b0;
+			byte_count <= 2'b00;
+			ctrl_fsm_cs <= RESET;
+		end
+		else begin
+			we_q <= we_d;
+			if (ctrl_fsm_cs == LOAD) begin
+				if (byte_count == 2'b00) begin
+					rx_byte_q0 <= rx_byte_d;
+					byte_count <= 2'b01;
+				end
+				else if (byte_count == 2'b01) begin
+					rx_byte_q1 <= rx_byte_d;
+					byte_count <= 2'b10;
+				end
+				else if (byte_count == 2'b10) begin
+					rx_byte_q2 <= rx_byte_d;
+					byte_count <= 2'b11;
+				end
+				else begin
+					rx_byte_q3 <= rx_byte_d;
+					byte_count <= 2'b00;
+				end
+				addr_q <= addr_d;
+			end
+			if (ctrl_fsm_cs == PROG)
+				addr_q <= addr_d + 2'h2;
+			reset_q <= reset_d;
+			ctrl_fsm_cs <= ctrl_fsm_ns;
+		end
+endmodule
diff --git a/verilog/rtl/BrqRV_EB1/design/pd_defines.vh b/verilog/rtl/BrqRV_EB1/design/pd_defines.vh
new file mode 100644
index 0000000..0b9763c
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/pd_defines.vh
@@ -0,0 +1,11 @@
+// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
+// This is an automatically generated file by hshabbir on و 08:16:54 PKT ت 08 جون 2021
+//
+// cmd:    brqrv -target=default -set build_axi4 
+//
+
+`include "common_defines.vh"
+`undef RV_ASSERT_ON
+`undef TEC_RV_ICG
+`define TEC_RV_ICG sky130_fd_sc_hd__dlclkp_1
+`define RV_PHYSICAL 1
diff --git a/verilog/rtl/BrqRV_EB1/design/rvjtag_tap.v b/verilog/rtl/BrqRV_EB1/design/rvjtag_tap.v
new file mode 100644
index 0000000..d4969b3
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/rvjtag_tap.v
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright 2019 MERL Corporation or it's affiliates.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License
+
+module rvjtag_tap #(
+parameter AWIDTH = 7
+)
+(
+input               trst,
+input               tck,
+input               tms,
+input               tdi,
+output   reg        tdo,
+output              tdoEnable,
+
+output [31:0]       wr_data,
+output [AWIDTH-1:0] wr_addr,
+output              wr_en,
+output              rd_en,
+
+input   [31:0]      rd_data,
+input   [1:0]       rd_status,
+
+output  reg         dmi_reset,
+output  reg         dmi_hard_reset,
+
+input   [2:0]       idle,
+input   [1:0]       dmi_stat,
+/*
+--  revisionCode        : 4'h0;
+--  manufacturersIdCode : 11'h45;
+--  deviceIdCode        : 16'h0001;
+--  order MSB .. LSB -> [4 bit version or revision] [16 bit part number] [11 bit manufacturer id] [value of 1'b1 in LSB]
+*/
+input   [31:1]      jtag_id,
+input   [3:0]       version
+);
+
+localparam USER_DR_LENGTH = AWIDTH + 34;
+
+
+reg [USER_DR_LENGTH-1:0] sr, nsr, dr;
+
+///////////////////////////////////////////////////////
+//                      Tap controller
+///////////////////////////////////////////////////////
+logic[3:0] state, nstate;
+logic [4:0] ir;
+wire jtag_reset;
+wire shift_dr;
+wire pause_dr;
+wire update_dr;
+wire capture_dr;
+wire shift_ir;
+wire pause_ir ;
+wire update_ir ;
+wire capture_ir;
+wire[1:0] dr_en;
+wire devid_sel;
+wire [5:0] abits;
+
+assign abits = AWIDTH[5:0];
+
+
+localparam TEST_LOGIC_RESET_STATE = 0;
+localparam RUN_TEST_IDLE_STATE    = 1;
+localparam SELECT_DR_SCAN_STATE   = 2;
+localparam CAPTURE_DR_STATE       = 3;
+localparam SHIFT_DR_STATE         = 4;
+localparam EXIT1_DR_STATE         = 5;
+localparam PAUSE_DR_STATE         = 6;
+localparam EXIT2_DR_STATE         = 7;
+localparam UPDATE_DR_STATE        = 8;
+localparam SELECT_IR_SCAN_STATE   = 9;
+localparam CAPTURE_IR_STATE       = 10;
+localparam SHIFT_IR_STATE         = 11;
+localparam EXIT1_IR_STATE         = 12;
+localparam PAUSE_IR_STATE         = 13;
+localparam EXIT2_IR_STATE         = 14;
+localparam UPDATE_IR_STATE        = 15;
+
+always_comb  begin
+    nstate = state;
+    case(state)
+    TEST_LOGIC_RESET_STATE: nstate = tms ? TEST_LOGIC_RESET_STATE : RUN_TEST_IDLE_STATE;
+    RUN_TEST_IDLE_STATE:    nstate = tms ? SELECT_DR_SCAN_STATE   : RUN_TEST_IDLE_STATE;
+    SELECT_DR_SCAN_STATE:   nstate = tms ? SELECT_IR_SCAN_STATE   : CAPTURE_DR_STATE;
+    CAPTURE_DR_STATE:       nstate = tms ? EXIT1_DR_STATE         : SHIFT_DR_STATE;
+    SHIFT_DR_STATE:         nstate = tms ? EXIT1_DR_STATE         : SHIFT_DR_STATE;
+    EXIT1_DR_STATE:         nstate = tms ? UPDATE_DR_STATE        : PAUSE_DR_STATE;
+    PAUSE_DR_STATE:         nstate = tms ? EXIT2_DR_STATE         : PAUSE_DR_STATE;
+    EXIT2_DR_STATE:         nstate = tms ? UPDATE_DR_STATE        : SHIFT_DR_STATE;
+    UPDATE_DR_STATE:        nstate = tms ? SELECT_DR_SCAN_STATE   : RUN_TEST_IDLE_STATE;
+    SELECT_IR_SCAN_STATE:   nstate = tms ? TEST_LOGIC_RESET_STATE : CAPTURE_IR_STATE;
+    CAPTURE_IR_STATE:       nstate = tms ? EXIT1_IR_STATE         : SHIFT_IR_STATE;
+    SHIFT_IR_STATE:         nstate = tms ? EXIT1_IR_STATE         : SHIFT_IR_STATE;
+    EXIT1_IR_STATE:         nstate = tms ? UPDATE_IR_STATE        : PAUSE_IR_STATE;
+    PAUSE_IR_STATE:         nstate = tms ? EXIT2_IR_STATE         : PAUSE_IR_STATE;
+    EXIT2_IR_STATE:         nstate = tms ? UPDATE_IR_STATE        : SHIFT_IR_STATE;
+    UPDATE_IR_STATE:        nstate = tms ? SELECT_DR_SCAN_STATE   : RUN_TEST_IDLE_STATE;
+    default:                nstate = TEST_LOGIC_RESET_STATE;
+    endcase
+end
+
+always @ (posedge tck or negedge trst) begin
+    if(!trst) state <= TEST_LOGIC_RESET_STATE;
+    else state <= nstate;
+end
+
+assign jtag_reset = state == TEST_LOGIC_RESET_STATE;
+assign shift_dr   = state == SHIFT_DR_STATE;
+assign pause_dr   = state == PAUSE_DR_STATE;
+assign update_dr  = state == UPDATE_DR_STATE;
+assign capture_dr = state == CAPTURE_DR_STATE;
+assign shift_ir   = state == SHIFT_IR_STATE;
+assign pause_ir   = state == PAUSE_IR_STATE;
+assign update_ir  = state == UPDATE_IR_STATE;
+assign capture_ir = state == CAPTURE_IR_STATE;
+
+assign tdoEnable = shift_dr | shift_ir;
+
+///////////////////////////////////////////////////////
+//                      IR register
+///////////////////////////////////////////////////////
+
+always @ (negedge tck or negedge trst) begin
+   if (!trst) ir <= 5'b1;
+   else begin
+      if (jtag_reset) ir <= 5'b1;
+      else if (update_ir) ir <= (sr[4:0] == '0) ? 5'h1f :sr[4:0];
+   end
+end
+
+
+assign devid_sel  = ir == 5'b00001;
+assign dr_en[0]   = ir == 5'b10000;
+assign dr_en[1]   = ir == 5'b10001;
+
+///////////////////////////////////////////////////////
+//                      Shift register
+///////////////////////////////////////////////////////
+always @ (posedge tck or negedge trst) begin
+    if(!trst)begin
+        sr <= '0;
+    end
+    else begin
+        sr <= nsr;
+    end
+end
+
+// SR next value
+always_comb begin
+    nsr = sr;
+    case(1)
+    shift_dr:   begin
+                    case(1)
+                    dr_en[1]:   nsr = {tdi, sr[USER_DR_LENGTH-1:1]};
+
+                    dr_en[0],
+                    devid_sel:  nsr = {{USER_DR_LENGTH-32{1'b0}},tdi, sr[31:1]};
+                    default:    nsr = {{USER_DR_LENGTH-1{1'b0}},tdi}; // bypass
+                    endcase
+                end
+    capture_dr: begin
+                    nsr[0] = 1'b0;
+                    case(1)
+                    dr_en[0]:   nsr = {{USER_DR_LENGTH-15{1'b0}}, idle, dmi_stat, abits, version};
+                    dr_en[1]:   nsr = {{AWIDTH{1'b0}}, rd_data, rd_status};
+                    devid_sel:  nsr = {{USER_DR_LENGTH-32{1'b0}}, jtag_id, 1'b1};
+                    endcase
+                end
+    shift_ir:   nsr = {{USER_DR_LENGTH-5{1'b0}},tdi, sr[4:1]};
+    capture_ir: nsr = {{USER_DR_LENGTH-1{1'b0}},1'b1};
+    endcase
+end
+
+// TDO retiming
+always @ (negedge tck ) tdo <= sr[0];
+
+// DMI CS register
+always @ (posedge tck or negedge trst) begin
+    if(!trst) begin
+        dmi_hard_reset <= 1'b0;
+        dmi_reset      <= 1'b0;
+    end
+    else if (update_dr & dr_en[0]) begin
+        dmi_hard_reset <= sr[17];
+        dmi_reset      <= sr[16];
+    end
+    else begin
+        dmi_hard_reset <= 1'b0;
+        dmi_reset      <= 1'b0;
+    end
+end
+
+// DR register
+always @ (posedge tck or negedge trst) begin
+    if(!trst)
+        dr <=  '0;
+    else begin
+        if (update_dr & dr_en[1])
+            dr <= sr;
+        else
+            dr <= {dr[USER_DR_LENGTH-1:2],2'b0};
+    end
+end
+
+assign {wr_addr, wr_data, wr_en, rd_en} = dr;
+
+
+
+
+endmodule
diff --git a/verilog/rtl/BrqRV_EB1/design/uart_rx_prog.v b/verilog/rtl/BrqRV_EB1/design/uart_rx_prog.v
new file mode 100644
index 0000000..71135fa
--- /dev/null
+++ b/verilog/rtl/BrqRV_EB1/design/uart_rx_prog.v
@@ -0,0 +1,146 @@
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+  
+module eb1_uart_rx_prog (
+   input         i_Clock,
+   input         rst_ni,
+   input         i_Rx_Serial,
+   input  [15:0] CLKS_PER_BIT,
+   output        o_Rx_DV,
+   output  [7:0] o_Rx_Byte
+   );
+    
+  parameter s_IDLE         = 3'b000;
+  parameter s_RX_START_BIT = 3'b001;
+  parameter s_RX_DATA_BITS = 3'b010;
+  parameter s_RX_STOP_BIT  = 3'b011;
+  parameter s_CLEANUP      = 3'b100;
+   
+  reg           r_Rx_Data_R = 1'b1;
+  reg           r_Rx_Data   = 1'b1;
+   
+  reg [15:0]     r_Clock_Count = 0;
+  reg [2:0]     r_Bit_Index   = 0; //8 bits total
+  reg [7:0]     r_Rx_Byte     = 0;
+  reg           r_Rx_DV       = 0;
+  reg [2:0]     r_SM_Main     = 0;
+   
+  // Purpose: Double-register the incoming data.
+  // This allows it to be used in the UART RX Clock Domain.
+  // (It removes problems caused by metastability)
+  always @(posedge i_Clock)
+    begin
+      r_Rx_Data_R <= i_Rx_Serial;
+      r_Rx_Data   <= r_Rx_Data_R;
+    end
+   
+   
+  // Purpose: Control RX state machine
+  always @(posedge i_Clock or negedge rst_ni)
+    begin
+      if (!rst_ni) begin
+        r_SM_Main <= s_IDLE;
+        r_Rx_DV       <= 1'b0;
+        r_Clock_Count <= 0;
+        r_Bit_Index   <= 0;
+      end else begin       
+      case (r_SM_Main)
+        s_IDLE :
+          begin
+            r_Rx_DV       <= 1'b0;
+            r_Clock_Count <= 0;
+            r_Bit_Index   <= 0;
+             
+            if (r_Rx_Data == 1'b0)          // Start bit detected
+              r_SM_Main <= s_RX_START_BIT;
+            else
+              r_SM_Main <= s_IDLE;
+          end
+         
+        // Check middle of start bit to make sure it's still low
+        s_RX_START_BIT :
+          begin
+            if (r_Clock_Count == ((CLKS_PER_BIT-1)>>1))
+              begin
+                if (r_Rx_Data == 1'b0)
+                  begin
+                    r_Clock_Count <= 0;  // reset counter, found the middle
+                    r_SM_Main     <= s_RX_DATA_BITS;
+                  end
+                else
+                  r_SM_Main <= s_IDLE;
+              end
+            else
+              begin
+                r_Clock_Count <= r_Clock_Count + 1;
+                r_SM_Main     <= s_RX_START_BIT;
+              end
+          end // case: s_RX_START_BIT
+         
+         
+        // Wait CLKS_PER_BIT-1 clock cycles to sample serial data
+        s_RX_DATA_BITS :
+          begin
+            if (r_Clock_Count < CLKS_PER_BIT-1)
+              begin
+                r_Clock_Count <= r_Clock_Count + 1;
+                r_SM_Main     <= s_RX_DATA_BITS;
+              end
+            else
+              begin
+                r_Clock_Count          <= 0;
+                r_Rx_Byte[r_Bit_Index] <= r_Rx_Data;
+                 
+                // Check if we have received all bits
+                if (r_Bit_Index < 7)
+                  begin
+                    r_Bit_Index <= r_Bit_Index + 1;
+                    r_SM_Main   <= s_RX_DATA_BITS;
+                  end
+                else
+                  begin
+                    r_Bit_Index <= 0;
+                    r_SM_Main   <= s_RX_STOP_BIT;
+                  end
+              end
+          end // case: s_RX_DATA_BITS
+     
+     
+        // Receive Stop bit.  Stop bit = 1
+        s_RX_STOP_BIT :
+          begin
+            // Wait CLKS_PER_BIT-1 clock cycles for Stop bit to finish
+            if (r_Clock_Count < CLKS_PER_BIT-1)
+              begin
+                r_Clock_Count <= r_Clock_Count + 1;
+                r_SM_Main     <= s_RX_STOP_BIT;
+              end
+            else
+              begin
+                r_Rx_DV       <= 1'b1;
+                r_Clock_Count <= 0;
+                r_SM_Main     <= s_CLEANUP;
+              end
+          end // case: s_RX_STOP_BIT
+     
+         
+        // Stay here 1 clock
+        s_CLEANUP :
+          begin
+            r_SM_Main <= s_IDLE;
+            r_Rx_DV   <= 1'b0;
+          end
+         
+         
+        default :
+          r_SM_Main <= s_IDLE;
+         
+      endcase
+      end
+    end   
+   
+  assign o_Rx_DV   = r_Rx_DV;
+  assign o_Rx_Byte = r_Rx_Byte;
+   
+endmodule // uart_rx
+
diff --git a/verilog/rtl/uprj_netlists.v b/verilog/rtl/uprj_netlists.v
index 3537de8..d161757 100644
--- a/verilog/rtl/uprj_netlists.v
+++ b/verilog/rtl/uprj_netlists.v
@@ -15,6 +15,10 @@
 
 // Include caravel global defines for the number of the user project IO pads 
 `include "defines.v"
+//`include "BrqRV_EB1/design/Defines/eb1_pdef.vh"
+//`include "BrqRV_EB1/design/Defines/eb1_param.vh"
+//`include "BrqRV_EB1/design/Defines/common_defines.vh"
+
 `define USE_POWER_PINS
 
 `ifdef GL
@@ -23,6 +27,54 @@
     `include "gl/user_project_wrapper.v"
     `include "gl/user_proj_example.v"
 `else
+    `include "BrqRV_EB1/design/Defines/eb1_pdef.vh"
+    `include "BrqRV_EB1/design/Defines/eb1_param.vh" 
+    `include "BrqRV_EB1/design/Defines/common_defines.vh"
     `include "user_project_wrapper.v"
     `include "user_proj_example.v"
-`endif
\ No newline at end of file
+    `include "BrqRV_EB1/design/eb1_brqrv_wrapper.sv"
+    `include "BrqRV_EB1/design/eb1_mem.sv"
+    `include "BrqRV_EB1/design/eb1_pic_ctrl.sv"
+    `include "BrqRV_EB1/design/eb1_brqrv.sv"
+    `include "BrqRV_EB1/design/eb1_dma_ctrl.sv"
+    `include "BrqRV_EB1/design/ifu/eb1_ifu_aln_ctl.sv"
+    `include "BrqRV_EB1/design/ifu/eb1_ifu_compress_ctl.sv"
+    `include "BrqRV_EB1/design/ifu/eb1_ifu_ifc_ctl.sv"
+    `include "BrqRV_EB1/design/ifu/eb1_ifu_bp_ctl.sv"
+    `include "BrqRV_EB1/design/ifu/eb1_ifu_ic_mem.sv"
+    `include "BrqRV_EB1/design/ifu/eb1_ifu_mem_ctl.sv"
+    `include "BrqRV_EB1/design/ifu/eb1_ifu_iccm_mem.sv"
+    `include "BrqRV_EB1/design/ifu/eb1_ifu.sv"
+    `include "BrqRV_EB1/design/dec/eb1_dec_decode_ctl.sv"
+    `include "BrqRV_EB1/design/dec/eb1_dec_gpr_ctl.sv"
+    `include "BrqRV_EB1/design/dec/eb1_dec_ib_ctl.sv"
+    `include "BrqRV_EB1/design/dec/eb1_dec_tlu_ctl.sv"
+    `include "BrqRV_EB1/design/dec/eb1_dec_trigger.sv"
+    `include "BrqRV_EB1/design/dec/eb1_dec.sv"
+    `include "BrqRV_EB1/design/exu/eb1_exu_alu_ctl.sv"
+    `include "BrqRV_EB1/design/exu/eb1_exu_mul_ctl.sv"
+    `include "BrqRV_EB1/design/exu/eb1_exu_div_ctl.sv"
+    `include "BrqRV_EB1/design/exu/eb1_exu.sv"
+    `include "BrqRV_EB1/design/lsu/eb1_lsu.sv"
+    `include "BrqRV_EB1/design/lsu/eb1_lsu_clkdomain.sv"
+    `include "BrqRV_EB1/design/lsu/eb1_lsu_addrcheck.sv"
+    `include "BrqRV_EB1/design/lsu/eb1_lsu_lsc_ctl.sv"
+    `include "BrqRV_EB1/design/lsu/eb1_lsu_stbuf.sv"
+    `include "BrqRV_EB1/design/lsu/eb1_lsu_bus_buffer.sv"
+    `include "BrqRV_EB1/design/lsu/eb1_lsu_bus_intf.sv"
+    `include "BrqRV_EB1/design/lsu/eb1_lsu_ecc.sv"
+    `include "BrqRV_EB1/design/lsu/eb1_lsu_dccm_mem.sv"
+    `include "BrqRV_EB1/design/lsu/eb1_lsu_dccm_ctl.sv"
+    `include "BrqRV_EB1/design/lsu/eb1_lsu_trigger.sv"
+    `include "BrqRV_EB1/design/dbg/eb1_dbg.sv"
+    `include "BrqRV_EB1/design/dmi/dmi_wrapper.v"
+    `include "BrqRV_EB1/design/dmi/dmi_jtag_to_core_sync.v"
+    `include "BrqRV_EB1/design/dmi/rvjtag_tap.v"
+    `include "BrqRV_EB1/design/soc_files/uart_rx_prog.v"
+    `include "BrqRV_EB1/design/soc_files/iccm_controller.v"
+    `include "BrqRV_EB1/design/sky130_sram_1kbyte_1rw1r_32x256_8.v"
+    `include "BrqRV_EB1/design/lib/eb1_lib.sv"
+    `include "BrqRV_EB1/design/lib/beh_lib.sv"
+    `include "BrqRV_EB1/design/lib/mem_lib.sv"
+    `include "BrqRV_EB1/design/lib"
+`endif