| // Copyright 2019 ETH Zurich and University of Bologna. |
| // |
| // Copyright and related rights are licensed under the Solderpad Hardware |
| // License, Version 0.51 (the "License"); you may not use this file except in |
| // compliance with the License. You may obtain a copy of the License at |
| // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law |
| // or agreed to in writing, software, hardware and materials distributed under |
| // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR |
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations under the License. |
| |
| // Author: Stefan Mach <smach@iis.ee.ethz.ch> |
| |
| module fpnew_fma_multi #( |
| parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, |
| parameter int unsigned NumPipeRegs = 0, |
| parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, |
| parameter type TagType = logic, |
| parameter type AuxType = logic, |
| // Do not change |
| localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), |
| localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS |
| ) ( |
| input logic clk_i, |
| input logic rst_ni, |
| // Input signals |
| input logic [2:0][WIDTH-1:0] operands_i, // 3 operands |
| input logic [NUM_FORMATS-1:0][2:0] is_boxed_i, // 3 operands |
| input fpnew_pkg::roundmode_e rnd_mode_i, |
| input fpnew_pkg::operation_e op_i, |
| input logic op_mod_i, |
| input fpnew_pkg::fp_format_e src_fmt_i, // format of the multiplicands |
| input fpnew_pkg::fp_format_e dst_fmt_i, // format of the addend and result |
| input TagType tag_i, |
| input AuxType aux_i, |
| // Input Handshake |
| input logic in_valid_i, |
| output logic in_ready_o, |
| input logic flush_i, |
| // Output signals |
| output logic [WIDTH-1:0] result_o, |
| output fpnew_pkg::status_t status_o, |
| output logic extension_bit_o, |
| output TagType tag_o, |
| output AuxType aux_o, |
| // Output handshake |
| output logic out_valid_o, |
| input logic out_ready_i, |
| // Indication of valid data in flight |
| output logic busy_o |
| ); |
| |
| // ---------- |
| // Constants |
| // ---------- |
| // The super-format that can hold all formats |
| localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig); |
| |
| localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits; |
| localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits; |
| |
| // Precision bits 'p' include the implicit bit |
| localparam int unsigned PRECISION_BITS = SUPER_MAN_BITS + 1; |
| // The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection |
| localparam int unsigned LOWER_SUM_WIDTH = 2 * PRECISION_BITS + 3; |
| localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH); |
| // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid |
| // datapath leakage. This is either given by the exponent bits or the width of the LZC result. |
| // In most reasonable FP formats the internal exponent will be wider than the LZC result. |
| localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(SUPER_EXP_BITS + 2, LZC_RESULT_WIDTH); |
| // Shift amount width: maximum internal mantissa size is 3p+3 bits |
| localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3); |
| // Pipelines |
| localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE |
| ? NumPipeRegs |
| : (PipeConfig == fpnew_pkg::DISTRIBUTED |
| ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs |
| : 0); // no regs here otherwise |
| localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE |
| ? NumPipeRegs |
| : (PipeConfig == fpnew_pkg::DISTRIBUTED |
| ? ((NumPipeRegs + 2) / 3) // First to get distributed regs |
| : 0); // no regs here otherwise |
| localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER |
| ? NumPipeRegs |
| : (PipeConfig == fpnew_pkg::DISTRIBUTED |
| ? (NumPipeRegs / 3) // Last to get distributed regs |
| : 0); // no regs here otherwise |
| |
| // ---------------- |
| // Type definition |
| // ---------------- |
| typedef struct packed { |
| logic sign; |
| logic [SUPER_EXP_BITS-1:0] exponent; |
| logic [SUPER_MAN_BITS-1:0] mantissa; |
| } fp_t; |
| |
| // --------------- |
| // Input pipeline |
| // --------------- |
| // Selected pipeline output signals as non-arrays |
| logic [2:0][WIDTH-1:0] operands_q; |
| fpnew_pkg::fp_format_e src_fmt_q; |
| fpnew_pkg::fp_format_e dst_fmt_q; |
| |
| // Input pipeline signals, index i holds signal after i register stages |
| logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q; |
| logic [0:NUM_INP_REGS][NUM_FORMATS-1:0][2:0] inp_pipe_is_boxed_q; |
| fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; |
| fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; |
| logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; |
| fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; |
| fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; |
| TagType [0:NUM_INP_REGS] inp_pipe_tag_q; |
| AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; |
| logic [0:NUM_INP_REGS] inp_pipe_valid_q; |
| // Ready signal is combinatorial for all stages |
| logic [0:NUM_INP_REGS] inp_pipe_ready; |
| |
| // Input stage: First element of pipeline is taken from inputs |
| assign inp_pipe_operands_q[0] = operands_i; |
| assign inp_pipe_is_boxed_q[0] = is_boxed_i; |
| assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; |
| assign inp_pipe_op_q[0] = op_i; |
| assign inp_pipe_op_mod_q[0] = op_mod_i; |
| assign inp_pipe_src_fmt_q[0] = src_fmt_i; |
| assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; |
| assign inp_pipe_tag_q[0] = tag_i; |
| assign inp_pipe_aux_q[0] = aux_i; |
| assign inp_pipe_valid_q[0] = in_valid_i; |
| // Input stage: Propagate pipeline ready signal to updtream circuitry |
| assign in_ready_o = inp_pipe_ready[0]; |
| // Generate the register stages |
| for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline |
| // Internal register enable for this stage |
| logic reg_ena; |
| // Determine the ready signal of the current stage - advance the pipeline: |
| // 1. if the next stage is ready for our data |
| // 2. if the next stage only holds a bubble (not valid) -> we can pop it |
| assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; |
| // Valid: enabled by ready signal, synchronous clear with the flush signal |
| `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) |
| // Enable register if pipleine ready and a valid data item is present |
| assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; |
| // Generate the pipeline registers within the stages, use enable-registers |
| `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) |
| `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) |
| `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) |
| `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) |
| `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) |
| `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) |
| `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) |
| `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) |
| `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) |
| end |
| // Output stage: assign selected pipe outputs to signals for later use |
| assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; |
| assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS]; |
| assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; |
| |
| // ----------------- |
| // Input processing |
| // ----------------- |
| logic [NUM_FORMATS-1:0][2:0] fmt_sign; |
| logic signed [NUM_FORMATS-1:0][2:0][SUPER_EXP_BITS-1:0] fmt_exponent; |
| logic [NUM_FORMATS-1:0][2:0][SUPER_MAN_BITS-1:0] fmt_mantissa; |
| |
| fpnew_pkg::fp_info_t [NUM_FORMATS-1:0][2:0] info_q; |
| |
| // FP Input initialization |
| for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs |
| // Set up some constants |
| localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); |
| localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); |
| localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); |
| |
| if (FpFmtConfig[fmt]) begin : active_format |
| logic [2:0][FP_WIDTH-1:0] trimmed_ops; |
| |
| // Classify input |
| fpnew_classifier #( |
| .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), |
| .NumOperands ( 3 ) |
| ) i_fpnew_classifier ( |
| .operands_i ( trimmed_ops ), |
| .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS][fmt] ), |
| .info_o ( info_q[fmt] ) |
| ); |
| for (genvar op = 0; op < 3; op++) begin : gen_operands |
| assign trimmed_ops[op] = operands_q[op][FP_WIDTH-1:0]; |
| assign fmt_sign[fmt][op] = operands_q[op][FP_WIDTH-1]; |
| assign fmt_exponent[fmt][op] = signed'({1'b0, operands_q[op][MAN_BITS+:EXP_BITS]}); |
| assign fmt_mantissa[fmt][op] = {info_q[fmt][op].is_normal, operands_q[op][MAN_BITS-1:0]} << |
| (SUPER_MAN_BITS - MAN_BITS); // move to left of mantissa |
| end |
| end else begin : inactive_format |
| assign info_q[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled |
| assign fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled |
| assign fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled |
| assign fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled |
| end |
| end |
| |
| fp_t operand_a, operand_b, operand_c; |
| fpnew_pkg::fp_info_t info_a, info_b, info_c; |
| |
| // Operation selection and operand adjustment |
| // | \c op_q | \c op_mod_q | Operation Adjustment |
| // |:--------:|:-----------:|--------------------- |
| // | FMADD | \c 0 | FMADD: none |
| // | FMADD | \c 1 | FMSUB: Invert sign of operand C |
| // | FNMSUB | \c 0 | FNMSUB: Invert sign of operand A |
| // | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C |
| // | ADD | \c 0 | ADD: Set operand A to +1.0 |
| // | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C |
| // | MUL | \c 0 | MUL: Set operand C to +0.0 |
| // | *others* | \c - | *invalid* |
| // \note \c op_mod_q always inverts the sign of the addend. |
| always_comb begin : op_select |
| |
| // Default assignments - packing-order-agnostic |
| operand_a = {fmt_sign[src_fmt_q][0], fmt_exponent[src_fmt_q][0], fmt_mantissa[src_fmt_q][0]}; |
| operand_b = {fmt_sign[src_fmt_q][1], fmt_exponent[src_fmt_q][1], fmt_mantissa[src_fmt_q][1]}; |
| operand_c = {fmt_sign[dst_fmt_q][2], fmt_exponent[dst_fmt_q][2], fmt_mantissa[dst_fmt_q][2]}; |
| info_a = info_q[src_fmt_q][0]; |
| info_b = info_q[src_fmt_q][1]; |
| info_c = info_q[dst_fmt_q][2]; |
| |
| // op_mod_q inverts sign of operand C |
| operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; |
| |
| unique case (inp_pipe_op_q[NUM_INP_REGS]) |
| fpnew_pkg::FMADD: ; // do nothing |
| fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product |
| fpnew_pkg::ADD: begin // Set multiplicand to +1 |
| operand_a = '{sign: 1'b0, exponent: fpnew_pkg::bias(src_fmt_q), mantissa: '0}; |
| info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value. |
| end |
| fpnew_pkg::MUL: begin // Set addend to -0 (for proper rounding with RDN) |
| operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0}; |
| info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value. |
| end |
| default: begin // propagate don't cares |
| operand_a = '{default: fpnew_pkg::DONT_CARE}; |
| operand_b = '{default: fpnew_pkg::DONT_CARE}; |
| operand_c = '{default: fpnew_pkg::DONT_CARE}; |
| info_a = '{default: fpnew_pkg::DONT_CARE}; |
| info_b = '{default: fpnew_pkg::DONT_CARE}; |
| info_c = '{default: fpnew_pkg::DONT_CARE}; |
| end |
| endcase |
| end |
| |
| // --------------------- |
| // Input classification |
| // --------------------- |
| logic any_operand_inf; |
| logic any_operand_nan; |
| logic signalling_nan; |
| logic effective_subtraction; |
| logic tentative_sign; |
| |
| // Reduction for special case handling |
| assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf, info_c.is_inf}); |
| assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan, info_c.is_nan}); |
| assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling}); |
| // Effective subtraction in FMA occurs when product and addend signs differ |
| assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign; |
| // The tentative sign of the FMA shall be the sign of the product |
| assign tentative_sign = operand_a.sign ^ operand_b.sign; |
| |
| // ---------------------- |
| // Special case handling |
| // ---------------------- |
| logic [WIDTH-1:0] special_result; |
| fpnew_pkg::status_t special_status; |
| logic result_is_special; |
| |
| logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_special_result; |
| fpnew_pkg::status_t [NUM_FORMATS-1:0] fmt_special_status; |
| logic [NUM_FORMATS-1:0] fmt_result_is_special; |
| |
| |
| for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results |
| // Set up some constants |
| localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); |
| localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); |
| localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); |
| |
| localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1; |
| localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1); |
| localparam logic [MAN_BITS-1:0] ZERO_MANTISSA = '0; |
| |
| if (FpFmtConfig[fmt]) begin : active_format |
| always_comb begin : special_results |
| logic [FP_WIDTH-1:0] special_res; |
| |
| // Default assignment |
| special_res = {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN |
| fmt_special_status[fmt] = '0; |
| fmt_result_is_special[fmt] = 1'b0; |
| |
| // Handle potentially mixed nan & infinity input => important for the case where infinity and |
| // zero are multiplied and added to a qnan. |
| // RISC-V mandates raising the NV exception in these cases: |
| // (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs) |
| if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin |
| fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN |
| fmt_special_status[fmt].NV = 1'b1; // invalid operation |
| // NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP |
| end else if (any_operand_nan) begin |
| fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN |
| fmt_special_status[fmt].NV = signalling_nan; // raise the invalid operation flag if signalling |
| // Special cases involving infinity |
| end else if (any_operand_inf) begin |
| fmt_result_is_special[fmt] = 1'b1; // bypass FMA |
| // Effective addition of opposite infinities (±inf - ±inf) is invalid! |
| if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction) |
| fmt_special_status[fmt].NV = 1'b1; // invalid operation |
| // Handle cases where output will be inf because of inf product input |
| else if (info_a.is_inf || info_b.is_inf) begin |
| // Result is infinity with the sign of the product |
| special_res = {operand_a.sign ^ operand_b.sign, QNAN_EXPONENT, ZERO_MANTISSA}; |
| // Handle cases where the addend is inf |
| end else if (info_c.is_inf) begin |
| // Result is inifinity with sign of the addend (= operand_c) |
| special_res = {operand_c.sign, QNAN_EXPONENT, ZERO_MANTISSA}; |
| end |
| end |
| // Initialize special result with ones (NaN-box) |
| fmt_special_result[fmt] = '1; |
| fmt_special_result[fmt][FP_WIDTH-1:0] = special_res; |
| end |
| end else begin : inactive_format |
| assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE}; |
| assign fmt_special_status[fmt] = '0; |
| assign fmt_result_is_special[fmt] = 1'b0; |
| end |
| end |
| |
| // Detect special case from source format, I2F casts don't produce a special result |
| assign result_is_special = fmt_result_is_special[dst_fmt_q]; // they're all the same |
| // Signalling input NaNs raise invalid flag, otherwise no flags set |
| assign special_status = fmt_special_status[dst_fmt_q]; |
| // Assemble result according to destination format |
| assign special_result = fmt_special_result[dst_fmt_q]; // destination format |
| |
| // --------------------------- |
| // Initial exponent data path |
| // --------------------------- |
| logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c; |
| logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference; |
| logic signed [EXP_WIDTH-1:0] tentative_exponent; |
| |
| // Zero-extend exponents into signed container - implicit width extension |
| assign exponent_a = signed'({1'b0, operand_a.exponent}); |
| assign exponent_b = signed'({1'b0, operand_b.exponent}); |
| assign exponent_c = signed'({1'b0, operand_c.exponent}); |
| |
| // Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx) |
| // with Ex the encoded exponent and nx the implicit bit. Internal exponents are biased to dst fmt. |
| assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm |
| // Biased product exponent is the sum of encoded exponents minus the bias. |
| assign exponent_product = (info_a.is_zero || info_b.is_zero) // in case the product is zero, set minimum exp. |
| ? 2 - signed'(fpnew_pkg::bias(dst_fmt_q)) |
| : signed'(exponent_a + info_a.is_subnormal |
| + exponent_b + info_b.is_subnormal |
| - 2*signed'(fpnew_pkg::bias(src_fmt_q)) |
| + signed'(fpnew_pkg::bias(dst_fmt_q))); // rebias for dst fmt |
| // Exponent difference is the addend exponent minus the product exponent |
| assign exponent_difference = exponent_addend - exponent_product; |
| // The tentative exponent will be the larger of the product or addend exponent |
| assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product; |
| |
| // Shift amount for addend based on exponents (unsigned as only right shifts) |
| logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt; |
| |
| always_comb begin : addend_shift_amount |
| // Product-anchored case, saturated shift (addend is only in the sticky bit) |
| if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1)) |
| addend_shamt = 3 * PRECISION_BITS + 4; |
| // Addend and product will have mutual bits to add |
| else if (exponent_difference <= signed'(PRECISION_BITS + 2)) |
| addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference); |
| // Addend-anchored case, saturated shift (product is only in the sticky bit) |
| else |
| addend_shamt = 0; |
| end |
| |
| // ------------------ |
| // Product data path |
| // ------------------ |
| logic [PRECISION_BITS-1:0] mantissa_a, mantissa_b, mantissa_c; |
| logic [2*PRECISION_BITS-1:0] product; // the p*p product is 2p bits wide |
| logic [3*PRECISION_BITS+3:0] product_shifted; // addends are 3p+4 bit wide (including G/R) |
| |
| // Add implicit bits to mantissae |
| assign mantissa_a = {info_a.is_normal, operand_a.mantissa}; |
| assign mantissa_b = {info_b.is_normal, operand_b.mantissa}; |
| assign mantissa_c = {info_c.is_normal, operand_c.mantissa}; |
| |
| // Mantissa multiplier (a*b) |
| assign product = mantissa_a * mantissa_b; |
| |
| // Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky: |
| // | 000...000 | product | RS | |
| // <- p+2 -> <- 2p -> < 2> |
| assign product_shifted = product << 2; // constant shift |
| |
| // ----------------- |
| // Addend data path |
| // ----------------- |
| logic [3*PRECISION_BITS+3:0] addend_after_shift; // upper 3p+4 bits are needed to go on |
| logic [PRECISION_BITS-1:0] addend_sticky_bits; // up to p bit of shifted addend are sticky |
| logic sticky_before_add; // they are compressed into a single sticky bit |
| logic [3*PRECISION_BITS+3:0] addend_shifted; // addends are 3p+4 bit wide (including G/R) |
| logic inject_carry_in; // inject carry for subtractions if needed |
| |
| // In parallel, the addend is right-shifted according to the exponent difference. Up to p bits are |
| // shifted out and compressed into a sticky bit. |
| // BEFORE THE SHIFT: |
| // | mantissa_c | 000..000 | |
| // <- p -> <- 3p+4 -> |
| // AFTER THE SHIFT: |
| // | 000..........000 | mantissa_c | 000...............0GR | sticky bits | |
| // <- addend_shamt -> <- p -> <- 2p+4-addend_shamt -> <- up to p -> |
| assign {addend_after_shift, addend_sticky_bits} = |
| (mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt; |
| |
| assign sticky_before_add = (| addend_sticky_bits); |
| |
| // In case of a subtraction, the addend is inverted |
| assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift; |
| assign inject_carry_in = effective_subtraction & ~sticky_before_add; |
| |
| // ------ |
| // Adder |
| // ------ |
| logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry |
| logic sum_carry; // observe carry bit from sum for sign fixing |
| logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow |
| logic final_sign; |
| |
| //Mantissa adder (ab+c). In normal addition, it cannot overflow. |
| assign sum_raw = product_shifted + addend_shifted + inject_carry_in; |
| assign sum_carry = sum_raw[3*PRECISION_BITS+4]; |
| |
| // Complement negative sum (can only happen in subtraction -> overflows for positive results) |
| assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw; |
| |
| // In case of a mispredicted subtraction result, do a sign flip |
| assign final_sign = (effective_subtraction && (sum_carry == tentative_sign)) |
| ? 1'b1 |
| : (effective_subtraction ? 1'b0 : tentative_sign); |
| |
| // --------------- |
| // Internal pipeline |
| // --------------- |
| // Pipeline output signals as non-arrays |
| logic effective_subtraction_q; |
| logic signed [EXP_WIDTH-1:0] exponent_product_q; |
| logic signed [EXP_WIDTH-1:0] exponent_difference_q; |
| logic signed [EXP_WIDTH-1:0] tentative_exponent_q; |
| logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; |
| logic sticky_before_add_q; |
| logic [3*PRECISION_BITS+3:0] sum_q; |
| logic final_sign_q; |
| fpnew_pkg::fp_format_e dst_fmt_q2; |
| fpnew_pkg::roundmode_e rnd_mode_q; |
| logic result_is_special_q; |
| fp_t special_result_q; |
| fpnew_pkg::status_t special_status_q; |
| // Internal pipeline signals, index i holds signal after i register stages |
| logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q; |
| logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q; |
| logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q; |
| logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q; |
| logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q; |
| logic [0:NUM_MID_REGS] mid_pipe_sticky_q; |
| logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q; |
| logic [0:NUM_MID_REGS] mid_pipe_final_sign_q; |
| fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; |
| fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; |
| logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; |
| fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; |
| fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; |
| TagType [0:NUM_MID_REGS] mid_pipe_tag_q; |
| AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; |
| logic [0:NUM_MID_REGS] mid_pipe_valid_q; |
| // Ready signal is combinatorial for all stages |
| logic [0:NUM_MID_REGS] mid_pipe_ready; |
| |
| // Input stage: First element of pipeline is taken from upstream logic |
| assign mid_pipe_eff_sub_q[0] = effective_subtraction; |
| assign mid_pipe_exp_prod_q[0] = exponent_product; |
| assign mid_pipe_exp_diff_q[0] = exponent_difference; |
| assign mid_pipe_tent_exp_q[0] = tentative_exponent; |
| assign mid_pipe_add_shamt_q[0] = addend_shamt; |
| assign mid_pipe_sticky_q[0] = sticky_before_add; |
| assign mid_pipe_sum_q[0] = sum; |
| assign mid_pipe_final_sign_q[0] = final_sign; |
| assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; |
| assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; |
| assign mid_pipe_res_is_spec_q[0] = result_is_special; |
| assign mid_pipe_spec_res_q[0] = special_result; |
| assign mid_pipe_spec_stat_q[0] = special_status; |
| assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; |
| assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; |
| assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; |
| // Input stage: Propagate pipeline ready signal to input pipe |
| assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; |
| |
| // Generate the register stages |
| for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline |
| // Internal register enable for this stage |
| logic reg_ena; |
| // Determine the ready signal of the current stage - advance the pipeline: |
| // 1. if the next stage is ready for our data |
| // 2. if the next stage only holds a bubble (not valid) -> we can pop it |
| assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; |
| // Valid: enabled by ready signal, synchronous clear with the flush signal |
| `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) |
| // Enable register if pipleine ready and a valid data item is present |
| assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; |
| // Generate the pipeline registers within the stages, use enable-registers |
| `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) |
| `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) |
| `FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0) |
| `FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0) |
| `FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0) |
| `FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0) |
| `FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0) |
| `FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0) |
| `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) |
| `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) |
| `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) |
| `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) |
| `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) |
| `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) |
| `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) |
| end |
| // Output stage: assign selected pipe outputs to signals for later use |
| assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; |
| assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS]; |
| assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS]; |
| assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS]; |
| assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS]; |
| assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS]; |
| assign sum_q = mid_pipe_sum_q[NUM_MID_REGS]; |
| assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS]; |
| assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; |
| assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS]; |
| assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS]; |
| assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS]; |
| assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS]; |
| |
| // -------------- |
| // Normalization |
| // -------------- |
| logic [LOWER_SUM_WIDTH-1:0] sum_lower; // lower 2p+3 bits of sum are searched |
| logic [LZC_RESULT_WIDTH-1:0] leading_zero_count; // the number of leading zeroes |
| logic signed [LZC_RESULT_WIDTH:0] leading_zero_count_sgn; // signed leading-zero count |
| logic lzc_zeroes; // in case only zeroes found |
| |
| logic [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount |
| logic signed [EXP_WIDTH-1:0] normalized_exponent; |
| |
| logic [3*PRECISION_BITS+4:0] sum_shifted; // result after first normalization shift |
| logic [PRECISION_BITS:0] final_mantissa; // final mantissa before rounding with round bit |
| logic [2*PRECISION_BITS+2:0] sum_sticky_bits; // remaining 2p+3 sticky bits after normalization |
| logic sticky_after_norm; // sticky bit after normalization |
| |
| logic signed [EXP_WIDTH-1:0] final_exponent; |
| |
| assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0]; |
| |
| // Leading zero counter for cancellations |
| lzc #( |
| .WIDTH ( LOWER_SUM_WIDTH ), |
| .MODE ( 1 ) // MODE = 1 counts leading zeroes |
| ) i_lzc ( |
| .in_i ( sum_lower ), |
| .cnt_o ( leading_zero_count ), |
| .empty_o ( lzc_zeroes ) |
| ); |
| |
| assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count}); |
| |
| // Normalization shift amount based on exponents and LZC (unsigned as only left shifts) |
| always_comb begin : norm_shift_amount |
| // Product-anchored case or cancellations require LZC |
| if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin |
| // Normal result (biased exponent > 0 and not a zero) |
| if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin |
| // Undo initial product shift, remove the counted zeroes |
| norm_shamt = PRECISION_BITS + 2 + leading_zero_count; |
| normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift |
| // Subnormal result |
| end else begin |
| // Cap the shift distance to align mantissa with minimum exponent |
| norm_shamt = unsigned'(signed'(PRECISION_BITS + 2 + exponent_product_q)); |
| normalized_exponent = 0; // subnormals encoded as 0 |
| end |
| // Addend-anchored case |
| end else begin |
| norm_shamt = addend_shamt_q; // Undo the initial shift |
| normalized_exponent = tentative_exponent_q; |
| end |
| end |
| |
| // Do the large normalization shift |
| assign sum_shifted = sum_q << norm_shamt; |
| |
| // The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left |
| // or right of the (non-carry) MSB of the sum. |
| always_comb begin : small_norm |
| // Default assignment, discarding carry bit |
| {final_mantissa, sum_sticky_bits} = sum_shifted; |
| final_exponent = normalized_exponent; |
| |
| // The normalized sum has overflown, align right and fix exponent |
| if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit |
| {final_mantissa, sum_sticky_bits} = sum_shifted >> 1; |
| final_exponent = normalized_exponent + 1; |
| // The normalized sum is normal, nothing to do |
| end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB |
| // do nothing |
| // The normalized sum is still denormal, align left - unless the result is not already subnormal |
| end else if (normalized_exponent > 1) begin |
| {final_mantissa, sum_sticky_bits} = sum_shifted << 1; |
| final_exponent = normalized_exponent - 1; |
| // Otherwise we're denormal |
| end else begin |
| final_exponent = '0; |
| end |
| end |
| |
| // Update the sticky bit with the shifted-out bits |
| assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q; |
| |
| // ---------------------------- |
| // Rounding and classification |
| // ---------------------------- |
| logic pre_round_sign; |
| logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding |
| logic [1:0] round_sticky_bits; |
| |
| logic of_before_round, of_after_round; // overflow |
| logic uf_before_round, uf_after_round; // underflow |
| |
| logic [NUM_FORMATS-1:0][SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] fmt_pre_round_abs; // per format |
| logic [NUM_FORMATS-1:0][1:0] fmt_round_sticky_bits; |
| |
| logic [NUM_FORMATS-1:0] fmt_of_after_round; |
| logic [NUM_FORMATS-1:0] fmt_uf_after_round; |
| |
| logic rounded_sign; |
| logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding |
| logic result_zero; |
| |
| // Classification before round. RISC-V mandates checking underflow AFTER rounding! |
| assign of_before_round = final_exponent >= 2**(fpnew_pkg::exp_bits(dst_fmt_q2))-1; // infinity exponent is all ones |
| assign uf_before_round = final_exponent == 0; // exponent for subnormals capped to 0 |
| |
| // Pack exponent and mantissa into proper rounding form |
| for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble |
| // Set up some constants |
| localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); |
| localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); |
| |
| logic [EXP_BITS-1:0] pre_round_exponent; |
| logic [MAN_BITS-1:0] pre_round_mantissa; |
| |
| if (FpFmtConfig[fmt]) begin : active_format |
| |
| assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : final_exponent[EXP_BITS-1:0]; |
| assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[SUPER_MAN_BITS-:MAN_BITS]; |
| // Assemble result before rounding. In case of overflow, the largest normal value is set. |
| assign fmt_pre_round_abs[fmt] = {pre_round_exponent, pre_round_mantissa}; // 0-extend |
| |
| // Round bit is after mantissa (1 in case of overflow for rounding) |
| assign fmt_round_sticky_bits[fmt][1] = final_mantissa[SUPER_MAN_BITS-MAN_BITS] | |
| of_before_round; |
| |
| // remaining bits in mantissa to sticky (1 in case of overflow for rounding) |
| if (MAN_BITS < SUPER_MAN_BITS) begin : narrow_sticky |
| assign fmt_round_sticky_bits[fmt][0] = (| final_mantissa[SUPER_MAN_BITS-MAN_BITS-1:0]) | |
| sticky_after_norm | of_before_round; |
| end else begin : normal_sticky |
| assign fmt_round_sticky_bits[fmt][0] = sticky_after_norm | of_before_round; |
| end |
| end else begin : inactive_format |
| assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE}; |
| assign fmt_round_sticky_bits[fmt] = '{default: fpnew_pkg::DONT_CARE}; |
| end |
| end |
| |
| // Assemble result before rounding. In case of overflow, the largest normal value is set. |
| assign pre_round_sign = final_sign_q; |
| assign pre_round_abs = fmt_pre_round_abs[dst_fmt_q2]; |
| |
| // In case of overflow, the round and sticky bits are set for proper rounding |
| assign round_sticky_bits = fmt_round_sticky_bits[dst_fmt_q2]; |
| |
| // Perform the rounding |
| fpnew_rounding #( |
| .AbsWidth ( SUPER_EXP_BITS + SUPER_MAN_BITS ) |
| ) i_fpnew_rounding ( |
| .abs_value_i ( pre_round_abs ), |
| .sign_i ( pre_round_sign ), |
| .round_sticky_bits_i ( round_sticky_bits ), |
| .rnd_mode_i ( rnd_mode_q ), |
| .effective_subtraction_i ( effective_subtraction_q ), |
| .abs_rounded_o ( rounded_abs ), |
| .sign_o ( rounded_sign ), |
| .exact_zero_o ( result_zero ) |
| ); |
| |
| logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result; |
| |
| for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject |
| // Set up some constants |
| localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); |
| localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); |
| localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); |
| |
| if (FpFmtConfig[fmt]) begin : active_format |
| always_comb begin : post_process |
| // detect of / uf |
| fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal |
| fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp. |
| |
| // Assemble regular result, nan box short ones. |
| fmt_result[fmt] = '1; |
| fmt_result[fmt][FP_WIDTH-1:0] = {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]}; |
| end |
| end else begin : inactive_format |
| assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE; |
| assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE; |
| assign fmt_result[fmt] = '{default: fpnew_pkg::DONT_CARE}; |
| end |
| end |
| |
| // Classification after rounding select by destination format |
| assign uf_after_round = fmt_uf_after_round[dst_fmt_q2]; |
| assign of_after_round = fmt_of_after_round[dst_fmt_q2]; |
| |
| |
| // ----------------- |
| // Result selection |
| // ----------------- |
| logic [WIDTH-1:0] regular_result; |
| fpnew_pkg::status_t regular_status; |
| |
| // Assemble regular result |
| assign regular_result = fmt_result[dst_fmt_q2]; |
| assign regular_status.NV = 1'b0; // only valid cases are handled in regular path |
| assign regular_status.DZ = 1'b0; // no divisions |
| assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow |
| assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF |
| assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round; |
| |
| // Final results for output pipeline |
| logic [WIDTH-1:0] result_d; |
| fpnew_pkg::status_t status_d; |
| |
| // Select output depending on special case detection |
| assign result_d = result_is_special_q ? special_result_q : regular_result; |
| assign status_d = result_is_special_q ? special_status_q : regular_status; |
| |
| // ---------------- |
| // Output Pipeline |
| // ---------------- |
| // Output pipeline signals, index i holds signal after i register stages |
| logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; |
| fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; |
| TagType [0:NUM_OUT_REGS] out_pipe_tag_q; |
| AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; |
| logic [0:NUM_OUT_REGS] out_pipe_valid_q; |
| // Ready signal is combinatorial for all stages |
| logic [0:NUM_OUT_REGS] out_pipe_ready; |
| |
| // Input stage: First element of pipeline is taken from inputs |
| assign out_pipe_result_q[0] = result_d; |
| assign out_pipe_status_q[0] = status_d; |
| assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; |
| assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; |
| assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; |
| // Input stage: Propagate pipeline ready signal to inside pipe |
| assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; |
| // Generate the register stages |
| for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline |
| // Internal register enable for this stage |
| logic reg_ena; |
| // Determine the ready signal of the current stage - advance the pipeline: |
| // 1. if the next stage is ready for our data |
| // 2. if the next stage only holds a bubble (not valid) -> we can pop it |
| assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; |
| // Valid: enabled by ready signal, synchronous clear with the flush signal |
| `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) |
| // Enable register if pipleine ready and a valid data item is present |
| assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; |
| // Generate the pipeline registers within the stages, use enable-registers |
| `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) |
| `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) |
| `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) |
| `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) |
| end |
| // Output stage: Ready travels backwards from output side, driven by downstream circuitry |
| assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; |
| // Output stage: assign module outputs |
| assign result_o = out_pipe_result_q[NUM_OUT_REGS]; |
| assign status_o = out_pipe_status_q[NUM_OUT_REGS]; |
| assign extension_bit_o = 1'b1; // always NaN-Box result |
| assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; |
| assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; |
| assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; |
| assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); |
| endmodule |