| |
| `define OP_L 15:0 |
| `define OP_H 31:16 |
| |
| /** |
| * Fast Multiplier and Division |
| * |
| * 16x16 kernel multiplier and Long Division |
| */ |
| |
| |
| |
| module brq_exu_multdiv_fast #( |
| parameter brq_pkg::rv32m_e RV32M = brq_pkg::RV32MFast |
| ) ( |
| input logic clk_i, |
| input logic rst_ni, |
| input logic mult_en_i, // dynamic enable signal, for FSM control |
| input logic div_en_i, // dynamic enable signal, for FSM control |
| input logic mult_sel_i, // static decoder output, for data muxes |
| input logic div_sel_i, // static decoder output, for data muxes |
| input brq_pkg::md_op_e operator_i, |
| input logic [1:0] signed_mode_i, |
| input logic [31:0] op_a_i, |
| input logic [31:0] op_b_i, |
| input logic [33:0] alu_adder_ext_i, |
| input logic [31:0] alu_adder_i, |
| input logic equal_to_zero_i, |
| input logic data_ind_timing_i, |
| |
| output logic [32:0] alu_operand_a_o, |
| output logic [32:0] alu_operand_b_o, |
| |
| input logic [33:0] imd_val_q_i[2], |
| output logic [33:0] imd_val_d_o[2], |
| output logic [1:0] imd_val_we_o, |
| |
| input logic multdiv_ready_id_i, |
| |
| output logic [31:0] multdiv_result_o, |
| output logic valid_o |
| ); |
| |
| import brq_pkg::*; |
| |
| // Both multiplier variants |
| logic signed [34:0] mac_res_signed; |
| logic [34:0] mac_res_ext; |
| logic [33:0] accum; |
| logic sign_a, sign_b; |
| logic mult_valid; |
| logic signed_mult; |
| |
| // Results that become intermediate value depending on whether mul or div is being calculated |
| logic [33:0] mac_res_d, op_remainder_d; |
| // Raw output of MAC calculation |
| logic [33:0] mac_res; |
| |
| // Divider signals |
| logic div_sign_a, div_sign_b; |
| logic is_greater_equal; |
| logic div_change_sign, rem_change_sign; |
| logic [31:0] one_shift; |
| logic [31:0] op_denominator_q; |
| logic [31:0] op_numerator_q; |
| logic [31:0] op_quotient_q; |
| logic [31:0] op_denominator_d; |
| logic [31:0] op_numerator_d; |
| logic [31:0] op_quotient_d; |
| logic [31:0] next_remainder; |
| logic [32:0] next_quotient; |
| logic [31:0] res_adder_h; |
| logic div_valid; |
| logic [ 4:0] div_counter_q, div_counter_d; |
| logic multdiv_en; |
| logic mult_hold; |
| logic div_hold; |
| logic div_by_zero_d, div_by_zero_q; |
| |
| logic mult_en_internal; |
| logic div_en_internal; |
| |
| typedef enum logic [2:0] { |
| MD_IDLE, MD_ABS_A, MD_ABS_B, MD_COMP, MD_LAST, MD_CHANGE_SIGN, MD_FINISH |
| } md_fsm_e; |
| md_fsm_e md_state_q, md_state_d; |
| |
| logic unused_mult_sel_i; |
| assign unused_mult_sel_i = mult_sel_i; |
| |
| assign mult_en_internal = mult_en_i & ~mult_hold; |
| assign div_en_internal = div_en_i & ~div_hold; |
| |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| div_counter_q <= '0; |
| md_state_q <= MD_IDLE; |
| op_numerator_q <= '0; |
| op_quotient_q <= '0; |
| div_by_zero_q <= '0; |
| end else if (div_en_internal) begin |
| div_counter_q <= div_counter_d; |
| op_numerator_q <= op_numerator_d; |
| op_quotient_q <= op_quotient_d; |
| md_state_q <= md_state_d; |
| div_by_zero_q <= div_by_zero_d; |
| end |
| end |
| |
| |
| assign multdiv_en = mult_en_internal | div_en_internal; |
| |
| // Intermediate value register shared with ALU |
| assign imd_val_d_o[0] = div_sel_i ? op_remainder_d : mac_res_d; |
| assign imd_val_we_o[0] = multdiv_en; |
| |
| assign imd_val_d_o[1] = {2'b0, op_denominator_d}; |
| assign imd_val_we_o[1] = div_en_internal; |
| assign op_denominator_q = imd_val_q_i[1][31:0]; |
| logic [1:0] unused_imd_val; |
| assign unused_imd_val = imd_val_q_i[1][33:32]; |
| logic unused_mac_res_ext; |
| assign unused_mac_res_ext = mac_res_ext[34]; |
| |
| assign signed_mult = (signed_mode_i != 2'b00); |
| assign multdiv_result_o = div_sel_i ? imd_val_q_i[0][31:0] : mac_res_d[31:0]; |
| |
| // The single cycle multiplier uses three 17 bit multipliers to compute MUL instructions in a |
| // single cycle and MULH instructions in two cycles. |
| if (RV32M == RV32MSingleCycle) begin : gen_mult_single_cycle |
| |
| typedef enum logic { |
| MULL, MULH |
| } mult_fsm_e; |
| mult_fsm_e mult_state_q, mult_state_d; |
| |
| logic signed [33:0] mult1_res, mult2_res, mult3_res; |
| logic [33:0] mult1_res_uns; |
| logic [33:32] unused_mult1_res_uns; |
| logic [15:0] mult1_op_a, mult1_op_b; |
| logic [15:0] mult2_op_a, mult2_op_b; |
| logic [15:0] mult3_op_a, mult3_op_b; |
| logic mult1_sign_a, mult1_sign_b; |
| logic mult2_sign_a, mult2_sign_b; |
| logic mult3_sign_a, mult3_sign_b; |
| logic [33:0] summand1, summand2, summand3; |
| |
| assign mult1_res = $signed({mult1_sign_a, mult1_op_a}) * $signed({mult1_sign_b, mult1_op_b}); |
| assign mult2_res = $signed({mult2_sign_a, mult2_op_a}) * $signed({mult2_sign_b, mult2_op_b}); |
| assign mult3_res = $signed({mult3_sign_a, mult3_op_a}) * $signed({mult3_sign_b, mult3_op_b}); |
| |
| assign mac_res_signed = $signed(summand1) + $signed(summand2) + $signed(summand3); |
| |
| assign mult1_res_uns = $unsigned(mult1_res); |
| assign mac_res_ext = $unsigned(mac_res_signed); |
| assign mac_res = mac_res_ext[33:0]; |
| |
| assign sign_a = signed_mode_i[0] & op_a_i[31]; |
| assign sign_b = signed_mode_i[1] & op_b_i[31]; |
| |
| // The first two multipliers are only used in state 1 (MULL). We can assign them statically. |
| // al*bl |
| assign mult1_sign_a = 1'b0; |
| assign mult1_sign_b = 1'b0; |
| assign mult1_op_a = op_a_i[`OP_L]; |
| assign mult1_op_b = op_b_i[`OP_L]; |
| |
| // al*bh |
| assign mult2_sign_a = 1'b0; |
| assign mult2_sign_b = sign_b; |
| assign mult2_op_a = op_a_i[`OP_L]; |
| assign mult2_op_b = op_b_i[`OP_H]; |
| |
| // used in MULH |
| assign accum[17:0] = imd_val_q_i[0][33:16]; |
| assign accum[33:18] = {16{signed_mult & imd_val_q_i[0][33]}}; |
| |
| always_comb begin |
| // Default values == MULL |
| |
| // ah*bl |
| mult3_sign_a = sign_a; |
| mult3_sign_b = 1'b0; |
| mult3_op_a = op_a_i[`OP_H]; |
| mult3_op_b = op_b_i[`OP_L]; |
| |
| summand1 = {18'h0, mult1_res_uns[`OP_H]}; |
| summand2 = $unsigned(mult2_res); |
| summand3 = $unsigned(mult3_res); |
| |
| // mac_res = A*B[47:16], mult1_res = A*B[15:0] |
| mac_res_d = {2'b0, mac_res[`OP_L], mult1_res_uns[`OP_L]}; |
| mult_valid = mult_en_i; |
| mult_state_d = MULL; |
| |
| mult_hold = 1'b0; |
| |
| unique case (mult_state_q) |
| |
| MULL: begin |
| if (operator_i != MD_OP_MULL) begin |
| mac_res_d = mac_res; |
| mult_valid = 1'b0; |
| mult_state_d = MULH; |
| end else begin |
| mult_hold = ~multdiv_ready_id_i; |
| end |
| end |
| |
| MULH: begin |
| // ah*bh |
| mult3_sign_a = sign_a; |
| mult3_sign_b = sign_b; |
| mult3_op_a = op_a_i[`OP_H]; |
| mult3_op_b = op_b_i[`OP_H]; |
| mac_res_d = mac_res; |
| |
| summand1 = '0; |
| summand2 = accum; |
| summand3 = mult3_res; |
| |
| mult_state_d = MULL; |
| mult_valid = 1'b1; |
| |
| mult_hold = ~multdiv_ready_id_i; |
| end |
| |
| default: begin |
| mult_state_d = MULL; |
| end |
| |
| endcase // mult_state_q |
| end |
| |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| mult_state_q <= MULL; |
| end else begin |
| if (mult_en_internal) begin |
| mult_state_q <= mult_state_d; |
| end |
| end |
| end |
| |
| assign unused_mult1_res_uns = mult1_res_uns[33:32]; |
| |
| // States must be knwon/valid. |
| |
| |
| // The fast multiplier uses one 17 bit multiplier to compute MUL instructions in 3 cycles |
| // and MULH instructions in 4 cycles. |
| end else begin : gen_mult_fast |
| logic [15:0] mult_op_a; |
| logic [15:0] mult_op_b; |
| |
| typedef enum logic [1:0] { |
| ALBL, ALBH, AHBL, AHBH |
| } mult_fsm_e; |
| mult_fsm_e mult_state_q, mult_state_d; |
| |
| // The 2 MSBs of mac_res_ext (mac_res_ext[34:33]) are always equal since: |
| // 1. The 2 MSBs of the multiplicants are always equal, and |
| // 2. The 16 MSBs of the addend (accum[33:18]) are always equal. |
| // Thus, it is safe to ignore mac_res_ext[34]. |
| assign mac_res_signed = |
| $signed({sign_a, mult_op_a}) * $signed({sign_b, mult_op_b}) + $signed(accum); |
| assign mac_res_ext = $unsigned(mac_res_signed); |
| assign mac_res = mac_res_ext[33:0]; |
| |
| always_comb begin |
| mult_op_a = op_a_i[`OP_L]; |
| mult_op_b = op_b_i[`OP_L]; |
| sign_a = 1'b0; |
| sign_b = 1'b0; |
| accum = imd_val_q_i[0]; |
| mac_res_d = mac_res; |
| mult_state_d = mult_state_q; |
| mult_valid = 1'b0; |
| mult_hold = 1'b0; |
| |
| unique case (mult_state_q) |
| |
| ALBL: begin |
| // al*bl |
| mult_op_a = op_a_i[`OP_L]; |
| mult_op_b = op_b_i[`OP_L]; |
| sign_a = 1'b0; |
| sign_b = 1'b0; |
| accum = '0; |
| mac_res_d = mac_res; |
| mult_state_d = ALBH; |
| end |
| |
| ALBH: begin |
| // al*bh<<16 |
| mult_op_a = op_a_i[`OP_L]; |
| mult_op_b = op_b_i[`OP_H]; |
| sign_a = 1'b0; |
| sign_b = signed_mode_i[1] & op_b_i[31]; |
| // result of AL*BL (in imd_val_q_i[0]) always unsigned with no carry |
| accum = {18'b0, imd_val_q_i[0][31:16]}; |
| if (operator_i == MD_OP_MULL) begin |
| mac_res_d = {2'b0, mac_res[`OP_L], imd_val_q_i[0][`OP_L]}; |
| end else begin |
| // MD_OP_MULH |
| mac_res_d = mac_res; |
| end |
| mult_state_d = AHBL; |
| end |
| |
| AHBL: begin |
| // ah*bl<<16 |
| mult_op_a = op_a_i[`OP_H]; |
| mult_op_b = op_b_i[`OP_L]; |
| sign_a = signed_mode_i[0] & op_a_i[31]; |
| sign_b = 1'b0; |
| if (operator_i == MD_OP_MULL) begin |
| accum = {18'b0, imd_val_q_i[0][31:16]}; |
| mac_res_d = {2'b0, mac_res[15:0], imd_val_q_i[0][15:0]}; |
| mult_valid = 1'b1; |
| |
| // Note no state transition will occur if mult_hold is set |
| mult_state_d = ALBL; |
| mult_hold = ~multdiv_ready_id_i; |
| end else begin |
| accum = imd_val_q_i[0]; |
| mac_res_d = mac_res; |
| mult_state_d = AHBH; |
| end |
| end |
| |
| AHBH: begin |
| // only MD_OP_MULH here |
| // ah*bh |
| mult_op_a = op_a_i[`OP_H]; |
| mult_op_b = op_b_i[`OP_H]; |
| sign_a = signed_mode_i[0] & op_a_i[31]; |
| sign_b = signed_mode_i[1] & op_b_i[31]; |
| accum[17: 0] = imd_val_q_i[0][33:16]; |
| accum[33:18] = {16{signed_mult & imd_val_q_i[0][33]}}; |
| // result of AH*BL is not signed only if signed_mode_i == 2'b00 |
| mac_res_d = mac_res; |
| mult_valid = 1'b1; |
| |
| // Note no state transition will occur if mult_hold is set |
| mult_state_d = ALBL; |
| mult_hold = ~multdiv_ready_id_i; |
| end |
| default: begin |
| mult_state_d = ALBL; |
| end |
| endcase // mult_state_q |
| end |
| |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| mult_state_q <= ALBL; |
| end else begin |
| if (mult_en_internal) begin |
| mult_state_q <= mult_state_d; |
| end |
| end |
| end |
| |
| // States must be knwon/valid. |
| |
| end // gen_mult_fast |
| |
| // Divider |
| assign res_adder_h = alu_adder_ext_i[32:1]; |
| logic [1:0] unused_alu_adder_ext; |
| assign unused_alu_adder_ext = {alu_adder_ext_i[33],alu_adder_ext_i[0]}; |
| |
| assign next_remainder = is_greater_equal ? res_adder_h[31:0] : imd_val_q_i[0][31:0]; |
| assign next_quotient = is_greater_equal ? {1'b0, op_quotient_q} | {1'b0, one_shift} : |
| {1'b0, op_quotient_q}; |
| |
| assign one_shift = {31'b0, 1'b1} << div_counter_q; |
| |
| // The adder in the ALU computes alu_operand_a_o + alu_operand_b_o which means |
| // Remainder - Divisor. If Remainder - Divisor >= 0, is_greater_equal is equal to 1, |
| // the next Remainder is Remainder - Divisor contained in res_adder_h and the |
| always_comb begin |
| if ((imd_val_q_i[0][31] ^ op_denominator_q[31]) == 1'b0) begin |
| is_greater_equal = (res_adder_h[31] == 1'b0); |
| end else begin |
| is_greater_equal = imd_val_q_i[0][31]; |
| end |
| end |
| |
| assign div_sign_a = op_a_i[31] & signed_mode_i[0]; |
| assign div_sign_b = op_b_i[31] & signed_mode_i[1]; |
| assign div_change_sign = (div_sign_a ^ div_sign_b) & ~div_by_zero_q; |
| assign rem_change_sign = div_sign_a; |
| |
| |
| always_comb begin |
| div_counter_d = div_counter_q - 5'h1; |
| op_remainder_d = imd_val_q_i[0]; |
| op_quotient_d = op_quotient_q; |
| md_state_d = md_state_q; |
| op_numerator_d = op_numerator_q; |
| op_denominator_d = op_denominator_q; |
| alu_operand_a_o = {32'h0 , 1'b1}; |
| alu_operand_b_o = {~op_b_i, 1'b1}; |
| div_valid = 1'b0; |
| div_hold = 1'b0; |
| div_by_zero_d = div_by_zero_q; |
| |
| unique case(md_state_q) |
| MD_IDLE: begin |
| if (operator_i == MD_OP_DIV) begin |
| // Check if the Denominator is 0 |
| // quotient for division by 0 is specified to be -1 |
| // Note with data-independent time option, the full divide operation will proceed as |
| // normal and will naturally return -1 |
| op_remainder_d = '1; |
| md_state_d = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A; |
| // Record that this is a div by zero to stop the sign change at the end of the |
| // division (in data_ind_timing mode). |
| div_by_zero_d = equal_to_zero_i; |
| end else begin |
| // Check if the Denominator is 0 |
| // remainder for division by 0 is specified to be the numerator (operand a) |
| // Note with data-independent time option, the full divide operation will proceed as |
| // normal and will naturally return operand a |
| op_remainder_d = {2'b0, op_a_i}; |
| md_state_d = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A; |
| end |
| // 0 - B = 0 iff B == 0 |
| alu_operand_a_o = {32'h0 , 1'b1}; |
| alu_operand_b_o = {~op_b_i, 1'b1}; |
| div_counter_d = 5'd31; |
| end |
| |
| MD_ABS_A: begin |
| // quotient |
| op_quotient_d = '0; |
| // A abs value |
| op_numerator_d = div_sign_a ? alu_adder_i : op_a_i; |
| md_state_d = MD_ABS_B; |
| div_counter_d = 5'd31; |
| // ABS(A) = 0 - A |
| alu_operand_a_o = {32'h0 , 1'b1}; |
| alu_operand_b_o = {~op_a_i, 1'b1}; |
| end |
| |
| MD_ABS_B: begin |
| // remainder |
| op_remainder_d = { 33'h0, op_numerator_q[31]}; |
| // B abs value |
| op_denominator_d = div_sign_b ? alu_adder_i : op_b_i; |
| md_state_d = MD_COMP; |
| div_counter_d = 5'd31; |
| // ABS(B) = 0 - B |
| alu_operand_a_o = {32'h0 , 1'b1}; |
| alu_operand_b_o = {~op_b_i, 1'b1}; |
| end |
| |
| MD_COMP: begin |
| op_remainder_d = {1'b0, next_remainder[31:0], op_numerator_q[div_counter_d]}; |
| op_quotient_d = next_quotient[31:0]; |
| md_state_d = (div_counter_q == 5'd1) ? MD_LAST : MD_COMP; |
| // Division |
| alu_operand_a_o = {imd_val_q_i[0][31:0], 1'b1}; // it contains the remainder |
| alu_operand_b_o = {~op_denominator_q[31:0], 1'b1}; // -denominator two's compliment |
| end |
| |
| MD_LAST: begin |
| if (operator_i == MD_OP_DIV) begin |
| // this time we save the quotient in op_remainder_d (i.e. imd_val_q_i[0]) since |
| // we do not need anymore the remainder |
| op_remainder_d = {1'b0, next_quotient}; |
| end else begin |
| // this time we do not save the quotient anymore since we need only the remainder |
| op_remainder_d = {2'b0, next_remainder[31:0]}; |
| end |
| // Division |
| alu_operand_a_o = {imd_val_q_i[0][31:0], 1'b1}; // it contains the remainder |
| alu_operand_b_o = {~op_denominator_q[31:0], 1'b1}; // -denominator two's compliment |
| |
| md_state_d = MD_CHANGE_SIGN; |
| end |
| |
| MD_CHANGE_SIGN: begin |
| md_state_d = MD_FINISH; |
| if (operator_i == MD_OP_DIV) begin |
| op_remainder_d = (div_change_sign) ? {2'h0, alu_adder_i} : imd_val_q_i[0]; |
| end else begin |
| op_remainder_d = (rem_change_sign) ? {2'h0, alu_adder_i} : imd_val_q_i[0]; |
| end |
| // ABS(Quotient) = 0 - Quotient (or Remainder) |
| alu_operand_a_o = {32'h0 , 1'b1}; |
| alu_operand_b_o = {~imd_val_q_i[0][31:0], 1'b1}; |
| end |
| |
| MD_FINISH: begin |
| // Hold result until ID stage is ready to accept it |
| // Note no state transition will occur if div_hold is set |
| md_state_d = MD_IDLE; |
| div_hold = ~multdiv_ready_id_i; |
| div_valid = 1'b1; |
| end |
| |
| default: begin |
| md_state_d = MD_IDLE; |
| end |
| endcase // md_state_q |
| end |
| |
| assign valid_o = mult_valid | div_valid; |
| |
| |
| endmodule // brq_mult |