blob: c4ee8492a88a435d5adb09159e166c1589b9c18c [file] [log] [blame]
// Copyright 2019 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
`include "registers.svh"
module fpnew_opgroup_multifmt_slice #(
parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::CONV,
parameter int unsigned Width = 64,
// FPU configuration
parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1,
parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1,
parameter logic EnableVectors = 1'b1,
parameter int unsigned NumPipeRegs = 0,
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
parameter type TagType = logic,
// Do not change
localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup),
localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
) (
input logic clk_i,
input logic rst_ni,
// Input signals
input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i,
input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i,
input fpnew_pkg::roundmode_e rnd_mode_i,
input fpnew_pkg::operation_e op_i,
input logic op_mod_i,
input fpnew_pkg::fp_format_e src_fmt_i,
input fpnew_pkg::fp_format_e dst_fmt_i,
input fpnew_pkg::int_format_e int_fmt_i,
input logic vectorial_op_i,
input TagType tag_i,
// Input Handshake
input logic in_valid_i,
output logic in_ready_o,
input logic flush_i,
// Output signals
output logic [Width-1:0] result_o,
output fpnew_pkg::status_t status_o,
output logic extension_bit_o,
output TagType tag_o,
// Output handshake
output logic out_valid_o,
input logic out_ready_i,
// Indication of valid data in flight
output logic busy_o
);
localparam int unsigned MAX_FP_WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig);
localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig);
localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, 1'b1);
localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS;
// We will send the format information along with the data
localparam int unsigned FMT_BITS =
fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS));
localparam int unsigned AUX_BITS = FMT_BITS + 2; // also add vectorial and integer flags
logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes
logic vectorial_op;
logic [FMT_BITS-1:0] dst_fmt; // destination format to pass along with operation
logic [AUX_BITS-1:0] aux_data;
// additional flags for CONV
logic dst_fmt_is_int, dst_is_cpk;
logic [1:0] dst_vec_op; // info for vectorial results (for packing)
logic [2:0] target_aux_d, target_aux_q;
logic is_up_cast, is_down_cast;
logic [NUM_FORMATS-1:0][Width-1:0] fmt_slice_result;
logic [NUM_INT_FORMATS-1:0][Width-1:0] ifmt_slice_result;
logic [Width-1:0] conv_slice_result;
logic [Width-1:0] conv_target_d, conv_target_q; // vectorial conversions update a register
fpnew_pkg::status_t [NUM_LANES-1:0] lane_status;
logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used
TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used
logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used
logic [NUM_LANES-1:0] lane_busy; // dito
logic result_is_vector;
logic [FMT_BITS-1:0] result_fmt;
logic result_fmt_is_int, result_is_cpk;
logic [1:0] result_vec_op; // info for vectorial results (for packing)
// -----------
// Input Side
// -----------
assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane
assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled
// Cast-and-Pack ops are encoded in operation and modifier
assign dst_fmt_is_int = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::F2I);
assign dst_is_cpk = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::CPKAB ||
op_i == fpnew_pkg::CPKCD);
assign dst_vec_op = (OpGroup == fpnew_pkg::CONV) & {(op_i == fpnew_pkg::CPKCD), op_mod_i};
assign is_up_cast = (fpnew_pkg::fp_width(dst_fmt_i) > fpnew_pkg::fp_width(src_fmt_i));
assign is_down_cast = (fpnew_pkg::fp_width(dst_fmt_i) < fpnew_pkg::fp_width(src_fmt_i));
// The destination format is the int format for F2I casts
assign dst_fmt = dst_fmt_is_int ? int_fmt_i : dst_fmt_i;
// The data sent along consists of the vectorial flag and format bits
assign aux_data = {dst_fmt_is_int, vectorial_op, dst_fmt};
assign target_aux_d = {dst_vec_op, dst_is_cpk};
// CONV passes one operand for assembly after the unit: opC for cpk, opB for others
if (OpGroup == fpnew_pkg::CONV) begin : conv_target
assign conv_target_d = dst_is_cpk ? operands_i[2] : operands_i[1];
end
// For 2-operand units, prepare boxing info
logic [NUM_FORMATS-1:0] is_boxed_1op;
logic [NUM_FORMATS-1:0][1:0] is_boxed_2op;
always_comb begin : boxed_2op
for (int fmt = 0; fmt < NUM_FORMATS; fmt++) begin
is_boxed_1op[fmt] = is_boxed_i[fmt][0];
is_boxed_2op[fmt] = is_boxed_i[fmt][1:0];
end
end
// ---------------
// Generate Lanes
// ---------------
for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes
localparam int unsigned LANE = unsigned'(lane); // unsigned to please the linter
// Get a mask of active formats for this lane
localparam fpnew_pkg::fmt_logic_t ACTIVE_FORMATS =
fpnew_pkg::get_lane_formats(Width, FpFmtConfig, LANE);
localparam fpnew_pkg::ifmt_logic_t ACTIVE_INT_FORMATS =
fpnew_pkg::get_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE);
localparam int unsigned MAX_WIDTH = fpnew_pkg::max_fp_width(ACTIVE_FORMATS);
// Cast-specific parameters
localparam fpnew_pkg::fmt_logic_t CONV_FORMATS =
fpnew_pkg::get_conv_lane_formats(Width, FpFmtConfig, LANE);
localparam fpnew_pkg::ifmt_logic_t CONV_INT_FORMATS =
fpnew_pkg::get_conv_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE);
localparam int unsigned CONV_WIDTH = fpnew_pkg::max_fp_width(CONV_FORMATS);
// Lane parameters from Opgroup
localparam fpnew_pkg::fmt_logic_t LANE_FORMATS = (OpGroup == fpnew_pkg::CONV)
? CONV_FORMATS : ACTIVE_FORMATS;
localparam int unsigned LANE_WIDTH = (OpGroup == fpnew_pkg::CONV) ? CONV_WIDTH : MAX_WIDTH;
logic [LANE_WIDTH-1:0] local_result; // lane-local results
// Generate instances only if needed, lane 0 always generated
if ((lane == 0) || EnableVectors) begin : active_lane
logic in_valid, out_valid, out_ready; // lane-local handshake
logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands; // lane-local oprands
logic [LANE_WIDTH-1:0] op_result; // lane-local results
fpnew_pkg::status_t op_status;
assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors
// Slice out the operands for this lane, upper bits are ignored in the unit
always_comb begin : prepare_input
for (int unsigned i = 0; i < NUM_OPERANDS; i++) begin
local_operands[i] = operands_i[i] >> LANE*fpnew_pkg::fp_width(src_fmt_i);
end
// override operand 0 for some conversions
if (OpGroup == fpnew_pkg::CONV) begin
// Source is an integer
if (op_i == fpnew_pkg::I2F) begin
local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::int_width(int_fmt_i);
// vectorial F2F up casts
end else if (op_i == fpnew_pkg::F2F) begin
if (vectorial_op && op_mod_i && is_up_cast) begin // up cast with upper half
local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::fp_width(src_fmt_i) +
MAX_FP_WIDTH/2;
end
// CPK
end else if (dst_is_cpk) begin
if (lane == 1) begin
local_operands[0] = operands_i[1][LANE_WIDTH-1:0]; // using opB as second argument
end
end
end
end
// Instantiate the operation from the selected opgroup
if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance
fpnew_fma_multi #(
.FpFmtConfig ( LANE_FORMATS ),
.NumPipeRegs ( NumPipeRegs ),
.PipeConfig ( PipeConfig ),
.TagType ( TagType ),
.AuxType ( logic [AUX_BITS-1:0] )
) i_fpnew_fma_multi (
.clk_i,
.rst_ni,
.operands_i ( local_operands ),
.is_boxed_i,
.rnd_mode_i,
.op_i,
.op_mod_i,
.src_fmt_i,
.dst_fmt_i,
.tag_i,
.aux_i ( aux_data ),
.in_valid_i ( in_valid ),
.in_ready_o ( lane_in_ready[lane] ),
.flush_i,
.result_o ( op_result ),
.status_o ( op_status ),
.extension_bit_o ( lane_ext_bit[lane] ),
.tag_o ( lane_tags[lane] ),
.aux_o ( lane_aux[lane] ),
.out_valid_o ( out_valid ),
.out_ready_i ( out_ready ),
.busy_o ( lane_busy[lane] )
);
end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance
fpnew_divsqrt_multi #(
.FpFmtConfig ( LANE_FORMATS ),
.NumPipeRegs ( NumPipeRegs ),
.PipeConfig ( PipeConfig ),
.TagType ( TagType ),
.AuxType ( logic [AUX_BITS-1:0] )
) i_fpnew_divsqrt_multi (
.clk_i,
.rst_ni,
.operands_i ( local_operands[1:0] ), // 2 operands
.is_boxed_i ( is_boxed_2op ), // 2 operands
.rnd_mode_i,
.op_i,
.dst_fmt_i,
.tag_i,
.aux_i ( aux_data ),
.in_valid_i ( in_valid ),
.in_ready_o ( lane_in_ready[lane] ),
.flush_i,
.result_o ( op_result ),
.status_o ( op_status ),
.extension_bit_o ( lane_ext_bit[lane] ),
.tag_o ( lane_tags[lane] ),
.aux_o ( lane_aux[lane] ),
.out_valid_o ( out_valid ),
.out_ready_i ( out_ready ),
.busy_o ( lane_busy[lane] )
);
end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance
end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance
fpnew_cast_multi #(
.FpFmtConfig ( LANE_FORMATS ),
.IntFmtConfig ( CONV_INT_FORMATS ),
.NumPipeRegs ( NumPipeRegs ),
.PipeConfig ( PipeConfig ),
.TagType ( TagType ),
.AuxType ( logic [AUX_BITS-1:0] )
) i_fpnew_cast_multi (
.clk_i,
.rst_ni,
.operands_i ( local_operands[0] ),
.is_boxed_i ( is_boxed_1op ),
.rnd_mode_i,
.op_i,
.op_mod_i,
.src_fmt_i,
.dst_fmt_i,
.int_fmt_i,
.tag_i,
.aux_i ( aux_data ),
.in_valid_i ( in_valid ),
.in_ready_o ( lane_in_ready[lane] ),
.flush_i,
.result_o ( op_result ),
.status_o ( op_status ),
.extension_bit_o ( lane_ext_bit[lane] ),
.tag_o ( lane_tags[lane] ),
.aux_o ( lane_aux[lane] ),
.out_valid_o ( out_valid ),
.out_ready_i ( out_ready ),
.busy_o ( lane_busy[lane] )
);
end // ADD OTHER OPTIONS HERE
// Handshakes are only done if the lane is actually used
assign out_ready = out_ready_i & ((lane == 0) | result_is_vector);
assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector);
// Properly NaN-box or sign-extend the slice result if not in use
assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]};
assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0;
// Otherwise generate constant sign-extension
end else begin : inactive_lane
assign lane_out_valid[lane] = 1'b0; // unused lane
assign lane_in_ready[lane] = 1'b0; // unused lane
assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box
assign lane_status[lane] = '0;
assign lane_busy[lane] = 1'b0;
end
// Generate result packing depending on float format
for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : pack_fp_result
// Set up some constants
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
// only for active formats within the lane
if (ACTIVE_FORMATS[fmt]) begin
assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] =
local_result[FP_WIDTH-1:0];
end else if ((LANE+1)*FP_WIDTH <= Width) begin
assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] =
'{default: lane_ext_bit[LANE]};
end else if (LANE*FP_WIDTH < Width) begin
assign fmt_slice_result[fmt][Width-1:LANE*FP_WIDTH] =
'{default: lane_ext_bit[LANE]};
end
end
// Generate result packing depending on integer format
if (OpGroup == fpnew_pkg::CONV) begin : int_results_enabled
for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : pack_int_result
// Set up some constants
localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
if (ACTIVE_INT_FORMATS[ifmt]) begin
assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] =
local_result[INT_WIDTH-1:0];
end else if ((LANE+1)*INT_WIDTH <= Width) begin
assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] = '0;
end else if (LANE*INT_WIDTH < Width) begin
assign ifmt_slice_result[ifmt][Width-1:LANE*INT_WIDTH] = '0;
end
end
end
end
// Extend slice result if needed
for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : extend_fp_result
// Set up some constants
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
if (NUM_LANES*FP_WIDTH < Width)
assign fmt_slice_result[fmt][Width-1:NUM_LANES*FP_WIDTH] = '{default: lane_ext_bit[0]};
end
// Mute int results if unused
for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : int_results_disabled
if (OpGroup != fpnew_pkg::CONV) begin : mute_int_result
assign ifmt_slice_result[ifmt] = '0;
end
end
// Bypass lanes with target operand for vectorial casts
if (OpGroup == fpnew_pkg::CONV) begin : target_regs
// Bypass pipeline signals, index i holds signal after i register stages
logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q;
logic [0:NumPipeRegs][2:0] byp_pipe_aux_q;
logic [0:NumPipeRegs] byp_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NumPipeRegs] byp_pipe_ready;
// Input stage: First element of pipeline is taken from inputs
assign byp_pipe_target_q[0] = conv_target_d;
assign byp_pipe_aux_q[0] = target_aux_d;
assign byp_pipe_valid_q[0] = in_valid_i & vectorial_op;
// Generate the register stages
for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline
// Internal register enable for this stage
logic reg_ena;
// Determine the ready signal of the current stage - advance the pipeline:
// 1. if the next stage is ready for our data
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1];
// Valid: enabled by ready signal, synchronous clear with the flush signal
`FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
// Enable register if pipleine ready and a valid data item is present
assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i];
// Generate the pipeline registers within the stages, use enable-registers
`FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0)
`FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0)
end
// Output stage: Ready travels backwards from output side, driven by downstream circuitry
assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector;
// Output stage: assign module outputs
assign conv_target_q = byp_pipe_target_q[NumPipeRegs];
// decode the aux data
assign {result_vec_op, result_is_cpk} = byp_pipe_aux_q[NumPipeRegs];
end else begin : no_conv
assign {result_vec_op, result_is_cpk} = '0;
end
// ------------
// Output Side
// ------------
assign {result_fmt_is_int, result_is_vector, result_fmt} = lane_aux[0];
assign result_o = result_fmt_is_int
? ifmt_slice_result[result_fmt]
: fmt_slice_result[result_fmt];
assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones
assign tag_o = lane_tags[0]; // don't care about upper ones
assign busy_o = (| lane_busy);
assign out_valid_o = lane_out_valid[0]; // don't care about upper ones
// Collapse the status
always_comb begin : output_processing
// Collapse the status
automatic fpnew_pkg::status_t temp_status;
temp_status = '0;
for (int i = 0; i < int'(NUM_LANES); i++)
temp_status |= lane_status[i];
status_o = temp_status;
end
endmodule