blob: 9013a642803b0023395e0432138cc27baf2c639c [file] [log] [blame]
// Copyright 2019 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>, ETH Zurich
// Date: 02.04.2019
// Description: logarithmic arbitration tree with round robin arbitration scheme.
/// The rr_arb_tree employs non-starving round robin-arbitration - i.e., the priorities
/// rotate each cycle.
///
/// ## Fair vs. unfair Arbitration
///
/// This refers to fair throughput distribution when not all inputs have active requests.
/// This module has an internal state `rr_q` which defines the highest priority input. (When
/// `ExtPrio` is `1'b1` this state is provided from the outside.) The arbitration tree will
/// choose the input with the same index as currently defined by the state if it has an active
/// request. Otherwise a *random* other active input is selected. The parameter `FairArb` is used
/// to distinguish between two methods of calculating the next state.
/// * `1'b0`: The next state is calculated by advancing the current state by one. This leads to the
/// state being calculated without the context of the active request. Leading to an
/// unfair throughput distribution if not all inputs have active requests.
/// * `1'b1`: The next state jumps to the next unserved request with higher index.
/// This is achieved by using two trailing-zero-counters (`lzc`). The upper has the masked
/// `req_i` signal with all indices which will have a higher priority in the next state.
/// The trailing zero count defines the input index with the next highest priority after
/// the current one is served. When the upper is empty the lower `lzc` provides the
/// wrapped index if there are outstanding requests with lower or same priority.
/// The implication of throughput fairness on the module timing are:
/// * The trailing zero counter (`lzc`) has a loglog relation of input to output timing. This means
/// that in this module the input to register path scales with Log(Log(`NumIn`)).
/// * The `rr_arb_tree` data multiplexing scales with Log(`NumIn`). This means that the input to output
/// timing path of this module also scales scales with Log(`NumIn`).
/// This implies that in this module the input to output path is always longer than the input to
/// register path. As the output data usually also terminates in a register the parameter `FairArb`
/// only has implications on the area. When it is `1'b0` a static plus one adder is instantiated.
/// If it is `1'b1` two `lzc`, a masking logic stage and a two input multiplexer are instantiated.
/// However these are small in respect of the data multiplexers needed, as the width of the `req_i`
/// signal is usually less as than `DataWidth`.
module rr_arb_tree #(
/// Number of inputs to be arbitrated.
parameter int unsigned NumIn = 64,
/// Data width of the payload in bits. Not needed if `DataType` is overwritten.
parameter int unsigned DataWidth = 32,
/// Data type of the payload, can be overwritten with custom type. Only use of `DataWidth`.
parameter type DataType = logic [DataWidth-1:0],
/// The `ExtPrio` option allows to override the internal round robin counter via the
/// `rr_i` signal. This can be useful in case multiple arbiters need to have
/// rotating priorities that are operating in lock-step. If static priority arbitration
/// is needed, just connect `rr_i` to '0.
///
/// Set to 1'b1 to enable.
parameter bit ExtPrio = 1'b0,
/// If `AxiVldRdy` is set, the req/gnt signals are compliant with the AXI style vld/rdy
/// handshake. Namely, upstream vld (req) must not depend on rdy (gnt), as it can be deasserted
/// again even though vld is asserted. Enabling `AxiVldRdy` leads to a reduction of arbiter
/// delay and area.
///
/// Set to `1'b1` to treat req/gnt as vld/rdy.
parameter bit AxiVldRdy = 1'b0,
/// The `LockIn` option prevents the arbiter from changing the arbitration
/// decision when the arbiter is disabled. I.e., the index of the first request
/// that wins the arbitration will be locked in case the destination is not
/// able to grant the request in the same cycle.
///
/// Set to `1'b1` to enable.
parameter bit LockIn = 1'b0,
/// When set, ensures that throughput gets distributed evenly between all inputs.
///
/// Set to `1'b0` to disable.
parameter bit FairArb = 1'b1,
/// Dependent parameter, do **not** overwrite.
/// Width of the arbitration priority signal and the arbitrated index.
parameter int unsigned IdxWidth = (NumIn > 32'd1) ? unsigned'($clog2(NumIn)) : 32'd1,
/// Dependent parameter, do **not** overwrite.
/// Type for defining the arbitration priority and arbitrated index signal.
parameter type idx_t = logic [IdxWidth-1:0]
) (
/// clk_i, positive edge triggered.
input logic clk_i,
/// Asynchronous rst_ni, active low.
input logic rst_ni,
/// Clears the arbiter state. Only used if `ExtPrio` is `1'b0` or `LockIn` is `1'b1`.
input logic flush_i,
/// External round-robin priority. Only used if `ExtPrio` is `1'b1.`
input idx_t rr_i,
/// Input requests arbitration.
input logic [NumIn-1:0] req_i,
/* verilator lint_off UNOPTFLAT */
/// Input request is granted.
output logic [NumIn-1:0] gnt_o,
/* verilator lint_on UNOPTFLAT */
/// Input data for arbitration.
input DataType [NumIn-1:0] data_i,
/// Output request is valid.
output logic req_o,
/// Output request is granted.
input logic gnt_i,
/// Output data.
output DataType data_o,
/// Index from which input the data came from.
output idx_t idx_o
);
// pragma translate_off
`ifndef VERILATOR
// Default SVA rst_ni
default disable iff (!rst_ni || flush_i);
`endif
// pragma translate_on
// just pass through in this corner case
if (NumIn == unsigned'(1)) begin : gen_pass_through
assign req_o = req_i[0];
assign gnt_o[0] = gnt_i;
assign data_o = data_i[0];
assign idx_o = '0;
// non-degenerate cases
end else begin : gen_arbiter
localparam int unsigned NumLevels = unsigned'($clog2(NumIn));
/* verilator lint_off UNOPTFLAT */
idx_t [2**NumLevels-2:0] index_nodes; // used to propagate the indices
DataType [2**NumLevels-2:0] data_nodes; // used to propagate the data
logic [2**NumLevels-2:0] gnt_nodes; // used to propagate the grant to masters
logic [2**NumLevels-2:0] req_nodes; // used to propagate the requests to slave
/* lint_off */
idx_t rr_q;
logic [NumIn-1:0] req_d;
// the final arbitration decision can be taken from the root of the tree
assign req_o = req_nodes[0];
assign data_o = data_nodes[0];
assign idx_o = index_nodes[0];
if (ExtPrio) begin : gen_ext_rr
assign rr_q = rr_i;
assign req_d = req_i;
end else begin : gen_int_rr
idx_t rr_d;
// lock arbiter decision in case we got at least one req and no acknowledge
if (LockIn) begin : gen_lock
logic lock_d, lock_q;
logic [NumIn-1:0] req_q;
assign lock_d = req_o & ~gnt_i;
assign req_d = (lock_q) ? req_q : req_i;
always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg
if (!rst_ni) begin
lock_q <= '0;
end else begin
if (flush_i) begin
lock_q <= '0;
end else begin
lock_q <= lock_d;
end
end
end
// pragma translate_off
`ifndef VERILATOR
lock: assert property(
@(posedge clk_i) LockIn |-> req_o && !gnt_i |=> idx_o == $past(idx_o)) else
$fatal (1, "Lock implies same arbiter decision in next cycle if output is not \
ready.");
logic [NumIn-1:0] req_tmp;
assign req_tmp = req_q & req_i;
lock_req: assume property(
@(posedge clk_i) LockIn |-> lock_d |=> req_tmp == req_q) else
$fatal (1, "It is disallowed to deassert unserved request signals when LockIn is \
enabled.");
`endif
// pragma translate_on
always_ff @(posedge clk_i or negedge rst_ni) begin : p_req_regs
if (!rst_ni) begin
req_q <= '0;
end else begin
if (flush_i) begin
req_q <= '0;
end else begin
req_q <= req_d;
end
end
end
end else begin : gen_no_lock
assign req_d = req_i;
end
if (FairArb) begin : gen_fair_arb
logic [NumIn-1:0] upper_mask, lower_mask;
idx_t upper_idx, lower_idx, next_idx;
logic upper_empty, lower_empty;
for (genvar i = 0; i < NumIn; i++) begin : gen_mask
assign upper_mask[i] = (i > rr_q) ? req_d[i] : 1'b0;
assign lower_mask[i] = (i <= rr_q) ? req_d[i] : 1'b0;
end
lzc #(
.WIDTH ( NumIn ),
.MODE ( 1'b0 )
) i_lzc_upper (
.in_i ( upper_mask ),
.cnt_o ( upper_idx ),
.empty_o ( upper_empty )
);
lzc #(
.WIDTH ( NumIn ),
.MODE ( 1'b0 )
) i_lzc_lower (
.in_i ( lower_mask ),
.cnt_o ( lower_idx ),
.empty_o ( /*unused*/ )
);
assign next_idx = upper_empty ? lower_idx : upper_idx;
assign rr_d = (gnt_i && req_o) ? next_idx : rr_q;
end else begin : gen_unfair_arb
assign rr_d = (gnt_i && req_o) ? ((rr_q == idx_t'(NumIn-1)) ? '0 : rr_q + 1'b1) : rr_q;
end
// this holds the highest priority
always_ff @(posedge clk_i or negedge rst_ni) begin : p_rr_regs
if (!rst_ni) begin
rr_q <= '0;
end else begin
if (flush_i) begin
rr_q <= '0;
end else begin
rr_q <= rr_d;
end
end
end
end
assign gnt_nodes[0] = gnt_i;
// arbiter tree
for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : gen_levels
for (genvar l = 0; l < 2**level; l++) begin : gen_level
// local select signal
logic sel;
// index calcs
localparam int unsigned Idx0 = 2**level-1+l;// current node
localparam int unsigned Idx1 = 2**(level+1)-1+l*2;
//////////////////////////////////////////////////////////////
// uppermost level where data is fed in from the inputs
if (unsigned'(level) == NumLevels-1) begin : gen_first_level
// if two successive indices are still in the vector...
if (unsigned'(l) * 2 < NumIn-1) begin : gen_reduce
assign req_nodes[Idx0] = req_d[l*2] | req_d[l*2+1];
// arbitration: round robin
assign sel = ~req_d[l*2] | req_d[l*2+1] & rr_q[NumLevels-1-level];
assign index_nodes[Idx0] = idx_t'(sel);
assign data_nodes[Idx0] = (sel) ? data_i[l*2+1] : data_i[l*2];
assign gnt_o[l*2] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]) & ~sel;
assign gnt_o[l*2+1] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2+1]) & sel;
end
// if only the first index is still in the vector...
if (unsigned'(l) * 2 == NumIn-1) begin : gen_first
assign req_nodes[Idx0] = req_d[l*2];
assign index_nodes[Idx0] = '0;// always zero in this case
assign data_nodes[Idx0] = data_i[l*2];
assign gnt_o[l*2] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]);
end
// if index is out of range, fill up with zeros (will get pruned)
if (unsigned'(l) * 2 > NumIn-1) begin : gen_out_of_range
assign req_nodes[Idx0] = 1'b0;
assign index_nodes[Idx0] = idx_t'('0);
assign data_nodes[Idx0] = DataType'('0);
end
//////////////////////////////////////////////////////////////
// general case for other levels within the tree
end else begin : gen_other_levels
assign req_nodes[Idx0] = req_nodes[Idx1] | req_nodes[Idx1+1];
// arbitration: round robin
assign sel = ~req_nodes[Idx1] | req_nodes[Idx1+1] & rr_q[NumLevels-1-level];
assign index_nodes[Idx0] = (sel) ?
idx_t'({1'b1, index_nodes[Idx1+1][NumLevels-unsigned'(level)-2:0]}) :
idx_t'({1'b0, index_nodes[Idx1][NumLevels-unsigned'(level)-2:0]});
assign data_nodes[Idx0] = (sel) ? data_nodes[Idx1+1] : data_nodes[Idx1];
assign gnt_nodes[Idx1] = gnt_nodes[Idx0] & ~sel;
assign gnt_nodes[Idx1+1] = gnt_nodes[Idx0] & sel;
end
//////////////////////////////////////////////////////////////
end
end
// pragma translate_off
`ifndef VERILATOR
initial begin : p_assert
assert(NumIn)
else $fatal(1, "Input must be at least one element wide.");
assert(!(LockIn && ExtPrio))
else $fatal(1,"Cannot use LockIn feature together with external ExtPrio.");
end
hot_one : assert property(
@(posedge clk_i) $onehot0(gnt_o))
else $fatal (1, "Grant signal must be hot1 or zero.");
gnt0 : assert property(
@(posedge clk_i) |gnt_o |-> gnt_i)
else $fatal (1, "Grant out implies grant in.");
gnt1 : assert property(
@(posedge clk_i) req_o |-> gnt_i |-> |gnt_o)
else $fatal (1, "Req out and grant in implies grant out.");
gnt_idx : assert property(
@(posedge clk_i) req_o |-> gnt_i |-> gnt_o[idx_o])
else $fatal (1, "Idx_o / gnt_o do not match.");
req0 : assert property(
@(posedge clk_i) |req_i |-> req_o)
else $fatal (1, "Req in implies req out.");
req1 : assert property(
@(posedge clk_i) req_o |-> |req_i)
else $fatal (1, "Req out implies req in.");
`endif
// pragma translate_on
end
endmodule : rr_arb_tree