blob: 0afa424566c9e616220a5ab921f6e91783f5fa0b [file] [log] [blame]
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: 2021 Tamas Hubai
`default_nettype none
/*
Generates a DFF RAM block for each core with a tree-like interconnect mesh between them
Parameters:
CORES = number of cpu cores, also specifies the number of ram blocks
DEPTH = number of words per ram block
DATA_WIDTH = word size, number of bits per memory cell
ADDR_WIDTH = address bus width, should be clog2(DEPTH)
SPREAD_LAYERS = number of spread layers, should be clog2(CORES)
SPREAD_WIDTH = spread bus width, should be clog2(2+SPREAD_LAYERS)
IO_PORTS = number of io ports, should be <= DEPTH
IO_FIRST = memory cell mapped to the first io port, should be <= DEPTH - IO_PORTS
A value of wspread > 0 on write operations specifies that the same address should also be written in some
other memory blocks. In particular, blocks whose number only differ in the lowest wspread bits are affected.
If several simultaneous write operations affect the same memory cell, writes with higher wspread have
priority. For writes having equal wspread the core with the lowest number wins.
If addresses < IO_BUS_WIDTH are written with wspread > SPREAD_LAYERS, wdata is also sent to the io bus.
Incoming data on the io bus is written to the respective cells with maximal spread (affecting all cores).
*/
module mem_mesh #(parameter CORES=8, DEPTH=256, DATA_WIDTH=16, ADDR_WIDTH=8, SPREAD_LAYERS=3, SPREAD_WIDTH=3, USE_IO=1, IO_PORTS=16, IO_FIRST=240) (
input clk, // clock signal
input rst_n, // reset, active low
input [CORES-1:0] we, // write enable
input [CORES*ADDR_WIDTH-1:0] waddr, // write address
input [CORES*SPREAD_WIDTH-1:0] wspread, // write spread
input [CORES*DATA_WIDTH-1:0] wdata, // write data
input [CORES*ADDR_WIDTH-1:0] raddr, // read address
output [CORES*DATA_WIDTH-1:0] rdata, // read data
input [IO_PORTS-1:0] io_active_in, // is receiving data on io bus
output [IO_PORTS-1:0] io_active_out, // is sending data on io bus
input [IO_PORTS*DATA_WIDTH-1:0] io_data_in, // io bus input
output [IO_PORTS*DATA_WIDTH-1:0] io_data_out // io bus output
);
reg [DATA_WIDTH-1:0] mem[CORES-1:0][DEPTH-1:0]; // memory cells
wire presel[CORES-1:0][DEPTH-1:0]; // is address selected before spreading
wire uspread[CORES-1:0][SPREAD_LAYERS+1-1:0]; // is spreading to layer
wire postsel[CORES-1:0][DEPTH-1:0]; // is address selected after spreading
wire [DATA_WIDTH-1:0] postdata[CORES-1:0][DEPTH-1:0]; // data to be written after spreading
generate genvar core, addr, layer, group, spl;
// convert spread to unary
for (core=0; core<CORES; core=core+1) begin:g_core
for(layer=0; layer<=SPREAD_LAYERS; layer=layer+1) begin:g_layer
assign uspread[core][layer] = we[core] & wspread[core*SPREAD_WIDTH +: SPREAD_WIDTH] > layer;
end
end
for (addr=0; addr<DEPTH; addr=addr+1) begin:g_cell
// convert write address to one-hot encoding
for (core=0; core<CORES; core=core+1) begin:g_core_m
assign presel[core][addr] = we[core] & (waddr[core*ADDR_WIDTH +: ADDR_WIDTH] == addr);
end
// calculate spreading from individual cores to groups of cores
for (layer=0; layer<=SPREAD_LAYERS; layer=layer+1) begin:spread
localparam GROUPS = CORES >> layer;
wire gsel[GROUPS-1:0];
wire [DATA_WIDTH-1:0] gdata[GROUPS-1:0];
wire gspread[GROUPS-1:0][SPREAD_LAYERS+1-layer-1:0];
if (layer == 0) begin:i_layerz
for (group=0; group<GROUPS; group=group+1) begin:g_group
assign gsel[group] = presel[group][addr];
assign gdata[group] = {(DATA_WIDTH){we[group]}} & wdata[group*DATA_WIDTH +: DATA_WIDTH];
for (spl=0; spl<=SPREAD_LAYERS; spl=spl+1) begin:cspread
assign gspread[group][spl] = uspread[group][spl];
end
end
end else begin:i_layernz
for (group=0; group<GROUPS; group=group+1) begin:g_group
wire gs1 = spread[layer-1].gsel[group*2] & spread[layer-1].gspread[group*2][0];
wire gs2 = spread[layer-1].gsel[group*2+1] & spread[layer-1].gspread[group*2+1][0];
wire [DATA_WIDTH-1:0] gd1 = spread[layer-1].gdata[group*2];
wire [DATA_WIDTH-1:0] gd2 = spread[layer-1].gdata[group*2+1];
assign gsel[group] = gs1 | gs2;
assign gdata[group] = gs1 ? gd1 : gd2;
for (spl=0; spl<=SPREAD_LAYERS-layer; spl=spl+1) begin:g_spread
wire gsp1 = spread[layer-1].gspread[group*2][spl+1];
wire gsp2 = spread[layer-1].gspread[group*2+1][spl+1];
assign gspread[group][spl] = gs1 ? gsp1 : gsp2;
end
end
end
end
// mix in io logic at the highest spreading level
wire gs_i;
wire [DATA_WIDTH-1:0] gd_i;
if (USE_IO && IO_FIRST <= addr && addr < IO_FIRST + IO_PORTS) begin:i_io
localparam io = addr - IO_FIRST;
wire gs_o = spread[SPREAD_LAYERS].gsel[0] & spread[SPREAD_LAYERS].gspread[0][0];
wire [DATA_WIDTH-1:0] gd_o = {(DATA_WIDTH){gs_o}} & spread[SPREAD_LAYERS].gdata[0];
assign io_active_out[io] = gs_o;
assign io_data_out[io*DATA_WIDTH +: DATA_WIDTH] = gd_o;
assign gs_i = io_active_in[io] ? 1'b1 : spread[SPREAD_LAYERS].gsel[0];
assign gd_i = io_active_in[io] ? io_data_in[io*DATA_WIDTH +: DATA_WIDTH] : spread[SPREAD_LAYERS].gdata[0];
end else begin:i_nio
assign gs_i = spread[SPREAD_LAYERS].gsel[0];
assign gd_i = spread[SPREAD_LAYERS].gdata[0];
end
// calculate spreading back from groups of cores to individual cores
for (layer=SPREAD_LAYERS; layer>=0; layer=layer-1) begin:collect
localparam GROUPS = CORES >> layer;
wire pgsel[GROUPS-1:0];
wire [DATA_WIDTH-1:0] pgdata[GROUPS-1:0];
if (layer == SPREAD_LAYERS) begin:i_layerl
assign pgsel[0] = gs_i;
assign pgdata[0] = gd_i;
for (group=1; group<GROUPS; group=group+1) begin:g_group
assign pgsel[group] = spread[layer].gsel[group];
assign pgdata[group] = spread[layer].gdata[group];
end
end else begin:i_layernl
for (group=0; group<GROUPS; group=group+1) begin:g_group
wire gs = spread[layer].gsel[group];
wire [DATA_WIDTH-1:0] gd = spread[layer].gdata[group];
wire cgs = collect[layer+1].pgsel[group/2];
wire [DATA_WIDTH-1:0] cgd = collect[layer+1].pgdata[group/2];
assign pgsel[group] = cgs | gs;
assign pgdata[group] = cgs ? cgd : gd;
end
end
end
for (core=0; core<CORES; core=core+1) begin:g_core_c
assign postsel[core][addr] = collect[0].pgsel[core];
assign postdata[core][addr] = collect[0].pgdata[core];
end
// sequential write logic
for (core=0; core<CORES; core=core+1) begin:g_core_w
always @(posedge clk) begin
if (!rst_n) begin
mem[core][addr] <= 0;
end else begin
if (postsel[core][addr]) begin
mem[core][addr] <= postdata[core][addr];
end
end
end
end
end
// read logic
for (core=0; core<CORES; core=core+1) begin:g_core_r
wire [ADDR_WIDTH-1:0] craddr = raddr[core*ADDR_WIDTH +: ADDR_WIDTH];
assign rdata[core*DATA_WIDTH +: DATA_WIDTH] = mem[core][craddr];
end
endgenerate
endmodule
`default_nettype wire