verilog/rtl/mem_mesh.v - third_party/shuttle/sky130/mpw-003/slot-026 - Git at Google

 // SPDX-License-Identifier: MIT
 // SPDX-FileCopyrightText: 2021 Tamas Hubai

 `default_nettype none

 /*
 Generates a DFF RAM block for each core with a tree-like interconnect mesh between them

 Parameters:
 CORES = number of cpu cores, also specifies the number of ram blocks
 DEPTH = number of words per ram block
 DATA_WIDTH = word size, number of bits per memory cell
 ADDR_WIDTH = address bus width, should be clog2(DEPTH)
 SPREAD_LAYERS = number of spread layers, should be clog2(CORES)
 SPREAD_WIDTH = spread bus width, should be clog2(2+SPREAD_LAYERS)
 IO_PORTS = number of io ports, should be <= DEPTH
 IO_FIRST = memory cell mapped to the first io port, should be <= DEPTH - IO_PORTS

 A value of wspread > 0 on write operations specifies that the same address should also be written in some
 other memory blocks. In particular, blocks whose number only differ in the lowest wspread bits are affected.
 If several simultaneous write operations affect the same memory cell, writes with higher wspread have
 priority. For writes having equal wspread the core with the lowest number wins.

 If addresses < IO_BUS_WIDTH are written with wspread > SPREAD_LAYERS, wdata is also sent to the io bus.
 Incoming data on the io bus is written to the respective cells with maximal spread (affecting all cores).
 */

 module mem_mesh #(parameter CORES=8, DEPTH=256, DATA_WIDTH=16, ADDR_WIDTH=8, SPREAD_LAYERS=3, SPREAD_WIDTH=3, USE_IO=1, IO_PORTS=16, IO_FIRST=240) (
    input clk,                                   // clock signal
    input rst_n,                                 // reset, active low
    input [CORES-1:0] we,                        // write enable
    input [CORES*ADDR_WIDTH-1:0] waddr,          // write address
    input [CORES*SPREAD_WIDTH-1:0] wspread,      // write spread
    input [CORES*DATA_WIDTH-1:0] wdata,          // write data
    input [CORES*ADDR_WIDTH-1:0] raddr,          // read address
    output [CORES*DATA_WIDTH-1:0] rdata,         // read data
    input [IO_PORTS-1:0] io_active_in,           // is receiving data on io bus
    output [IO_PORTS-1:0] io_active_out,         // is sending data on io bus
    input [IO_PORTS*DATA_WIDTH-1:0] io_data_in,  // io bus input
    output [IO_PORTS*DATA_WIDTH-1:0] io_data_out // io bus output
 );

 reg [DATA_WIDTH-1:0] mem[CORES-1:0][DEPTH-1:0];       // memory cells
 wire presel[CORES-1:0][DEPTH-1:0];                    // is address selected before spreading
 wire uspread[CORES-1:0][SPREAD_LAYERS+1-1:0];         // is spreading to layer
 wire postsel[CORES-1:0][DEPTH-1:0];                   // is address selected after spreading
 wire [DATA_WIDTH-1:0] postdata[CORES-1:0][DEPTH-1:0]; // data to be written after spreading

 generate genvar core, addr, layer, group, spl;

 // convert spread to unary
 for (core=0; core<CORES; core=core+1) begin:g_core
    for(layer=0; layer<=SPREAD_LAYERS; layer=layer+1) begin:g_layer
       assign uspread[core][layer] = we[core] & wspread[core*SPREAD_WIDTH +: SPREAD_WIDTH] > layer;
    end
 end

 for (addr=0; addr<DEPTH; addr=addr+1) begin:g_cell

    // convert write address to one-hot encoding
    for (core=0; core<CORES; core=core+1) begin:g_core_m
       assign presel[core][addr] = we[core] & (waddr[core*ADDR_WIDTH +: ADDR_WIDTH] == addr);
    end

    // calculate spreading from individual cores to groups of cores
    for (layer=0; layer<=SPREAD_LAYERS; layer=layer+1) begin:spread
       localparam GROUPS = CORES >> layer;
       wire gsel[GROUPS-1:0];
       wire [DATA_WIDTH-1:0] gdata[GROUPS-1:0];
       wire gspread[GROUPS-1:0][SPREAD_LAYERS+1-layer-1:0];
       if (layer == 0) begin:i_layerz
          for (group=0; group<GROUPS; group=group+1) begin:g_group
             assign gsel[group] = presel[group][addr];
             assign gdata[group] = {(DATA_WIDTH){we[group]}} & wdata[group*DATA_WIDTH +: DATA_WIDTH];
             for (spl=0; spl<=SPREAD_LAYERS; spl=spl+1) begin:cspread
                assign gspread[group][spl] = uspread[group][spl];
             end
          end
       end else begin:i_layernz
          for (group=0; group<GROUPS; group=group+1) begin:g_group
             wire gs1 = spread[layer-1].gsel[group*2] & spread[layer-1].gspread[group*2][0];
             wire gs2 = spread[layer-1].gsel[group*2+1] & spread[layer-1].gspread[group*2+1][0];
             wire [DATA_WIDTH-1:0] gd1 = spread[layer-1].gdata[group*2];
             wire [DATA_WIDTH-1:0] gd2 = spread[layer-1].gdata[group*2+1];
             assign gsel[group] = gs1 | gs2;
             assign gdata[group] = gs1 ? gd1 : gd2;
             for (spl=0; spl<=SPREAD_LAYERS-layer; spl=spl+1) begin:g_spread
                wire gsp1 = spread[layer-1].gspread[group*2][spl+1];
                wire gsp2 = spread[layer-1].gspread[group*2+1][spl+1];
                assign gspread[group][spl] = gs1 ? gsp1 : gsp2;
             end
          end
       end
    end

    // mix in io logic at the highest spreading level
    wire gs_i;
    wire [DATA_WIDTH-1:0] gd_i;
    if (USE_IO && IO_FIRST <= addr && addr < IO_FIRST + IO_PORTS) begin:i_io
       localparam io = addr - IO_FIRST;
       wire gs_o = spread[SPREAD_LAYERS].gsel[0] & spread[SPREAD_LAYERS].gspread[0][0];
       wire [DATA_WIDTH-1:0] gd_o = {(DATA_WIDTH){gs_o}} & spread[SPREAD_LAYERS].gdata[0];
       assign io_active_out[io] = gs_o;
       assign io_data_out[io*DATA_WIDTH +: DATA_WIDTH] = gd_o;
       assign gs_i = io_active_in[io] ? 1'b1 : spread[SPREAD_LAYERS].gsel[0];
       assign gd_i = io_active_in[io] ? io_data_in[io*DATA_WIDTH +: DATA_WIDTH] : spread[SPREAD_LAYERS].gdata[0];
    end else begin:i_nio
       assign gs_i = spread[SPREAD_LAYERS].gsel[0];
       assign gd_i = spread[SPREAD_LAYERS].gdata[0];
    end

    // calculate spreading back from groups of cores to individual cores
    for (layer=SPREAD_LAYERS; layer>=0; layer=layer-1) begin:collect
       localparam GROUPS = CORES >> layer;
       wire pgsel[GROUPS-1:0];
       wire [DATA_WIDTH-1:0] pgdata[GROUPS-1:0];
       if (layer == SPREAD_LAYERS) begin:i_layerl
          assign pgsel[0] = gs_i;
          assign pgdata[0] = gd_i;
          for (group=1; group<GROUPS; group=group+1) begin:g_group
             assign pgsel[group] = spread[layer].gsel[group];
             assign pgdata[group] = spread[layer].gdata[group];
          end
       end else begin:i_layernl
          for (group=0; group<GROUPS; group=group+1) begin:g_group
             wire gs = spread[layer].gsel[group];
             wire [DATA_WIDTH-1:0] gd = spread[layer].gdata[group];
             wire cgs = collect[layer+1].pgsel[group/2];
             wire [DATA_WIDTH-1:0] cgd = collect[layer+1].pgdata[group/2];
             assign pgsel[group] = cgs | gs;
             assign pgdata[group] = cgs ? cgd : gd;
          end
       end
    end
    for (core=0; core<CORES; core=core+1) begin:g_core_c
       assign postsel[core][addr] = collect[0].pgsel[core];
       assign postdata[core][addr] = collect[0].pgdata[core];
    end

    // sequential write logic
    for (core=0; core<CORES; core=core+1) begin:g_core_w
       always @(posedge clk) begin
          if (!rst_n) begin
             mem[core][addr] <= 0;
          end else begin
             if (postsel[core][addr]) begin
                mem[core][addr] <= postdata[core][addr];
             end
          end
       end
    end

 end

 // read logic
 for (core=0; core<CORES; core=core+1) begin:g_core_r
    wire [ADDR_WIDTH-1:0] craddr = raddr[core*ADDR_WIDTH +: ADDR_WIDTH];
    assign rdata[core*DATA_WIDTH +: DATA_WIDTH] = mem[core][craddr];
 end

 endgenerate

 endmodule

 `default_nettype wire
	// SPDX-License-Identifier: MIT
	// SPDX-FileCopyrightText: 2021 Tamas Hubai

	`default_nettype none

	/*
	Generates a DFF RAM block for each core with a tree-like interconnect mesh between them

	Parameters:
	CORES = number of cpu cores, also specifies the number of ram blocks
	DEPTH = number of words per ram block
	DATA_WIDTH = word size, number of bits per memory cell
	ADDR_WIDTH = address bus width, should be clog2(DEPTH)
	SPREAD_LAYERS = number of spread layers, should be clog2(CORES)
	SPREAD_WIDTH = spread bus width, should be clog2(2+SPREAD_LAYERS)
	IO_PORTS = number of io ports, should be <= DEPTH
	IO_FIRST = memory cell mapped to the first io port, should be <= DEPTH - IO_PORTS

	A value of wspread > 0 on write operations specifies that the same address should also be written in some
	other memory blocks. In particular, blocks whose number only differ in the lowest wspread bits are affected.
	If several simultaneous write operations affect the same memory cell, writes with higher wspread have
	priority. For writes having equal wspread the core with the lowest number wins.

	If addresses < IO_BUS_WIDTH are written with wspread > SPREAD_LAYERS, wdata is also sent to the io bus.
	Incoming data on the io bus is written to the respective cells with maximal spread (affecting all cores).
	*/

	module mem_mesh #(parameter CORES=8, DEPTH=256, DATA_WIDTH=16, ADDR_WIDTH=8, SPREAD_LAYERS=3, SPREAD_WIDTH=3, USE_IO=1, IO_PORTS=16, IO_FIRST=240) (
	input clk, // clock signal
	input rst_n, // reset, active low
	input [CORES-1:0] we, // write enable
	input [CORES*ADDR_WIDTH-1:0] waddr, // write address
	input [CORES*SPREAD_WIDTH-1:0] wspread, // write spread
	input [CORES*DATA_WIDTH-1:0] wdata, // write data
	input [CORES*ADDR_WIDTH-1:0] raddr, // read address
	output [CORES*DATA_WIDTH-1:0] rdata, // read data
	input [IO_PORTS-1:0] io_active_in, // is receiving data on io bus
	output [IO_PORTS-1:0] io_active_out, // is sending data on io bus
	input [IO_PORTS*DATA_WIDTH-1:0] io_data_in, // io bus input
	output [IO_PORTS*DATA_WIDTH-1:0] io_data_out // io bus output
	);

	reg [DATA_WIDTH-1:0] mem[CORES-1:0][DEPTH-1:0]; // memory cells
	wire presel[CORES-1:0][DEPTH-1:0]; // is address selected before spreading
	wire uspread[CORES-1:0][SPREAD_LAYERS+1-1:0]; // is spreading to layer
	wire postsel[CORES-1:0][DEPTH-1:0]; // is address selected after spreading
	wire [DATA_WIDTH-1:0] postdata[CORES-1:0][DEPTH-1:0]; // data to be written after spreading

	generate genvar core, addr, layer, group, spl;

	// convert spread to unary
	for (core=0; core<CORES; core=core+1) begin:g_core
	for(layer=0; layer<=SPREAD_LAYERS; layer=layer+1) begin:g_layer
	assign uspread[core][layer] = we[core] & wspread[core*SPREAD_WIDTH +: SPREAD_WIDTH] > layer;
	end
	end

	for (addr=0; addr<DEPTH; addr=addr+1) begin:g_cell

	// convert write address to one-hot encoding
	for (core=0; core<CORES; core=core+1) begin:g_core_m
	assign presel[core][addr] = we[core] & (waddr[core*ADDR_WIDTH +: ADDR_WIDTH] == addr);
	end

	// calculate spreading from individual cores to groups of cores
	for (layer=0; layer<=SPREAD_LAYERS; layer=layer+1) begin:spread
	localparam GROUPS = CORES >> layer;
	wire gsel[GROUPS-1:0];
	wire [DATA_WIDTH-1:0] gdata[GROUPS-1:0];
	wire gspread[GROUPS-1:0][SPREAD_LAYERS+1-layer-1:0];
	if (layer == 0) begin:i_layerz
	for (group=0; group<GROUPS; group=group+1) begin:g_group
	assign gsel[group] = presel[group][addr];
	assign gdata[group] = {(DATA_WIDTH){we[group]}} & wdata[group*DATA_WIDTH +: DATA_WIDTH];
	for (spl=0; spl<=SPREAD_LAYERS; spl=spl+1) begin:cspread
	assign gspread[group][spl] = uspread[group][spl];
	end
	end
	end else begin:i_layernz
	for (group=0; group<GROUPS; group=group+1) begin:g_group
	wire gs1 = spread[layer-1].gsel[group2] & spread[layer-1].gspread[group2][0];
	wire gs2 = spread[layer-1].gsel[group2+1] & spread[layer-1].gspread[group2+1][0];
	wire [DATA_WIDTH-1:0] gd1 = spread[layer-1].gdata[group*2];
	wire [DATA_WIDTH-1:0] gd2 = spread[layer-1].gdata[group*2+1];
	assign gsel[group] = gs1 \| gs2;
	assign gdata[group] = gs1 ? gd1 : gd2;
	for (spl=0; spl<=SPREAD_LAYERS-layer; spl=spl+1) begin:g_spread
	wire gsp1 = spread[layer-1].gspread[group*2][spl+1];
	wire gsp2 = spread[layer-1].gspread[group*2+1][spl+1];
	assign gspread[group][spl] = gs1 ? gsp1 : gsp2;
	end
	end
	end
	end

	// mix in io logic at the highest spreading level
	wire gs_i;
	wire [DATA_WIDTH-1:0] gd_i;
	if (USE_IO && IO_FIRST <= addr && addr < IO_FIRST + IO_PORTS) begin:i_io
	localparam io = addr - IO_FIRST;
	wire gs_o = spread[SPREAD_LAYERS].gsel[0] & spread[SPREAD_LAYERS].gspread[0][0];
	wire [DATA_WIDTH-1:0] gd_o = {(DATA_WIDTH){gs_o}} & spread[SPREAD_LAYERS].gdata[0];
	assign io_active_out[io] = gs_o;
	assign io_data_out[io*DATA_WIDTH +: DATA_WIDTH] = gd_o;
	assign gs_i = io_active_in[io] ? 1'b1 : spread[SPREAD_LAYERS].gsel[0];
	assign gd_i = io_active_in[io] ? io_data_in[io*DATA_WIDTH +: DATA_WIDTH] : spread[SPREAD_LAYERS].gdata[0];
	end else begin:i_nio
	assign gs_i = spread[SPREAD_LAYERS].gsel[0];
	assign gd_i = spread[SPREAD_LAYERS].gdata[0];
	end

	// calculate spreading back from groups of cores to individual cores
	for (layer=SPREAD_LAYERS; layer>=0; layer=layer-1) begin:collect
	localparam GROUPS = CORES >> layer;
	wire pgsel[GROUPS-1:0];
	wire [DATA_WIDTH-1:0] pgdata[GROUPS-1:0];
	if (layer == SPREAD_LAYERS) begin:i_layerl
	assign pgsel[0] = gs_i;
	assign pgdata[0] = gd_i;
	for (group=1; group<GROUPS; group=group+1) begin:g_group
	assign pgsel[group] = spread[layer].gsel[group];
	assign pgdata[group] = spread[layer].gdata[group];
	end
	end else begin:i_layernl
	for (group=0; group<GROUPS; group=group+1) begin:g_group
	wire gs = spread[layer].gsel[group];
	wire [DATA_WIDTH-1:0] gd = spread[layer].gdata[group];
	wire cgs = collect[layer+1].pgsel[group/2];
	wire [DATA_WIDTH-1:0] cgd = collect[layer+1].pgdata[group/2];
	assign pgsel[group] = cgs \| gs;
	assign pgdata[group] = cgs ? cgd : gd;
	end
	end
	end
	for (core=0; core<CORES; core=core+1) begin:g_core_c
	assign postsel[core][addr] = collect[0].pgsel[core];
	assign postdata[core][addr] = collect[0].pgdata[core];
	end

	// sequential write logic
	for (core=0; core<CORES; core=core+1) begin:g_core_w
	always @(posedge clk) begin
	if (!rst_n) begin
	mem[core][addr] <= 0;
	end else begin
	if (postsel[core][addr]) begin
	mem[core][addr] <= postdata[core][addr];
	end
	end
	end
	end

	end

	// read logic
	for (core=0; core<CORES; core=core+1) begin:g_core_r
	wire [ADDR_WIDTH-1:0] craddr = raddr[core*ADDR_WIDTH +: ADDR_WIDTH];
	assign rdata[core*DATA_WIDTH +: DATA_WIDTH] = mem[core][craddr];
	end

	endgenerate

	endmodule

	`default_nettype wire