blob: 03c7afbc3684680348233b2bd2c3eb79bbc4c987 [file] [log] [blame]
// SPDX-FileCopyrightText: 2020 Efabless Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// SPDX-License-Identifier: Apache-2.0
`default_nettype none
/*
*-------------------------------------------------------------
*
* user_proj_example
*
* This is an example of a (trivially simple) user project,
* showing how the user project can connect to the logic
* analyzer, the wishbone bus, and the I/O pads.
*
* This project generates an integer count, which is output
* on the user area GPIO pads (digital output only). The
* wishbone connection allows the project to be controlled
* (start and stop) from the management SoC program.
*
* See the testbenches in directory "mprj_counter" for the
* example programs that drive this user project. The three
* testbenches are "io_ports", "la_test1", and "la_test2".
*
*-------------------------------------------------------------
*/
module user_proj_example #(
parameter BITS = 32
)(
`ifdef USE_POWER_PINS
inout vccd1, // User area 1 1.8V supply
inout vssd1, // User area 1 digital ground
`endif
// Wishbone Slave ports (WB MI A)
input wb_clk_i,
input wb_rst_i,
input wbs_stb_i,
input wbs_cyc_i,
input wbs_we_i,
input [3:0] wbs_sel_i,
input [31:0] wbs_dat_i,
input [31:0] wbs_adr_i,
output wbs_ack_o,
output [31:0] wbs_dat_o,
// Logic Analyzer Signals
input [127:0] la_data_in,
output [127:0] la_data_out,
input [127:0] la_oenb,
// IOs
input [`MPRJ_IO_PADS-1:0] io_in,
output [`MPRJ_IO_PADS-1:0] io_out,
output [`MPRJ_IO_PADS-1:0] io_oeb,
// IRQ
output [2:0] irq
);
wire clk;
wire [7:0] in_data;
wire [7:0] out1,out2,out3,out4,out5;
assign clk = wb_clk_i;
assign in_data = la_data_in[7:0];
assign la_data_out[15:8] = out1;
assign la_data_out[23:16] = out2;
assign la_data_out[31:24] = out3;
assign la_data_out[39:32] = out4;
assign la_data_out[47:40] = out5;
main uut(clk,in_data,out1,out2,out3,out4,out5);
endmodule
module main(clk,in,out1,out2,out3,out4,out5);
input [7:0] in;
input clk;
output [7:0] out1;
output [7:0] out2;
output [7:0] out3;
output [7:0] out4;
output [7:0] out5;
systolic_module dut(clk,in,in,in,in,in,in,in,in,in,in,in,in,in,in,in,out1,out2,out3,out4,out5);
endmodule
module systolic_module(clk,
input_top_1,
input_top_2,
input_top_3,
input_top_4,
input_top_5,
input_left_1,
input_left_2,
input_left_3,
input_left_4,
input_left_5,
input_diag_top_0,
input_diag_top_1,
input_diag_top_2,
input_diag_left_1,
input_diag_left_2,
output_bottom_5,
output_bottom_4,
output_bottom_3,
output_right_4,
output_right_3);
input clk;
parameter MATRIX_SIZE = 3;
parameter ARRAY_SIZE = 2 * MATRIX_SIZE - 1;
parameter REG_WIDTH = 8; // system type
// systolic array
wire [REG_WIDTH-1:0] array_input_top [ARRAY_SIZE-1:0]; //REVIEW THE TYPE AFTER ADDING STREAMING MEMORY
wire [REG_WIDTH-1:0] array_input_left [ARRAY_SIZE-1:0];
wire [REG_WIDTH-1:0] c_in_left [ARRAY_SIZE-1:0]; // some of them are dummy, TOP AND BOTTOM TAKES THE PREFERNCE OVER LEFT AND RIGHT
wire [REG_WIDTH-1:0] c_in_top [ARRAY_SIZE-1:0]; // some of them are dummy
wire [REG_WIDTH-1:0] c_out_right [ARRAY_SIZE-1:0]; // some of them are dummy
wire [REG_WIDTH-1:0] c_out_bottom [ARRAY_SIZE-1:0]; //some of them are dummy
wire [REG_WIDTH-1:0] array_output_bottom [ARRAY_SIZE-1:0];
wire [REG_WIDTH-1:0] array_output_right [ARRAY_SIZE-1:0];
// wires/connections in the systolic arrays
wire [REG_WIDTH-1:0] horizontal_wires [ARRAY_SIZE-1:0][ARRAY_SIZE-2:0];
wire [REG_WIDTH-1:0] vertical_wires [ARRAY_SIZE-2:0][ARRAY_SIZE-1:0];
wire [REG_WIDTH-1:0] diagonal_wires [ARRAY_SIZE-2:0][ARRAY_SIZE-2:0];
//interface to systolic arrays
input [REG_WIDTH-1:0] input_top_1;
input [REG_WIDTH-1:0] input_top_2;
input [REG_WIDTH-1:0] input_top_3;
input [REG_WIDTH-1:0] input_top_4;
input [REG_WIDTH-1:0] input_top_5;
input [REG_WIDTH-1:0] input_left_1;
input [REG_WIDTH-1:0] input_left_2;
input [REG_WIDTH-1:0] input_left_3;
input [REG_WIDTH-1:0] input_left_4;
input [REG_WIDTH-1:0] input_left_5;
input [REG_WIDTH-1:0] input_diag_top_0;
input [REG_WIDTH-1:0] input_diag_top_1;
input [REG_WIDTH-1:0] input_diag_top_2;
input [REG_WIDTH-1:0] input_diag_left_1;
input [REG_WIDTH-1:0] input_diag_left_2;
output [REG_WIDTH-1:0] output_bottom_5;
output [REG_WIDTH-1:0] output_bottom_4;
output [REG_WIDTH-1:0] output_bottom_3;
output [REG_WIDTH-1:0] output_right_4;
output [REG_WIDTH-1:0] output_right_3;
assign array_input_top[0] = input_top_1;
assign array_input_top[1] = input_top_2;
assign array_input_top[2] = input_top_3;
assign array_input_top[3] = input_top_4;
assign array_input_top[4] = input_top_5;
assign array_input_left[0] = input_left_1;
assign array_input_left[1] = input_left_2;
assign array_input_left[2] = input_left_3;
assign array_input_left[3] = input_left_4;
assign array_input_left[4] = input_left_5;
assign c_in_top[0] = input_diag_top_0;
assign c_in_top[1] = input_diag_top_1;
assign c_in_top[2] = input_diag_top_2;
assign c_in_left[1] = input_diag_left_1;
assign c_in_left[2] = input_diag_left_2;
assign output_bottom_5 = c_out_bottom[4];
assign output_bottom_4 = c_out_bottom[3];
assign output_bottom_3 = c_out_bottom[2];
assign output_right_4 = c_out_right[3];
assign output_right_3 = c_out_right[2];
// interface done
// creating the systolic array
generate ////////
genvar row_index;
genvar column_index;
for(row_index = 0;row_index < ARRAY_SIZE; row_index = row_index + 1) begin
for(column_index = 0;column_index < ARRAY_SIZE; column_index = column_index + 1) begin
// CONNECTING BOUNDARIES
//PE element(.clk(clk),.a_n_1(array_input_left[row_index]),.b_n_1(array_input_top[column_index]),.c_n_1(c_in_top[column_index]),.a_n(horizontal_wires[row_index][column_index]),.b_n(array_input_left[row_index]),.c_ab(diagonal_wires[row_index][column_index]));
// LOGICALLY CONNECTING PART
if(row_index < MATRIX_SIZE -1) begin
if(column_index < MATRIX_SIZE+row_index) begin
if(row_index == 0 && column_index==0) begin // LEFT TOP // covering corners DEPENDS ON THE CONDITION
PE element(.clk(clk),.a_n_1(array_input_left[row_index]),.b_n_1(array_input_top[column_index]),.c_n_1(c_in_top[column_index]),.a_n(horizontal_wires[row_index][column_index]),.b_n(vertical_wires[row_index][column_index]),.c_ab(diagonal_wires[row_index][column_index]));
end
else if(column_index == 0) begin// LEFT inputs
PE element(.clk(clk),.a_n_1(array_input_left[row_index]),.b_n_1(vertical_wires[row_index-1][column_index]),.c_n_1(c_in_left[row_index]),.a_n(horizontal_wires[row_index][column_index]),.b_n(vertical_wires[row_index][column_index]),.c_ab(diagonal_wires[row_index][column_index]));
end
else if(row_index == 0) begin // TOP inputs
PE element(.clk(clk),.a_n_1(horizontal_wires[row_index][column_index-1]),.b_n_1(array_input_top[column_index]),.c_n_1(c_in_top[column_index]),.a_n(horizontal_wires[row_index][column_index]),.b_n(vertical_wires[row_index][column_index]),.c_ab(diagonal_wires[row_index][column_index]));
end // internal elements
else begin
PE element(.clk(clk),.a_n_1(horizontal_wires[row_index][column_index-1]),.b_n_1(vertical_wires[row_index-1][column_index]),.c_n_1(diagonal_wires[row_index-1][column_index-1]),.a_n(horizontal_wires[row_index][column_index]),.b_n(vertical_wires[row_index][column_index]),.c_ab(diagonal_wires[row_index][column_index]));
end
//PE element(.clk(clk),.a_n_1(),.b_n_1(),.c_n_1(),.a_n(),.b_n(),.c_ab());
end
else begin
if(row_index == 0 && column_index==ARRAY_SIZE-1) begin // RIGHT TOP // covering corners DEPENDS ON THE CONDITION
delay_element element(.clk(clk),.a_n_1(horizontal_wires[row_index][column_index-1]),.b_n_1(array_input_top[column_index]),.a_n(array_output_right[row_index]),.b_n(vertical_wires[row_index][column_index]));
end
else if(column_index == ARRAY_SIZE-1) begin// RIGHT
delay_element element(.clk(clk),.a_n_1(horizontal_wires[row_index][column_index-1]),.b_n_1(vertical_wires[row_index-1][column_index]),.a_n(array_output_right[row_index]),.b_n(vertical_wires[row_index][column_index]));
end
else if(row_index == 0) begin // TOP inputs
delay_element element(.clk(clk),.a_n_1(horizontal_wires[row_index][column_index-1]),.b_n_1(array_input_top[column_index]),.a_n(horizontal_wires[row_index][column_index]),.b_n(vertical_wires[row_index][column_index]));
end // internal elements
else begin
delay_element element(.clk(clk),.a_n_1(horizontal_wires[row_index][column_index-1]),.b_n_1(vertical_wires[row_index-1][column_index]),.a_n(horizontal_wires[row_index][column_index]),.b_n(vertical_wires[row_index][column_index]));
end
//delay_element element(.clk(),.a_n_1(),.b_n_1(),.a_n(),.b_n());
end
end
else if(row_index == MATRIX_SIZE-1) begin
if(column_index == 0) begin //LEFT end
PE element(.clk(clk),.a_n_1(array_input_left[row_index]),.b_n_1(vertical_wires[row_index-1][column_index]),.c_n_1(c_in_left[row_index]),.a_n(horizontal_wires[row_index][column_index]),.b_n(vertical_wires[row_index][column_index]),.c_ab(diagonal_wires[row_index][column_index]));
end
else if(column_index == ARRAY_SIZE-1) begin // RIGHT end
PE element(.clk(clk),.a_n_1(horizontal_wires[row_index][column_index-1]),.b_n_1(vertical_wires[row_index-1][column_index]),.c_n_1(diagonal_wires[row_index-1][column_index-1]),.a_n(array_output_right[row_index]),.b_n(vertical_wires[row_index][column_index]),.c_ab(c_out_right[row_index]));
end
else begin //internal elements
PE element(.clk(clk),.a_n_1(horizontal_wires[row_index][column_index-1]),.b_n_1(vertical_wires[row_index-1][column_index]),.c_n_1(diagonal_wires[row_index-1][column_index-1]),.a_n(horizontal_wires[row_index][column_index]),.b_n(vertical_wires[row_index][column_index]),.c_ab(diagonal_wires[row_index][column_index]));
end
// PE element(.clk(clk),.a_n_1(),.b_n_1(),.c_n_1(),.a_n(),.b_n(),.c_ab());
end
else begin
if(column_index < MATRIX_SIZE+row_index-ARRAY_SIZE) begin // intended : MATRIX_SIZE - 1 + row_index - ARRAY_SIZE+1
if((row_index == ARRAY_SIZE-1) && (column_index == 0)) begin//LEFT BOTTOM corner
delay_element element(.clk(clk),.a_n_1(array_input_left[row_index]),.b_n_1(vertical_wires[row_index-1][column_index]),.a_n(horizontal_wires[row_index][column_index]),.b_n(array_output_bottom[column_index]));
end
else if(column_index == 0) begin //LEFT
delay_element element(.clk(clk),.a_n_1(array_input_left[row_index]),.b_n_1(vertical_wires[row_index-1][column_index]),.a_n(horizontal_wires[row_index][column_index]),.b_n(array_output_bottom[column_index]));
end
else if(row_index == ARRAY_SIZE - 1) begin //BOTTOM
delay_element element(.clk(clk),.a_n_1(horizontal_wires[row_index][column_index-1]),.b_n_1(vertical_wires[row_index-1][column_index]),.a_n(horizontal_wires[row_index][column_index]),.b_n(array_output_bottom[column_index]));
end
else begin
delay_element element(.clk(clk),.a_n_1(horizontal_wires[row_index][column_index-1]),.b_n_1(vertical_wires[row_index-1][column_index]),.a_n(horizontal_wires[row_index][column_index]),.b_n(vertical_wires[row_index][column_index]));
end
//delay_element element(.clk(),.a_n_1(),.b_n_1(),.a_n(),.b_n());
end
else begin
if((row_index == ARRAY_SIZE-1) && (column_index == ARRAY_SIZE-1)) begin//RIGHT BOTTOM
PE element(.clk(clk),.a_n_1(horizontal_wires[row_index][column_index-1]),.b_n_1(vertical_wires[row_index-1][column_index]),.c_n_1(diagonal_wires[row_index-1][column_index-1]),.a_n(array_output_right[row_index]),.b_n(array_output_bottom[column_index]),.c_ab(c_out_bottom[column_index]));
end
else if(column_index == ARRAY_SIZE-1) begin // RIGHT
PE element(.clk(clk),.a_n_1(horizontal_wires[row_index][column_index-1]),.b_n_1(vertical_wires[row_index-1][column_index]),.c_n_1(diagonal_wires[row_index-1][column_index-1]),.a_n(array_output_right[row_index]),.b_n(vertical_wires[row_index][column_index]),.c_ab(c_out_right[row_index]));
end
else if(row_index == ARRAY_SIZE - 1) begin //BOTTOM
PE element(.clk(clk),.a_n_1(horizontal_wires[row_index][column_index-1]),.b_n_1(vertical_wires[row_index-1][column_index]),.c_n_1(diagonal_wires[row_index-1][column_index-1]),.a_n(horizontal_wires[row_index][column_index]),.b_n(array_output_bottom[column_index]),.c_ab(c_out_bottom[column_index]));
end
else begin
PE element(.clk(clk),.a_n_1(horizontal_wires[row_index][column_index-1]),.b_n_1(vertical_wires[row_index-1][column_index]),.c_n_1(diagonal_wires[row_index-1][column_index-1]),.a_n(horizontal_wires[row_index][column_index]),.b_n(vertical_wires[row_index][column_index]),.c_ab(diagonal_wires[row_index][column_index]));
end
//PE element(.clk(clk),.a_n_1(),.b_n_1(),.c_n_1(),.a_n(),.b_n(),.c_ab());
end
end
end
end
endgenerate//////////
// systolic array created
endmodule
module PE #(parameter REG_WIDTH = 8) (clk,a_n_1,b_n_1,c_n_1,a_n,b_n,c_ab);
input clk;
input [REG_WIDTH-1:0] a_n_1;
input [REG_WIDTH-1:0] b_n_1;
input [REG_WIDTH-1:0] c_n_1;
output reg [REG_WIDTH-1:0] a_n; //NEED NOT BE REG, JUST USED HERE TO SIMPLIFY
output reg [REG_WIDTH-1:0] b_n; // POINT OF OPTIMISATION, CAN REMOVE REG
output reg [REG_WIDTH-1:0] c_ab;
always @(posedge clk) begin
a_n <= a_n_1;
b_n <= b_n_1;
c_ab <= a_n_1 * b_n_1 + c_n_1;
end
endmodule
module delay_element #(parameter REG_WIDTH = 8) (clk,a_n_1,b_n_1,a_n,b_n,);
input clk;
input [REG_WIDTH-1:0] a_n_1;
input [REG_WIDTH-1:0] b_n_1;
output reg [REG_WIDTH-1:0] a_n; //NEED NOT BE REG, JUST USED HERE TO SIMPLIFY
output reg [REG_WIDTH-1:0] b_n; // POINT OF OPTIMISATION, CAN REMOVE REG
always @(posedge clk) begin
a_n <= a_n_1;
b_n <= b_n_1;
end
endmodule
`default_nettype wire