Add verilog sources
diff --git a/verilog/rtl/actfn.v b/verilog/rtl/actfn.v new file mode 100644 index 0000000..f7e5214 --- /dev/null +++ b/verilog/rtl/actfn.v
@@ -0,0 +1,223 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2022 Tamas Hubai + +`default_nettype none + +// activation functions used in the neural network + + +// leaky ReLU + +module leaky_relu_comb ( + input [`NUM_WIDTH-1:0] x, + output [`NUM_WIDTH-1:0] res +); + +wire sig_x = x[`NUM_WIDTH-1]; +assign res = sig_x ? {{(`LEAK_SHIFT){1'b1}}, x[`NUM_WIDTH-1:`LEAK_SHIFT]} : x; + +endmodule + + +// derivative of leaky ReLU + +module leaky_relu_diff_comb ( + input [`NUM_WIDTH-1:0] x, + output [`NUM_WIDTH-1:0] res +); + +wire sig_x = x[`NUM_WIDTH-1]; + +assign res = sig_x ? + {{(`INT_WIDTH+`LEAK_SHIFT-1){1'b0}}, 1'b1, {(`FRAC_WIDTH-`LEAK_SHIFT){1'b0}}} : + {{(`INT_WIDTH-1){1'b0}}, 1'b1, {(`FRAC_WIDTH){1'b0}}}; + +endmodule + + +// very rough approximation of 2^x, used in softmax + +module approx_exp_comb ( + input [`NUM_WIDTH-1:0] x, + output [`NUM_WIDTH-1:0] res +); + +wire saturated = ~x[`NUM_WIDTH-1] & (x[`NUM_WIDTH-1:`FRAC_WIDTH] > `INT_WIDTH - 2); + +assign res[`NUM_WIDTH-1] = 1'b0; +generate genvar g; +for (g=0; g<`NUM_WIDTH-1; g=g+1) begin:g_exp + assign res[g] = saturated | (x[`NUM_WIDTH-1:`FRAC_WIDTH] == g - `FRAC_WIDTH); +end +endgenerate + +endmodule + + +// piecewise linear approximation of 1/x, used in softmax + +module approx_inv_comb ( + input [`NUM_WIDTH-1:0] x, // assuming x > 0 + output [`NUM_WIDTH-1:0] res +); + +wire [`NUM_WIDTH:0] bnd; +wire [`NUM_WIDTH-1:0] msb; +wire [`FRAC_WIDTH-1:0] m; + +assign bnd[`NUM_WIDTH] = 0; + +generate genvar g; + +for (g=`NUM_WIDTH-1; g>=0; g=g-1) begin:g_msb + assign bnd[g] = bnd[g+1] | x[g]; + assign msb[g] = bnd[g] & ~bnd[g+1]; +end +for (g=0; g<`NUM_WIDTH; g=g+1) begin:g_mant + wire [`FRAC_WIDTH-1:0] mc = msb[g] ? ({x, {(`FRAC_WIDTH){1'b0}}} >> g) : {(`FRAC_WIDTH){1'b0}}; + wire [`FRAC_WIDTH-1:0] ms; + if (g==0) begin:i_mantz + assign ms = mc; + end else begin:i_mantnz + assign ms = g_mant[g-1].ms | mc; + end +end +assign m = g_mant[`NUM_WIDTH-1].ms; + +// m contains the input bit-shifted to within [1, 2), with its integer part (i.e. 1) removed +// for 1 <= x < 1.25 we use 1/x ~= 115/64 - 51/64 x +wire [`FRAC_WIDTH:0] minv_a = {7'd115, {(`FRAC_WIDTH-6){1'b0}}} - (({{(`FRAC_WIDTH){1'b0}}, 7'd51} * {7'd1, m}) >> 6); +// for 1.25 <= x < 1.5 we use 1/x ~= 95/64 - 35/64 x +wire [`FRAC_WIDTH:0] minv_b = {7'd95, {(`FRAC_WIDTH-6){1'b0}}} - (({{(`FRAC_WIDTH){1'b0}}, 7'd35} * {7'd1, m}) >> 6); +// for 1.5 <= x < 1.75 we use 1/x ~= 157/128 - 3/8 x +wire [`FRAC_WIDTH:0] minv_c = {8'd157, {(`FRAC_WIDTH-7){1'b0}}} - (({{(`FRAC_WIDTH){1'b0}}, 3'd3} * {3'd1, m}) >> 3); +// for 1.75 <= x < 2 we use 1/x ~= 17/16 - 9/32 x +wire [`FRAC_WIDTH:0] minv_d = {5'd17, {(`FRAC_WIDTH-4){1'b0}}} - (({{(`FRAC_WIDTH){1'b0}}, 5'd9} * {5'd1, m}) >> 5); +wire [`FRAC_WIDTH:0] minv = m[`FRAC_WIDTH-1] ? (m[`FRAC_WIDTH-2] ? minv_d : minv_c) : (m[`FRAC_WIDTH-2] ? minv_b : minv_a); + +for (g=0; g<`NUM_WIDTH; g=g+1) begin:g_mrec + wire [`NUM_WIDTH-1:0] mrc = msb[g] ? ({{(`INT_WIDTH){1'b0}}, minv, {(`FRAC_WIDTH){1'b0}}} >> g) : {(`NUM_WIDTH){1'b0}}; + wire [`NUM_WIDTH-1:0] mrs; + if (g==0) begin:i_mrecz + assign mrs = mrc; + end else begin:i_mrecnz + assign mrs = g_mrec[g-1].mrs | mrc; + end +end +assign res = g_mrec[`NUM_WIDTH-1].mrs; + +endgenerate + +endmodule + + +// softmax using approximated 2^x and 1/x + +module approx_softmax_comb ( + input [`OUTPUT_SIZE*`NUM_WIDTH-1:0] x_pk, + output [`OUTPUT_SIZE*`NUM_WIDTH-1:0] res_pk +); + +wire [`NUM_WIDTH-1:0] x[`OUTPUT_SIZE-1:0]; +wire [`NUM_WIDTH-1:0] res[`OUTPUT_SIZE-1:0]; + +`UNPACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, x, x_pk) +`PACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, res, res_pk) + +wire [`NUM_WIDTH-1:0] xmax; +wire [`INDEX_WIDTH-1:0] _ignore; + +max_comb i_max ( + .x_pk, + .res_val(xmax), + .res_pos(_ignore) +); + +wire [`NUM_WIDTH-1:0] dexp[`OUTPUT_SIZE-1:0]; +wire [`NUM_WIDTH-1:0] esum; + +generate genvar g; + +for (g=0; g<`OUTPUT_SIZE; g=g+1) begin:g_expsum + wire [`NUM_WIDTH-1:0] diff; + sub_sat_comb i_sub ( + .a(x[g]), + .b(xmax), + .res(diff) + ); + approx_exp_comb i_exp ( + .x(diff), + .res(dexp[g]) + ); + wire [`NUM_WIDTH-1:0] psum; + if (g==0) begin + assign psum = dexp[g]; + end else begin + assign psum = g_expsum[g-1].psum + dexp[g]; + end +end +assign esum = g_expsum[`OUTPUT_SIZE-1].psum; + +endgenerate + +wire [`NUM_WIDTH-1:0] isum; +approx_inv_comb i_inv ( + .x(esum), + .res(isum) +); + +generate + +for (g=0; g<`OUTPUT_SIZE; g=g+1) begin:g_div + mul_sat_comb i_mul ( + .a(dexp[g]), + .b(isum), + .res(res[g]) + ); +end + +endgenerate + +endmodule + + +// derivative of softmax + +module approx_softmax_diff_comb ( + input [`OUTPUT_SIZE*`NUM_WIDTH-1:0] x_pk, + output [`OUTPUT_SIZE*`NUM_WIDTH-1:0] res_pk +); + +wire [`OUTPUT_SIZE*`NUM_WIDTH-1:0] sm_pk; + +approx_softmax_comb i_sm ( + .x_pk, + .res_pk(sm_pk) +); + +wire [`NUM_WIDTH-1:0] sm[`OUTPUT_SIZE-1:0]; +wire [`NUM_WIDTH-1:0] res[`OUTPUT_SIZE-1:0]; + +`UNPACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, sm, sm_pk) +`PACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, res, res_pk) + +generate genvar g; + +for (g=0; g<`OUTPUT_SIZE; g=g+1) begin + wire [`NUM_WIDTH-1:0] sqr; + mul_sat_comb i_mul ( + .a(sm[g]), + .b(sm[g]), + .res(sqr) + ); + sub_sat_comb i_sub ( + .a(sm[g]), + .b(sqr), + .res(res[g]) + ); +end + +endgenerate + +endmodule +
diff --git a/verilog/rtl/actfn_tb.v b/verilog/rtl/actfn_tb.v new file mode 100644 index 0000000..ba2c805 --- /dev/null +++ b/verilog/rtl/actfn_tb.v
@@ -0,0 +1,253 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2022 Tamas Hubai + +`default_nettype none + +// testbenches for actfn.v + + +// leaky ReLU + +module leaky_relu_comb_tb (); + +reg [`NUM_WIDTH-1:0] x; +wire [`NUM_WIDTH-1:0] res; + +leaky_relu_comb dut ( + .x, + .res +); + +initial begin + $monitor("time %4t x %64b res %64b", $time, x, res); + x <= 3; + #10 + x <= -3; + #10 + x <= -3 << `LEAK_SHIFT; + $finish; +end + +endmodule + + +// derivative of leaky ReLU + +module leaky_relu_diff_comb_tb (); + +reg [`NUM_WIDTH-1:0] x; +wire [`NUM_WIDTH-1:0] res; + +leaky_relu_diff_comb dut ( + .x, + .res +); + +initial begin + $monitor("time %4t x %64b res %64b", $time, x, res); + x <= 3; + #10 + x <= -3; + #10 + x <= -3 << `LEAK_SHIFT; + $finish; +end + +endmodule + + +// very rough approximation of 2^x, used in softmax + +module approx_exp_comb_tb (); + +reg [`NUM_WIDTH-1:0] x; +wire [`NUM_WIDTH-1:0] res; + +approx_exp_comb dut ( + .x, + .res +); + +initial begin + $monitor("time %4t x %64b res %64b", $time, x, res); + x <= 3; + #10 + x <= 3 << `FRAC_WIDTH; + #10 + x <= -3 << `FRAC_WIDTH; + #10 + x <= (`INT_WIDTH-2) << `FRAC_WIDTH; + #10 + x <= (`INT_WIDTH-1) << `FRAC_WIDTH; + #10 + x <= (-`FRAC_WIDTH) << `FRAC_WIDTH; + #10 + x <= (-`FRAC_WIDTH-1) << `FRAC_WIDTH; + $finish; +end + +endmodule + + +// piecewise linear approximation of 1/x, used in softmax + +module approx_inv_comb_tb (); + +reg [`NUM_WIDTH-1:0] x; +wire [`NUM_WIDTH-1:0] res; + +approx_inv_comb dut ( + .x, + .res +); + +initial begin + $monitor("time %4t x %64b res %64b m 1%24b minv %25b", $time, x, res, dut.m, dut.minv); + x <= 1; + #10 + x <= 1 << `FRAC_WIDTH; + #10 + x <= 1 << (`FRAC_WIDTH + 2); + #10 + x <= 1 << (`FRAC_WIDTH - 3); + #10 + x <= 4'b1000 << `FRAC_WIDTH; + #10 + x <= 4'b1001 << `FRAC_WIDTH; + #10 + x <= 4'b1010 << `FRAC_WIDTH; + #10 + x <= 4'b1011 << `FRAC_WIDTH; + #10 + x <= 4'b1100 << `FRAC_WIDTH; + #10 + x <= 4'b1101 << `FRAC_WIDTH; + #10 + x <= 4'b1110 << `FRAC_WIDTH; + #10 + x <= 4'b1111 << `FRAC_WIDTH; + #10 + $finish; +end + +endmodule + + +// softmax using approximated 2^x and 1/x + +module approx_softmax_comb_tb (); + +reg [`NUM_WIDTH-1:0] x[`OUTPUT_SIZE-1:0]; +wire [`NUM_WIDTH-1:0] res[`OUTPUT_SIZE-1:0]; + +wire [`OUTPUT_SIZE*`NUM_WIDTH-1:0] x_pk; +`PACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, x, x_pk) + +wire [`OUTPUT_SIZE*`NUM_WIDTH-1:0] res_pk; +`UNPACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, res, res_pk) + +approx_softmax_comb dut ( + .x_pk, + .res_pk +); + +wire [`NUM_WIDTH-1:0] res0 = res[0]; // workaround for segfault in vvp +wire [`NUM_WIDTH-1:0] res1 = res[1]; +wire [`NUM_WIDTH-1:0] res2 = res[2]; + +reg [`INDEX_WIDTH-1:0] i; +wire [31:0] hint = 32'b1 << `FRAC_WIDTH; + +initial begin + $display("TIME vvvv X[0] %32b X[1] %32b X[2] %32b RES[0] %32b RES[1] %32b RES[2] %32b", hint, hint, hint, hint, hint, hint); + $monitor("time %4t x[0] %32b x[1] %32b x[2] %32b res[0] %32b res[1] %32b res[2] %32b", $time, x[0][31:0], x[1][31:0], x[2][31:0], res0[31:0], res1[31:0], res2[31:0]); + for(i=0; i<`OUTPUT_SIZE; i=i+1) begin + x[i] <= 0; + end + x[0] <= 0; + x[1] <= 0; + x[2] <= 0; + #10 + x[0] <= 1 << (`FRAC_WIDTH-4); + x[1] <= 2 << (`FRAC_WIDTH-4); + x[2] <= 3 << (`FRAC_WIDTH-4); + #10 + x[0] <= 1 << `FRAC_WIDTH; + #10 + x[1] <= 1 << `FRAC_WIDTH; + #10 + x[2] <= 2 << `FRAC_WIDTH; + #10 + x[1] <= 2 << `FRAC_WIDTH; + #10 + x[0] <= 2 << `FRAC_WIDTH; + #10 + x[0] <= 1 << (`FRAC_WIDTH+4); + #10 + x[1] <= 2 << (`FRAC_WIDTH+4); + #10 + x[2] <= 3 << (`FRAC_WIDTH+4); + $finish; +end + +endmodule + + +// derivative of softmax + +module approx_softmax_diff_comb_tb (); + +reg [`NUM_WIDTH-1:0] x[`OUTPUT_SIZE-1:0]; +wire [`NUM_WIDTH-1:0] res[`OUTPUT_SIZE-1:0]; + +wire [`OUTPUT_SIZE*`NUM_WIDTH-1:0] x_pk; +`PACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, x, x_pk) +wire [`OUTPUT_SIZE*`NUM_WIDTH-1:0] res_pk; +`UNPACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, res, res_pk) + +approx_softmax_diff_comb dut ( + .x_pk, + .res_pk +); + +wire [`NUM_WIDTH-1:0] res0 = res[0]; // workaround for segfault in vvp +wire [`NUM_WIDTH-1:0] res1 = res[1]; +wire [`NUM_WIDTH-1:0] res2 = res[2]; + +reg [`INDEX_WIDTH-1:0] i; +wire [31:0] hint = 32'b1 << `FRAC_WIDTH; + +initial begin + $display("TIME vvvv X[0] %32b X[1] %32b X[2] %32b RES[0] %32b RES[1] %32b RES[2] %32b", hint, hint, hint, hint, hint, hint); + $monitor("time %4t x[0] %32b x[1] %32b x[2] %32b res[0] %32b res[1] %32b res[2] %32b", $time, x[0][31:0], x[1][31:0], x[2][31:0], res0[31:0], res1[31:0], res2[31:0]); + for(i=0; i<`OUTPUT_SIZE; i=i+1) begin + x[i] <= 0; + end + x[0] <= 0; + x[1] <= 0; + x[2] <= 0; + #10 + x[0] <= 1 << (`FRAC_WIDTH-4); + x[1] <= 2 << (`FRAC_WIDTH-4); + x[2] <= 3 << (`FRAC_WIDTH-4); + #10 + x[0] <= 1 << `FRAC_WIDTH; + #10 + x[1] <= 1 << `FRAC_WIDTH; + #10 + x[2] <= 2 << `FRAC_WIDTH; + #10 + x[1] <= 2 << `FRAC_WIDTH; + #10 + x[0] <= 2 << `FRAC_WIDTH; + #10 + x[0] <= 1 << (`FRAC_WIDTH+4); + #10 + x[1] <= 2 << (`FRAC_WIDTH+4); + #10 + x[2] <= 3 << (`FRAC_WIDTH+4); + $finish; +end + +endmodule +
diff --git a/verilog/rtl/config.v b/verilog/rtl/config.v new file mode 100644 index 0000000..e2c80d1 --- /dev/null +++ b/verilog/rtl/config.v
@@ -0,0 +1,36 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2022 Tamas Hubai + +`default_nettype none + +// numbers are represented as fixed-point fractions +// with an integral part of INT_WIDTH bits +// and a fractional part of FRAC_WIDTH bits +// (64 bits are an overkill here, but simulations were run on a 64-bit platform) +`define INT_WIDTH 40 +`define FRAC_WIDTH 24 +`define NUM_WIDTH (`INT_WIDTH + `FRAC_WIDTH) + +// multiplication is the main bottleneck in the circuit complexity +// so we recude integer & fractional widths for multiplications +// (this didn't significantly affect learning speed in our tests) + +`define MUL_INT_WIDTH 6 +`define PRE_MUL_FRAC_WIDTH 12 +`define POST_MUL_FRAC_WIDTH 16 + +// number of neurons in input, hidden 1, hidden 2 & output layers +`define INPUT_SIZE 1 +`define HIDDEN1_SIZE 1 +`define HIDDEN2_SIZE 1 +`define OUTPUT_SIZE 1 + +// bits required to describe the sizes above +`define INDEX_WIDTH 10 + +// power of 1/2 used an the slope of leaky ReLU's negative part +`define LEAK_SHIFT 7 + +// power of 1/2 used as the learning rate +`define LEARN_SHIFT 7 +
diff --git a/verilog/rtl/interface.v b/verilog/rtl/interface.v new file mode 100644 index 0000000..4b4abab --- /dev/null +++ b/verilog/rtl/interface.v
@@ -0,0 +1,140 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2022 Tamas Hubai + +`default_nettype none + +// wrapper for neural_network that allows +// - setting initial weights, inputs & ground truth +// - retrieving outputs +// - starting forward or backward propagation +// - detecting when forward or backward propagation finishes +// by using memory i/o within a single virtual address space + +module neural_interface ( + input clk, + input [23:0] addr, + input [`NUM_WIDTH-1:0] data_in, + input we, + output reg [`NUM_WIDTH-1:0] data_out +); + +reg fp; +wire fp_out; +reg [`NUM_WIDTH-1:0] a0[`INPUT_SIZE-1:0]; +wire [`NUM_WIDTH-1:0] a3[`OUTPUT_SIZE-1:0]; +reg bp; +wire bp_out; +reg [`NUM_WIDTH-1:0] g3[`OUTPUT_SIZE-1:0]; +reg wu; +reg [1:0] w_layer; +reg [`INDEX_WIDTH-1:0] w_i; +reg [`INDEX_WIDTH-1:0] w_j; +reg [`NUM_WIDTH-1:0] w_in; +wire [`NUM_WIDTH-1:0] w_out; + +wire [`INPUT_SIZE*`NUM_WIDTH-1:0] a0_pk; +`PACK_ARRAY(`NUM_WIDTH, `INPUT_SIZE, a0, a0_pk) +wire [`OUTPUT_SIZE*`NUM_WIDTH-1:0] a3_pk; +`UNPACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, a3, a3_pk) +wire [`OUTPUT_SIZE*`NUM_WIDTH-1:0] g3_pk; +`PACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, g3, g3_pk) + +neural_network i_nn ( + .clk, + .fp, + .fp_out, + .a0_pk, + .a3_pk, + .bp, + .bp_out, + .g3_pk, + .wu, + .w_layer, + .w_i, + .w_j, + .w_in, + .w_out +); + +wire [`NUM_WIDTH-1:0] _ignore_a3; +wire [`INDEX_WIDTH-1:0] argmax_a3; +max_comb i_max_a3 ( + .x_pk(a3_pk), + .res_val(_ignore_a3), + .res_pos(argmax_a3) +); + +wire [`NUM_WIDTH-1:0] _ignore_g3; +wire [`INDEX_WIDTH-1:0] argmax_g3; +max_comb i_max_g3 ( + .x_pk(g3_pk), + .res_val(_ignore_g3), + .res_pos(argmax_g3) +); + +reg fp_out_hold; +reg bp_out_hold; +wire fp_out_hn = fp_out_hold | fp_out; +wire bp_out_hn = bp_out_hold | bp_out; + +wire [3:0] sel = addr[23:20]; +wire [9:0] index_h = addr[19:10]; +wire [9:0] index_l = addr[9:0]; + +always @(posedge clk) begin + fp <= 0; + bp <= 0; + wu <= 0; + fp_out_hold <= fp_out_hn; + bp_out_hold <= bp_out_hn; + data_out <= {(`NUM_WIDTH){1'b0}}; + if (sel[3:2]==0) begin + w_layer <= sel[1:0]; + w_i <= index_h; + w_j <= index_l; + data_out <= w_out; + if (we) begin + w_in <= data_in; + wu <= 1; + end + end else if (sel==4) begin + data_out <= a0[index_l]; + if (we) begin + a0[index_l] <= data_in; + end + end else if (sel==5) begin + data_out <= a3[index_l]; + end else if (sel==6) begin + data_out <= g3[index_l]; + if (we) begin + g3[index_l] <= data_in; + end + end else if (sel==7) begin + if (index_l == 0) begin + data_out <= {(`NUM_WIDTH){fp_out_hn}}; + if (we) begin + if (|data_in) begin + fp <= 1; + end else begin + fp_out_hold <= 0; + end + end + end else if (index_l == 1) begin + data_out <= {(`NUM_WIDTH){bp_out_hn}}; + if (we) begin + if (|data_in) begin + bp <= 1; + end else begin + bp_out_hold <= 0; + end + end + end else if (index_l == 2) begin + data_out <= argmax_a3 << `FRAC_WIDTH; + end else if (index_l == 3) begin + data_out <= argmax_g3 << `FRAC_WIDTH; + end + end +end + +endmodule +
diff --git a/verilog/rtl/interface_tb.v b/verilog/rtl/interface_tb.v new file mode 100644 index 0000000..db60498 --- /dev/null +++ b/verilog/rtl/interface_tb.v
@@ -0,0 +1,135 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2022 Tamas Hubai + +`default_nettype none + +// testbench for the single neural_interface module in interface.v + +module neural_interface_tb (); + +reg clk; +reg [23:0] addr; +reg [`NUM_WIDTH-1:0] data_in; +reg we; +wire [`NUM_WIDTH-1:0] data_out; + +neural_interface dut ( + .clk, + .addr, + .data_in, + .we, + .data_out +); + +generate genvar i; genvar j; + +for (i=0; i<`INPUT_SIZE; i=i+1) begin + for (j=0; j<`HIDDEN1_SIZE; j=j+1) begin + initial begin + dut.i_nn.g_syn01_o[i].g_syn01_i[j].i_syn01.w <= 0; + end + end +end +for (i=0; i<`HIDDEN1_SIZE; i=i+1) begin + for (j=0; j<`HIDDEN2_SIZE; j=j+1) begin + initial begin + dut.i_nn.g_syn12_o[i].g_syn12_i[j].i_syn12.w <= 0; + end + end +end +for (i=0; i<`HIDDEN2_SIZE; i=i+1) begin + for (j=0; j<`OUTPUT_SIZE; j=j+1) begin + initial begin + dut.i_nn.g_syn23_o[i].g_syn23_i[j].i_syn23.w <= 0; + end + end +end +for (i=0; i<`INPUT_SIZE; i=i+1) begin + initial begin + dut.a0[i] <= 1; + end +end +for (i=0; i<`OUTPUT_SIZE; i=i+1) begin + initial begin + dut.i_nn.g_layer3[i].i_neu_3.a <= 2; + dut.g3[i] <= 3; + end +end + +endgenerate + +initial begin + clk <= 0; + we <= 0; + $monitor("time %4t addr %24b data_in %64b we %1b data_out %64b", $time, addr, data_in, we, data_out); + #5 clk<=1; #5 clk<=0; + addr <= 24'b0010_0000000111_0000000011; + #5 clk<=1; #5 clk<=0; + data_in <= 15; + we <= 1; + #5 clk<=1; #5 clk<=0; + we <= 0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + addr <= 24'b0100_0000000000_0000000101; + data_in <= 33; + we <= 1; + #5 clk<=1; #5 clk<=0; + we <= 0; + #5 clk<=1; #5 clk<=0; + addr <= 24'b0101_0000000000_0000000101; + data_in <= 17; + we <= 1; + #5 clk<=1; #5 clk<=0; + we <= 0; + #5 clk<=1; #5 clk<=0; + addr <= 24'b0110_0000000000_0000000101; + data_in <= 9; + we <= 1; + #5 clk<=1; #5 clk<=0; + we <= 0; + #5 clk<=1; #5 clk<=0; + dut.fp_out_hold <= 0; + addr <= 24'b0111_0000000000_0000000000; + data_in <= 1; + we <= 1; + #5 clk<=1; #5 clk<=0; + we <= 0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + data_in <= 0; + we <= 1; + #5 clk<=1; #5 clk<=0; + we <= 0; + #5 clk<=1; #5 clk<=0; + dut.bp_out_hold <= 0; + addr <= 24'b0111_0000000000_0000000001; + data_in <= 1; + we <= 1; + #5 clk<=1; #5 clk<=0; + we <= 0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + data_in <= 0; + we <= 1; + #5 clk<=1; #5 clk<=0; + we <= 0; + #5 clk<=1; #5 clk<=0; + addr <= 24'b0111_0000000000_0000000010; + #5 clk<=1; #5 clk<=0; + addr <= 24'b0111_0000000000_0000000011; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + $finish; +end + +endmodule +
diff --git a/verilog/rtl/macros.v b/verilog/rtl/macros.v new file mode 100644 index 0000000..26d3132 --- /dev/null +++ b/verilog/rtl/macros.v
@@ -0,0 +1,21 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2022 Tamas Hubai + +`default_nettype none + +// macros for passing bus arrays to modules +`define PACK_ARRAY_INTERNAL(WIDTH,LEN,SRC,DEST,VAR) \ + generate genvar VAR; \ + for (VAR=0; VAR<(LEN); VAR=VAR+1) begin \ + assign DEST[((WIDTH)*VAR+((WIDTH)-1)):((WIDTH)*VAR)] = SRC[VAR][((WIDTH)-1):0]; \ + end \ + endgenerate +`define PACK_ARRAY(WIDTH,LEN,SRC,DEST) `PACK_ARRAY_INTERNAL(WIDTH,LEN,SRC,DEST,pa_``SRC) +`define UNPACK_ARRAY_INTERNAL(WIDTH,LEN,DEST,SRC,VAR) \ + generate genvar VAR; \ + for (VAR=0; VAR<(LEN); VAR=VAR+1) begin \ + assign DEST[VAR][((WIDTH)-1):0] = SRC[((WIDTH)*VAR+(WIDTH-1)):((WIDTH)*VAR)]; \ + end \ + endgenerate +`define UNPACK_ARRAY(WIDTH,LEN,SRC,DEST) `UNPACK_ARRAY_INTERNAL(WIDTH,LEN,SRC,DEST,ua_``SRC) +
diff --git a/verilog/rtl/math.v b/verilog/rtl/math.v new file mode 100644 index 0000000..a34db07 --- /dev/null +++ b/verilog/rtl/math.v
@@ -0,0 +1,113 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2022 Tamas Hubai + +`default_nettype none + +// basic arithmetics with saturation + + +// addition +// res = a + b + +module add_sat_comb ( + input [`NUM_WIDTH-1:0] a, + input [`NUM_WIDTH-1:0] b, + output [`NUM_WIDTH-1:0] res +); + +wire sig_a = a[`NUM_WIDTH-1]; +wire sig_b = b[`NUM_WIDTH-1]; +wire sig_sum; +wire [`NUM_WIDTH-1:0] sum; +assign {sig_sum, sum} = {sig_a, a} + {sig_b, b}; +wire saturated = sig_sum != sum[`NUM_WIDTH-1]; +assign res = saturated ? {sig_sum, {(`NUM_WIDTH-1){~sig_sum}}} : sum; + +endmodule + + +// subtraction +// res = a - b + +module sub_sat_comb ( + input [`NUM_WIDTH-1:0] a, + input [`NUM_WIDTH-1:0] b, + output [`NUM_WIDTH-1:0] res +); + +wire sig_a = a[`NUM_WIDTH-1]; +wire sig_b = b[`NUM_WIDTH-1]; +wire sig_sum; +wire [`NUM_WIDTH-1:0] sum; +assign {sig_sum, sum} = {sig_a, a} - {sig_b, b}; +wire saturated = sig_sum != sum[`NUM_WIDTH-1]; +assign res = saturated ? {sig_sum, {(`NUM_WIDTH-1){~sig_sum}}} : sum; + +endmodule + + +// multiplication +// res = a * b + +module mul_sat_comb ( + input [`NUM_WIDTH-1:0] a, + input [`NUM_WIDTH-1:0] b, + output [`NUM_WIDTH-1:0] res +); + +wire sig_a = a[`NUM_WIDTH-1]; +wire sig_b = b[`NUM_WIDTH-1]; +wire sat_a = |a[`NUM_WIDTH-1:`MUL_INT_WIDTH+`FRAC_WIDTH-1] & ~&a[`NUM_WIDTH-1:`MUL_INT_WIDTH+`FRAC_WIDTH-1]; +wire sat_b = |b[`NUM_WIDTH-1:`MUL_INT_WIDTH+`FRAC_WIDTH-1] & ~&b[`NUM_WIDTH-1:`MUL_INT_WIDTH+`FRAC_WIDTH-1]; +wire [`MUL_INT_WIDTH+`PRE_MUL_FRAC_WIDTH-1:0] short_a = sat_a ? {sig_a, {(`MUL_INT_WIDTH+`PRE_MUL_FRAC_WIDTH-1){~sig_a}}} : a[`MUL_INT_WIDTH+`FRAC_WIDTH-1:`FRAC_WIDTH-`PRE_MUL_FRAC_WIDTH]; +wire [`MUL_INT_WIDTH+`PRE_MUL_FRAC_WIDTH-1:0] short_b = sat_b ? {sig_b, {(`MUL_INT_WIDTH+`PRE_MUL_FRAC_WIDTH-1){~sig_b}}} : b[`MUL_INT_WIDTH+`FRAC_WIDTH-1:`FRAC_WIDTH-`PRE_MUL_FRAC_WIDTH]; +wire sig_mul; +wire [`MUL_INT_WIDTH-1:0] mul_hi; +wire [`MUL_INT_WIDTH+`POST_MUL_FRAC_WIDTH-2:0] mul_md; +wire [2*`PRE_MUL_FRAC_WIDTH-`POST_MUL_FRAC_WIDTH-1:0] mul_lo; +assign {sig_mul, mul_hi, mul_md, mul_lo} = {{(`MUL_INT_WIDTH+`PRE_MUL_FRAC_WIDTH){sig_a}}, short_a} * {{(`MUL_INT_WIDTH+`PRE_MUL_FRAC_WIDTH){sig_b}}, short_b}; +wire saturated = |{sig_mul, mul_hi} & ~&{sig_mul, mul_hi}; +assign res = saturated ? {sig_mul, {(`NUM_WIDTH-1){~sig_mul}}} : {{(`INT_WIDTH-`MUL_INT_WIDTH+1){sig_mul}}, mul_md, {(`FRAC_WIDTH-`POST_MUL_FRAC_WIDTH){1'b0}}}; + +endmodule + + +// maximum & argmax +// res_val = max(x) +// res_pos = argmax(x) + +module max_comb ( + input [`OUTPUT_SIZE*`NUM_WIDTH-1:0] x_pk, + output [`NUM_WIDTH-1:0] res_val, + output [`INDEX_WIDTH-1:0] res_pos +); + +wire [`NUM_WIDTH-1:0] x[`OUTPUT_SIZE-1:0]; +`UNPACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, x, x_pk) + +generate genvar g; genvar h; + +for (g=0; g<`OUTPUT_SIZE; g=g+1) begin:g_max_o + wire [`OUTPUT_SIZE-1:0] is_greater; + for (h=0; h<`OUTPUT_SIZE; h=h+1) begin:g_max_i + assign is_greater[h] = $signed(x[g]) >= $signed(x[h]); + end + wire is_max = &is_greater; + wire [`NUM_WIDTH-1:0] cur_val = is_max ? x[g] : {(`NUM_WIDTH){1'b0}}; + wire [`NUM_WIDTH-1:0] max_val; + wire [`INDEX_WIDTH-1:0] pos; + if (g==0) begin + assign max_val = cur_val; + assign pos = 0; + end else begin + assign max_val = g_max_o[g-1].max_val | cur_val; + assign pos = is_max ? g : g_max_o[g-1].pos; + end +end +assign res_val = g_max_o[`OUTPUT_SIZE-1].max_val; +assign res_pos = g_max_o[`OUTPUT_SIZE-1].pos; + +endgenerate + +endmodule +
diff --git a/verilog/rtl/math_tb.v b/verilog/rtl/math_tb.v new file mode 100644 index 0000000..242274a --- /dev/null +++ b/verilog/rtl/math_tb.v
@@ -0,0 +1,168 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2022 Tamas Hubai + +`default_nettype none + +// testbenches for math.v + + +// addition + +module add_sat_comb_tb (); + +reg [`NUM_WIDTH-1:0] a; +reg [`NUM_WIDTH-1:0] b; +wire [`NUM_WIDTH-1:0] res; + +add_sat_comb dut ( + .a, + .b, + .res +); + +initial begin + $monitor("time %4t a %64b b %64b res %64b", $time, a, b, res); + a <= 1; + b <= 2; + #10 + a <= -1; + b <= -2; + #10 + a[`NUM_WIDTH-1] <= 0; + b[`NUM_WIDTH-1] <= 0; + #10 + b = 2; + b[`NUM_WIDTH-1] <= 1; + #10 + a = 1; + a[`NUM_WIDTH-1] <= 1; + $finish; +end + +endmodule + + +// subtraction + +module sub_sat_comb_tb (); + +reg [`NUM_WIDTH-1:0] a; +reg [`NUM_WIDTH-1:0] b; +wire [`NUM_WIDTH-1:0] res; + +sub_sat_comb dut ( + .a, + .b, + .res +); + +initial begin + $monitor("time %4t a %64b b %64b res %64b", $time, a, b, res); + a <= 1; + b <= 2; + #10 + a <= -1; + b <= -2; + #10 + a[`NUM_WIDTH-1] <= 0; + b[`NUM_WIDTH-1] <= 0; + #10 + b = 2; + b[`NUM_WIDTH-1] <= 1; + #10 + a = 1; + a[`NUM_WIDTH-1] <= 1; + $finish; +end + +endmodule + + +// multiplication + +module mul_sat_comb_tb (); + +reg [`NUM_WIDTH-1:0] a; +reg [`NUM_WIDTH-1:0] b; +wire [`NUM_WIDTH-1:0] res; + +mul_sat_comb dut ( + .a, + .b, + .res +); + +initial begin + $monitor("time %4t a %64b b %64b res %64b", $time, a, b, res); + a <= 1 << `FRAC_WIDTH; + b <= 2 << `FRAC_WIDTH; + #10 + a <= -1 << `FRAC_WIDTH; + b <= -2 << `FRAC_WIDTH; + #10 + a[`NUM_WIDTH-1] <= 0; + b[`NUM_WIDTH-1] <= 0; + #10 + b <= 2 << `FRAC_WIDTH; + b[`NUM_WIDTH-1] <= 1; + #10 + a <= 1 << `FRAC_WIDTH; + a[`NUM_WIDTH-1] <= 1; + $finish; +end + +endmodule + + +// maximum & argmax + +module max_comb_tb (); + +reg [`NUM_WIDTH-1:0] x[`OUTPUT_SIZE-1:0]; +wire [`NUM_WIDTH-1:0] res_val; +wire [`INDEX_WIDTH-1:0] res_pos; + +wire [`OUTPUT_SIZE*`NUM_WIDTH-1:0] x_pk; +`PACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, x, x_pk) + +max_comb dut ( + .x_pk, + .res_val, + .res_pos +); + +reg [`INDEX_WIDTH-1:0] i; + +initial begin + $monitor("time %4t x[0] %32b x[1] %32b x[2] %32b res_val %32b res_pos %10b", $time, x[0][31:0], x[1][31:0], x[2][31:0], res_val[31:0], res_pos); + for(i=0; i<`OUTPUT_SIZE; i=i+1) begin + x[i] <= -20; + end + x[0] <= 0; + x[1] <= 0; + x[2] <= 0; + #10 + x[0] <= 1; + x[1] <= 2; + x[2] <= 3; + #10 + x[0] <= 10; + #10 + x[1] <= 10; + #10 + x[2] <= 20; + #10 + x[1] <= 20; + #10 + x[0] <= 20; + #10 + x[0] <= -1; + x[1] <= -2; + x[2] <= -3; + #10 + x[0] <= 1; + $finish; +end + +endmodule +
diff --git a/verilog/rtl/network.v b/verilog/rtl/network.v new file mode 100644 index 0000000..6fec60c --- /dev/null +++ b/verilog/rtl/network.v
@@ -0,0 +1,389 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2022 Tamas Hubai + +`default_nettype none + +// neural network composed of an input layer, +// two fully connected hidden layers with a ReLU activation function +// and a fully connected output layer with a softmax activation function + +module neural_network ( + input clk, + input fp, + output fp_out, + input [`INPUT_SIZE*`NUM_WIDTH-1:0] a0_pk, + output [`OUTPUT_SIZE*`NUM_WIDTH-1:0] a3_pk, + input bp, + output bp_out, + input [`OUTPUT_SIZE*`NUM_WIDTH-1:0] g3_pk, // ground truth + input wu, + input [1:0] w_layer, + input [`INDEX_WIDTH-1:0] w_i, + input [`INDEX_WIDTH-1:0] w_j, + input [`NUM_WIDTH-1:0] w_in, + output [`NUM_WIDTH-1:0] w_out +); + +wire [`NUM_WIDTH-1:0] a0[`INPUT_SIZE-1:0]; +`UNPACK_ARRAY(`NUM_WIDTH, `INPUT_SIZE, a0, a0_pk) +wire [`NUM_WIDTH-1:0] a3[`OUTPUT_SIZE-1:0]; +`PACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, a3, a3_pk) +wire [`NUM_WIDTH-1:0] g3[`OUTPUT_SIZE-1:0]; +`UNPACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, g3, g3_pk) + +generate genvar g; genvar h; + +// synapses between input layer & hidden layer 1 +wire bp_s01; +wire [`NUM_WIDTH-1:0] e1[`HIDDEN1_SIZE-1:0]; +for (g=0; g<`INPUT_SIZE; g=g+1) begin:g_syn01_o + for (h=0; h<`HIDDEN1_SIZE; h=h+1) begin:g_syn01_i + wire fp_out; + wire bp_out; + wire [`NUM_WIDTH-1:0] zc; + wire [`NUM_WIDTH-1:0] tc; // ignored for input layer + wire wu_sel = (w_layer == 0) && (w_i == g) && (w_j == h); + wire [`NUM_WIDTH-1:0] w_out_r; + synapse i_syn01 ( + .clk, + .fp, + .fp_out, + .a(a0[g]), + .zc, + .bp(bp_s01), + .bp_out, + .e(e1[h]), + .tc, + .wu(wu && wu_sel), + .w_in(w_in), + .w_out(w_out_r) + ); + wire [`NUM_WIDTH-1:0] w_out_s = wu_sel ? w_out_r : {(`NUM_WIDTH){1'b0}}; + wire [`NUM_WIDTH-1:0] w_out_a; + if (h==0) begin + assign w_out_a = w_out_s; + end else begin + assign w_out_a = g_syn01_i[h-1].w_out_a | w_out_s; + end + end + wire [`NUM_WIDTH-1:0] w_out_b; + if (g==0) begin + assign w_out_b = g_syn01_i[`HIDDEN1_SIZE-1].w_out_a; + end else begin + assign w_out_b = g_syn01_o[g-1].w_out_b | g_syn01_i[`HIDDEN1_SIZE-1].w_out_a; + end +end +wire [`NUM_WIDTH-1:0] w_out_c01 = g_syn01_o[`INPUT_SIZE-1].w_out_b; + +wire [`NUM_WIDTH-1:0] z1[`HIDDEN1_SIZE-1:0]; +for (g=0; g<`HIDDEN1_SIZE; g=g+1) begin:g_z1_o + for (h=0; h<`INPUT_SIZE; h=h+1) begin:g_z1_i + wire [`NUM_WIDTH-1:0] zc = g_syn01_o[h].g_syn01_i[g].zc; + wire [`NUM_WIDTH-1:0] z1s; + if (h==0) begin + assign z1s = zc; + end else begin + add_sat_comb i_add_z1 ( + .a(g_z1_i[h-1].z1s), + .b(zc), + .res(z1s) + ); + end + end + assign z1[g] = g_z1_i[`INPUT_SIZE-1].z1s; +end + +wire fp_h1 = g_syn01_o[0].g_syn01_i[0].fp_out; +assign bp_out = g_syn01_o[0].g_syn01_i[0].bp_out; + +// hidden layer 1 +wire bp_h1; +wire [`NUM_WIDTH-1:0] a1[`HIDDEN1_SIZE-1:0]; +wire [`NUM_WIDTH-1:0] t1[`HIDDEN1_SIZE-1:0]; +for (g=0; g<`HIDDEN1_SIZE; g=g+1) begin:g_layer1 + wire fp_out; + wire bp_out; + wire [`NUM_WIDTH-1:0] to_act; + wire [`NUM_WIDTH-1:0] from_act; + wire [`NUM_WIDTH-1:0] from_act_diff; + neuron i_neu_1 ( + .clk, + .fp(fp_h1), + .fp_out, + .z(z1[g]), + .a(a1[g]), + .bp(bp_h1), + .bp_out, + .t(t1[g]), + .e(e1[g]), + .to_act, + .from_act, + .from_act_diff + ); + leaky_relu_comb i_act_1 ( + .x(to_act), + .res(from_act) + ); + leaky_relu_diff_comb i_act_diff_1 ( + .x(to_act), + .res(from_act_diff) + ); +end + +wire fp_s12 = g_layer1[0].fp_out; +assign bp_s01 = g_layer1[0].bp_out; + +// synapses between hidden layers 1 & 2 +wire bp_s12; +wire [`NUM_WIDTH-1:0] e2[`HIDDEN2_SIZE-1:0]; +for (g=0; g<`HIDDEN1_SIZE; g=g+1) begin:g_syn12_o + for (h=0; h<`HIDDEN2_SIZE; h=h+1) begin:g_syn12_i + wire fp_out; + wire bp_out; + wire [`NUM_WIDTH-1:0] zc; + wire [`NUM_WIDTH-1:0] tc; + wire wu_sel = (w_layer == 1) && (w_i == g) && (w_j == h); + wire [`NUM_WIDTH-1:0] w_out_r; + synapse i_syn12 ( + .clk, + .fp(fp_s12), + .fp_out, + .a(a1[g]), + .zc, + .bp(bp_s12), + .bp_out, + .e(e2[h]), + .tc, + .wu(wu && wu_sel), + .w_in(w_in), + .w_out(w_out_r) + ); + wire [`NUM_WIDTH-1:0] w_out_s = wu_sel ? w_out_r : {(`NUM_WIDTH){1'b0}}; + wire [`NUM_WIDTH-1:0] w_out_a; + if (h==0) begin + assign w_out_a = w_out_s; + end else begin + assign w_out_a = g_syn12_i[h-1].w_out_a | w_out_s; + end + end + wire [`NUM_WIDTH-1:0] w_out_b; + if (g==0) begin + assign w_out_b = g_syn12_i[`HIDDEN2_SIZE-1].w_out_a; + end else begin + assign w_out_b = g_syn12_o[g-1].w_out_b | g_syn12_i[`HIDDEN2_SIZE-1].w_out_a; + end +end +wire [`NUM_WIDTH-1:0] w_out_c12 = g_syn12_o[`HIDDEN1_SIZE-1].w_out_b; + +wire [`NUM_WIDTH-1:0] z2[`HIDDEN2_SIZE-1:0]; +for (g=0; g<`HIDDEN2_SIZE; g=g+1) begin:g_z2_o + for (h=0; h<`HIDDEN1_SIZE; h=h+1) begin:g_z2_i + wire [`NUM_WIDTH-1:0] zc = g_syn12_o[h].g_syn12_i[g].zc; + wire [`NUM_WIDTH-1:0] z2s; + if (h==0) begin + assign z2s = zc; + end else begin + add_sat_comb i_add_z2 ( + .a(g_z2_i[h-1].z2s), + .b(zc), + .res(z2s) + ); + end + end + assign z2[g] = g_z2_i[`HIDDEN1_SIZE-1].z2s; +end + +for (g=0; g<`HIDDEN1_SIZE; g=g+1) begin:g_t1_o + for(h=0; h<`HIDDEN2_SIZE; h=h+1) begin:g_t1_i + wire [`NUM_WIDTH-1:0] tc = g_syn12_o[g].g_syn12_i[h].tc; + wire [`NUM_WIDTH-1:0] t1s; + if (h==0) begin + assign t1s = tc; + end else begin + add_sat_comb i_add_t1 ( + .a(g_t1_i[h-1].t1s), + .b(tc), + .res(t1s) + ); + end + end + assign t1[g] = g_t1_i[`HIDDEN2_SIZE-1].t1s; +end + +wire fp_h2 = g_syn12_o[0].g_syn12_i[0].fp_out; +assign bp_h1 = g_syn12_o[0].g_syn12_i[0].bp_out; + +// hidden layer 2 +wire bp_h2; +wire [`NUM_WIDTH-1:0] a2[`HIDDEN2_SIZE-1:0]; +wire [`NUM_WIDTH-1:0] t2[`HIDDEN2_SIZE-1:0]; +for (g=0; g<`HIDDEN2_SIZE; g=g+1) begin:g_layer2 + wire fp_out; + wire bp_out; + wire [`NUM_WIDTH-1:0] to_act; + wire [`NUM_WIDTH-1:0] from_act; + wire [`NUM_WIDTH-1:0] from_act_diff; + neuron i_neu_2 ( + .clk, + .fp(fp_h2), + .fp_out, + .z(z2[g]), + .a(a2[g]), + .bp(bp_h2), + .bp_out, + .t(t2[g]), + .e(e2[g]), + .to_act, + .from_act, + .from_act_diff + ); + leaky_relu_comb i_act_2 ( + .x(to_act), + .res(from_act) + ); + leaky_relu_diff_comb i_act_diff_2 ( + .x(to_act), + .res(from_act_diff) + ); +end + +wire fp_s23 = g_layer2[0].fp_out; +assign bp_s12 = g_layer2[0].bp_out; + +// synapses between hidden layer 2 & output layer +wire bp_s23; +wire [`NUM_WIDTH-1:0] e3[`OUTPUT_SIZE-1:0]; +for (g=0; g<`HIDDEN2_SIZE; g=g+1) begin:g_syn23_o + for (h=0; h<`OUTPUT_SIZE; h=h+1) begin:g_syn23_i + wire fp_out; + wire bp_out; + wire [`NUM_WIDTH-1:0] zc; + wire [`NUM_WIDTH-1:0] tc; + wire wu_sel = (w_layer == 2) && (w_i == g) && (w_j == h); + wire [`NUM_WIDTH-1:0] w_out_r; + synapse i_syn23 ( + .clk, + .fp(fp_s23), + .fp_out, + .a(a2[g]), + .zc, + .bp(bp_s23), + .bp_out, + .e(e3[h]), + .tc, + .wu(wu && wu_sel), + .w_in(w_in), + .w_out(w_out_r) + ); + wire [`NUM_WIDTH-1:0] w_out_s = wu_sel ? w_out_r : {(`NUM_WIDTH){1'b0}}; + wire [`NUM_WIDTH-1:0] w_out_a; + if (h==0) begin + assign w_out_a = w_out_s; + end else begin + assign w_out_a = g_syn23_i[h-1].w_out_a | w_out_s; + end + end + wire [`NUM_WIDTH-1:0] w_out_b; + if (g==0) begin + assign w_out_b = g_syn23_i[`OUTPUT_SIZE-1].w_out_a; + end else begin + assign w_out_b = g_syn23_o[g-1].w_out_b | g_syn23_i[`OUTPUT_SIZE-1].w_out_a; + end +end +wire [`NUM_WIDTH-1:0] w_out_c23 = g_syn23_o[`HIDDEN2_SIZE-1].w_out_b; + +wire [`NUM_WIDTH-1:0] z3[`OUTPUT_SIZE-1:0]; +for (g=0; g<`OUTPUT_SIZE; g=g+1) begin:g_z3_o + for (h=0; h<`HIDDEN2_SIZE; h=h+1) begin:g_z3_i + wire [`NUM_WIDTH-1:0] zc = g_syn23_o[h].g_syn23_i[g].zc; + wire [`NUM_WIDTH-1:0] z3s; + if (h==0) begin + assign z3s = zc; + end else begin + add_sat_comb i_add_z3 ( + .a(g_z3_i[h-1].z3s), + .b(zc), + .res(z3s) + ); + end + end + assign z3[g] = g_z3_i[`HIDDEN2_SIZE-1].z3s; +end + +for (g=0; g<`HIDDEN2_SIZE; g=g+1) begin:g_t2_o + for(h=0; h<`OUTPUT_SIZE; h=h+1) begin:g_t2_i + wire [`NUM_WIDTH-1:0] tc = g_syn23_o[g].g_syn23_i[h].tc; + wire [`NUM_WIDTH-1:0] t2s; + if (h==0) begin + assign t2s = tc; + end else begin + add_sat_comb i_add_t2 ( + .a(g_t2_i[h-1].t2s), + .b(tc), + .res(t2s) + ); + end + end + assign t2[g] = g_t2_i[`OUTPUT_SIZE-1].t2s; +end + +wire fp_h3 = g_syn23_o[0].g_syn23_i[0].fp_out; +assign bp_h2 = g_syn23_o[0].g_syn23_i[0].bp_out; + +// output layer +wire [`NUM_WIDTH-1:0] t3[`OUTPUT_SIZE-1:0]; +wire [`NUM_WIDTH-1:0] to_softmax[`OUTPUT_SIZE-1:0]; +wire [`NUM_WIDTH-1:0] from_softmax[`OUTPUT_SIZE-1:0]; +wire [`NUM_WIDTH-1:0] from_softmax_diff[`OUTPUT_SIZE-1:0]; +for (g=0; g<`OUTPUT_SIZE; g=g+1) begin:g_layer3 + wire fp_out; + wire bp_out; + wire [`NUM_WIDTH-1:0] to_act; + wire [`NUM_WIDTH-1:0] from_act; + wire [`NUM_WIDTH-1:0] from_act_diff; + neuron i_neu_3 ( + .clk, + .fp(fp_h3), + .fp_out, + .z(z3[g]), + .a(a3[g]), + .bp, + .bp_out, + .t(t3[g]), + .e(e3[g]), + .to_act(to_softmax[g]), + .from_act(from_softmax[g]), + .from_act_diff(from_softmax_diff[g]) + ); + // feedback using ground truth + sub_sat_comb i_sub_fb ( + .a(a3[g]), + .b(g3[g]), + .res(t3[g]) + ); +end + +wire [`OUTPUT_SIZE*`NUM_WIDTH-1:0] to_softmax_pk; +`PACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, to_softmax, to_softmax_pk) +wire [`OUTPUT_SIZE*`NUM_WIDTH-1:0] from_softmax_pk; +`UNPACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, from_softmax, from_softmax_pk) +wire [`OUTPUT_SIZE*`NUM_WIDTH-1:0] from_softmax_diff_pk; +`UNPACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, from_softmax_diff, from_softmax_diff_pk) + +approx_softmax_comb i_act_3 ( + .x_pk(to_softmax_pk), + .res_pk(from_softmax_pk) +); +approx_softmax_diff_comb i_act_diff_3 ( + .x_pk(to_softmax_pk), + .res_pk(from_softmax_diff_pk) +); + +assign fp_out = g_layer3[0].fp_out; +assign bp_s23 = g_layer3[0].bp_out; +assign w_out = w_out_c01 | w_out_c12 | w_out_c23; + +endgenerate + +endmodule +
diff --git a/verilog/rtl/network_tb.v b/verilog/rtl/network_tb.v new file mode 100644 index 0000000..09fc949 --- /dev/null +++ b/verilog/rtl/network_tb.v
@@ -0,0 +1,124 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2022 Tamas Hubai + +`default_nettype none + +// testbench for the single neural_network module in network.v + +module neural_network_tb (); + +reg clk; +reg fp; +wire fp_out; +reg [`NUM_WIDTH-1:0] a0[`INPUT_SIZE-1:0]; +wire [`NUM_WIDTH-1:0] a3[`OUTPUT_SIZE-1:0]; +reg bp; +wire bp_out; +reg [`NUM_WIDTH-1:0] g3[`OUTPUT_SIZE-1:0]; +reg wu; +reg [1:0] w_layer; +reg [`INDEX_WIDTH-1:0] w_i; +reg [`INDEX_WIDTH-1:0] w_j; +reg [`NUM_WIDTH-1:0] w_in; +wire [`NUM_WIDTH-1:0] w_out; + +wire [`INPUT_SIZE*`NUM_WIDTH-1:0] a0_pk; +`PACK_ARRAY(`NUM_WIDTH, `INPUT_SIZE, a0, a0_pk) +wire [`OUTPUT_SIZE*`NUM_WIDTH-1:0] a3_pk; +`UNPACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, a3, a3_pk) +wire [`OUTPUT_SIZE*`NUM_WIDTH-1:0] g3_pk; +`PACK_ARRAY(`NUM_WIDTH, `OUTPUT_SIZE, g3, g3_pk) + +neural_network dut ( + .clk, + .fp, + .fp_out, + .a0_pk, + .a3_pk, + .bp, + .bp_out, + .g3_pk, + .wu, + .w_layer, + .w_i, + .w_j, + .w_in, + .w_out +); + +wire [`NUM_WIDTH-1:0] a_test = a3[4]; + +generate genvar i; genvar j; + +for (i=0; i<`INPUT_SIZE; i=i+1) begin + for (j=0; j<`HIDDEN1_SIZE; j=j+1) begin + initial begin + dut.g_syn01_o[i].g_syn01_i[j].i_syn01.w <= (1 << 22) + (i << 10) + (j << 12); + end + end +end +for (i=0; i<`HIDDEN1_SIZE; i=i+1) begin + for (j=0; j<`HIDDEN2_SIZE; j=j+1) begin + initial begin + dut.g_syn12_o[i].g_syn12_i[j].i_syn12.w <= (1 << 22) + (i << 10) + (j << 12); + end + end +end +for (i=0; i<`HIDDEN2_SIZE; i=i+1) begin + for (j=0; j<`OUTPUT_SIZE; j=j+1) begin + initial begin + dut.g_syn23_o[i].g_syn23_i[j].i_syn23.w <= (1 << 22) + (i << 10) + (j << 12); + end + end +end +for (i=0; i<`INPUT_SIZE; i=i+1) begin + initial begin + a0[i] <= (i % 4 == 0) << 24; + end +end +for (i=0; i<`OUTPUT_SIZE; i=i+1) begin + initial begin + g3[i] <= (i == 4) << 24; + end +end + +endgenerate + +initial begin + clk <= 0; + fp <= 0; + bp <= 0; + wu <= 0; + w_layer <= 0; + w_i <= 1; + w_j <= 2; + $monitor("time %4t fp %1b fp_out %1b a3[4] %24b bp %1b bp_out %1b w1[1][2] %24b", $time, fp, fp_out, a_test[23:0], bp, bp_out, w_out[23:0]); + #5 clk<=1; #5 clk<=0; + fp <= 1; + #5 clk<=1; #5 clk<=0; + fp <= 0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + bp <= 1; + #5 clk<=1; #5 clk<=0; + bp <= 0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + wu <= 1; + w_in <= 24'b111100001100110010101010; + #5 clk<=1; #5 clk<=0; + wu <= 0; + #5 clk<=1; #5 clk<=0; + $finish; +end + +endmodule +
diff --git a/verilog/rtl/neuron.v b/verilog/rtl/neuron.v new file mode 100644 index 0000000..b84feb2 --- /dev/null +++ b/verilog/rtl/neuron.v
@@ -0,0 +1,116 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2022 Tamas Hubai + +`default_nettype none + +// synapse and neuron primitives for building up the neural network layers + + +// synapse is an edge between two neurons with two-way propagation +// and an updatable weight + +module synapse ( + input clk, + input fp, // forward propagation + output reg fp_out, + input [`NUM_WIDTH-1:0] a, + output reg [`NUM_WIDTH-1:0] zc, + input bp, // backward propagation + output reg bp_out, + input [`NUM_WIDTH-1:0] e, + output reg [`NUM_WIDTH-1:0] tc, + input wu, // weight update + input [`NUM_WIDTH-1:0] w_in, + output [`NUM_WIDTH-1:0] w_out +); + +reg [`NUM_WIDTH-1:0] w; +assign w_out = w; + +wire [`NUM_WIDTH-1:0] zn; +mul_sat_comb i_mul_z ( + .a(a), + .b(w), + .res(zn) +); + +wire [`NUM_WIDTH-1:0] tn; +mul_sat_comb i_mul_t ( + .a(e), + .b(w), + .res(tn) +); + +wire [`NUM_WIDTH-1:0] cn; +mul_sat_comb i_mul_c ( + .a(a), + .b(e), + .res(cn) +); + +wire [`NUM_WIDTH-1:0] wn; +sub_sat_comb i_sub_w ( + .a(w), + .b($signed(cn) >>> `LEARN_SHIFT), + .res(wn) +); + +always @(posedge clk) begin + if (fp) begin + zc <= zn; + end + fp_out <= fp; + if (bp) begin + tc <= tn; + w <= wn; + end + bp_out <= bp; + if (wu) begin + w <= w_in; + end +end + +endmodule + + +// generic neuron with two-way propagation that needs to be connected to +// the respective activation function and its derivative to make +// either a ReLU or a softmax neuron + +module neuron ( + input clk, + input fp, // forward propagation + output reg fp_out, + input [`NUM_WIDTH-1:0] z, + output reg [`NUM_WIDTH-1:0] a, + input bp, // backward propagation + output reg bp_out, + input [`NUM_WIDTH-1:0] t, + output reg [`NUM_WIDTH-1:0] e, + output [`NUM_WIDTH-1:0] to_act, // to activation function + input [`NUM_WIDTH-1:0] from_act, + input [`NUM_WIDTH-1:0] from_act_diff +); + +assign to_act = z; + +wire [`NUM_WIDTH-1:0] en; +mul_sat_comb i_mul_e ( + .a(t), + .b(from_act_diff), + .res(en) +); + +always @(posedge clk) begin + if (fp) begin + a <= from_act; + end + fp_out <= fp; + if (bp) begin + e <= en; + end + bp_out <= bp; +end + +endmodule +
diff --git a/verilog/rtl/neuron_tb.v b/verilog/rtl/neuron_tb.v new file mode 100644 index 0000000..620669a --- /dev/null +++ b/verilog/rtl/neuron_tb.v
@@ -0,0 +1,137 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2022 Tamas Hubai + +`default_nettype none + +// testbenches for neuron.v + + +// synapse is an edge between two neurons with two-way propagation +// and an updatable weight + +module synapse_tb (); + +reg clk; +reg fp; +wire fp_out; +reg [`NUM_WIDTH-1:0] a; +wire [`NUM_WIDTH-1:0] zc; +reg bp; +wire bp_out; +reg [`NUM_WIDTH-1:0] e; +wire [`NUM_WIDTH-1:0] tc; +reg wu; +reg [`NUM_WIDTH-1:0] w_in; +wire [`NUM_WIDTH-1:0] w_out; + +synapse dut ( + .clk, + .fp, + .fp_out, + .a, + .zc, + .bp, + .bp_out, + .e, + .tc, + .wu, + .w_in, + .w_out +); + +initial begin + clk <= 0; + fp <= 0; + bp <= 0; + wu <= 0; + $monitor("time %4t fp %1b fp_out %1b a %16b zc %16b bp %1b bp_out %1b e %16b tc %16b wu %1b w_in %16b w_out %16b", $time, fp, fp_out, a[31:16], zc[31:16], bp, bp_out, e[31:16], tc[31:16], wu, w_in[31:16], w_out[31:16]); + #5 clk<=1; #5 clk<=0; + wu <= 1; + w_in <= 3 << `FRAC_WIDTH; + #5 clk<=1; #5 clk<=0; + wu <= 0; + w_in <= 5 << `FRAC_WIDTH; + #5 clk<=1; #5 clk<=0; + a <= 7 << `FRAC_WIDTH; + #5 clk<=1; #5 clk<=0; + fp <= 1; + #5 clk<=1; #5 clk<=0; + fp <= 0; + a <= 5 << `FRAC_WIDTH; + #5 clk<=1; #5 clk<=0; + e <= 7 << `FRAC_WIDTH; + #5 clk<=1; #5 clk<=0; + bp <= 1; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + #5 clk<=1; #5 clk<=0; + bp <= 0; + #5 clk<=1; #5 clk<=0; + $finish; +end + +endmodule + + +// generic neuron with two-way propagation that needs to be connected to +// the respective activation function and its derivative to make +// either a ReLU or a softmax neuron + +module neuron_tb (); + +reg clk; +reg fp; +wire fp_out; +reg [`NUM_WIDTH-1:0] z; +wire [`NUM_WIDTH-1:0] a; +reg bp; +wire bp_out; +reg [`NUM_WIDTH-1:0] t; +wire [`NUM_WIDTH-1:0] e; +wire [`NUM_WIDTH-1:0] to_act; +wire [`NUM_WIDTH-1:0] from_act; +wire [`NUM_WIDTH-1:0] from_act_diff; + +neuron dut ( + .clk, + .fp, + .fp_out, + .z, + .a, + .bp, + .bp_out, + .t, + .e, + .to_act, + .from_act, + .from_act_diff +); + +assign from_act = to_act * 9; +assign from_act_diff = to_act * 17; + +initial begin + clk <= 0; + fp <= 0; + bp <= 0; + $monitor("time %4t fp %1b fp_out %1b z %16b a %16b bp %1b bp_out %1b t %16b e %16b ta %16b fa %16b fad %16b", $time, fp, fp_out, z[35:20], a[35:20], bp, bp_out, t[35:20], e[35:20], to_act[35:20], from_act[35:20], from_act_diff[35:20]); + #5 clk<=1; #5 clk<=0; + z <= 3 << `FRAC_WIDTH; + #5 clk<=1; #5 clk<=0; + fp <= 1; + #5 clk<=1; #5 clk<=0; + fp <= 0; + z <= 5 << `FRAC_WIDTH; + #5 clk<=1; #5 clk<=0; + t <= 7 << `FRAC_WIDTH; + #5 clk<=1; #5 clk<=0; + bp <= 1; + #5 clk<=1; #5 clk<=0; + bp <= 0; + t <= 9 << `FRAC_WIDTH; + #5 clk<=1; #5 clk<=0; + $finish; +end + +endmodule +
diff --git a/verilog/rtl/trainable_nn.v b/verilog/rtl/trainable_nn.v new file mode 100644 index 0000000..367a04e --- /dev/null +++ b/verilog/rtl/trainable_nn.v
@@ -0,0 +1,70 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2022 Tamas Hubai + +`default_nettype none + +module trainable_nn ( +`ifdef USE_POWER_PINS + inout vccd1, // User area 1 1.8V supply + inout vssd1, // User area 1 digital ground +`endif + + // Wishbone Slave ports (WB MI A) + input wb_clk_i, + input wb_rst_i, + input wbs_stb_i, + input wbs_cyc_i, + input wbs_we_i, + input [3:0] wbs_sel_i, + input [31:0] wbs_dat_i, + input [31:0] wbs_adr_i, + output wbs_ack_o, + output [31:0] wbs_dat_o, + + // Logic Analyzer Signals + input [127:0] la_data_in, + output [127:0] la_data_out, + input [127:0] la_oenb, + + // IOs + input [`MPRJ_IO_PADS-1:0] io_in, + output [`MPRJ_IO_PADS-1:0] io_out, + output [`MPRJ_IO_PADS-1:0] io_oeb, + + // IRQ + output [2:0] irq +); + + assign io_out = {(`MPRJ_IO_PADS){1'b0}}; + assign io_oeb = {(`MPRJ_IO_PADS){1'b0}}; + + assign irq = 3'b000; // Unused + + wire clk = (~la_oenb[0]) ? la_data_in[0]: wb_clk_i; + wire rst = (~la_oenb[1]) ? la_data_in[1]: wb_rst_i; + + wire use_wbs = wbs_cyc_i & wbs_stb_i; + wire use_la_addr = &~la_oenb[31:8]; + wire use_la_data_in = &~la_oenb[95:32]; + + wire [23:0] addr = use_la_addr ? la_data_in[31:8] : wbs_adr_i[23:0]; + wire [63:0] data_in = (use_la_addr & use_la_data_in) ? la_data_in[95:32] : {20'b0, wbs_dat_i, 12'b0}; + wire we = (use_la_addr & use_la_data_in) | (use_wbs & wbs_we_i); + wire [63:0] data_out; + assign la_data_out = {32'b0, data_out, 32'b0}; + assign wbs_dat_o = data_out[43:12]; + + neural_interface i_ni ( + .clk, + .addr, + .data_in, + .we, + .data_out + ); + + always @(posedge clk) begin + wbs_ack_o <= (~rst) & use_wbs; + end + +endmodule +
diff --git a/verilog/rtl/uprj_netlists.v b/verilog/rtl/uprj_netlists.v index 3537de8..06a0b3a 100644 --- a/verilog/rtl/uprj_netlists.v +++ b/verilog/rtl/uprj_netlists.v
@@ -21,8 +21,8 @@ // Assume default net type to be wire because GL netlists don't have the wire definitions `default_nettype wire `include "gl/user_project_wrapper.v" - `include "gl/user_proj_example.v" + `include "gl/trainable_nn.v" `else `include "user_project_wrapper.v" - `include "user_proj_example.v" + `include "trainable_nn.v" `endif \ No newline at end of file
diff --git a/verilog/rtl/user_defines.v b/verilog/rtl/user_defines.v index ee44b08..66bc63d 100644 --- a/verilog/rtl/user_defines.v +++ b/verilog/rtl/user_defines.v
@@ -52,41 +52,41 @@ // up in a state that can be used immediately without depending on // the management SoC to run a startup program to configure the GPIOs. -`define USER_CONFIG_GPIO_5_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_6_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_7_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_8_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_9_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_10_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_11_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_12_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_13_INIT `GPIO_MODE_INVALID +`define USER_CONFIG_GPIO_5_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_6_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_7_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_8_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_9_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_10_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_11_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_12_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_13_INIT `GPIO_MODE_MGMT_STD_OUTPUT // Configurations of GPIO 14 to 24 are used on caravel but not caravan. -`define USER_CONFIG_GPIO_14_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_15_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_16_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_17_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_18_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_19_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_20_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_21_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_22_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_23_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_24_INIT `GPIO_MODE_INVALID +`define USER_CONFIG_GPIO_14_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_15_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_16_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_17_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_18_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_19_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_20_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_21_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_22_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_23_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_24_INIT `GPIO_MODE_MGMT_STD_OUTPUT -`define USER_CONFIG_GPIO_25_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_26_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_27_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_28_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_29_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_30_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_31_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_32_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_33_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_34_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_35_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_36_INIT `GPIO_MODE_INVALID -`define USER_CONFIG_GPIO_37_INIT `GPIO_MODE_INVALID +`define USER_CONFIG_GPIO_25_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_26_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_27_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_28_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_29_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_30_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_31_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_32_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_33_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_34_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_35_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_36_INIT `GPIO_MODE_MGMT_STD_OUTPUT +`define USER_CONFIG_GPIO_37_INIT `GPIO_MODE_MGMT_STD_OUTPUT `endif // __USER_DEFINES_H
diff --git a/verilog/rtl/user_proj_example.v b/verilog/rtl/user_proj_example.v deleted file mode 100644 index 26081e9..0000000 --- a/verilog/rtl/user_proj_example.v +++ /dev/null
@@ -1,165 +0,0 @@ -// SPDX-FileCopyrightText: 2020 Efabless Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// SPDX-License-Identifier: Apache-2.0 - -`default_nettype none -/* - *------------------------------------------------------------- - * - * user_proj_example - * - * This is an example of a (trivially simple) user project, - * showing how the user project can connect to the logic - * analyzer, the wishbone bus, and the I/O pads. - * - * This project generates an integer count, which is output - * on the user area GPIO pads (digital output only). The - * wishbone connection allows the project to be controlled - * (start and stop) from the management SoC program. - * - * See the testbenches in directory "mprj_counter" for the - * example programs that drive this user project. The three - * testbenches are "io_ports", "la_test1", and "la_test2". - * - *------------------------------------------------------------- - */ - -module user_proj_example #( - parameter BITS = 32 -)( -`ifdef USE_POWER_PINS - inout vccd1, // User area 1 1.8V supply - inout vssd1, // User area 1 digital ground -`endif - - // Wishbone Slave ports (WB MI A) - input wb_clk_i, - input wb_rst_i, - input wbs_stb_i, - input wbs_cyc_i, - input wbs_we_i, - input [3:0] wbs_sel_i, - input [31:0] wbs_dat_i, - input [31:0] wbs_adr_i, - output wbs_ack_o, - output [31:0] wbs_dat_o, - - // Logic Analyzer Signals - input [127:0] la_data_in, - output [127:0] la_data_out, - input [127:0] la_oenb, - - // IOs - input [`MPRJ_IO_PADS-1:0] io_in, - output [`MPRJ_IO_PADS-1:0] io_out, - output [`MPRJ_IO_PADS-1:0] io_oeb, - - // IRQ - output [2:0] irq -); - wire clk; - wire rst; - - wire [`MPRJ_IO_PADS-1:0] io_in; - wire [`MPRJ_IO_PADS-1:0] io_out; - wire [`MPRJ_IO_PADS-1:0] io_oeb; - - wire [31:0] rdata; - wire [31:0] wdata; - wire [BITS-1:0] count; - - wire valid; - wire [3:0] wstrb; - wire [31:0] la_write; - - // WB MI A - assign valid = wbs_cyc_i && wbs_stb_i; - assign wstrb = wbs_sel_i & {4{wbs_we_i}}; - assign wbs_dat_o = rdata; - assign wdata = wbs_dat_i; - - // IO - assign io_out = count; - assign io_oeb = {(`MPRJ_IO_PADS-1){rst}}; - - // IRQ - assign irq = 3'b000; // Unused - - // LA - assign la_data_out = {{(127-BITS){1'b0}}, count}; - // Assuming LA probes [63:32] are for controlling the count register - assign la_write = ~la_oenb[63:32] & ~{BITS{valid}}; - // Assuming LA probes [65:64] are for controlling the count clk & reset - assign clk = (~la_oenb[64]) ? la_data_in[64]: wb_clk_i; - assign rst = (~la_oenb[65]) ? la_data_in[65]: wb_rst_i; - - counter #( - .BITS(BITS) - ) counter( - .clk(clk), - .reset(rst), - .ready(wbs_ack_o), - .valid(valid), - .rdata(rdata), - .wdata(wbs_dat_i), - .wstrb(wstrb), - .la_write(la_write), - .la_input(la_data_in[63:32]), - .count(count) - ); - -endmodule - -module counter #( - parameter BITS = 32 -)( - input clk, - input reset, - input valid, - input [3:0] wstrb, - input [BITS-1:0] wdata, - input [BITS-1:0] la_write, - input [BITS-1:0] la_input, - output ready, - output [BITS-1:0] rdata, - output [BITS-1:0] count -); - reg ready; - reg [BITS-1:0] count; - reg [BITS-1:0] rdata; - - always @(posedge clk) begin - if (reset) begin - count <= 0; - ready <= 0; - end else begin - ready <= 1'b0; - if (~|la_write) begin - count <= count + 1; - end - if (valid && !ready) begin - ready <= 1'b1; - rdata <= count; - if (wstrb[0]) count[7:0] <= wdata[7:0]; - if (wstrb[1]) count[15:8] <= wdata[15:8]; - if (wstrb[2]) count[23:16] <= wdata[23:16]; - if (wstrb[3]) count[31:24] <= wdata[31:24]; - end else if (|la_write) begin - count <= la_write & la_input; - end - end - end - -endmodule -`default_nettype wire
diff --git a/verilog/rtl/user_project_wrapper.v b/verilog/rtl/user_project_wrapper.v index 5ee1cee..a54beaa 100644 --- a/verilog/rtl/user_project_wrapper.v +++ b/verilog/rtl/user_project_wrapper.v
@@ -82,7 +82,7 @@ /* User project is instantiated here */ /*--------------------------------------*/ -user_proj_example mprj ( +trainable_nn mprj ( `ifdef USE_POWER_PINS .vccd1(vccd1), // User area 1 1.8V power .vssd1(vssd1), // User area 1 digital ground