blob: a0ecd5c2527441edad4991591427503a2ac5fab0 [file] [log] [blame]
// SPDX-FileCopyrightText: 2020 Efabless Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// SPDX-License-Identifier: Apache-2.0
`default_nettype none
/*
*-------------------------------------------------------------
*
* chaos_automaton
*
* This chip is a pure asynchronous cellular automaton. Each cell has
* four inputs from N, S, E, W and generates four outputs to N, S, E, W.
* Each output can be configured for any boolean function of the four
* inputs (16 bits each).
*
* Outputs on the periphery (or some selection thereof) are passed to the
* chip GPIO. Inputs may also come from the chip periphery; choice of
* input or output is programmable like the cell boolean function.
*
* All periphery inputs and outputs may be channeled through the logic
* analyzer to apply input to or monitor output from the array.
*
* The wishbone bus may be used to program the cell functions.
*
* This can be used in a loop with an evolutionary algorithm to tune the
* chip functions to achieve a specific behavior.
*
* Most of the core circuitry is straightforward. The total number of
* cells is parameterized, so that the largest number of cells that will
* fit in the caravel user project space can be determined.
*
* Version v1: To avoid massive amounts of wiring (e.g., 16 or 32
* data wires + 10 address wires to every single cell), all of the
* LUT configuration memory is stored in a (very long) serial chain
* in a full loop. The scan chain is 64 bits longer than the number
* of cells and allows 64 bits to be transferred to and from the
* wishbone bus independently of the cells. Every cell has 64 latches
* in addition to the 64 flops so that the scan chain can be cycled
* without affecting ongoing operation of the automaton.
*
* Memory mapped address space:
*
* BASE_ADR + 7 to BASE_ADR + 0: Data to read or write
* BASE_ADR + 15 to BASE_ADR + 8: Core cell address for read/write
* BASE_ADR + 16: Triggers
*
* Trigger bits:
* bit 0: Shift by (address) cells (64 bits).
* bit 1: Finish cycle. Return shift register to run state, toggle "hold"
*
* Both trigger bits are self-resetting. The trigger bit (as read) remains
* high until the transfer has completed. The trigger bit can be polled to
* determine when the cycle has completed.
*
* The shift cycle bit can be used to load the configuration of the array
* cell by cell. The typical case is to set address = 1 and apply or read
* each cell's configuration in turn. However, it can also be used piecemeal,
* for example, to read out a block of configurations, without having
* to loop a full cycle for each one. The counter tracks what the
* current offset is, and can return to the run-state position on
* application of bit 1, "Finish cycle". At the end of "Finish cycle"
* the hold bit is toggled to latch and apply any new configuration
* data.
*
* Reading and writing a single cell's configuration can be accomplished
* by a sequence of shift cycles and reads/writes. To change the
* configuration of a single cell: (1) Write the cell address, (2) Apply
* the shift cycle, (3) Write the configuration data, (4) Apply the
* finish cycle. To read the configuration of a single cell: (1) Write
* the cell address, (2) Apply the shift cycle, (3) Read the configuration
* data, (4) Apply the finish cycle.
*
*
* This version, chaos_automaton_full.v, is an update to the original
* code (now chaos_automaton_orig.v), implementing changes that were
* made in chaos_subarray.v to make the array programming robust to
* hold violations even when the timing of individual cells is not
* known. It is an intermediate development stage between the
* original and final design, although it can be used if it works
* better to synthesize, place, and route the entire design in one
* synthesis run instead of pre-hardening the chaos_subarray module
* as a macro.
*-------------------------------------------------------------
*/
/* Remove the following for synthesis */
// `define MPRJ_IO_PADS 38
`include "chaos_subarray.v"
/*
*-----------------------------------------------------------------
* User project top level
*-----------------------------------------------------------------
*/
module chaos_automaton_full #(
parameter XSIZE = 10, // Number of cells left to right
parameter YSIZE = 10, // Number of cells top to bottom
parameter ASIZE = 9, // Enough bits to count XSIZE * YSIZE
parameter BASE_ADR = 32'h 3000_0000 // Wishbone base address
)(
`ifdef USE_POWER_PINS
inout vdda1, // User area 1 3.3V supply
inout vdda2, // User area 2 3.3V supply
inout vssa1, // User area 1 analog ground
inout vssa2, // User area 2 analog ground
inout vccd1, // User area 1 1.8V supply
inout vccd2, // User area 2 1.8v supply
inout vssd1, // User area 1 digital ground
inout vssd2, // User area 2 digital ground
`endif
// Wishbone Slave ports (WB MI A)
input wb_clk_i,
input wb_rst_i,
input wbs_stb_i,
input wbs_cyc_i,
input wbs_we_i,
input [3:0] wbs_sel_i,
input [31:0] wbs_dat_i,
input [31:0] wbs_adr_i,
output wbs_ack_o,
output [31:0] wbs_dat_o,
// Logic Analyzer Signals
input [127:0] la_data_in,
output [127:0] la_data_out,
input [127:0] la_oenb,
// IOs
input [`MPRJ_IO_PADS-1:0] io_in,
output [`MPRJ_IO_PADS-1:0] io_out,
output [`MPRJ_IO_PADS-1:0] io_oeb,
// IRQ
output [2:0] irq
);
`define IDLE 3'b000
`define START 3'b001
`define FINISH 3'b010
`define XDATAS 3'b011
`define XDATAF 3'b100
`define LOAD 3'b101
`define CONFIGL 8'h00 /* Address offset of configuration data low word */
`define CONFIGH 8'h04 /* Address offset of configuration data high word */
`define ADDRESS 8'h08 /* Address offset of cell address value */
`define XFER 8'h0c /* Address offset of transfer bits */
`define MAXADDR (XSIZE * YSIZE) /* Highest cell address plus one */
reg clk; /* serial clock to transfer data */
reg hold; /* trigger to hold transferred data */
reg [2:0] xfer_state; /* state of the data transfer */
reg [1:0] xfer_ctrl; /* Transfer trigger bits */
reg [63:0] config_data; /* 64 bits to read or write */
reg [ASIZE - 1:0] cell_addr; /* Core cell to address */
reg [ASIZE - 1:0] cell_offset; /* Current offset of shift register */
reg [ASIZE + 6:0] bit_count; /* Full count (cell address + bits) */
wire [`MPRJ_IO_PADS-1:0] io_in;
wire [`MPRJ_IO_PADS-1:0] io_out;
wire [`MPRJ_IO_PADS-1:0] io_oeb;
wire [1:0] config_sel;
wire address_sel;
wire xfer_sel;
wire valid;
reg ready;
wire [3:0] iomem_we;
wire selected;
wire [1:0] busy;
reg [31:0] rdata_pre;
wire [63:0] rdata;
reg [31:0] wbs_dat_o;
reg [63:0] wdata;
reg write;
wire [2*XSIZE + 2*YSIZE - 1: 0] data_in;
// Wishbone address select indicators
assign config_sel[0] = (wbs_adr_i[7:0] == `CONFIGL);
assign config_sel[1] = (wbs_adr_i[7:0] == `CONFIGH);
assign address_sel = (wbs_adr_i[7:0] == `ADDRESS);
assign xfer_sel = (wbs_adr_i[7:0] == `XFER);
assign selected = config_sel[1] || config_sel[0] || address_sel || xfer_sel;
assign valid = wbs_cyc_i && wbs_stb_i;
assign wbs_ack_o = ready;
assign iomem_we = wbs_sel_i & {4{wbs_we_i}};
// Chip pin output (Connects to a subset of la_data_in;
// 9 signals each N and S, 10 signals each W and E)
assign io_out = {la_data_out[2*YSIZE + XSIZE + 8: 2*YSIZE + XSIZE], // north
la_data_out[2*YSIZE + 8: 2*YSIZE], // south
la_data_out[YSIZE + 9: YSIZE], // east
la_data_out[9:0]}; // west
// Chip pin direction is assigned to la_data sub-array
assign io_oeb = la_data_in[127:127-38] & ~la_oenb[127:127-38];
// IRQ
assign irq = 3'b000; // Unused
// Instantiate the chaos cell array
chaos_array #(
.XSIZE(XSIZE),
.YSIZE(YSIZE),
.BASE_ADR(BASE_ADR)
) chaos_array_inst (
`ifdef USE_POWER_PINS
.vccd1(vccd1),
.vssd1(vssd1),
`endif
.clk(clk),
.reset(wb_rst_i),
.hold(hold),
.rdata(rdata),
.wdata(wdata),
.write(write),
.data_in(data_in),
.data_out(la_data_out[2*XSIZE + 2*YSIZE - 1: 0])
);
/* Hook up io_in (multiplexed with la_data_int based on value of la_oenb,
* using the same subsets as used for io_out). The expressions are more
* complicated because the signals that are connected to the GPIO pins
* have to be multiplexed with the logic analyzer inputs.
*/
genvar i;
generate
for (i = 2*YSIZE + XSIZE + 9; i < 2*YSIZE + 2*XSIZE; i=i+1) begin
assign data_in[i] = la_data_in[i];
end
for (i = 2 * YSIZE + XSIZE; i < 2*YSIZE + XSIZE + 9; i=i+1) begin
assign data_in[i] = la_oenb[i] ? io_in[i - 2*YSIZE + XSIZE + 29] : la_data_in[i];
end
for (i = 2 * YSIZE + 9; i < 2 * YSIZE + XSIZE; i=i+1) begin
assign data_in[i] = la_data_in[i];
end
for (i = 2 * YSIZE; i < 2 * YSIZE + 9; i=i+1) begin
assign data_in[i] = la_oenb[i] ? io_in[i - 2*YSIZE + 20] : la_data_in[i];
end
for (i = YSIZE + 10; i < 2 * YSIZE; i=i+1) begin
assign data_in[i] = la_data_in[i];
end
for (i = YSIZE; i < YSIZE + 10; i=i+1) begin
assign data_in[i] = la_oenb[i] ? io_in[i - YSIZE + 10] : la_data_in[i];
end
for (i = 10; i < YSIZE; i=i+1) begin
assign data_in[i] = la_data_in[i];
end
for (i = 0; i < 10; i=i+1) begin
assign data_in[i] = la_oenb[i] ? io_in[i]: la_data_in[i];
end
endgenerate
/* Read data (only rdata is something that was not written by the processor) */
always @* begin
rdata_pre = 'b0;
if (xfer_sel) begin
rdata_pre = {30'b0, busy};
end else if (config_sel[0]) begin
rdata_pre = rdata[31:0];
end else if (config_sel[1]) begin
rdata_pre = rdata[63:32];
end else if (address_sel) begin
/* When ADDRESS is selected, pass back the existing cell */
/* count rather than what was written into cell_addr. */
rdata_pre = bit_count[ASIZE + 6: 7];
end
end
/* Read data */
always @(posedge wb_clk_i or posedge wb_rst_i) begin
if (wb_rst_i) begin
wbs_dat_o <= 0;
ready <= 0;
end else begin
ready <= 0;
if (valid && !ready && wbs_adr_i[31:8] == BASE_ADR[31:8]) begin
ready <= 1'b1;
if (selected) begin
wbs_dat_o <= rdata_pre;
end
end
end
end
/* Write data */
always @(posedge wb_clk_i or posedge wb_rst_i) begin
if (wb_rst_i) begin
xfer_ctrl <= 0;
wdata <= 0;
write <= 1'b0;
end else begin
write <= 1'b0;
if (valid && !ready && wbs_adr_i[31:8] == BASE_ADR[31:8]) begin
if (xfer_sel) begin
if (iomem_we[0]) xfer_ctrl <= wbs_dat_i[1:0];
end else if (config_sel[0]) begin
if (iomem_we[0]) wdata[7:0] <= wbs_dat_i[7:0];
if (iomem_we[1]) wdata[15:8] <= wbs_dat_i[15:8];
if (iomem_we[2]) wdata[23:16] <= wbs_dat_i[23:16];
if (iomem_we[3]) wdata[31:24] <= wbs_dat_i[31:24];
if (|iomem_we) write <= 1'b1;
end else if (config_sel[1]) begin
if (iomem_we[0]) wdata[39:32] <= wbs_dat_i[7:0];
if (iomem_we[1]) wdata[47:40] <= wbs_dat_i[15:8];
if (iomem_we[2]) wdata[55:48] <= wbs_dat_i[23:16];
if (iomem_we[3]) wdata[63:56] <= wbs_dat_i[31:24];
if (|iomem_we) write <= 1'b1;
end else if (address_sel) begin
/* NOTE: If XSIZE * YSIZE > 256, this must be adjusted */
if (iomem_we[0]) cell_addr <= wbs_dat_i[7:0];
end
end else begin
xfer_ctrl <= 0; // Immediately self-resetting
end
end
end
/* Transfer status */
assign busy[0] = (xfer_state == `START || xfer_state == `XDATAS);
assign busy[1] = (xfer_state == `FINISH || xfer_state == `XDATAF ||
xfer_state == `LOAD);
/* Transfer cycles */
always @(posedge wb_clk_i or posedge wb_rst_i) begin
if (wb_rst_i == 1'b1) begin
xfer_state <= `IDLE;
bit_count <= 'd0;
cell_offset <= 'd0;
clk <= 1'b0;
hold <= 1'b1;
end else begin
clk <= 1'b0;
hold <= 1'b1;
if (xfer_state == `IDLE) begin
if (xfer_ctrl[0] == 1'b1) begin
xfer_state <= `START;
end else if (xfer_ctrl[1] == 1'b1) begin
xfer_state <= `FINISH;
end
end else if (xfer_state == `START) begin
bit_count[ASIZE + 6:7] <= cell_addr;
bit_count[6:0] <= 7'b1111110;
xfer_state <= `XDATAS;
end else if (xfer_state == `FINISH) begin
bit_count[ASIZE + 6:7] <= `MAXADDR - cell_offset;
bit_count[6:0] <= 7'b1111110;
xfer_state <= `XDATAF;
end else if (xfer_state == `XDATAS) begin
clk <= ~clk;
bit_count <= bit_count - 1;
if (bit_count[6:0] == 0) begin
cell_offset <= cell_offset + 1;
end
if (clk == 1'b0) begin
if (bit_count == 0) begin
xfer_state <= `IDLE;
end
end
end else if (xfer_state == `XDATAF) begin
clk <= ~clk;
bit_count <= bit_count - 1;
if (bit_count[6:0] == 0) begin
cell_offset <= cell_offset + 1;
end
if (clk == 1'b0) begin
if (bit_count == 0) begin
xfer_state <= `LOAD;
end
end
end else if (xfer_state == `LOAD) begin
hold <= 1'b0;
xfer_state <= `IDLE;
cell_offset <= 'd0;
end
end
end
endmodule
/*
*-----------------------------------------------------------------
* Chaos array (XSIZE * YSIZE)
*-----------------------------------------------------------------
*/
module chaos_array #(
parameter XSIZE = 10,
parameter YSIZE = 10,
parameter BASE_ADR = 32'h3000_0000
)(
`ifdef USE_POWER_PINS
inout vccd1, // User area 1 1.8V supply
inout vssd1, // User area 1 digital ground
`endif
input clk,
input reset,
input hold,
input write,
input [63:0] wdata,
output [63:0] rdata,
input [2*XSIZE + 2*YSIZE - 1:0] data_in,
output [2*XSIZE + 2*YSIZE - 1:0] data_out
);
wire [XSIZE - 1: 0] uconn [YSIZE: 0];
wire [XSIZE - 1: 0] dconn [YSIZE: 0];
wire [YSIZE - 1: 0] rconn [XSIZE: 0];
wire [YSIZE - 1: 0] lconn [XSIZE: 0];
wire [YSIZE - 1: 0] shiftreg [XSIZE: 0];
wire [YSIZE - 1: 0] clkarray [XSIZE: 0];
wire io_data_sel; // wishbone select data
wire xfer_sel; // wishbone select transfer
// NOTE: For viewing internal signals in gtkwave,
// some 2D arrays may need to be copied into 1D arrays.
// See the original verilog for examples.
/* The perimeter inputs and outputs connect to the logic analyzer */
/* (To do: multiplex inputs between the chip I/O and logic analyzer */
assign data_out = {uconn[YSIZE][XSIZE - 1:0], dconn[0][XSIZE - 1:0],
rconn[XSIZE][YSIZE - 1:0], lconn[0][YSIZE - 1:0]};
assign clkarray[0][0] = clk;
assign dconn[YSIZE][XSIZE - 1:0] = data_in[2*XSIZE+2*YSIZE - 1: 2*YSIZE + XSIZE];
assign uconn[0][XSIZE - 1:0] = data_in[2*YSIZE + XSIZE - 1:2*YSIZE];
assign rconn[0][YSIZE - 1:0] = data_in[2*YSIZE-1:YSIZE];
assign lconn[XSIZE][YSIZE - 1:0] = data_in[YSIZE-1:0];
genvar i, j;
/* NOTE: To see the internal cell values in gtkwave, it is necessary
* to split out a few individual instances from the 2D array. Loop
* from j = 1 in 2D generate loop, then add a 1D generate loop for
* i = N to XSIZE with j set to zero, then add individual instances for
* i = 0 to N - 1 with j set to zero.
*/
/* Connected array of subarrays */
generate
for (j = 0; j < YSIZE; j=j+1) begin: celly
for (i = 0; i < XSIZE; i=i+1) begin: cellx
chaos_cell chaos_cell_inst (
.inorth(dconn[j+1][i]),
.isouth(uconn[j][i]),
.ieast(lconn[i+1][j]),
.iwest(rconn[i][j]),
.onorth(uconn[j+1][i]),
.osouth(dconn[j][i]),
.oeast(rconn[i+1][j]),
.owest(lconn[i][j]),
.reset(reset),
.hold(hold),
.iclk(clkarray[i][j]),
.oclk(clkarray[i+1][j]),
.idata(shiftreg[i][j]),
.odata(shiftreg[i+1][j])
);
end
end
/* NOTE: This would work better topologically if each */
/* row switched the direction of the shift register. */
for (j = 0; j < YSIZE - 1; j=j+1) begin: shifty
assign shiftreg[0][j+1] = shiftreg[XSIZE][j];
assign clkarray[0][j+1] = clkarray[XSIZE][j];
end
endgenerate
/* Storage for data transfers to and from the processor. This is */
/* 64 bits, so can hold the configuration data for one core cell. */
reg [63:0] lutdata;
/* Wire up the lutdata registers as a shift register and connect the */
/* ends to the array's shift register to form a loop. */
always @(posedge clk or posedge write) begin
if (write) begin
/* Copy data from wdata to lutdata on write */
lutdata <= wdata;
end else begin
/* Shift data on clock when "write" is not raised */
lutdata[63:1] <= lutdata[62:0];
lutdata[0] <= shiftreg[XSIZE][YSIZE-1];
end
end
assign shiftreg[0][0] = lutdata[63];
endmodule
`default_nettype wire