| // Copyright (C) 2022, Gray Research LLC. |
| // Licensed under the Apache License, Version 2.0. |
| |
| `default_nettype none |
| |
| `define V(N) [(N)-1:0] |
| `define SEGS(N,M) (((N) + ((M)-1)) / (M)) |
| |
| // Receive a stream of LUTs' LUT config segments of SI_W bits per clock. |
| // When an entire LUT config is recevied, compute the next value of that LUT. |
| // |
| // LUT config: |
| // packed struct LUT_n_k { // N K-LUTs |
| // bit[$clog2(N)] input[K]; // indices of those LUT outputs which are this LUT's K inputs. |
| // bit[2**K] mask; // LUT mask |
| // }; |
| // Each input[] and mask field rounded up to next multiple of SI_W bits. |
| // |
| // For N=16, K=4, SI_W=4, this is 32 bits: |
| // packed struct LUT_n16_k4 { // N=16 K=4-LUTs |
| // bit[4] input[4]; // 4 4b indices |
| // bit[16] mask; // LUT mask |
| // }; |
| // |
| // For N=64, K=6, SI_W=4, this is 112 bits: |
| // struct LUT_n64_k6 { // N=64 K=4-LUTs |
| // bit[8] input[6]; // 6 6b indices padded to 6 8b indices |
| // bit[64] mask; // LUT mask |
| // }; |
| module s4ga #( |
| parameter N = 67, // # LUTs -- must not be multiple of LL (LUT latency) -- use a prime number |
| parameter K = 5, // # LUT inputs |
| parameter I = 2, // # FPGA inputs |
| parameter O = 7, // # FPGA outputs |
| parameter SI_W = 4 // SI width |
| ) ( |
| input wire `V(8) io_in, // [0]:clk [1]:rst [5:2]:si [7:6]:inputs |
| output reg `V(8) io_out // [6:0] outputs [7]:debug |
| ); |
| localparam N_W = $clog2(N); |
| localparam K_W = $clog2(K+1); // k in [0,K] |
| localparam MASK_W = 2**K; |
| localparam MAX_W = (MASK_W >= N_W) ? MASK_W : N_W; |
| localparam SR_W = MAX_W - SI_W; |
| localparam SEG_W = $clog2(`SEGS(MAX_W, SI_W)); |
| localparam MASK_SEGS= `SEGS(MASK_W, SI_W); |
| localparam IDX_SEGS = `SEGS(N_W, SI_W); |
| localparam LL = K*IDX_SEGS + MASK_SEGS; // LUT (transmission) latency |
| |
| wire clk; // clock input |
| wire rst; // sync reset input -- must assert rst for >N cycles |
| wire `V(SI_W) si; // LUTs' configuration segments input stream |
| reg `V(N) luts; // last N LUT outputs; shuffling circular shift register |
| |
| wire `V(I) inputs; // FPGA inputs |
| reg/*comb*/ debug; // debug output -- stream of evaluated LUT inputs and outputs |
| |
| assign {inputs,si,rst,clk} = io_in; |
| |
| reg `V(SR_W) sr; // input shift reg of LUT input index (k<K) or LUT mask (k==K) |
| wire `V(MASK_W) mask = {sr,si}; // current LUT mask |
| wire `V(MASK_W/2) half = {sr,si}; // current LUT half mask (LSBs) |
| wire `V(N_W) idx = {sr,si}; // current input index |
| reg `V(K) ins; // LUT input values; shift register |
| reg q; // previous half-LUT output register |
| |
| // control FSM |
| reg `V(N_W) n; // LUT counter; n in [0,N) |
| reg `V(K_W) k; // LUT input index counter; k in [0,K]: k<K => loading index; k==K => loading mask |
| reg `V(SEG_W) seg; // input segment counter |
| |
| reg/*comb*/ in; // a LUT input; valid when k<K && seg==IDX_SEGS-1 |
| reg/*comb*/ lut; // LUT output (when LUT frame received), else prior LUT output, else 0 during reset |
| reg/*comb*/`V(O) outputs; // last O LUT outputs |
| |
| integer i; |
| |
| always @* begin |
| if (&idx[N_W-1:2]) begin |
| // process special indices designated 'b11..11xx |
| case (idx[1:0]) |
| 2'b00: in = inputs[n[$clog2(I)-1:0]]; // n'th input pin |
| 2'b01: in = q; |
| 2'b10: in = 1'b0; |
| 2'b11: in = 1'b1; |
| endcase |
| end else begin |
| in = luts[idx]; // select an input bit from the various LUT outputs |
| end |
| |
| if (rst) begin |
| lut = 1'b0; |
| end else if (k == K && seg == MASK_SEGS-1) begin |
| lut = mask[ins];// select LUT mask bit indexed by the input bit vector |
| end else begin |
| lut = luts[N-1]; // LUT not yet received: recirculate current LUT output |
| // (shuffling circular shift register area optimization -- saves N-1 mux2s) |
| end |
| |
| // locate last O LUT outputs in the luts shuffling circular shift register (uses 0 gates) |
| outputs[0] = lut; |
| for (i = 1; i < O; i = i + 1) begin |
| outputs[i] = luts[(LL*i-1) % N]; |
| end |
| |
| // output evaluated LUT inputs, evaluated LUT outputs, or prior LUT shift register outputs |
| if (rst) |
| debug = 1'b0; |
| else if (k != K && seg == IDX_SEGS-1) |
| debug = in; // LUT input, valid this cycle |
| else if (k == K && seg == MASK_SEGS-1) |
| debug = lut; // LUT output, valid this cycle |
| else |
| debug = 1'b0; |
| end |
| |
| always @(posedge clk) begin |
| sr <= {sr,si}; // always collect input segments |
| luts <= {luts,lut}; // always recirculate LUTs / load LUT updates -- area optimization |
| |
| io_out[7] <= debug; |
| |
| if (rst) begin |
| ins <= '0; |
| n <= '0; |
| k <= '0; |
| seg <= '0; |
| q <= 1'b0; |
| // serial reset (eventually luts=='0 and thus outputs=='0) |
| io_out[O-1:0] <= outputs; |
| end else if (k != K) begin |
| // LUT input index segment |
| if (seg == IDX_SEGS-1) begin |
| // fetch and shift in the next LUT input |
| ins <= {ins,in}; |
| k <= k + 1'b1; |
| seg <= '0; |
| end else begin |
| seg <= seg + 1'b1; |
| end |
| end else begin |
| // mask segment |
| if (seg == MASK_SEGS-1) begin |
| // evaluate LUT and its half-LUT |
| // luts <= {luts,lut}; -- see "always recirculates" above |
| q <= half[ins[K-2:0]]; |
| |
| // all LUTs evaluated: update FPGA outputs |
| if (n == N-1) |
| io_out[O-1:0] <= outputs; |
| |
| n <= (n == N-1) ? '0 : (n + 1'b1); |
| k <= '0; |
| seg <= '0; |
| end else begin |
| seg <= seg + 1'b1; |
| end |
| end |
| end |
| endmodule |