openlane/chaos_automaton/runs/22_08_08_12_04/results/final/gds/chaos_automaton.gds
diff --git a/gds/chaos_subarray.gds b/gds/chaos_subarray.gds
index 80ca597..4fc82e1 100644
--- a/gds/chaos_subarray.gds
+++ b/gds/chaos_subarray.gds
Binary files differ
diff --git a/mag/chaos_subarray.mag b/mag/chaos_subarray.mag
index a8aa589..cc47162 100644
--- a/mag/chaos_subarray.mag
+++ b/mag/chaos_subarray.mag
@@ -1,7 +1,7 @@
 magic
 tech sky130B
 magscale 1 2
-timestamp 1659731836
+timestamp 1659988397
 << viali >>
 rect 1869 117249 1903 117283
 rect 5549 117249 5583 117283
diff --git a/maglef/chaos_subarray.mag b/maglef/chaos_subarray.mag
index ee689b4..00d3aba 100644
--- a/maglef/chaos_subarray.mag
+++ b/maglef/chaos_subarray.mag
@@ -1,7 +1,7 @@
 magic
 tech sky130B
 magscale 1 2
-timestamp 1659731860
+timestamp 1659988421
 << obsli1 >>
 rect 1104 2159 178848 117521
 << obsm1 >>
@@ -440,7 +440,7 @@
 string LEFclass BLOCK
 string LEFview TRUE
 string GDS_END 36487724
-string GDS_FILE /home/alex/chaos_automaton_Summer_2022/openlane/chaos_subarray/runs/22_08_05_16_33/results/signoff/chaos_subarray.magic.gds
+string GDS_FILE /home/alex/chaos_automaton_Summer_2022/openlane/chaos_subarray/runs/22_08_08_15_49/results/signoff/chaos_subarray.magic.gds
 string GDS_START 141920
 << end >>
 
diff --git a/openlane/chaos_subarray/common_pdn.tcl b/openlane/chaos_subarray/common_pdn.tcl
new file mode 100644
index 0000000..586bd4a
--- /dev/null
+++ b/openlane/chaos_subarray/common_pdn.tcl
@@ -0,0 +1,2 @@
+set ::env(DESIGN_IS_CORE) 1

+set ::env(FP_PDN_CORE_RING) 1
\ No newline at end of file
diff --git a/sdc/chaos_subarray.sdc b/sdc/chaos_subarray.sdc
index 54240c6..817fe4e 100644
--- a/sdc/chaos_subarray.sdc
+++ b/sdc/chaos_subarray.sdc
@@ -1,6 +1,6 @@
 ###############################################################################
 # Created by write_sdc
-# Fri Aug  5 20:34:37 2022
+# Mon Aug  8 19:50:43 2022
 ###############################################################################
 current_design chaos_subarray
 ###############################################################################
diff --git a/sdf/chaos_subarray.sdf b/sdf/chaos_subarray.sdf
index e39241a..305fe02 100644
--- a/sdf/chaos_subarray.sdf
+++ b/sdf/chaos_subarray.sdf
@@ -1,7 +1,7 @@
 (DELAYFILE
  (SDFVERSION "3.0")
  (DESIGN "chaos_subarray")
- (DATE "Fri Aug  5 20:36:50 2022")
+ (DATE "Mon Aug  8 19:52:52 2022")
  (VENDOR "Parallax")
  (PROGRAM "STA")
  (VERSION "2.3.1")
diff --git a/signoff/chaos_subarray/metrics.csv b/signoff/chaos_subarray/metrics.csv
index 16e5677..82f79f9 100644
--- a/signoff/chaos_subarray/metrics.csv
+++ b/signoff/chaos_subarray/metrics.csv
@@ -1,2 +1,2 @@
 design,design_name,config,flow_status,total_runtime,routed_runtime,(Cell/mm^2)/Core_Util,DIEAREA_mm^2,CellPer_mm^2,OpenDP_Util,Peak_Memory_Usage_MB,cell_count,tritonRoute_violations,Short_violations,MetSpc_violations,OffGrid_violations,MinHole_violations,Other_violations,Magic_violations,antenna_violations,lvs_total_errors,cvc_total_errors,klayout_violations,wire_length,vias,wns,pl_wns,optimized_wns,fastroute_wns,spef_wns,tns,pl_tns,optimized_tns,fastroute_tns,spef_tns,HPWL,routing_layer1_pct,routing_layer2_pct,routing_layer3_pct,routing_layer4_pct,routing_layer5_pct,routing_layer6_pct,wires_count,wire_bits,public_wires_count,public_wire_bits,memories_count,memory_bits,processes_count,cells_pre_abc,AND,DFF,NAND,NOR,OR,XOR,XNOR,MUX,inputs,outputs,level,EndCaps,TapCells,Diodes,Total_Physical_Cells,CoreArea_um^2,power_slowest_internal_uW,power_slowest_switching_uW,power_slowest_leakage_uW,power_typical_internal_uW,power_typical_switching_uW,power_typical_leakage_uW,power_fastest_internal_uW,power_fastest_switching_uW,power_fastest_leakage_uW,critical_path_ns,suggested_clock_period,suggested_clock_frequency,CLOCK_PERIOD,SYNTH_STRATEGY,SYNTH_MAX_FANOUT,FP_CORE_UTIL,FP_ASPECT_RATIO,FP_PDN_VPITCH,FP_PDN_HPITCH,PL_TARGET_DENSITY,GLB_RT_ADJUSTMENT,STD_CELL_LIBRARY,CELL_PAD,DIODE_INSERTION_STRATEGY

-/home/alex/chaos_automaton_Summer_2022/openlane/chaos_subarray,chaos_subarray,22_08_05_16_33,flow completed,0h9m55s0ms,0h2m47s0ms,-2.0,0.54,-1,39.46,3550.07,-1,0,0,0,0,0,0,0,6,0,-1,-1,339198,103321,-26.09,-26.09,0.0,0.0,-1,-26.09,-26.09,0.0,0.0,-1,132918716.0,0.0,31.47,19.28,0.8,0.26,-1,1040,7346,1040,7346,0,0,0,6500,0,100,0,0,0,0,0,0,2,6600,5,424,7276,0,7700,514032.2304,0.012,0.00293,0.000174,0.0154,0.00382,2.02e-07,0.0179,0.00456,3.87e-07,1.9400000000000004,26.0,38.46153846153846,25,AREA 0,12,50,1,153.6,153.18,0.48,0.3,sky130_fd_sc_hd,4,4

+/home/alex/chaos_automaton_Summer_2022/openlane/chaos_subarray,chaos_subarray,22_08_08_15_49,flow completed,0h9m51s0ms,0h2m40s0ms,-2.0,0.54,-1,39.46,3576.88,-1,0,0,0,0,0,0,0,6,0,-1,-1,339198,103321,-26.09,-26.09,0.0,0.0,-1,-26.09,-26.09,0.0,0.0,-1,132918716.0,0.0,31.47,19.28,0.8,0.26,-1,1040,7346,1040,7346,0,0,0,6500,0,100,0,0,0,0,0,0,2,6600,5,424,7276,0,7700,514032.2304,0.012,0.00293,0.000174,0.0154,0.00382,2.02e-07,0.0179,0.00456,3.87e-07,1.9400000000000004,26.0,38.46153846153846,25,AREA 0,12,50,1,153.6,153.18,0.48,0.3,sky130_fd_sc_hd,4,4

diff --git a/verilog/dv/verify.log b/verilog/dv/verify.log
deleted file mode 100644
index e69de29..0000000
--- a/verilog/dv/verify.log
+++ /dev/null
diff --git a/verilog/rtl/chaos_automaton.v b/verilog/rtl/chaos_automaton.v
index a37ac10..afe54ae 100644
--- a/verilog/rtl/chaos_automaton.v
+++ b/verilog/rtl/chaos_automaton.v
@@ -13,9 +13,6 @@
 // limitations under the License.
 // SPDX-License-Identifier: Apache-2.0
 
-// NOTE:  Remove the following line before synthesizing
-// `define MPRJ_IO_PADS 38
-
 `default_nettype none
 /*
  *-------------------------------------------------------------
@@ -52,17 +49,35 @@
  * in addition to the 64 flops so that the scan chain can be cycled
  * without affecting ongoing operation of the automaton.
  *
+ * Version v2:  The logic analyzer is replaced by a local version that
+ * has the same number of bits as periphery I/O.  There are two registers
+ * per signal, one for output, and one for input.  All registers update
+ * simultaneously.  Every periphery input is connected to three sources,
+ * XOR'd together:  A periphery output, a GPIO input, and a register.
+ * Every periphery output is connected to three sinks:  A periphery
+ * input, a GPIO output, and a register.  The periphery output-to-input
+ * connections can be a loop-back or neighbor loop-back.
+ *
  * Memory mapped address space:
  *
- *	BASE_ADR + 7 to BASE_ADR + 0:   Data to read or write
- *	BASE_ADR + 15 to BASE_ADR + 8:	Core cell address for read/write
- *	BASE_ADR + 16:			Triggers
+ *	BASE_ADR + 7 to BASE_ADR + 0:   Configuration data to read or write
+ *	BASE_ADR + 11 to BASE_ADR + 8:	Core cell address for read/write
+ *	BASE_ADR + 12:			Triggers
+ *	BASE_ADR + 17 to BASE_ADR + 16: Per-side input configuration
+ *	BASE_ADR + 18:			GPIO input and output slice selection
+ *	BASE_ADR + 19:			GPIO direction
+ *	BASE_ADR + ?? to BASE_ADR + 20: Operational data
+ *	(BASE_ADR + 39 for 50x30 array)
  *
  * Trigger bits:
  *	bit 0:  Shift by (address) cells (64 bits).
  *	bit 1:  Finish cycle.  Return shift register to run state, toggle "hold"
  *
- * Both trigger bits are self-resetting.  The trigger bit (as read) remains
+ * (to be done:)
+ *	bit 2:  Capture data
+ *	bit 3:  Apply data
+ *
+ * All trigger bits are self-resetting.  The trigger bit (as read) remains
  * high until the transfer has completed.  The trigger bit can be polled to
  * determine when the cycle has completed.
  *
@@ -90,7 +105,9 @@
  *-------------------------------------------------------------
  */
 
-//`include "chaos_subarray.v"
+// NOTE:  Uncomment the following lines for syntax checking
+// `define MPRJ_IO_PADS 38
+// `include "chaos_subarray.v"
 
 /*
  *-----------------------------------------------------------------
@@ -103,7 +120,7 @@
     parameter YSIZE = 50,	// Total number of cells top to bottom
     parameter XTOP = 3,		// Number of sub-arrays left to right
     parameter YTOP = 5,		// Number of sub-arrays top to bottom
-    parameter ASIZE = 10,	// Enough bits to count XSIZE * YSIZE
+    parameter ASIZE = 11,	// Enough bits to count XSIZE * YSIZE
     parameter BASE_ADR = 32'h 3000_0000 // Wishbone base address
 )(
 `ifdef USE_POWER_PINS
@@ -129,7 +146,7 @@
     output wbs_ack_o,
     output [31:0] wbs_dat_o,
 
-    // Logic Analyzer Signals
+    // Logic Analyzer Signals (unused)
     input  [127:0] la_data_in,
     output [127:0] la_data_out,
     input  [127:0] la_oenb,
@@ -151,17 +168,20 @@
 `define LOAD	3'b101
 
 `define CONFIGL	8'h00		/* Address offset of configuration data low word */
-`define CONFIGH	8'h04		/* Address offset of configuration data high word */
-`define ADDRESS	8'h08		/* Address offset of cell address value */
-`define XFER	8'h0c		/* Address offset of transfer bits */
+`define CONFIGH	8'h01		/* Address offset of configuration data high word */
+`define ADDRESS	8'h02		/* Address offset of cell address value */
+`define XFER	8'h03		/* Address offset of transfer bits */
+`define DIRECT  8'h04		/* Address offset of GPIO directions */
+`define SOURCE  8'h04		/* Address offset of GPIO source selection */
+`define DATATOP	8'h05		/* Address offset of start of data section */
 
 `define MAXADDR (XSIZE * YSIZE)	/* Highest cell address plus one */
 
     reg clk;			/* serial clock to transfer data 	*/
     reg hold;			/* trigger to hold transferred data 	*/
     reg [2:0] xfer_state;	/* state of the data transfer		*/
-    reg [1:0] xfer_ctrl;	/* Transfer trigger bits		*/
-    reg [63:0] config_data;	/* 64 bits to read or write		*/
+    reg [1:0] xfer_ctrl;	/* Configuration transfer trigger bits	*/
+    reg [63:0] config_data;	/* 64 bits to read or write configuration */
 
     reg [ASIZE - 1:0] cell_addr;	/* Core cell to address	*/
     reg [ASIZE - 1:0] cell_offset;	/* Current offset of shift register */
@@ -174,6 +194,15 @@
     wire [1:0] config_sel;
     wire address_sel;
     wire xfer_sel;
+    wire direct_sel;
+    wire source_sel;
+
+    // NOTE:  This should be parameterized.
+    // For the 50x30 array, there are 50+50+30+30 = 160 periphery bits =
+    // 5 words of 32 bits.  This is hard-coded for convenience.  If the
+    // array size changes, this needs to be changed as well.  Needs to be
+    // converted to a "generate" block.
+    wire [4:0] data_sel;
 
     wire valid;
     reg ready;
@@ -186,30 +215,83 @@
     reg [63:0] wdata;
     reg write;
 
-    wire [2*XSIZE + 2*YSIZE - 1: 0] data_in;
+    // Direction for each GPIO (32 used)
+    reg [31:0] gpio_oeb;
+
+    // Data to and from array periphery I/O
+    wire [YSIZE-1: 0] data_in_east;
+    wire [YSIZE-1: 0] data_in_west;
+    wire [XSIZE-1: 0] data_in_north;
+    wire [XSIZE-1: 0] data_in_south;
+
+    wire [YSIZE-1: 0] data_out_east;
+    wire [YSIZE-1: 0] data_out_west;
+    wire [XSIZE-1: 0] data_out_north;
+    wire [XSIZE-1: 0] data_out_south;
+
+    // Latched output for wishbone read-back (to be done)
+    // TBD
+
+    // Latched input from wishbone (to do:  Make shadow register)
+    wire [YSIZE-1: 0] latched_in_east;
+    wire [YSIZE-1: 0] latched_in_west;
+    wire [XSIZE-1: 0] latched_in_north;
+    wire [XSIZE-1: 0] latched_in_south;
+
+    // Shadow registers for wishbone input (to be done)
+    // TBD
+
+    // Register array mapping latched data to 32-bit sections for data
+    // transfer through the wishbone
+    reg [XSIZE*2 + YSIZE*2 - 1:0] latched_in;
+
+    // Wire array mapping output data to 32-bit sections for data
+    // transfer through the wishbone
+    wire [XSIZE*2 + YSIZE*2 - 1:0] data_out;
+
+    // Periphery output-to-input loop-back selection
+    reg [2:0] north_loopback;
+    reg [2:0] east_loopback;
+    reg [2:0] south_loopback;
+    reg [2:0] west_loopback;
+
+// Loopback value definitions
+
+`define INPUT_LOW	3'b000
+`define INPUT_HIGH	3'b001
+`define LOOPBACK	3'b010
+`define NEIGHBOR_LEFT	3'b011
+`define NEIGHBOR_RIGHT  3'b100
+
+    // GPIO slicing (because there are many fewer GPIO than array outputs)
+    // GPIOs can be clustered on either end or in the center of the array
+    // side, or distributed along the side (1 GPIO per 5 array cells)
+    reg [1:0] gpio_output_slice;
+    reg [1:0] gpio_input_slice;
+
+    // Registered GPIO directions go directly to io_oeb[37:6].  Leave the
+    // lower 6 GPIO to the management processor.
+    assign io_oeb = {gpio_oeb, 6'b1};
 
     // Wishbone address select indicators
-    assign config_sel[0] = (wbs_adr_i[7:0] == `CONFIGL);
-    assign config_sel[1] = (wbs_adr_i[7:0] == `CONFIGH);
-    assign address_sel = (wbs_adr_i[7:0] == `ADDRESS);
-    assign xfer_sel = (wbs_adr_i[7:0] == `XFER);
+    assign config_sel[0] = (wbs_adr_i[7:2] == `CONFIGL);
+    assign config_sel[1] = (wbs_adr_i[7:2] == `CONFIGH);
+    assign address_sel = (wbs_adr_i[7:2] == `ADDRESS);
+    assign xfer_sel = (wbs_adr_i[7:2] == `XFER);
+    assign direct_sel = (wbs_adr_i[7:2] == `DIRECT);
+    assign source_sel = (wbs_adr_i[7:2] == `SOURCE);
 
-    assign selected = config_sel[1] || config_sel[0] || address_sel || xfer_sel;
-    
+    // Hard-coded to 5 words;  see note above
+    assign data_sel[0] = (wbs_adr_i[7:2] == (`DATATOP + 0));
+    assign data_sel[1] = (wbs_adr_i[7:2] == (`DATATOP + 1));
+    assign data_sel[2] = (wbs_adr_i[7:2] == (`DATATOP + 2));
+    assign data_sel[3] = (wbs_adr_i[7:2] == (`DATATOP + 3));
+    assign data_sel[4] = (wbs_adr_i[7:2] == (`DATATOP + 4));
+
     assign valid = wbs_cyc_i && wbs_stb_i; 
     assign wbs_ack_o = ready;
     assign iomem_we = wbs_sel_i & {4{wbs_we_i}};
 
-    // Chip pin output (Connects to a subset of la_data_in;
-    // 9 signals each N and S, 10 signals each W and E)
-    assign io_out = {la_data_out[2*YSIZE + XSIZE + 8: 2*YSIZE + XSIZE],	// north
-		     la_data_out[2*YSIZE + 8: 2*YSIZE],			// south
-		     la_data_out[YSIZE + 9: YSIZE],			// east
-		     la_data_out[9:0]};					// west
-
-    // Chip pin direction is assigned to la_data sub-array
-    assign io_oeb = la_data_in[127:127-38] & ~la_oenb[127:127-38];
-
     // IRQ
     assign irq = 3'b000;	// Unused
 
@@ -232,44 +314,298 @@
         .rdata(rdata),
         .wdata(wdata),
 	.write(write),
-        .data_in(data_in),
-        .data_out(la_data_out[2*XSIZE + 2*YSIZE - 1: 0])
+        .data_in_east(data_in_east),
+        .data_in_west(data_in_west),
+        .data_in_north(data_in_north),
+        .data_in_south(data_in_south),
+        .data_out_east(data_out_east),
+        .data_out_west(data_out_west),
+        .data_out_north(data_out_north),
+        .data_out_south(data_out_south)
     );
 
-    /* Hook up io_in (multiplexed with la_data_int based on value of la_oenb,
-     * using the same subsets as used for io_out).  The expressions are more
-     * complicated because the signals that are connected to the GPIO pins
-     * have to be multiplexed with the logic analyzer inputs.
-     */
+    // Wire definitions mapping the GPIO to the array periphery
+    wire [YSIZE-1:0] gpio_east, gpio_west;
+    wire [XSIZE-1:0] gpio_north, gpio_south;
 
-    genvar i;
+    // Wire definitions mapping the array periphery loop-back connections
+    wire [YSIZE-1:0] data_muxed_east, data_muxed_west;
+    wire [XSIZE-1:0] data_muxed_north, data_muxed_south;
 
-    generate
-	for (i = 2*YSIZE + XSIZE + 9; i < 2*YSIZE + 2*XSIZE; i=i+1) begin
-	    assign data_in[i] = la_data_in[i];
-	end
-	for (i = 2 * YSIZE + XSIZE; i < 2*YSIZE + XSIZE + 9; i=i+1) begin
-    	    assign data_in[i] = la_oenb[i] ? io_in[i - 2*YSIZE + XSIZE + 29] : la_data_in[i];
-	end
-	for (i = 2 * YSIZE + 9; i < 2 * YSIZE + XSIZE; i=i+1) begin
-	    assign data_in[i] = la_data_in[i];
-	end
-	for (i = 2 * YSIZE; i < 2 * YSIZE + 9; i=i+1) begin
-    	    assign data_in[i] = la_oenb[i] ? io_in[i - 2*YSIZE + 20] : la_data_in[i];
-	end
-	for (i = YSIZE + 10; i < 2 * YSIZE; i=i+1) begin
-	    assign data_in[i] = la_data_in[i];
-	end
-	for (i = YSIZE; i < YSIZE + 10; i=i+1) begin
-    	    assign data_in[i] = la_oenb[i] ? io_in[i - YSIZE + 10] : la_data_in[i];
-	end
-	for (i = 10; i < YSIZE; i=i+1) begin
-	    assign data_in[i] = la_data_in[i];
-	end
-	for (i = 0; i < 10; i=i+1) begin
-    	    assign data_in[i] = la_oenb[i] ? io_in[i]: la_data_in[i];
-	end
-    endgenerate
+    // Hook up array inputs (data_in_*) to an XOR'd combination of
+    // (1) array outputs (data_out_*, muxed into data_muxed_*),
+    // (2) the GPIO pads (muxed into gpio_*), and
+    // (3) data from the wishbone bus (latched_in_*).
+
+    assign data_in_west = latched_in_west ^ gpio_west ^ data_muxed_west;
+    assign data_in_east = latched_in_east ^ gpio_east ^ data_muxed_east;
+    assign data_in_south = latched_in_south ^ gpio_south ^ data_muxed_south;
+    assign data_in_north = latched_in_north ^ gpio_north ^ data_muxed_north;
+
+`define INPUT_LOW	3'b000
+`define INPUT_HIGH	3'b001
+`define LOOPBACK	3'b010
+`define NEIGHBOR_LEFT	3'b011
+`define NEIGHBOR_RIGHT  3'b100
+
+    // Define loop-back inputs
+    assign data_muxed_west =
+	(west_loopback == `NEIGHBOR_LEFT) ? {data_out_west[YSIZE-2:0], 1'b0} :
+	(west_loopback == `NEIGHBOR_RIGHT) ? {1'b0, data_out_west[YSIZE-1:1]} :
+	(west_loopback == `LOOPBACK) ?  data_out_west :
+	(west_loopback == `INPUT_HIGH) ? 'b1 : 'b0;
+
+    assign data_muxed_east =
+	(east_loopback == `NEIGHBOR_LEFT) ? {data_out_east[YSIZE-2:0], 1'b0} :
+	(east_loopback == `NEIGHBOR_RIGHT) ? {1'b0, data_out_east[YSIZE-1:1]} :
+	(east_loopback == `LOOPBACK) ?  data_out_east :
+	(east_loopback == `INPUT_HIGH) ? 'b1 : 'b0;
+
+    assign data_muxed_south =
+	(south_loopback == `NEIGHBOR_LEFT) ? {data_out_south[XSIZE-2:0], 1'b0} :
+	(south_loopback == `NEIGHBOR_RIGHT) ?  {1'b0, data_out_south[XSIZE-1:1]} :
+	(south_loopback == `LOOPBACK) ? data_out_south :
+	(south_loopback == `INPUT_HIGH) ? 'b1 : 'b0;
+
+    assign data_muxed_north =
+	(north_loopback == `NEIGHBOR_LEFT) ? {data_out_north[XSIZE-2:0], 1'b0} :
+	(north_loopback == `NEIGHBOR_RIGHT) ?  {1'b0, data_out_north[XSIZE-1:1]} :
+	(north_loopback == `LOOPBACK) ? data_out_north :
+	(south_loopback == `INPUT_HIGH) ? 'b1 : 'b0;
+
+    // Define I/O input slices
+    // NOTE:  This is hard-coded.  There are 38 GPIOs.  Assigning 32 of them
+    // (GPIO 37 to 6) to array inputs and outputs.  These are arranged as
+    // 10 on the sides and 6 on the top and bottom.  These are further sub-
+    // divided into 5 inputs and 5 outputs on the sides, and 3 inputs and
+    // 3 outputs on top and bottom.  Depending on the selection, these
+    // can be injected into various places around the array.
+
+    // Another note:  It probably makes more sense to define vectors for
+    // io_in_east, io_in_north, etc., and align them in the direction of
+    // the arrays (high to low index is top to bottom, or right to left).
+
+    assign gpio_east = 	// I/O 15 to 6
+	(gpio_input_slice == 0) ?	// Distributed
+		{2'b0, io_in[15], 4'b0, io_in[14], 4'b0, io_in[13],
+		 4'b0, io_in[12], 4'b0, io_in[11], 4'b0, io_in[10],
+		 4'b0, io_in[9],  4'b0, io_in[8],  4'b0, io_in[7],
+		 4'b0, io_in[6],  2'b0} :
+	(gpio_input_slice == 1) ? {40'b0, io_in[15:6]} :	// Bottom shifted
+	(gpio_input_slice == 2) ? {20'b0, io_in[15:6], 20'b0} : // Centered
+	{io_in[15:6], 40'b0};					// Top shifted
+
+    assign gpio_north = 	// I/O 21 to 16
+	(gpio_input_slice == 0) ?	// Distributed
+		{2'b0, io_in[16], 4'b0, io_in[17], 4'b0, io_in[18],
+		 4'b0, io_in[19], 4'b0, io_in[20], 4'b0, io_in[21], 2'b0} :
+	(gpio_input_slice == 1) ?	// Right shifted
+		{14'b0, io_in[16], io_in[17], io_in[18], io_in[19],
+		io_in[20], io_in[21]} :
+	(gpio_input_slice == 2) ?	// Centered
+		{7'b0, io_in[16], io_in[17], io_in[18], io_in[19],
+		io_in[20], io_in[21], 7'b0} :
+	{io_in[16], io_in[17], io_in[18], io_in[19], io_in[20],
+		io_in[21], 4'b0};	// Left shifted
+
+    assign gpio_west = 	// I/O 22 to 31
+	(gpio_input_slice == 0) ?	// Distributed
+		{2'b0, io_in[22], 4'b0, io_in[23], 4'b0, io_in[24],
+		 4'b0, io_in[25], 4'b0, io_in[26], 4'b0, io_in[27],
+		 4'b0, io_in[28], 4'b0, io_in[29], 4'b0, io_in[30],
+		 4'b0, io_in[31],  2'b0} :
+	(gpio_input_slice == 1) ?	// Bottom shifted
+		{40'b0, io_in[22], io_in[23], io_in[24], io_in[25],
+		io_in[26], io_in[27], io_in[28], io_in[29], io_in[31],
+		io_in[31]} :
+	(gpio_input_slice == 2) ?	// Centered
+		{20'b0, io_in[22], io_in[23], io_in[24], io_in[25],
+		io_in[26], io_in[27], io_in[28], io_in[29], io_in[31],
+		io_in[31], 20'b0} :
+	{io_in[22], io_in[23], io_in[24], io_in[25], io_in[26],
+		io_in[27], io_in[28], io_in[29], io_in[31], io_in[31],
+		40'b0};					// Top shifted
+
+    assign gpio_south = 	// I/O 32 to 37
+	(gpio_input_slice == 0) ?	// Distributed
+		{2'b0, io_in[37], 4'b0, io_in[36], 4'b0, io_in[35],
+		 4'b0, io_in[34], 4'b0, io_in[33], 4'b0, io_in[32], 2'b0} :
+	(gpio_input_slice == 1) ? {14'b0, io_in[37:32]} :	// Right shifted
+	(gpio_input_slice == 2) ? {7'b0, io_in[37:32], 7'b0} :	// Centered
+	{io_in[37:32], 14'b0};					// Left shifted
+
+    // East side
+    assign io_out[6] =
+	(gpio_output_slice == 0) ? data_out_east[2] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_east[20] :	// Center
+	(gpio_output_slice == 2) ? data_out_east[40] :	// Top
+	data_out_east[0];				// Bottom
+    assign io_out[7] =
+	(gpio_output_slice == 0) ? data_out_east[7] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_east[21] :	// Center
+	(gpio_output_slice == 2) ? data_out_east[41] :	// Top
+	data_out_east[1];				// Bottom
+    assign io_out[8] =
+	(gpio_output_slice == 0) ? data_out_east[12] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_east[22] :	// Center
+	(gpio_output_slice == 2) ? data_out_east[42] :	// Top
+	data_out_east[2];				// Bottom
+    assign io_out[9] =
+	(gpio_output_slice == 0) ? data_out_east[17] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_east[23] :	// Center
+	(gpio_output_slice == 2) ? data_out_east[43] :	// Top
+	data_out_east[3];				// Bottom
+    assign io_out[10] =
+	(gpio_output_slice == 0) ? data_out_east[22] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_east[24] :	// Center
+	(gpio_output_slice == 2) ? data_out_east[44] :	// Top
+	data_out_east[4];				// Bottom
+    assign io_out[11] =
+	(gpio_output_slice == 0) ? data_out_east[27] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_east[25] :	// Center
+	(gpio_output_slice == 2) ? data_out_east[45] :	// Top
+	data_out_east[5];				// Bottom
+    assign io_out[12] =
+	(gpio_output_slice == 0) ? data_out_east[32] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_east[26] :	// Center
+	(gpio_output_slice == 2) ? data_out_east[46] :	// Top
+	data_out_east[6];				// Bottom
+    assign io_out[13] =
+	(gpio_output_slice == 0) ? data_out_east[37] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_east[27] :	// Center
+	(gpio_output_slice == 2) ? data_out_east[47] :	// Top
+	data_out_east[7];				// Bottom
+    assign io_out[14] =
+	(gpio_output_slice == 0) ? data_out_east[42] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_east[28] :	// Center
+	(gpio_output_slice == 2) ? data_out_east[48] :	// Top
+	data_out_east[8];				// Bottom
+    assign io_out[15] =
+	(gpio_output_slice == 0) ? data_out_east[47] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_east[29] :	// Center
+	(gpio_output_slice == 2) ? data_out_east[49] :	// Top
+	data_out_east[9];				// Bottom
+
+    // North side
+    assign io_out[16] =
+	(gpio_output_slice == 0) ? data_out_north[27] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_north[16] :	// Center
+	(gpio_output_slice == 2) ? data_out_north[29] :	// Right
+	data_out_north[5];				// Left
+    assign io_out[17] =
+	(gpio_output_slice == 0) ? data_out_north[22] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_north[15] :	// Center
+	(gpio_output_slice == 2) ? data_out_north[28] :	// Right
+	data_out_north[4];				// Left
+    assign io_out[18] =
+	(gpio_output_slice == 0) ? data_out_north[17] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_north[14] :	// Center
+	(gpio_output_slice == 2) ? data_out_north[27] :	// Right
+	data_out_north[3];				// Left
+    assign io_out[19] =
+	(gpio_output_slice == 0) ? data_out_north[12] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_north[13] :	// Center
+	(gpio_output_slice == 2) ? data_out_north[26] :	// Right
+	data_out_north[2];				// Left
+    assign io_out[20] =
+	(gpio_output_slice == 0) ? data_out_north[7] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_north[12] :	// Center
+	(gpio_output_slice == 2) ? data_out_north[25] :	// Right
+	data_out_north[1];				// Left
+    assign io_out[21] =
+	(gpio_output_slice == 0) ? data_out_north[2] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_north[11] :	// Center
+	(gpio_output_slice == 2) ? data_out_north[24] :	// Right
+	data_out_north[0];				// Left
+
+    // West side
+    assign io_out[22] =
+	(gpio_output_slice == 0) ? data_out_west[47] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_west[29] :	// Center
+	(gpio_output_slice == 2) ? data_out_west[49] :	// Top
+	data_out_east[9];				// Bottom
+    assign io_out[23] =
+	(gpio_output_slice == 0) ? data_out_west[42] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_west[28] :	// Center
+	(gpio_output_slice == 2) ? data_out_west[48] :	// Top
+	data_out_east[8];				// Bottom
+    assign io_out[24] =
+	(gpio_output_slice == 0) ? data_out_west[37] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_west[27] :	// Center
+	(gpio_output_slice == 2) ? data_out_west[47] :	// Top
+	data_out_east[7];				// Bottom
+    assign io_out[25] =
+	(gpio_output_slice == 0) ? data_out_west[32] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_west[26] :	// Center
+	(gpio_output_slice == 2) ? data_out_west[46] :	// Top
+	data_out_east[6];				// Bottom
+    assign io_out[26] =
+	(gpio_output_slice == 0) ? data_out_west[27] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_west[25] :	// Center
+	(gpio_output_slice == 2) ? data_out_west[45] :	// Top
+	data_out_east[5];				// Bottom
+    assign io_out[27] =
+	(gpio_output_slice == 0) ? data_out_west[22] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_west[24] :	// Center
+	(gpio_output_slice == 2) ? data_out_west[44] :	// Top
+	data_out_east[4];				// Bottom
+    assign io_out[28] =
+	(gpio_output_slice == 0) ? data_out_west[17] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_west[23] :	// Center
+	(gpio_output_slice == 2) ? data_out_west[43] :	// Top
+	data_out_east[3];				// Bottom
+    assign io_out[29] =
+	(gpio_output_slice == 0) ? data_out_west[12] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_west[22] :	// Center
+	(gpio_output_slice == 2) ? data_out_west[42] :	// Top
+	data_out_east[2];				// Bottom
+    assign io_out[30] =
+	(gpio_output_slice == 0) ? data_out_west[7] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_west[21] :	// Center
+	(gpio_output_slice == 2) ? data_out_west[41] :	// Top
+	data_out_east[1];				// Bottom
+    assign io_out[31] =
+	(gpio_output_slice == 0) ? data_out_west[2] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_west[20] :	// Center
+	(gpio_output_slice == 2) ? data_out_west[40] :	// Top
+	data_out_east[0];				// Bottom
+
+    // South side
+    assign io_out[32] =
+	(gpio_output_slice == 0) ? data_out_south[2] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_south[11] :	// Center
+	(gpio_output_slice == 2) ? data_out_south[24] :	// Right
+	data_out_north[0];				// Left
+    assign io_out[33] =
+	(gpio_output_slice == 0) ? data_out_south[7] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_south[12] :	// Center
+	(gpio_output_slice == 2) ? data_out_south[25] :	// Right
+	data_out_north[1];				// Left
+    assign io_out[34] =
+	(gpio_output_slice == 0) ? data_out_south[12] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_south[13] :	// Center
+	(gpio_output_slice == 2) ? data_out_south[26] :	// Right
+	data_out_north[2];				// Left
+    assign io_out[35] =
+	(gpio_output_slice == 0) ? data_out_south[17] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_south[14] :	// Center
+	(gpio_output_slice == 2) ? data_out_south[27] :	// Right
+	data_out_north[3];				// Left
+    assign io_out[36] =
+	(gpio_output_slice == 0) ? data_out_south[22] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_south[15] :	// Center
+	(gpio_output_slice == 2) ? data_out_south[28] :	// Right
+	data_out_north[4];				// Left
+    assign io_out[37] =
+	(gpio_output_slice == 0) ? data_out_south[27] :	// Distributed
+	(gpio_output_slice == 1) ? data_out_south[16] :	// Center
+	(gpio_output_slice == 2) ? data_out_south[29] :	// Right
+	data_out_north[5];				// Left
+
+    // Map the output data from the sides to a single array that can be
+    // broken up into 32 bit segments for data transfer.  
+
+    assign data_out = {data_out_north, data_out_east, data_out_south, data_out_west};
 
     /* Read data (only rdata is something that was not written by the processor) */
 
@@ -285,6 +621,22 @@
 	    /* When ADDRESS is selected, pass back the existing cell	*/
 	    /* count rather than what was written into cell_addr.	*/
 	    rdata_pre = bit_count[ASIZE + 6: 7];
+	end else if (direct_sel) begin
+	    rdata_pre = gpio_oeb;
+	end else if (source_sel) begin
+	    rdata_pre = {10'b0, gpio_output_slice, 2'b0, gpio_input_slice,
+			1'b0, north_loopback, 1'b0, east_loopback,
+			1'b0, south_loopback, 1'b0, west_loopback};
+	end else if (data_sel[0]) begin
+	    rdata_pre = data_out[31:0];
+	end else if (data_sel[1]) begin
+	    rdata_pre = data_out[63:32];
+	end else if (data_sel[2]) begin
+	    rdata_pre = data_out[95:64];
+	end else if (data_sel[3]) begin
+	    rdata_pre = data_out[127:96];
+	end else if (data_sel[4]) begin
+	    rdata_pre = data_out[159:128];
 	end
     end
 
@@ -298,13 +650,19 @@
 	    ready <= 0;
             if (valid && !ready && wbs_adr_i[31:8] == BASE_ADR[31:8]) begin
 		ready <= 1'b1;
-		if (selected) begin
-		    wbs_dat_o <= rdata_pre;
-		end
+		wbs_dat_o <= rdata_pre;
 	    end
 	end
     end
 
+    // Map the latched data from the sides to a single array that can be
+    // broken up into 32 bit segments for data transfer.  
+
+    assign latched_in_north = latched_in[2*XSIZE+2*YSIZE-1:2*XSIZE+YSIZE];
+    assign latched_in_east = latched_in[2*YSIZE+XSIZE-1:YSIZE+XSIZE];
+    assign latched_in_south = latched_in[YSIZE+XSIZE-1:YSIZE];
+    assign latched_in_west = latched_in[YSIZE-1:0];
+
     /* Write data */
 
     always @(posedge wb_clk_i or posedge wb_rst_i) begin
@@ -332,6 +690,49 @@
 		end else if (address_sel) begin
 		    /* NOTE:  If XSIZE * YSIZE > 256, this must be adjusted */
                     if (iomem_we[0]) cell_addr <= wbs_dat_i[7:0];
+		end else if (direct_sel) begin
+                    if (iomem_we[0]) gpio_oeb[7:0] <= wbs_dat_i[7:0];
+                    if (iomem_we[1]) gpio_oeb[15:8] <= wbs_dat_i[15:8];
+                    if (iomem_we[2]) gpio_oeb[23:16] <= wbs_dat_i[23:16];
+                    if (iomem_we[3]) gpio_oeb[31:24] <= wbs_dat_i[31:24];
+		end else if (source_sel) begin
+                    if (iomem_we[0]) begin
+			 west_loopback <= wbs_dat_i[2:0];
+			 south_loopback <= wbs_dat_i[6:4];
+		    end
+                    if (iomem_we[1]) begin
+			 east_loopback <= wbs_dat_i[2:0];
+			 north_loopback <= wbs_dat_i[6:4];
+		    end
+                    if (iomem_we[2]) begin
+			 gpio_input_slice <= wbs_dat_i[1:0];
+			 gpio_output_slice <= wbs_dat_i[5:4];
+		    end
+		end else if (data_sel[0]) begin
+                    if (iomem_we[0]) latched_in[7:0] <= wbs_dat_i[7:0];
+                    if (iomem_we[1]) latched_in[15:8] <= wbs_dat_i[15:8];
+                    if (iomem_we[2]) latched_in[23:16] <= wbs_dat_i[23:16];
+                    if (iomem_we[3]) latched_in[31:24] <= wbs_dat_i[31:24];
+		end else if (data_sel[1]) begin
+                    if (iomem_we[0]) latched_in[39:32] <= wbs_dat_i[7:0];
+                    if (iomem_we[1]) latched_in[47:40] <= wbs_dat_i[15:8];
+                    if (iomem_we[2]) latched_in[55:48] <= wbs_dat_i[23:16];
+                    if (iomem_we[3]) latched_in[63:56] <= wbs_dat_i[31:24];
+		end else if (data_sel[2]) begin
+                    if (iomem_we[0]) latched_in[71:64] <= wbs_dat_i[7:0];
+                    if (iomem_we[1]) latched_in[79:72] <= wbs_dat_i[15:8];
+                    if (iomem_we[2]) latched_in[87:80] <= wbs_dat_i[23:16];
+                    if (iomem_we[3]) latched_in[95:88] <= wbs_dat_i[31:24];
+		end else if (data_sel[3]) begin
+                    if (iomem_we[0]) latched_in[103:96] <= wbs_dat_i[7:0];
+                    if (iomem_we[1]) latched_in[111:104] <= wbs_dat_i[15:8];
+                    if (iomem_we[2]) latched_in[119:112] <= wbs_dat_i[23:16];
+                    if (iomem_we[3]) latched_in[127:120] <= wbs_dat_i[31:24];
+		end else if (data_sel[4]) begin
+                    if (iomem_we[0]) latched_in[135:128] <= wbs_dat_i[7:0];
+                    if (iomem_we[1]) latched_in[143:136] <= wbs_dat_i[15:8];
+                    if (iomem_we[2]) latched_in[151:144] <= wbs_dat_i[23:16];
+                    if (iomem_we[3]) latched_in[159:152] <= wbs_dat_i[31:24];
                 end
             end else begin
                 xfer_ctrl <= 0;      // Immediately self-resetting
@@ -426,8 +827,14 @@
     input write,
     input [63:0] wdata,
     output [63:0] rdata,
-    input  [2*XSIZE + 2*YSIZE - 1:0] data_in,	// Perimeter I/O
-    output [2*XSIZE + 2*YSIZE - 1:0] data_out	// Perimeter I/O
+    input [YSIZE-1:0] data_in_east,	// Perimeter input
+    input [YSIZE-1:0] data_in_west,
+    input [XSIZE-1:0] data_in_north,
+    input [XSIZE-1:0] data_in_south,
+    output [YSIZE-1:0] data_out_east,	// Perimeter output
+    output [YSIZE-1:0] data_out_west,
+    output [XSIZE-1:0] data_out_north,
+    output [XSIZE-1:0] data_out_south
 );
     wire [XSIZE - 1: 0] uconn [YTOP: 0];
     wire [XSIZE - 1: 0] dconn [YTOP: 0];
@@ -481,16 +888,21 @@
     // some 2D arrays may need to be copied into 1D arrays.
     // See the original verilog for examples.
 
-    /* The perimeter inputs and outputs connect to the logic analyzer */
-    /* (To do:  multiplex inputs between the chip I/O and logic analyzer */
+    /* The perimeter inputs and outputs connect the array to the
+     * parent module.  Note that this hides all the interior data,
+     * which could be an issue with understanding how the circuit
+     * works.
+     */
 
-    assign data_out = {uconn[YTOP][XSIZE - 1:0], dconn[0][XSIZE - 1:0],
-			  rconn[XTOP][YSIZE - 1:0], lconn[0][YSIZE - 1:0]};
+    assign data_out_north = uconn[YTOP][XSIZE - 1:0];
+    assign data_out_south = dconn[0][XSIZE - 1:0];
+    assign data_out_east = rconn[XTOP][YSIZE - 1:0];
+    assign data_out_west = lconn[0][YSIZE - 1:0];
 
-    assign dconn[YTOP][XSIZE - 1:0] = data_in[2*XSIZE+2*YSIZE - 1: 2*YSIZE + XSIZE];
-    assign uconn[0][XSIZE - 1:0] = data_in[2*YSIZE + XSIZE - 1:2*YSIZE];
-    assign rconn[0][YSIZE - 1:0] = data_in[2*YSIZE-1:YSIZE];
-    assign lconn[XTOP][YSIZE - 1:0] = data_in[YSIZE-1:0];
+    assign dconn[YTOP][XSIZE - 1:0] = data_in_south;
+    assign uconn[0][XSIZE - 1:0] = data_in_north;
+    assign rconn[0][YSIZE - 1:0] = data_in_east;
+    assign lconn[XTOP][YSIZE - 1:0] = data_in_west;
 
     genvar i, j;
 
diff --git a/verilog/rtl/chaos_automaton.v:Zone.Identifier b/verilog/rtl/chaos_automaton.v:Zone.Identifier
index cfea768..bb947fc 100644
--- a/verilog/rtl/chaos_automaton.v:Zone.Identifier
+++ b/verilog/rtl/chaos_automaton.v:Zone.Identifier
@@ -1,3 +1,3 @@
 [ZoneTransfer]

 ZoneId=3

-HostUrl=https://files.slack.com/files-pri/T0156RWKTRA-F03SKB0CMK5/download/chaos_automaton.v?origin_team=T0156RWKTRA

+HostUrl=https://files.slack.com/files-pri/T0156RWKTRA-F03SG99NPGE/download/chaos_automaton.v?origin_team=T0156RWKTRA