Made a handful of corrections to the array connectivity;  the
array now passes tests 2 and 3.
diff --git a/verilog/dv/README.local b/verilog/dv/README.local
index cb6c608..20c407b 100644
--- a/verilog/dv/README.local
+++ b/verilog/dv/README.local
@@ -12,7 +12,7 @@
 	make setup
 	
 
-Simulation environment setup:
+Simulation environment setup (also requires PDK_ROOT and PDK from above):
 
 	setenv CORE_VERILOG_PATH /home/tim/gits/chaos_automaton_final/mgmt_core_wrapper/verilog
 	setenv DESIGNS /home/tim/gits/chaos_automaton_final
diff --git a/verilog/dv/chaos_test1/chaos_test1_tb.v b/verilog/dv/chaos_test1/chaos_test1_tb.v
index cacb564..fd26ef6 100644
--- a/verilog/dv/chaos_test1/chaos_test1_tb.v
+++ b/verilog/dv/chaos_test1/chaos_test1_tb.v
@@ -53,11 +53,11 @@
 	assign mprj_io[3] = (CSB == 1'b1) ? 1'b1 : 1'bz;
 
 	initial begin
-		// $dumpfile("chaos_test1.vcd");
-		// $dumpvars(0, chaos_test1_tb);
+		$dumpfile("chaos_test1.vcd");
+		$dumpvars(0, chaos_test1_tb);
 
 		// Repeat cycles of 1000 clock edges as needed to complete testbench
-		repeat (200) begin
+		repeat (100) begin
 			repeat (1000) @(posedge clock);
 			// $display("+1000 cycles");
 		end
diff --git a/verilog/dv/chaos_test3/chaos_test3.c b/verilog/dv/chaos_test3/chaos_test3.c
index a80b495..f4445c3 100644
--- a/verilog/dv/chaos_test3/chaos_test3.c
+++ b/verilog/dv/chaos_test3/chaos_test3.c
@@ -115,6 +115,9 @@
 	// Apply input values first, so that the bits are valid
 	// when the load process toggles "hold" on all cells.
 
+	// Apply a reset to all cells (auto-clearing bit)
+	reg_user_transfer = 4;
+
 	// Apply zero bits to all inputs
  	reg_user_data0 = 0;
  	reg_user_data1 = 0;
diff --git a/verilog/dv/local-install.md b/verilog/dv/local-install.md
new file mode 100644
index 0000000..7cd54c1
--- /dev/null
+++ b/verilog/dv/local-install.md
@@ -0,0 +1,65 @@
+# Local Installation (Linux)
+
+Instructions to install the dv setup locally (dockerless install).
+
+## You will need to fullfil these dependecies: 
+
+* Icarus Verilog (10.2+)
+* RV32I Toolchain
+
+Using apt, you can install Icarus Verilog:
+
+```bash
+sudo apt-get install iverilog
+```
+
+Next, you will need to build the RV32I toolchain. Firstly, export the installation path for the RV32I toolchain, 
+
+```bash
+export GCC_PATH=<gcc-installation-path>
+```
+
+Then, run the following: 
+
+```bash
+# packages needed:
+sudo apt-get install autoconf automake autotools-dev curl libmpc-dev \
+    libmpfr-dev libgmp-dev gawk build-essential bison flex texinfo \
+    gperf libtool patchutils bc zlib1g-dev git libexpat1-dev
+
+sudo mkdir $GCC_PATH
+sudo chown $USER $GCC_PATH
+
+git clone https://github.com/riscv/riscv-gnu-toolchain riscv-gnu-toolchain-rv32i
+cd riscv-gnu-toolchain-rv32i
+git checkout 411d134
+git submodule update --init --recursive
+
+mkdir build; cd build
+../configure --with-arch=rv32i --prefix=$GCC_PATH
+make -j$(nproc)
+```
+
+# Running Simulation
+
+You will need to export these environment variables: 
+
+```bash
+export GCC_PATH=<gcc-installation-path>
+export PDK_PATH=<pdk-location/sky130A>
+```
+
+To run any simulation, you need to be on the top level or caravel_user_project.
+
+To run RTL simulation for one of the DV tests, 
+
+```bash
+SIM=RTL make verify-<dv-test>
+```
+
+To run gate level simulation for one of the DV tests, 
+
+```bash
+SIM=GL make verify-<dv-test>
+```
+
diff --git a/verilog/rtl/chaos_subarray.v b/verilog/rtl/chaos_subarray.v
index b9bf395..252007c 100755
--- a/verilog/rtl/chaos_subarray.v
+++ b/verilog/rtl/chaos_subarray.v
@@ -234,16 +234,16 @@
 
 	/* Connect the endpoints of the array to the inputs and outputs of the module */
 
-	for (j = 0; j < YSIZE; j=j+1) begin: connx
-	    assign rconn[XSIZE][j] = ieast[j];
-	    assign lconn[0][j] = iwest[j];
+	for (j = 0; j < YSIZE; j=j+1) begin: conny
+	    assign rconn[0][j] = iwest[j];
+	    assign lconn[XSIZE][j] = ieast[j];
 	    assign oeast[j] = rconn[XSIZE][j];
 	    assign owest[j] = lconn[0][j];
 	end
 
-	for (i = 0; i < XSIZE; i=i+1) begin: conny
-	    assign uconn[YSIZE][i] = inorth[i];
-	    assign dconn[0][i] = isouth[i];
+	for (i = 0; i < XSIZE; i=i+1) begin: connx
+	    assign uconn[0][i] = isouth[i];
+	    assign dconn[YSIZE][i] = inorth[i];
 	    assign onorth[i] = uconn[YSIZE][i];
 	    assign osouth[i] = dconn[0][i];
 	end
diff --git a/verilog/rtl/user_defines.v b/verilog/rtl/user_defines.v
new file mode 100644
index 0000000..43ed500
--- /dev/null
+++ b/verilog/rtl/user_defines.v
@@ -0,0 +1,87 @@
+// SPDX-FileCopyrightText: 2020 Efabless Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// SPDX-License-Identifier: Apache-2.0
+
+`default_nettype none
+
+`ifndef __USER_DEFINES_H
+// User GPIO initial configuration parameters
+`define __USER_DEFINES_H
+
+// Useful GPIO mode values.  These match the names used in defs.h.
+`define GPIO_MODE_MGMT_STD_INPUT_NOPULL    13'h0403
+`define GPIO_MODE_MGMT_STD_INPUT_PULLDOWN  13'h0803
+`define GPIO_MODE_MGMT_STD_INPUT_PULLUP    13'h0c03
+`define GPIO_MODE_MGMT_STD_OUTPUT          13'h1809
+`define GPIO_MODE_MGMT_STD_BIDIRECTIONAL   13'h1801
+`define GPIO_MODE_MGMT_STD_ANALOG          13'h000b
+
+`define GPIO_MODE_USER_STD_INPUT_NOPULL    13'h0402
+`define GPIO_MODE_USER_STD_INPUT_PULLDOWN  13'h0802
+`define GPIO_MODE_USER_STD_INPUT_PULLUP    13'h0c02
+`define GPIO_MODE_USER_STD_OUTPUT          13'h1808
+`define GPIO_MODE_USER_STD_BIDIRECTIONAL   13'h1800
+`define GPIO_MODE_USER_STD_OUT_MONITORED   13'h1802
+`define GPIO_MODE_USER_STD_ANALOG          13'h000a
+
+// The power-on configuration for GPIO 0 to 4 is fixed and cannot be
+// modified (allowing the SPI and debug to always be accessible unless
+// overridden by a flash program).
+
+// The values below can be any of the standard types defined above,
+// or they can be any 13-bit value if the user wants a non-standard
+// startup state for the GPIO.  By default, every GPIO from 5 to 37
+// is set to power up as an input controlled by the management SoC.
+// Users may want to redefine these so that the user project powers
+// up in a state that can be used immediately without depending on
+// the management SoC to run a startup program to configure the GPIOs.
+
+`define USER_CONFIG_GPIO_5_INIT  `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_6_INIT  `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_7_INIT  `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_8_INIT  `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_9_INIT  `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_10_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_11_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_12_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_13_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_14_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+
+// Configurations of GPIO 15 to 25 are used on caravel but not caravan.
+`define USER_CONFIG_GPIO_15_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_16_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_17_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_18_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_19_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_20_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_21_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_22_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_23_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_24_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_25_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+
+`define USER_CONFIG_GPIO_26_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_27_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_28_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_29_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_30_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_31_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_32_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_33_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_34_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_35_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_36_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+`define USER_CONFIG_GPIO_37_INIT `GPIO_MODE_USER_STD_BIDIRECTIONAL
+
+`endif // __USER_DEFINES_H
diff --git a/verilog/rtl/user_project_wrapper.v b/verilog/rtl/user_project_wrapper.v
index c5e0dd8..09691b6 100644
--- a/verilog/rtl/user_project_wrapper.v
+++ b/verilog/rtl/user_project_wrapper.v
@@ -191,6 +191,7 @@
     reg [2:0] xfer_state;	/* state of the data transfer		*/
     reg [1:0] xfer_ctrl;	/* Configuration transfer trigger bits	*/
     reg [63:0] config_data;	/* 64 bits to read or write configuration */
+    reg local_reset;		/* Reset applied from a register	*/
 
     reg [ASIZE - 1:0] cell_addr;	/* Core cell to address	*/
     reg [ASIZE - 1:0] cell_offset;	/* Current offset of shift register */
@@ -223,6 +224,7 @@
     reg [31:0] wbs_dat_o;
     reg [63:0] wdata;
     reg write;
+    wire all_cell_reset;
 
     // Direction for each GPIO (32 used)
     reg [31:0] gpio_oeb;
@@ -276,7 +278,7 @@
     // GPIOs can be clustered on either end or in the center of the array
     // side, or distributed along the side (1 GPIO per 5 array cells)
     reg [1:0] gpio_output_slice;
-    reg [1:0] gpio_input_slice;
+    reg [2:0] gpio_input_slice;
 
     // Registered GPIO directions go directly to io_oeb[37:6].  Leave the
     // lower 6 GPIO to the management processor.
@@ -301,6 +303,8 @@
     assign wbs_ack_o = ready;
     assign iomem_we = wbs_sel_i & {4{wbs_we_i}};
 
+    assign all_cell_reset = wb_rst_i | local_reset;
+
     // IRQ
     assign user_irq = 3'b000;	// Unused
 
@@ -318,7 +322,7 @@
 	     .vssd1(vssd1),
 	`endif
         .clk(clk),
-        .reset(wb_rst_i),
+        .reset(all_cell_reset),
         .hold(hold),
         .rdata(rdata),
         .wdata(wdata),
@@ -395,39 +399,42 @@
     // the arrays (high to low index is top to bottom, or right to left).
 
     assign gpio_east = 	// I/O 15 to 6
-	(gpio_input_slice == 0) ?	// Distributed
+	(gpio_input_slice == 0) ? 50'b0 :	// No pad input
+	(gpio_input_slice == 1) ?	// Distributed
 		{2'b0, io_in[15], 4'b0, io_in[14], 4'b0, io_in[13],
 		 4'b0, io_in[12], 4'b0, io_in[11], 4'b0, io_in[10],
 		 4'b0, io_in[9],  4'b0, io_in[8],  4'b0, io_in[7],
 		 4'b0, io_in[6],  2'b0} :
-	(gpio_input_slice == 1) ? {40'b0, io_in[15:6]} :	// Bottom shifted
-	(gpio_input_slice == 2) ? {20'b0, io_in[15:6], 20'b0} : // Centered
+	(gpio_input_slice == 2) ? {40'b0, io_in[15:6]} :	// Bottom shifted
+	(gpio_input_slice == 3) ? {20'b0, io_in[15:6], 20'b0} : // Centered
 	{io_in[15:6], 40'b0};					// Top shifted
 
     assign gpio_north = 	// I/O 21 to 16
-	(gpio_input_slice == 0) ?	// Distributed
+	(gpio_input_slice == 0) ? 30'b0 :	// No pad input
+	(gpio_input_slice == 1) ?	// Distributed
 		{2'b0, io_in[16], 4'b0, io_in[17], 4'b0, io_in[18],
 		 4'b0, io_in[19], 4'b0, io_in[20], 4'b0, io_in[21], 2'b0} :
-	(gpio_input_slice == 1) ?	// Right shifted
+	(gpio_input_slice == 2) ?	// Right shifted
 		{14'b0, io_in[16], io_in[17], io_in[18], io_in[19],
 		io_in[20], io_in[21]} :
-	(gpio_input_slice == 2) ?	// Centered
+	(gpio_input_slice == 3) ?	// Centered
 		{7'b0, io_in[16], io_in[17], io_in[18], io_in[19],
 		io_in[20], io_in[21], 7'b0} :
 	{io_in[16], io_in[17], io_in[18], io_in[19], io_in[20],
 		io_in[21], 4'b0};	// Left shifted
 
     assign gpio_west = 	// I/O 22 to 31
-	(gpio_input_slice == 0) ?	// Distributed
+	(gpio_input_slice == 0) ? 50'b0 :	// No pad input
+	(gpio_input_slice == 1) ?	// Distributed
 		{2'b0, io_in[22], 4'b0, io_in[23], 4'b0, io_in[24],
 		 4'b0, io_in[25], 4'b0, io_in[26], 4'b0, io_in[27],
 		 4'b0, io_in[28], 4'b0, io_in[29], 4'b0, io_in[30],
 		 4'b0, io_in[31],  2'b0} :
-	(gpio_input_slice == 1) ?	// Bottom shifted
+	(gpio_input_slice == 2) ?	// Bottom shifted
 		{40'b0, io_in[22], io_in[23], io_in[24], io_in[25],
 		io_in[26], io_in[27], io_in[28], io_in[29], io_in[31],
 		io_in[31]} :
-	(gpio_input_slice == 2) ?	// Centered
+	(gpio_input_slice == 3) ?	// Centered
 		{20'b0, io_in[22], io_in[23], io_in[24], io_in[25],
 		io_in[26], io_in[27], io_in[28], io_in[29], io_in[31],
 		io_in[31], 20'b0} :
@@ -436,11 +443,12 @@
 		40'b0};					// Top shifted
 
     assign gpio_south = 	// I/O 32 to 37
-	(gpio_input_slice == 0) ?	// Distributed
+	(gpio_input_slice == 0) ? 30'b0 :	// No pad input
+	(gpio_input_slice == 1) ?	// Distributed
 		{2'b0, io_in[37], 4'b0, io_in[36], 4'b0, io_in[35],
 		 4'b0, io_in[34], 4'b0, io_in[33], 4'b0, io_in[32], 2'b0} :
-	(gpio_input_slice == 1) ? {14'b0, io_in[37:32]} :	// Right shifted
-	(gpio_input_slice == 2) ? {7'b0, io_in[37:32], 7'b0} :	// Centered
+	(gpio_input_slice == 2) ? {14'b0, io_in[37:32]} :	// Right shifted
+	(gpio_input_slice == 3) ? {7'b0, io_in[37:32], 7'b0} :	// Centered
 	{io_in[37:32], 14'b0};					// Left shifted
 
     // East side
@@ -633,7 +641,7 @@
 	end else if (direct_sel) begin
 	    rdata_pre = gpio_oeb;
 	end else if (source_sel) begin
-	    rdata_pre = {10'b0, gpio_output_slice, 2'b0, gpio_input_slice,
+	    rdata_pre = {9'b0, gpio_output_slice, 1'b0, gpio_input_slice,
 			1'b0, north_loopback, 1'b0, east_loopback,
 			1'b0, south_loopback, 1'b0, west_loopback};
 	end else if (data_sel[0]) begin
@@ -672,13 +680,12 @@
     assign latched_in_south = latched_in[YSIZE+XSIZE-1:YSIZE];
     assign latched_in_west = latched_in[YSIZE-1:0];
 
-    /* Write data */
-
     always @(posedge wb_clk_i or posedge wb_rst_i) begin
         if (wb_rst_i) begin
 	    cell_addr <= 0;
 	    gpio_oeb <= 0;
             xfer_ctrl <= 0;
+            local_reset <= 0;
 	    west_loopback <= 0;
 	    east_loopback <= 0;
 	    north_loopback <= 0;
@@ -692,7 +699,10 @@
 	    write <= 1'b0;
             if (valid && !ready && wbs_adr_i[31:8] == BASE_ADR[31:8]) begin
                 if (xfer_sel) begin
-                    if (iomem_we[0]) xfer_ctrl <= wbs_dat_i[1:0];
+                    if (iomem_we[0]) begin
+			xfer_ctrl <= wbs_dat_i[1:0];
+			local_reset <= wbs_dat_i[2];
+		    end
 		end else if (config_sel[0]) begin
                     if (iomem_we[0]) wdata[7:0] <= wbs_dat_i[7:0];
                     if (iomem_we[1]) wdata[15:8] <= wbs_dat_i[15:8];
@@ -724,8 +734,8 @@
 			 north_loopback <= wbs_dat_i[6:4];
 		    end
                     if (iomem_we[2]) begin
-			 gpio_input_slice <= wbs_dat_i[1:0];
-			 gpio_output_slice <= wbs_dat_i[5:4];
+			 gpio_input_slice <= wbs_dat_i[2:0];
+			 gpio_output_slice <= wbs_dat_i[6:4];
 		    end
 		end else if (data_sel[0]) begin
                     if (iomem_we[0]) latched_in[7:0] <= wbs_dat_i[7:0];
@@ -755,6 +765,7 @@
                 end
             end else begin
                 xfer_ctrl <= 0;      // Immediately self-resetting
+                local_reset <= 0;    // Immediately self-resetting
             end
         end
     end
@@ -855,10 +866,10 @@
     output [XSIZE-1:0] data_out_north,
     output [XSIZE-1:0] data_out_south
 );
-    wire [XSIZE - 1: 0] uconn [YTOP: 0];
-    wire [XSIZE - 1: 0] dconn [YTOP: 0];
-    wire [YSIZE - 1: 0] rconn [XTOP: 0];
-    wire [YSIZE - 1: 0] lconn [XTOP: 0];
+    wire [XSIZE - 1: 0] uconn [YTOP: 0];	// Upward moving data
+    wire [XSIZE - 1: 0] dconn [YTOP: 0];	// Downward moving data
+    wire [YSIZE - 1: 0] rconn [XTOP: 0];	// Rightward moving data
+    wire [YSIZE - 1: 0] lconn [XTOP: 0];	// Leftward moving data
 
     wire [YTOP - 1: 0] shiftreg [XTOP: 0];
     wire [YTOP - 1: 0] clkarray [XTOP: 0];
@@ -874,18 +885,18 @@
     //       |^|^      |^|^      |^|^   
     //       v|v|      v|v|      v|v|   
     //     +------+  +------+  +------+
-    //  l->|      |->|      |->|      |->l
-    //  r<-|      |<-|      |<-|      |<-r
-    //  l->|      |->|      |->|      |->l
-    //  r<-|      |<-|      |<-|      |<-r
+    //  r->|      |->|      |->|      |->r
+    //  l<-|      |<-|      |<-|      |<-l
+    //  r->|      |->|      |->|      |->r
+    //  l<-|      |<-|      |<-|      |<-l
     //     +------+  +------+  +------+
     //       |^|^      |^|^      |^|^   
     //       v|v|      v|v|      v|v|   
     //     +------+  +------+  +------+
-    //  l->|      |->|      |->|      |->l
-    //  r<-|      |<-|      |<-|      |<-r
-    //  l->|      |->|      |->|      |->l
-    //  r<-|      |<-|      |<-|      |<-r
+    //  r->|      |->|      |->|      |->r
+    //  l<-|      |<-|      |<-|      |<-l
+    //  r->|      |->|      |->|      |->r
+    //  l<-|      |<-|      |<-|      |<-l
     //     +------+  +------+  +------+
     //       |^|^      |^|^      |^|^   
     //       v|v|      v|v|      v|v|   
@@ -918,10 +929,10 @@
     assign data_out_east = rconn[XTOP][YSIZE - 1:0];
     assign data_out_west = lconn[0][YSIZE - 1:0];
 
-    assign dconn[YTOP][XSIZE - 1:0] = data_in_south;
-    assign uconn[0][XSIZE - 1:0] = data_in_north;
-    assign rconn[0][YSIZE - 1:0] = data_in_east;
-    assign lconn[XTOP][YSIZE - 1:0] = data_in_west;
+    assign dconn[YTOP][XSIZE - 1:0] = data_in_north;
+    assign uconn[0][XSIZE - 1:0] = data_in_south;
+    assign rconn[0][YSIZE - 1:0] = data_in_west;
+    assign lconn[XTOP][YSIZE - 1:0] = data_in_east;
 
     genvar i, j;