add a test of 4ft4 in caravel

the management processor gives 4ft4 a program to generate fibonacci
numbers, then waits for it to finish and checks the results.
diff --git a/verilog/dv/4ft4_fib/4ft4_fib.c b/verilog/dv/4ft4_fib/4ft4_fib.c
new file mode 100644
index 0000000..8336c2a
--- /dev/null
+++ b/verilog/dv/4ft4_fib/4ft4_fib.c
@@ -0,0 +1,250 @@
+/*
+ * SPDX-FileCopyrightText: 2020 Efabless Corporation
+ * SPDX-FileCopyrightText: 2022 Andrew Foote
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// This include is relative to $CARAVEL_PATH (see Makefile)
+#include <defs.h>
+#include <stub.c>
+
+/*
+    4FT4 Fibonnaci test:
+        - Configures lower 8 user IO pins as outputs
+        - Does a basic test of reading and writing from the start of RAM 0 over wishbone
+        - Downloads a program to ROM 0 at offset 64 that generates fibonacci numbers
+          in RAM 0 words 0 to 31
+        - Waits for the final fibonacci number to arrive, then validates the contents
+          of RAM 0 words 0 to 15
+*/
+
+#define reg_mprj_wb_base ((volatile uint32_t*)0x30000000)
+#define reg_mprj_wb_base_b ((volatile uint8_t*)0x30000000)
+
+#define ROM_BASE 0x0
+#define RAM_BASE 0x10000
+
+#define FAIL(n) \
+    do { \
+        reg_mprj_datal = 0xDEAD000 + (n << 16); \
+    } while (1);
+
+static uint32_t program[] = {
+  // CLC
+  0xf1,
+  // FIM 0P 0
+  0x20,
+  0x00,
+  // FIM 1P 0
+  0x22,
+  0x00,
+  // FIM 2P 1
+  0x24,
+  0x01,
+  // SRC 0P
+  0x21,
+  // LD 3
+  0xa3,
+  // WRM
+  0xe0,
+  // INC 1
+  0x61,
+  // SRC 0P
+  0x21,
+  // LD 2
+  0xa2,
+  // WRM
+  0xe0,
+  // INC 1
+  0x61,
+  // SRC 0P
+  0x21,
+  // LD 5
+  0xa5,
+  // WRM
+  0xe0,
+  // INC 1
+  0x61,
+  // SRC 0P
+  0x21,
+  // LD 4
+  0xa4,
+  // WRM
+  0xe0,
+  // INC 1
+  0x61,
+  // LDM 14
+  0xde,
+  // XCH 14
+  0xbe,
+  // LDM 10
+  0xda,
+  // XCH 15
+  0xbf,
+  // SRC 0P
+  0x21,
+  // CLC
+  0xf1,
+  // LD 3
+  0xa3,
+  // ADD 5
+  0x85,
+  // WRM
+  0xe0,
+  // XCH 7
+  0xb7,
+  // INC 1
+  0x61,
+  // SRC 0P
+  0x21,
+  // LD 2
+  0xa2,
+  // ADD 4
+  0x84,
+  // WRM
+  0xe0,
+  // XCH 6
+  0xb6,
+  // INC 1
+  0x61,
+  // SRC 0P
+  0x21,
+  // LD 5
+  0xa5,
+  // XCH 3
+  0xb3,
+  // LD 4
+  0xa4,
+  // XCH 2
+  0xb2,
+  // LD 7
+  0xa7,
+  // XCH 5
+  0xb5,
+  // LD 6
+  0xa6,
+  // XCH 4
+  0xb4,
+  // ISZ 15
+  0x7f,
+  0x5b,
+  // LDM 8
+  0xd8,
+  // XCH 15
+  0xbf,
+  // INC 0
+  0x60,
+  // ISZ 14
+  0x7e,
+  0x5b,
+};
+
+void main()
+{
+    /*
+    IO Control Registers
+    | DM     | VTRIP | SLOW  | AN_POL | AN_SEL | AN_EN | MOD_SEL | INP_DIS | HOLDH | OEB_N | MGMT_EN |
+    | 3-bits | 1-bit | 1-bit | 1-bit  | 1-bit  | 1-bit | 1-bit   | 1-bit   | 1-bit | 1-bit | 1-bit   |
+    Output: 0000_0110_0000_1110  (0x1808) = GPIO_MODE_USER_STD_OUTPUT
+    | DM     | VTRIP | SLOW  | AN_POL | AN_SEL | AN_EN | MOD_SEL | INP_DIS | HOLDH | OEB_N | MGMT_EN |
+    | 110    | 0     | 0     | 0      | 0      | 0     | 0       | 1       | 0     | 0     | 0       |
+
+
+    Input: 0000_0001_0000_1111 (0x0402) = GPIO_MODE_USER_STD_INPUT_NOPULL
+    | DM     | VTRIP | SLOW  | AN_POL | AN_SEL | AN_EN | MOD_SEL | INP_DIS | HOLDH | OEB_N | MGMT_EN |
+    | 001    | 0     | 0     | 0      | 0      | 0     | 0       | 0       | 0     | 1     | 0       |
+    */
+
+    /* Set up the housekeeping SPI to be connected internally so    */
+    /* that external pin changes don't affect it.            */
+
+    reg_spi_enable = 1;
+    reg_wb_enable = 1;
+    // reg_spimaster_config = 0xa002;    // Enable, prescaler = 2,
+                                        // connect to housekeeping SPI
+
+    // Connect the housekeeping SPI to the SPI master
+    // so that the CSB line is not left floating.  This allows
+    // all of the GPIO pins to be used for user functions.
+
+    reg_mprj_io_31 = GPIO_MODE_MGMT_STD_OUTPUT;
+    reg_mprj_io_30 = GPIO_MODE_MGMT_STD_OUTPUT;
+    reg_mprj_io_29 = GPIO_MODE_MGMT_STD_OUTPUT;
+    reg_mprj_io_28 = GPIO_MODE_MGMT_STD_OUTPUT;
+    reg_mprj_io_27 = GPIO_MODE_MGMT_STD_OUTPUT;
+    reg_mprj_io_26 = GPIO_MODE_MGMT_STD_OUTPUT;
+    reg_mprj_io_25 = GPIO_MODE_MGMT_STD_OUTPUT;
+    reg_mprj_io_24 = GPIO_MODE_MGMT_STD_OUTPUT;
+    reg_mprj_io_23 = GPIO_MODE_MGMT_STD_OUTPUT;
+    reg_mprj_io_22 = GPIO_MODE_MGMT_STD_OUTPUT;
+    reg_mprj_io_21 = GPIO_MODE_MGMT_STD_OUTPUT;
+    reg_mprj_io_20 = GPIO_MODE_MGMT_STD_OUTPUT;
+    reg_mprj_io_19 = GPIO_MODE_MGMT_STD_OUTPUT;
+    reg_mprj_io_18 = GPIO_MODE_MGMT_STD_OUTPUT;
+    reg_mprj_io_17 = GPIO_MODE_MGMT_STD_OUTPUT;
+    reg_mprj_io_16 = GPIO_MODE_MGMT_STD_OUTPUT;
+
+     /* Apply configuration */
+    reg_mprj_xfer = 1;
+    while (reg_mprj_xfer == 1);
+
+    reg_la2_oenb = reg_la2_iena = 0x00000000;    // [95:64]
+
+    // Flag start of the test
+    reg_mprj_datal = 0xAB600000;
+
+    *((volatile uint32_t *)(reg_mprj_wb_base_b + RAM_BASE)) = 0xfefefefe;
+    if ((*((volatile uint32_t *)(reg_mprj_wb_base_b + RAM_BASE))) != 0xe) {
+        FAIL(1);
+    }
+
+    *((volatile uint32_t *)(reg_mprj_wb_base_b + RAM_BASE + 4)) = 0xeaeaeaea;
+    if ((*((volatile uint32_t *)(reg_mprj_wb_base_b + RAM_BASE + 4))) != 0xa) {
+        FAIL(2);
+    }
+
+    // Load the program into ROM
+    for (int i = 0; i < sizeof(program)/sizeof(program[0]); i++) {
+        uint32_t w = program[i];
+
+        *(reg_mprj_wb_base + 64 + i) = w;
+    }
+
+    // Clear out the first 64 B to break the loop
+    for (int i = 0; i < 64; i++) {
+        *(reg_mprj_wb_base + i) = 0x0;
+    }
+
+    /*
+    !expect ram 0 reg 0: 0 0 1 0 1 0 2 0 3 0 5 0 8 0 d 0
+    !expect ram 0 reg 1: 5 1 2 2 7 3 9 5 0 9 9 e 9 7 2 6
+    */
+
+    // wait for the last expected value to show up in 4ft4 ram
+    while ((*((volatile uint32_t *)(reg_mprj_wb_base_b + RAM_BASE + 0x7c))) != 0x6) {
+        reg_mprj_datal = 0xAB610000;
+    }
+
+    const uint8_t expectation[] = { 0, 0, 1, 0, 1, 0, 2, 0, 3, 0, 5, 0, 8, 0, 0xd, 0};
+    for (int i = 0; i < 16; i++) {
+        uint8_t v = (*((volatile uint32_t *)(reg_mprj_wb_base_b + RAM_BASE + 4 * i)));
+        reg_mprj_datal = (((uint32_t)v) << 16);
+        if (v != expectation[i]) {
+            FAIL(0x100 + i);
+        }
+    }
+
+    reg_mprj_datal = 0x13370000;
+}
diff --git a/verilog/dv/4ft4_fib/4ft4_fib_tb.v b/verilog/dv/4ft4_fib/4ft4_fib_tb.v
new file mode 100644
index 0000000..18b1256
--- /dev/null
+++ b/verilog/dv/4ft4_fib/4ft4_fib_tb.v
@@ -0,0 +1,159 @@
+// SPDX-FileCopyrightText: 2020 Efabless Corporation
+// SPDX-FileCopyrightText: 2022 Andrew Foote
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// SPDX-License-Identifier: Apache-2.0
+
+`default_nettype none
+
+`timescale 1 ns / 1 ps
+
+module tb_4ft4_fib;
+    reg clock;
+    reg RSTB;
+    reg CSB;
+    reg power1, power2;
+    reg power3, power4;
+
+    wire gpio;
+    wire [37:0] mprj_io;
+    wire [7:0] mprj_io_0;
+    wire [15:0] checkbits;
+
+    assign checkbits = mprj_io[31:16];
+
+    assign mprj_io[3] = 1'b1;
+
+    // External clock is used by default.  Make this artificially fast for the
+    // simulation.  Normally this would be a slow clock and the digital PLL
+    // would be the fast clock.
+
+    always #12.5 clock <= (clock === 1'b0);
+
+    initial begin
+        clock = 0;
+    end
+
+    integer i;
+
+    initial begin
+        $dumpfile("4ft4_fib.vcd");
+        $dumpvars(0, tb_4ft4_fib);
+
+        for (i = 0; i < 3; i++) begin
+            $dumpvars(0, uut.mprj.mprj.sys.cpu.pc_stack.program_counters[i]);
+        end
+
+        for (i = 0; i < 16; i++) begin
+            $dumpvars(0, uut.mprj.mprj.sys.cpu.datapath.registers[i]);
+        end
+
+        // Repeat cycles of 1000 clock edges as needed to complete testbench
+        repeat (40) begin
+            repeat (1000) @(posedge clock);
+            $display("+1000 cycles");
+            $fflush();
+        end
+        $display("%c[1;31m",27);
+        `ifdef GL
+            $display ("Monitor: Timeout, 4ft4 fibonacci test failed (GL)");
+        `else
+            $display ("Monitor: Timeout, 4ft4 fibonacci test failed (RTL)");
+        `endif
+        $display("%c[0m",27);
+        $finish;
+    end
+
+    initial begin
+       wait(checkbits == 16'hAB60);
+        $display("Monitor: 4ft4 fibonacci test started");
+        wait(checkbits == 16'h1337);
+        `ifdef GL
+            $display("Monitor: 4ft4 fibonacci test passed (GL)");
+        `else
+            $display("Monitor: 4ft4 fibonacci test passed (RTL)");
+        `endif
+        $finish;
+    end
+
+    initial begin
+        RSTB <= 1'b0;
+        CSB  <= 1'b1;        // Force CSB high
+        #2000;
+        RSTB <= 1'b1;            // Release reset
+        #100000;
+        CSB = 1'b0;        // CSB can be released
+    end
+
+    initial begin        // Power-up sequence
+        power1 <= 1'b0;
+        power2 <= 1'b0;
+        #200;
+        power1 <= 1'b1;
+        #200;
+        power2 <= 1'b1;
+    end
+
+    wire flash_csb;
+    wire flash_clk;
+    wire flash_io0;
+    wire flash_io1;
+
+    wire VDD3V3 = power1;
+    wire VDD1V8 = power2;
+    wire USER_VDD3V3 = power3;
+    wire USER_VDD1V8 = power4;
+    wire VSS = 1'b0;
+
+    caravel uut (
+        .vddio      (VDD3V3),
+        .vddio_2  (VDD3V3),
+        .vssio      (VSS),
+        .vssio_2  (VSS),
+        .vdda      (VDD3V3),
+        .vssa      (VSS),
+        .vccd      (VDD1V8),
+        .vssd      (VSS),
+        .vdda1    (VDD3V3),
+        .vdda1_2  (VDD3V3),
+        .vdda2    (VDD3V3),
+        .vssa1      (VSS),
+        .vssa1_2  (VSS),
+        .vssa2      (VSS),
+        .vccd1      (VDD1V8),
+        .vccd2      (VDD1V8),
+        .vssd1      (VSS),
+        .vssd2      (VSS),
+        .clock    (clock),
+        .gpio     (gpio),
+        .mprj_io  (mprj_io),
+        .flash_csb(flash_csb),
+        .flash_clk(flash_clk),
+        .flash_io0(flash_io0),
+        .flash_io1(flash_io1),
+        .resetb      (RSTB)
+    );
+
+    spiflash #(
+        .FILENAME("4ft4_fib.hex")
+    ) spiflash (
+        .csb(flash_csb),
+        .clk(flash_clk),
+        .io0(flash_io0),
+        .io1(flash_io1),
+        .io2(),            // not used
+        .io3()            // not used
+    );
+
+endmodule
+`default_nettype wire
diff --git a/verilog/dv/4ft4_fib/Makefile b/verilog/dv/4ft4_fib/Makefile
new file mode 100644
index 0000000..cee6518
--- /dev/null
+++ b/verilog/dv/4ft4_fib/Makefile
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: 2020 Efabless Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+PWDD := $(shell pwd)
+BLOCKS := $(shell basename $(PWDD))
+
+# ---- Include Partitioned Makefiles ----
+
+CONFIG = caravel_user_project
+
+include $(MCW_ROOT)/verilog/dv/make/env.makefile
+include $(MCW_ROOT)/verilog/dv/make/var.makefile
+CPUFLAGS += -O2
+include $(MCW_ROOT)/verilog/dv/make/cpu.makefile
+include $(MCW_ROOT)/verilog/dv/make/sim.makefile
diff --git a/verilog/dv/4ft4_fib/signals.tcl b/verilog/dv/4ft4_fib/signals.tcl
new file mode 100644
index 0000000..42554d2
--- /dev/null
+++ b/verilog/dv/4ft4_fib/signals.tcl
@@ -0,0 +1,16 @@
+set signals [list]
+
+lappend signals "uut.mprj.mprj.wb_clock_i"
+lappend signals "uut.mprj.mprj.wb_reset_i"
+lappend signals "uut.mprj.mprj.wb_cyc_i"
+lappend signals "uut.mprj.mprj.wb_strobe_i"
+lappend signals "uut.mprj.mprj.wb_we_i"
+lappend signals "uut.mprj.mprj.wb_addr_i"
+lappend signals "uut.mprj.mprj.wb_data_i"
+lappend signals "uut.mprj.mprj.wb_data_o"
+lappend signals "uut.mprj.mprj.wb_ack_o"
+lappend signals "uut.mprj_io\[31:16\]"
+#lappend signals "tb_system.dut.cpu.datapath.\\registers\[13\]\[3:0\]"
+#lappend signals "tb_system.dut.cpu.datapath.\\registers\[14\]\[3:0\]"
+
+set num_added [ gtkwave::addSignalsFromList $signals ]
diff --git a/verilog/dv/Makefile b/verilog/dv/Makefile
index 43a4149..89b2acb 100644
--- a/verilog/dv/Makefile
+++ b/verilog/dv/Makefile
@@ -14,13 +14,12 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-# ---- Test patterns for project striVe ----
-
 .SUFFIXES:
 .SILENT: clean all
 
 
 PATTERNS = io_ports la_test1 la_test2 wb_port mprj_stimulus
+PATTERNS += 4ft4_fib
 
 all:  ${PATTERNS}
 
diff --git a/verilog/includes/includes.rtl.caravel_user_project b/verilog/includes/includes.rtl.caravel_user_project
index 31ab09b..68b7669 100644
--- a/verilog/includes/includes.rtl.caravel_user_project
+++ b/verilog/includes/includes.rtl.caravel_user_project
@@ -1,5 +1,17 @@
 # Caravel user project includes
 -v $(USER_PROJECT_VERILOG)/rtl/user_project_wrapper.v	     
 -v $(USER_PROJECT_VERILOG)/rtl/user_proj_example.v
-
- 
\ No newline at end of file
+-v $(USER_PROJECT_VERILOG)/rtl/4ft4_top.v
+-v $(USER_PROJECT_VERILOG)/../4ft4/rtl/wb_system.v
+-v $(USER_PROJECT_VERILOG)/../4ft4/rtl/cpu.v
+-v $(USER_PROJECT_VERILOG)/../4ft4/rtl/cpu_control.v
+-v $(USER_PROJECT_VERILOG)/../4ft4/rtl/alu.v
+-v $(USER_PROJECT_VERILOG)/../4ft4/rtl/datapath.v
+-v $(USER_PROJECT_VERILOG)/../4ft4/rtl/pc_stack.v
+-v $(USER_PROJECT_VERILOG)/../4ft4/rtl/rom.v
+-v $(USER_PROJECT_VERILOG)/../4ft4/rtl/ram.v
++define+NO_TRISTATE
+# https://github.com/steveicarus/iverilog/issues/187
++define+ROM_FILE_BASE="../../rtl/4ft4_rom"
++define+ROM_CAPACITY=256
++incdir+$(USER_PROJECT_VERILOG)/../4ft4/rtl