aes test added
diff --git a/verilog/dv/user_aes/Makefile b/verilog/dv/user_aes/Makefile
new file mode 100644
index 0000000..c674de1
--- /dev/null
+++ b/verilog/dv/user_aes/Makefile
@@ -0,0 +1,97 @@
+# SPDX-FileCopyrightText: 2020 Efabless Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+# ---- Include Partitioned Makefiles ----
+
+CONFIG = caravel_user_project
+ 
+#######################################################################
+## Caravel Verilog for Integration Tests
+#######################################################################
+
+DESIGNS?=../../..
+TOOLS?=/opt/riscv64i/
+
+export USER_PROJECT_VERILOG ?=  $(DESIGNS)/verilog
+## YIFIVE FIRMWARE
+YIFIVE_FIRMWARE_PATH = $(USER_PROJECT_VERILOG)/dv/firmware
+GCC64_PREFIX?=riscv64-unknown-elf
+
+
+## Simulation mode: RTL/GL
+SIM?=RTL
+DUMP?=OFF
+RISC_CORE?=0
+
+### To Enable IVERILOG FST DUMP
+export IVERILOG_DUMPER = fst
+
+
+.SUFFIXES:
+
+PATTERN = user_aes
+
+all:  ${PATTERN:=.vcd}
+
+
+vvp:  ${PATTERN:=.vvp}
+
+%.vvp: %_tb.v
+	${GCC64_PREFIX}-gcc -O2 -funroll-loops -fpeel-loops -fgcse-sm -fgcse-las  -D__RVC_EXT -static -std=gnu99 -fno-common -fno-builtin-printf -DTCM=0 -Wa,-march=rv32imc -march=rv32imc -mabi=ilp32 -DFLAGS_STR=\""-O2 -funroll-loops -fpeel-loops -fgcse-sm -fgcse-las "\"  -c -I./ -I$(YIFIVE_FIRMWARE_PATH)  user_aes.c -o user_aes.o
+	${GCC64_PREFIX}-gcc -O2 -funroll-loops -fpeel-loops -fgcse-sm -fgcse-las  -D__RVC_EXT -static -std=gnu99 -fno-common -fno-builtin-printf -DTCM=0 -Wa,-march=rv32imc -march=rv32imc -mabi=ilp32 -DFLAGS_STR=\""-O2 -funroll-loops -fpeel-loops -fgcse-sm -fgcse-las "\"  -c -I./ -I$(YIFIVE_FIRMWARE_PATH)  aes.c -o aes.o
+	${GCC64_PREFIX}-gcc -O2 -funroll-loops -fpeel-loops -fgcse-sm -fgcse-las  -D__RVC_EXT -static -std=gnu99 -fno-common -fno-builtin-printf -DTCM=0 -Wa,-march=rv32imc -march=rv32imc -mabi=ilp32 -DFLAGS_STR=\""-O2 -funroll-loops -fpeel-loops -fgcse-sm -fgcse-las "\"  -c -I./ -I$(YIFIVE_FIRMWARE_PATH)  $(YIFIVE_FIRMWARE_PATH)/sc_print.c -o sc_print.o
+	${GCC64_PREFIX}-gcc -O2 -funroll-loops -fpeel-loops -fgcse-sm -fgcse-las  -D__RVC_EXT -static -std=gnu99 -fno-common -fno-builtin-printf -DTCM=0 -Wa,-march=rv32imc -march=rv32imc -mabi=ilp32 -DFLAGS_STR=\""-O2 -funroll-loops -fpeel-loops -fgcse-sm -fgcse-las "\"  -D__ASSEMBLY__=1 -c -I./ -I$(YIFIVE_FIRMWARE_PATH)  $(YIFIVE_FIRMWARE_PATH)/crt.S -o crt.o
+	${GCC64_PREFIX}-gcc -o user_aes.elf -T $(YIFIVE_FIRMWARE_PATH)/link.ld user_aes.o aes.o sc_print.o crt.o -nostartfiles -lc -lgcc -march=rv32imc -mabi=ilp32 -N
+	${GCC64_PREFIX}-objcopy -O verilog user_aes.elf user_aes.hex
+	${GCC64_PREFIX}-objdump -D user_aes.elf > user_aes.dump
+	rm crt.o user_aes.o
+ifeq ($(SIM),RTL)
+   ifeq ($(DUMP),OFF)
+	iverilog -g2012 -DFUNCTIONAL -DSIM -I $(PDK_PATH) \
+	-f$(USER_PROJECT_VERILOG)/includes/includes.rtl.$(CONFIG) \
+	-f$(USER_PROJECT_VERILOG)/includes/includes.rtl.lib  \
+	$< -o $@ 
+    else  
+	iverilog -g2012 -DWFDUMP -DFUNCTIONAL -DSIM -I $(PDK_PATH) \
+	-f$(USER_PROJECT_VERILOG)/includes/includes.rtl.$(CONFIG) \
+	-f$(USER_PROJECT_VERILOG)/includes/includes.rtl.lib  \
+	$< -o $@ 
+   endif
+else  
+   ifeq ($(DUMP),OFF)
+	iverilog -g2012 -DFUNCTIONAL -DUSE_POWER_PINS -DGL -I $(PDK_PATH) \
+	-f$(USER_PROJECT_VERILOG)/includes/includes.gl.$(CONFIG) \
+	-f$(USER_PROJECT_VERILOG)/includes/includes.gl.lib \
+	$< -o $@ 
+    else  
+	iverilog -g2012 -DWFDUMP -DFUNCTIONAL -DUSE_POWER_PINS -DGL -I $(PDK_PATH) \
+	-f$(USER_PROJECT_VERILOG)/includes/includes.gl.$(CONFIG) \
+	-f$(USER_PROJECT_VERILOG)/includes/includes.gl.lib \
+	$< -o $@ 
+   endif
+endif
+
+%.vcd: %.vvp
+	vvp $< +risc_core_id=$(RISC_CORE)
+
+
+# ---- Clean ----
+
+clean:
+	rm -f *.elf *.hex *.bin *.vvp *.vcd *.log *.dump *.o
+
+.PHONY: clean hex all
diff --git a/verilog/dv/user_aes/aes.c b/verilog/dv/user_aes/aes.c
new file mode 100644
index 0000000..4481f7b
--- /dev/null
+++ b/verilog/dv/user_aes/aes.c
@@ -0,0 +1,572 @@
+/*
+
+This is an implementation of the AES algorithm, specifically ECB, CTR and CBC mode.
+Block size can be chosen in aes.h - available choices are AES128, AES192, AES256.
+
+The implementation is verified against the test vectors in:
+  National Institute of Standards and Technology Special Publication 800-38A 2001 ED
+
+ECB-AES128
+----------
+
+  plain-text:
+    6bc1bee22e409f96e93d7e117393172a
+    ae2d8a571e03ac9c9eb76fac45af8e51
+    30c81c46a35ce411e5fbc1191a0a52ef
+    f69f2445df4f9b17ad2b417be66c3710
+
+  key:
+    2b7e151628aed2a6abf7158809cf4f3c
+
+  resulting cipher
+    3ad77bb40d7a3660a89ecaf32466ef97 
+    f5d3d58503b9699de785895a96fdbaaf 
+    43b1cd7f598ece23881b00e3ed030688 
+    7b0c785e27e8ad3f8223207104725dd4 
+
+
+NOTE:   String length must be evenly divisible by 16byte (str_len % 16 == 0)
+        You should pad the end of the string with zeros if this is not the case.
+        For AES192/256 the key size is proportionally larger.
+
+*/
+
+
+/*****************************************************************************/
+/* Includes:                                                                 */
+/*****************************************************************************/
+#include <string.h> // CBC mode, for memset
+#include "aes.h"
+
+/*****************************************************************************/
+/* Defines:                                                                  */
+/*****************************************************************************/
+// The number of columns comprising a state in AES. This is a constant in AES. Value=4
+#define Nb 4
+
+#if defined(AES256) && (AES256 == 1)
+    #define Nk 8
+    #define Nr 14
+#elif defined(AES192) && (AES192 == 1)
+    #define Nk 6
+    #define Nr 12
+#else
+    #define Nk 4        // The number of 32 bit words in a key.
+    #define Nr 10       // The number of rounds in AES Cipher.
+#endif
+
+// jcallan@github points out that declaring Multiply as a function 
+// reduces code size considerably with the Keil ARM compiler.
+// See this link for more information: https://github.com/kokke/tiny-AES-C/pull/3
+#ifndef MULTIPLY_AS_A_FUNCTION
+  #define MULTIPLY_AS_A_FUNCTION 0
+#endif
+
+
+
+
+/*****************************************************************************/
+/* Private variables:                                                        */
+/*****************************************************************************/
+// state - array holding the intermediate results during decryption.
+typedef uint8_t state_t[4][4];
+
+
+
+// The lookup-tables are marked const so they can be placed in read-only storage instead of RAM
+// The numbers below can be computed dynamically trading ROM for RAM - 
+// This can be useful in (embedded) bootloader applications, where ROM is often limited.
+static const uint8_t sbox[256] = {
+  //0     1    2      3     4    5     6     7      8    9     A      B    C     D     E     F
+  0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+  0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+  0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+  0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+  0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+  0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+  0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+  0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+  0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+  0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+  0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+  0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+  0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+  0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+  0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+  0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 };
+
+#if (defined(CBC) && CBC == 1) || (defined(ECB) && ECB == 1)
+static const uint8_t rsbox[256] = {
+  0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+  0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+  0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+  0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+  0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+  0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+  0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+  0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+  0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+  0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+  0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+  0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+  0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+  0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+  0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+  0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d };
+#endif
+
+// The round constant word array, Rcon[i], contains the values given by 
+// x to the power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8)
+static const uint8_t Rcon[11] = {
+  0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 };
+
+/*
+ * Jordan Goulder points out in PR #12 (https://github.com/kokke/tiny-AES-C/pull/12),
+ * that you can remove most of the elements in the Rcon array, because they are unused.
+ *
+ * From Wikipedia's article on the Rijndael key schedule @ https://en.wikipedia.org/wiki/Rijndael_key_schedule#Rcon
+ * 
+ * "Only the first some of these constants are actually used – up to rcon[10] for AES-128 (as 11 round keys are needed), 
+ *  up to rcon[8] for AES-192, up to rcon[7] for AES-256. rcon[0] is not used in AES algorithm."
+ */
+
+
+/*****************************************************************************/
+/* Private functions:                                                        */
+/*****************************************************************************/
+/*
+static uint8_t getSBoxValue(uint8_t num)
+{
+  return sbox[num];
+}
+*/
+#define getSBoxValue(num) (sbox[(num)])
+
+// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states. 
+static void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key)
+{
+  unsigned i, j, k;
+  uint8_t tempa[4]; // Used for the column/row operations
+  
+  // The first round key is the key itself.
+  for (i = 0; i < Nk; ++i)
+  {
+    RoundKey[(i * 4) + 0] = Key[(i * 4) + 0];
+    RoundKey[(i * 4) + 1] = Key[(i * 4) + 1];
+    RoundKey[(i * 4) + 2] = Key[(i * 4) + 2];
+    RoundKey[(i * 4) + 3] = Key[(i * 4) + 3];
+  }
+
+  // All other round keys are found from the previous round keys.
+  for (i = Nk; i < Nb * (Nr + 1); ++i)
+  {
+    {
+      k = (i - 1) * 4;
+      tempa[0]=RoundKey[k + 0];
+      tempa[1]=RoundKey[k + 1];
+      tempa[2]=RoundKey[k + 2];
+      tempa[3]=RoundKey[k + 3];
+
+    }
+
+    if (i % Nk == 0)
+    {
+      // This function shifts the 4 bytes in a word to the left once.
+      // [a0,a1,a2,a3] becomes [a1,a2,a3,a0]
+
+      // Function RotWord()
+      {
+        const uint8_t u8tmp = tempa[0];
+        tempa[0] = tempa[1];
+        tempa[1] = tempa[2];
+        tempa[2] = tempa[3];
+        tempa[3] = u8tmp;
+      }
+
+      // SubWord() is a function that takes a four-byte input word and 
+      // applies the S-box to each of the four bytes to produce an output word.
+
+      // Function Subword()
+      {
+        tempa[0] = getSBoxValue(tempa[0]);
+        tempa[1] = getSBoxValue(tempa[1]);
+        tempa[2] = getSBoxValue(tempa[2]);
+        tempa[3] = getSBoxValue(tempa[3]);
+      }
+
+      tempa[0] = tempa[0] ^ Rcon[i/Nk];
+    }
+#if defined(AES256) && (AES256 == 1)
+    if (i % Nk == 4)
+    {
+      // Function Subword()
+      {
+        tempa[0] = getSBoxValue(tempa[0]);
+        tempa[1] = getSBoxValue(tempa[1]);
+        tempa[2] = getSBoxValue(tempa[2]);
+        tempa[3] = getSBoxValue(tempa[3]);
+      }
+    }
+#endif
+    j = i * 4; k=(i - Nk) * 4;
+    RoundKey[j + 0] = RoundKey[k + 0] ^ tempa[0];
+    RoundKey[j + 1] = RoundKey[k + 1] ^ tempa[1];
+    RoundKey[j + 2] = RoundKey[k + 2] ^ tempa[2];
+    RoundKey[j + 3] = RoundKey[k + 3] ^ tempa[3];
+  }
+}
+
+void AES_init_ctx(struct AES_ctx* ctx, const uint8_t* key)
+{
+  KeyExpansion(ctx->RoundKey, key);
+}
+#if (defined(CBC) && (CBC == 1)) || (defined(CTR) && (CTR == 1))
+void AES_init_ctx_iv(struct AES_ctx* ctx, const uint8_t* key, const uint8_t* iv)
+{
+  KeyExpansion(ctx->RoundKey, key);
+  memcpy (ctx->Iv, iv, AES_BLOCKLEN);
+}
+void AES_ctx_set_iv(struct AES_ctx* ctx, const uint8_t* iv)
+{
+  memcpy (ctx->Iv, iv, AES_BLOCKLEN);
+}
+#endif
+
+// This function adds the round key to state.
+// The round key is added to the state by an XOR function.
+static void AddRoundKey(uint8_t round, state_t* state, const uint8_t* RoundKey)
+{
+  uint8_t i,j;
+  for (i = 0; i < 4; ++i)
+  {
+    for (j = 0; j < 4; ++j)
+    {
+      (*state)[i][j] ^= RoundKey[(round * Nb * 4) + (i * Nb) + j];
+    }
+  }
+}
+
+// The SubBytes Function Substitutes the values in the
+// state matrix with values in an S-box.
+static void SubBytes(state_t* state)
+{
+  uint8_t i, j;
+  for (i = 0; i < 4; ++i)
+  {
+    for (j = 0; j < 4; ++j)
+    {
+      (*state)[j][i] = getSBoxValue((*state)[j][i]);
+    }
+  }
+}
+
+// The ShiftRows() function shifts the rows in the state to the left.
+// Each row is shifted with different offset.
+// Offset = Row number. So the first row is not shifted.
+static void ShiftRows(state_t* state)
+{
+  uint8_t temp;
+
+  // Rotate first row 1 columns to left  
+  temp           = (*state)[0][1];
+  (*state)[0][1] = (*state)[1][1];
+  (*state)[1][1] = (*state)[2][1];
+  (*state)[2][1] = (*state)[3][1];
+  (*state)[3][1] = temp;
+
+  // Rotate second row 2 columns to left  
+  temp           = (*state)[0][2];
+  (*state)[0][2] = (*state)[2][2];
+  (*state)[2][2] = temp;
+
+  temp           = (*state)[1][2];
+  (*state)[1][2] = (*state)[3][2];
+  (*state)[3][2] = temp;
+
+  // Rotate third row 3 columns to left
+  temp           = (*state)[0][3];
+  (*state)[0][3] = (*state)[3][3];
+  (*state)[3][3] = (*state)[2][3];
+  (*state)[2][3] = (*state)[1][3];
+  (*state)[1][3] = temp;
+}
+
+static uint8_t xtime(uint8_t x)
+{
+  return ((x<<1) ^ (((x>>7) & 1) * 0x1b));
+}
+
+// MixColumns function mixes the columns of the state matrix
+static void MixColumns(state_t* state)
+{
+  uint8_t i;
+  uint8_t Tmp, Tm, t;
+  for (i = 0; i < 4; ++i)
+  {  
+    t   = (*state)[i][0];
+    Tmp = (*state)[i][0] ^ (*state)[i][1] ^ (*state)[i][2] ^ (*state)[i][3] ;
+    Tm  = (*state)[i][0] ^ (*state)[i][1] ; Tm = xtime(Tm);  (*state)[i][0] ^= Tm ^ Tmp ;
+    Tm  = (*state)[i][1] ^ (*state)[i][2] ; Tm = xtime(Tm);  (*state)[i][1] ^= Tm ^ Tmp ;
+    Tm  = (*state)[i][2] ^ (*state)[i][3] ; Tm = xtime(Tm);  (*state)[i][2] ^= Tm ^ Tmp ;
+    Tm  = (*state)[i][3] ^ t ;              Tm = xtime(Tm);  (*state)[i][3] ^= Tm ^ Tmp ;
+  }
+}
+
+// Multiply is used to multiply numbers in the field GF(2^8)
+// Note: The last call to xtime() is unneeded, but often ends up generating a smaller binary
+//       The compiler seems to be able to vectorize the operation better this way.
+//       See https://github.com/kokke/tiny-AES-c/pull/34
+#if MULTIPLY_AS_A_FUNCTION
+static uint8_t Multiply(uint8_t x, uint8_t y)
+{
+  return (((y & 1) * x) ^
+       ((y>>1 & 1) * xtime(x)) ^
+       ((y>>2 & 1) * xtime(xtime(x))) ^
+       ((y>>3 & 1) * xtime(xtime(xtime(x)))) ^
+       ((y>>4 & 1) * xtime(xtime(xtime(xtime(x)))))); /* this last call to xtime() can be omitted */
+  }
+#else
+#define Multiply(x, y)                                \
+      (  ((y & 1) * x) ^                              \
+      ((y>>1 & 1) * xtime(x)) ^                       \
+      ((y>>2 & 1) * xtime(xtime(x))) ^                \
+      ((y>>3 & 1) * xtime(xtime(xtime(x)))) ^         \
+      ((y>>4 & 1) * xtime(xtime(xtime(xtime(x))))))   \
+
+#endif
+
+#if (defined(CBC) && CBC == 1) || (defined(ECB) && ECB == 1)
+/*
+static uint8_t getSBoxInvert(uint8_t num)
+{
+  return rsbox[num];
+}
+*/
+#define getSBoxInvert(num) (rsbox[(num)])
+
+// MixColumns function mixes the columns of the state matrix.
+// The method used to multiply may be difficult to understand for the inexperienced.
+// Please use the references to gain more information.
+static void InvMixColumns(state_t* state)
+{
+  int i;
+  uint8_t a, b, c, d;
+  for (i = 0; i < 4; ++i)
+  { 
+    a = (*state)[i][0];
+    b = (*state)[i][1];
+    c = (*state)[i][2];
+    d = (*state)[i][3];
+
+    (*state)[i][0] = Multiply(a, 0x0e) ^ Multiply(b, 0x0b) ^ Multiply(c, 0x0d) ^ Multiply(d, 0x09);
+    (*state)[i][1] = Multiply(a, 0x09) ^ Multiply(b, 0x0e) ^ Multiply(c, 0x0b) ^ Multiply(d, 0x0d);
+    (*state)[i][2] = Multiply(a, 0x0d) ^ Multiply(b, 0x09) ^ Multiply(c, 0x0e) ^ Multiply(d, 0x0b);
+    (*state)[i][3] = Multiply(a, 0x0b) ^ Multiply(b, 0x0d) ^ Multiply(c, 0x09) ^ Multiply(d, 0x0e);
+  }
+}
+
+
+// The SubBytes Function Substitutes the values in the
+// state matrix with values in an S-box.
+static void InvSubBytes(state_t* state)
+{
+  uint8_t i, j;
+  for (i = 0; i < 4; ++i)
+  {
+    for (j = 0; j < 4; ++j)
+    {
+      (*state)[j][i] = getSBoxInvert((*state)[j][i]);
+    }
+  }
+}
+
+static void InvShiftRows(state_t* state)
+{
+  uint8_t temp;
+
+  // Rotate first row 1 columns to right  
+  temp = (*state)[3][1];
+  (*state)[3][1] = (*state)[2][1];
+  (*state)[2][1] = (*state)[1][1];
+  (*state)[1][1] = (*state)[0][1];
+  (*state)[0][1] = temp;
+
+  // Rotate second row 2 columns to right 
+  temp = (*state)[0][2];
+  (*state)[0][2] = (*state)[2][2];
+  (*state)[2][2] = temp;
+
+  temp = (*state)[1][2];
+  (*state)[1][2] = (*state)[3][2];
+  (*state)[3][2] = temp;
+
+  // Rotate third row 3 columns to right
+  temp = (*state)[0][3];
+  (*state)[0][3] = (*state)[1][3];
+  (*state)[1][3] = (*state)[2][3];
+  (*state)[2][3] = (*state)[3][3];
+  (*state)[3][3] = temp;
+}
+#endif // #if (defined(CBC) && CBC == 1) || (defined(ECB) && ECB == 1)
+
+// Cipher is the main function that encrypts the PlainText.
+static void Cipher(state_t* state, const uint8_t* RoundKey)
+{
+  uint8_t round = 0;
+
+  // Add the First round key to the state before starting the rounds.
+  AddRoundKey(0, state, RoundKey);
+
+  // There will be Nr rounds.
+  // The first Nr-1 rounds are identical.
+  // These Nr rounds are executed in the loop below.
+  // Last one without MixColumns()
+  for (round = 1; ; ++round)
+  {
+    SubBytes(state);
+    ShiftRows(state);
+    if (round == Nr) {
+      break;
+    }
+    MixColumns(state);
+    AddRoundKey(round, state, RoundKey);
+  }
+  // Add round key to last round
+  AddRoundKey(Nr, state, RoundKey);
+}
+
+#if (defined(CBC) && CBC == 1) || (defined(ECB) && ECB == 1)
+static void InvCipher(state_t* state, const uint8_t* RoundKey)
+{
+  uint8_t round = 0;
+
+  // Add the First round key to the state before starting the rounds.
+  AddRoundKey(Nr, state, RoundKey);
+
+  // There will be Nr rounds.
+  // The first Nr-1 rounds are identical.
+  // These Nr rounds are executed in the loop below.
+  // Last one without InvMixColumn()
+  for (round = (Nr - 1); ; --round)
+  {
+    InvShiftRows(state);
+    InvSubBytes(state);
+    AddRoundKey(round, state, RoundKey);
+    if (round == 0) {
+      break;
+    }
+    InvMixColumns(state);
+  }
+
+}
+#endif // #if (defined(CBC) && CBC == 1) || (defined(ECB) && ECB == 1)
+
+/*****************************************************************************/
+/* Public functions:                                                         */
+/*****************************************************************************/
+#if defined(ECB) && (ECB == 1)
+
+
+void AES_ECB_encrypt(const struct AES_ctx* ctx, uint8_t* buf)
+{
+  // The next function call encrypts the PlainText with the Key using AES algorithm.
+  Cipher((state_t*)buf, ctx->RoundKey);
+}
+
+void AES_ECB_decrypt(const struct AES_ctx* ctx, uint8_t* buf)
+{
+  // The next function call decrypts the PlainText with the Key using AES algorithm.
+  InvCipher((state_t*)buf, ctx->RoundKey);
+}
+
+
+#endif // #if defined(ECB) && (ECB == 1)
+
+
+
+
+
+#if defined(CBC) && (CBC == 1)
+
+
+static void XorWithIv(uint8_t* buf, const uint8_t* Iv)
+{
+  uint8_t i;
+  for (i = 0; i < AES_BLOCKLEN; ++i) // The block in AES is always 128bit no matter the key size
+  {
+    buf[i] ^= Iv[i];
+  }
+}
+
+void AES_CBC_encrypt_buffer(struct AES_ctx *ctx, uint8_t* buf, size_t length)
+{
+  size_t i;
+  uint8_t *Iv = ctx->Iv;
+  for (i = 0; i < length; i += AES_BLOCKLEN)
+  {
+    XorWithIv(buf, Iv);
+    Cipher((state_t*)buf, ctx->RoundKey);
+    Iv = buf;
+    buf += AES_BLOCKLEN;
+  }
+  /* store Iv in ctx for next call */
+  memcpy(ctx->Iv, Iv, AES_BLOCKLEN);
+}
+
+void AES_CBC_decrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length)
+{
+  size_t i;
+  uint8_t storeNextIv[AES_BLOCKLEN];
+  for (i = 0; i < length; i += AES_BLOCKLEN)
+  {
+    memcpy(storeNextIv, buf, AES_BLOCKLEN);
+    InvCipher((state_t*)buf, ctx->RoundKey);
+    XorWithIv(buf, ctx->Iv);
+    memcpy(ctx->Iv, storeNextIv, AES_BLOCKLEN);
+    buf += AES_BLOCKLEN;
+  }
+
+}
+
+#endif // #if defined(CBC) && (CBC == 1)
+
+
+
+#if defined(CTR) && (CTR == 1)
+
+/* Symmetrical operation: same function for encrypting as for decrypting. Note any IV/nonce should never be reused with the same key */
+void AES_CTR_xcrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length)
+{
+  uint8_t buffer[AES_BLOCKLEN];
+  
+  size_t i;
+  int bi;
+  for (i = 0, bi = AES_BLOCKLEN; i < length; ++i, ++bi)
+  {
+    if (bi == AES_BLOCKLEN) /* we need to regen xor compliment in buffer */
+    {
+      
+      memcpy(buffer, ctx->Iv, AES_BLOCKLEN);
+      Cipher((state_t*)buffer,ctx->RoundKey);
+
+      /* Increment Iv and handle overflow */
+      for (bi = (AES_BLOCKLEN - 1); bi >= 0; --bi)
+      {
+	/* inc will overflow */
+        if (ctx->Iv[bi] == 255)
+	{
+          ctx->Iv[bi] = 0;
+          continue;
+        } 
+        ctx->Iv[bi] += 1;
+        break;   
+      }
+      bi = 0;
+    }
+
+    buf[i] = (buf[i] ^ buffer[bi]);
+  }
+}
+
+#endif // #if defined(CTR) && (CTR == 1)
+
diff --git a/verilog/dv/user_aes/aes.h b/verilog/dv/user_aes/aes.h
new file mode 100644
index 0000000..fecf80f
--- /dev/null
+++ b/verilog/dv/user_aes/aes.h
@@ -0,0 +1,93 @@
+#ifndef _AES_H_
+#define _AES_H_
+
+#include <stdint.h>
+#include <stddef.h>
+
+#define printf sc_printf
+
+// #define the macros below to 1/0 to enable/disable the mode of operation.
+//
+// CBC enables AES encryption in CBC-mode of operation.
+// CTR enables encryption in counter-mode.
+// ECB enables the basic ECB 16-byte block algorithm. All can be enabled simultaneously.
+
+// The #ifndef-guard allows it to be configured before #include'ing or at compile time.
+#ifndef CBC
+  #define CBC 1
+#endif
+
+#ifndef ECB
+  #define ECB 1
+#endif
+
+#ifndef CTR
+  #define CTR 1
+#endif
+
+
+#define AES128 1
+//#define AES192 1
+//#define AES256 1
+
+#define AES_BLOCKLEN 16 // Block length in bytes - AES is 128b block only
+
+#if defined(AES256) && (AES256 == 1)
+    #define AES_KEYLEN 32
+    #define AES_keyExpSize 240
+#elif defined(AES192) && (AES192 == 1)
+    #define AES_KEYLEN 24
+    #define AES_keyExpSize 208
+#else
+    #define AES_KEYLEN 16   // Key length in bytes
+    #define AES_keyExpSize 176
+#endif
+
+struct AES_ctx
+{
+  uint8_t RoundKey[AES_keyExpSize];
+#if (defined(CBC) && (CBC == 1)) || (defined(CTR) && (CTR == 1))
+  uint8_t Iv[AES_BLOCKLEN];
+#endif
+};
+
+void AES_init_ctx(struct AES_ctx* ctx, const uint8_t* key);
+#if (defined(CBC) && (CBC == 1)) || (defined(CTR) && (CTR == 1))
+void AES_init_ctx_iv(struct AES_ctx* ctx, const uint8_t* key, const uint8_t* iv);
+void AES_ctx_set_iv(struct AES_ctx* ctx, const uint8_t* iv);
+#endif
+
+#if defined(ECB) && (ECB == 1)
+// buffer size is exactly AES_BLOCKLEN bytes; 
+// you need only AES_init_ctx as IV is not used in ECB 
+// NB: ECB is considered insecure for most uses
+void AES_ECB_encrypt(const struct AES_ctx* ctx, uint8_t* buf);
+void AES_ECB_decrypt(const struct AES_ctx* ctx, uint8_t* buf);
+
+#endif // #if defined(ECB) && (ECB == !)
+
+
+#if defined(CBC) && (CBC == 1)
+// buffer size MUST be mutile of AES_BLOCKLEN;
+// Suggest https://en.wikipedia.org/wiki/Padding_(cryptography)#PKCS7 for padding scheme
+// NOTES: you need to set IV in ctx via AES_init_ctx_iv() or AES_ctx_set_iv()
+//        no IV should ever be reused with the same key 
+void AES_CBC_encrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length);
+void AES_CBC_decrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length);
+
+#endif // #if defined(CBC) && (CBC == 1)
+
+
+#if defined(CTR) && (CTR == 1)
+
+// Same function for encrypting as for decrypting. 
+// IV is incremented for every block, and used after encryption as XOR-compliment for output
+// Suggesting https://en.wikipedia.org/wiki/Padding_(cryptography)#PKCS7 for padding scheme
+// NOTES: you need to set IV in ctx with AES_init_ctx_iv() or AES_ctx_set_iv()
+//        no IV should ever be reused with the same key 
+void AES_CTR_xcrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length);
+
+#endif // #if defined(CTR) && (CTR == 1)
+
+
+#endif // _AES_H_
diff --git a/verilog/dv/user_aes/user_aes.c b/verilog/dv/user_aes/user_aes.c
new file mode 100644
index 0000000..f78b8fe
--- /dev/null
+++ b/verilog/dv/user_aes/user_aes.c
@@ -0,0 +1,391 @@
+//////////////////////////////////////////////////////////////////////////////
+// SPDX-FileCopyrightText: 2021, Dinesh Annayya
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileContributor: Dinesh Annayya <dinesha@opencores.org>
+// //////////////////////////////////////////////////////////////////////////
+#define SC_SIM_OUTPORT (0xf0000000)
+
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+
+// Enable ECB, CTR and CBC mode. Note this can be done before including aes.h or at compile-time.
+// E.g. with GCC by using the -D flag: gcc -c aes.c -DCBC=0 -DCTR=1 -DECB=1
+#define CBC 1
+#define CTR 1
+#define ECB 1
+
+#include "aes.h"
+
+
+static void phex(uint8_t* str);
+static int test_encrypt_cbc(void);
+static int test_decrypt_cbc(void);
+static int test_encrypt_ctr(void);
+static int test_decrypt_ctr(void);
+static int test_encrypt_ecb(void);
+static int test_decrypt_ecb(void);
+static void test_encrypt_ecb_verbose(void);
+
+#define reg_mprj_globl_reg0  (*(volatile uint32_t*)0x10020000)
+#define reg_mprj_globl_reg1  (*(volatile uint32_t*)0x10020004)
+#define reg_mprj_globl_reg2  (*(volatile uint32_t*)0x10020008)
+#define reg_mprj_globl_reg3  (*(volatile uint32_t*)0x1002000C)
+#define reg_mprj_globl_reg4  (*(volatile uint32_t*)0x10020010)
+#define reg_mprj_globl_reg5  (*(volatile uint32_t*)0x10020014)
+#define reg_mprj_globl_reg6  (*(volatile uint32_t*)0x10020018)
+#define reg_mprj_globl_reg7  (*(volatile uint32_t*)0x1002001C)
+#define reg_mprj_globl_reg8  (*(volatile uint32_t*)0x10020020)
+#define reg_mprj_globl_reg9  (*(volatile uint32_t*)0x10020024)
+#define reg_mprj_globl_reg10 (*(volatile uint32_t*)0x10020028)
+#define reg_mprj_globl_reg11 (*(volatile uint32_t*)0x1002002C)
+#define reg_mprj_globl_reg12 (*(volatile uint32_t*)0x10020030)
+#define reg_mprj_globl_reg13 (*(volatile uint32_t*)0x10020034)
+#define reg_mprj_globl_reg14 (*(volatile uint32_t*)0x10020038)
+#define reg_mprj_globl_reg15 (*(volatile uint32_t*)0x1002003C)
+#define reg_mprj_globl_reg16 (*(volatile uint32_t*)0x10020040)
+#define reg_mprj_globl_reg17 (*(volatile uint32_t*)0x10020044)
+#define reg_mprj_globl_reg18 (*(volatile uint32_t*)0x10020048)
+#define reg_mprj_globl_reg19 (*(volatile uint32_t*)0x1002004C)
+#define reg_mprj_globl_reg20 (*(volatile uint32_t*)0x10020050)
+#define reg_mprj_globl_reg21 (*(volatile uint32_t*)0x10020054)
+#define reg_mprj_globl_reg22 (*(volatile uint32_t*)0x10020058)
+#define reg_mprj_globl_reg23 (*(volatile uint32_t*)0x1002005C)
+#define reg_mprj_globl_reg24 (*(volatile uint32_t*)0x10020060)
+#define reg_mprj_globl_reg25 (*(volatile uint32_t*)0x10020064)
+#define reg_mprj_globl_reg26 (*(volatile uint32_t*)0x10020068)
+#define reg_mprj_globl_reg27 (*(volatile uint32_t*)0x1002006C)
+
+#define reg_mprg_pinmux_gpio_odata (*(volatile uint32_t*)0x10020018)
+
+int main(void)
+{
+    int exit;
+
+#if defined(AES256)
+    //printf("\nTesting AES256\n\n");
+#elif defined(AES192)
+    //printf("\nTesting AES192\n\n");
+#elif defined(AES128)
+    //printf("\nTesting AES128\n\n");
+#else
+    //printf("You need to specify a symbol between AES128, AES192 or AES256. Exiting");
+    return 0;
+#endif
+
+    reg_mprg_pinmux_gpio_odata  = 0x00000100; 
+    reg_mprj_globl_reg23  = 0x00000000; 
+    exit = test_encrypt_cbc();
+    reg_mprg_pinmux_gpio_odata  = 0x00000200; 
+    reg_mprj_globl_reg23  = exit;
+    exit += test_decrypt_cbc();
+    reg_mprg_pinmux_gpio_odata  = 0x00000300; 
+    reg_mprj_globl_reg23  = exit;
+    exit += test_encrypt_ctr();
+    reg_mprg_pinmux_gpio_odata  = 0x00000400; 
+    reg_mprj_globl_reg23  = exit;
+    exit += test_decrypt_ctr();
+    reg_mprg_pinmux_gpio_odata  = 0x00000500; 
+    reg_mprj_globl_reg23  = exit;
+    exit += test_decrypt_ecb();
+    reg_mprg_pinmux_gpio_odata  = 0x00000600; 
+    reg_mprj_globl_reg23  = exit;
+    exit += test_encrypt_ecb();
+    reg_mprg_pinmux_gpio_odata  = 0x00000700; 
+    reg_mprj_globl_reg23  = exit;
+    test_encrypt_ecb_verbose();
+    reg_mprg_pinmux_gpio_odata  = 0x00000800; 
+    reg_mprj_globl_reg23  = exit;
+
+    if(exit == 0) {
+        reg_mprg_pinmux_gpio_odata  = 0x00001800; 
+    } else {
+        reg_mprg_pinmux_gpio_odata  = 0x0000A800; 
+    }
+
+    return exit;
+}
+
+
+// prints string as hex
+static void phex(uint8_t* str)
+{
+
+#if defined(AES256)
+    uint8_t len = 32;
+#elif defined(AES192)
+    uint8_t len = 24;
+#elif defined(AES128)
+    uint8_t len = 16;
+#endif
+
+    unsigned char i;
+    //for (i = 0; i < len; ++i)
+    //    printf("%.2x", str[i]);
+    //printf("\n");
+}
+
+static void test_encrypt_ecb_verbose(void)
+{
+    // Example of more verbose verification
+
+    uint8_t i;
+
+    // 128bit key
+    uint8_t key[16] =        { (uint8_t) 0x2b, (uint8_t) 0x7e, (uint8_t) 0x15, (uint8_t) 0x16, (uint8_t) 0x28, (uint8_t) 0xae, (uint8_t) 0xd2, (uint8_t) 0xa6, (uint8_t) 0xab, (uint8_t) 0xf7, (uint8_t) 0x15, (uint8_t) 0x88, (uint8_t) 0x09, (uint8_t) 0xcf, (uint8_t) 0x4f, (uint8_t) 0x3c };
+    // 512bit text
+    uint8_t plain_text[64] = { (uint8_t) 0x6b, (uint8_t) 0xc1, (uint8_t) 0xbe, (uint8_t) 0xe2, (uint8_t) 0x2e, (uint8_t) 0x40, (uint8_t) 0x9f, (uint8_t) 0x96, (uint8_t) 0xe9, (uint8_t) 0x3d, (uint8_t) 0x7e, (uint8_t) 0x11, (uint8_t) 0x73, (uint8_t) 0x93, (uint8_t) 0x17, (uint8_t) 0x2a,
+                               (uint8_t) 0xae, (uint8_t) 0x2d, (uint8_t) 0x8a, (uint8_t) 0x57, (uint8_t) 0x1e, (uint8_t) 0x03, (uint8_t) 0xac, (uint8_t) 0x9c, (uint8_t) 0x9e, (uint8_t) 0xb7, (uint8_t) 0x6f, (uint8_t) 0xac, (uint8_t) 0x45, (uint8_t) 0xaf, (uint8_t) 0x8e, (uint8_t) 0x51,
+                               (uint8_t) 0x30, (uint8_t) 0xc8, (uint8_t) 0x1c, (uint8_t) 0x46, (uint8_t) 0xa3, (uint8_t) 0x5c, (uint8_t) 0xe4, (uint8_t) 0x11, (uint8_t) 0xe5, (uint8_t) 0xfb, (uint8_t) 0xc1, (uint8_t) 0x19, (uint8_t) 0x1a, (uint8_t) 0x0a, (uint8_t) 0x52, (uint8_t) 0xef,
+                               (uint8_t) 0xf6, (uint8_t) 0x9f, (uint8_t) 0x24, (uint8_t) 0x45, (uint8_t) 0xdf, (uint8_t) 0x4f, (uint8_t) 0x9b, (uint8_t) 0x17, (uint8_t) 0xad, (uint8_t) 0x2b, (uint8_t) 0x41, (uint8_t) 0x7b, (uint8_t) 0xe6, (uint8_t) 0x6c, (uint8_t) 0x37, (uint8_t) 0x10 };
+
+    // print text to encrypt, key and IV
+    //printf("ECB encrypt verbose:\n\n");
+    //printf("plain text:\n");
+    for (i = (uint8_t) 0; i < (uint8_t) 4; ++i)
+    {
+        phex(plain_text + i * (uint8_t) 16);
+    }
+    //printf("\n");
+
+    //printf("key:\n");
+    phex(key);
+    //printf("\n");
+
+    // print the resulting cipher as 4 x 16 byte strings
+    //printf("ciphertext:\n");
+    
+    struct AES_ctx ctx;
+    AES_init_ctx(&ctx, key);
+
+    for (i = 0; i < 4; ++i)
+    {
+      AES_ECB_encrypt(&ctx, plain_text + (i * 16));
+      phex(plain_text + (i * 16));
+    }
+    //printf("\n");
+}
+
+
+static int test_encrypt_ecb(void)
+{
+#if defined(AES256)
+    uint8_t key[] = { 0x60, 0x3d, 0xeb, 0x10, 0x15, 0xca, 0x71, 0xbe, 0x2b, 0x73, 0xae, 0xf0, 0x85, 0x7d, 0x77, 0x81,
+                      0x1f, 0x35, 0x2c, 0x07, 0x3b, 0x61, 0x08, 0xd7, 0x2d, 0x98, 0x10, 0xa3, 0x09, 0x14, 0xdf, 0xf4 };
+    uint8_t out[] = { 0xf3, 0xee, 0xd1, 0xbd, 0xb5, 0xd2, 0xa0, 0x3c, 0x06, 0x4b, 0x5a, 0x7e, 0x3d, 0xb1, 0x81, 0xf8 };
+#elif defined(AES192)
+    uint8_t key[] = { 0x8e, 0x73, 0xb0, 0xf7, 0xda, 0x0e, 0x64, 0x52, 0xc8, 0x10, 0xf3, 0x2b, 0x80, 0x90, 0x79, 0xe5,
+                      0x62, 0xf8, 0xea, 0xd2, 0x52, 0x2c, 0x6b, 0x7b };
+    uint8_t out[] = { 0xbd, 0x33, 0x4f, 0x1d, 0x6e, 0x45, 0xf2, 0x5f, 0xf7, 0x12, 0xa2, 0x14, 0x57, 0x1f, 0xa5, 0xcc };
+#elif defined(AES128)
+    uint8_t key[] = { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c };
+    uint8_t out[] = { 0x3a, 0xd7, 0x7b, 0xb4, 0x0d, 0x7a, 0x36, 0x60, 0xa8, 0x9e, 0xca, 0xf3, 0x24, 0x66, 0xef, 0x97 };
+#endif
+
+    uint8_t in[]  = { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a };
+    struct AES_ctx ctx;
+
+    AES_init_ctx(&ctx, key);
+    AES_ECB_encrypt(&ctx, in);
+
+    //printf("ECB encrypt: ");
+
+    if (0 == memcmp((char*) out, (char*) in, 16)) {
+        //printf("SUCCESS!\n");
+	return(0);
+    } else {
+        //printf("FAILURE!\n");
+	return(1);
+    }
+}
+
+static int test_decrypt_cbc(void)
+{
+
+#if defined(AES256)
+    uint8_t key[] = { 0x60, 0x3d, 0xeb, 0x10, 0x15, 0xca, 0x71, 0xbe, 0x2b, 0x73, 0xae, 0xf0, 0x85, 0x7d, 0x77, 0x81,
+                      0x1f, 0x35, 0x2c, 0x07, 0x3b, 0x61, 0x08, 0xd7, 0x2d, 0x98, 0x10, 0xa3, 0x09, 0x14, 0xdf, 0xf4 };
+    uint8_t in[]  = { 0xf5, 0x8c, 0x4c, 0x04, 0xd6, 0xe5, 0xf1, 0xba, 0x77, 0x9e, 0xab, 0xfb, 0x5f, 0x7b, 0xfb, 0xd6,
+                      0x9c, 0xfc, 0x4e, 0x96, 0x7e, 0xdb, 0x80, 0x8d, 0x67, 0x9f, 0x77, 0x7b, 0xc6, 0x70, 0x2c, 0x7d,
+                      0x39, 0xf2, 0x33, 0x69, 0xa9, 0xd9, 0xba, 0xcf, 0xa5, 0x30, 0xe2, 0x63, 0x04, 0x23, 0x14, 0x61,
+                      0xb2, 0xeb, 0x05, 0xe2, 0xc3, 0x9b, 0xe9, 0xfc, 0xda, 0x6c, 0x19, 0x07, 0x8c, 0x6a, 0x9d, 0x1b };
+#elif defined(AES192)
+    uint8_t key[] = { 0x8e, 0x73, 0xb0, 0xf7, 0xda, 0x0e, 0x64, 0x52, 0xc8, 0x10, 0xf3, 0x2b, 0x80, 0x90, 0x79, 0xe5, 0x62, 0xf8, 0xea, 0xd2, 0x52, 0x2c, 0x6b, 0x7b };
+    uint8_t in[]  = { 0x4f, 0x02, 0x1d, 0xb2, 0x43, 0xbc, 0x63, 0x3d, 0x71, 0x78, 0x18, 0x3a, 0x9f, 0xa0, 0x71, 0xe8,
+                      0xb4, 0xd9, 0xad, 0xa9, 0xad, 0x7d, 0xed, 0xf4, 0xe5, 0xe7, 0x38, 0x76, 0x3f, 0x69, 0x14, 0x5a,
+                      0x57, 0x1b, 0x24, 0x20, 0x12, 0xfb, 0x7a, 0xe0, 0x7f, 0xa9, 0xba, 0xac, 0x3d, 0xf1, 0x02, 0xe0,
+                      0x08, 0xb0, 0xe2, 0x79, 0x88, 0x59, 0x88, 0x81, 0xd9, 0x20, 0xa9, 0xe6, 0x4f, 0x56, 0x15, 0xcd };
+#elif defined(AES128)
+    uint8_t key[] = { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c };
+    uint8_t in[]  = { 0x76, 0x49, 0xab, 0xac, 0x81, 0x19, 0xb2, 0x46, 0xce, 0xe9, 0x8e, 0x9b, 0x12, 0xe9, 0x19, 0x7d,
+                      0x50, 0x86, 0xcb, 0x9b, 0x50, 0x72, 0x19, 0xee, 0x95, 0xdb, 0x11, 0x3a, 0x91, 0x76, 0x78, 0xb2,
+                      0x73, 0xbe, 0xd6, 0xb8, 0xe3, 0xc1, 0x74, 0x3b, 0x71, 0x16, 0xe6, 0x9e, 0x22, 0x22, 0x95, 0x16,
+                      0x3f, 0xf1, 0xca, 0xa1, 0x68, 0x1f, 0xac, 0x09, 0x12, 0x0e, 0xca, 0x30, 0x75, 0x86, 0xe1, 0xa7 };
+#endif
+    uint8_t iv[]  = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f };
+    uint8_t out[] = { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a,
+                      0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51,
+                      0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef,
+                      0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 };
+//  uint8_t buffer[64];
+    struct AES_ctx ctx;
+
+    AES_init_ctx_iv(&ctx, key, iv);
+    AES_CBC_decrypt_buffer(&ctx, in, 64);
+
+    //printf("CBC decrypt: ");
+
+    if (0 == memcmp((char*) out, (char*) in, 64)) {
+        //printf("SUCCESS!\n");
+	return(0);
+    } else {
+        //printf("FAILURE!\n");
+	return(1);
+    }
+}
+
+static int test_encrypt_cbc(void)
+{
+#if defined(AES256)
+    uint8_t key[] = { 0x60, 0x3d, 0xeb, 0x10, 0x15, 0xca, 0x71, 0xbe, 0x2b, 0x73, 0xae, 0xf0, 0x85, 0x7d, 0x77, 0x81,
+                      0x1f, 0x35, 0x2c, 0x07, 0x3b, 0x61, 0x08, 0xd7, 0x2d, 0x98, 0x10, 0xa3, 0x09, 0x14, 0xdf, 0xf4 };
+    uint8_t out[] = { 0xf5, 0x8c, 0x4c, 0x04, 0xd6, 0xe5, 0xf1, 0xba, 0x77, 0x9e, 0xab, 0xfb, 0x5f, 0x7b, 0xfb, 0xd6,
+                      0x9c, 0xfc, 0x4e, 0x96, 0x7e, 0xdb, 0x80, 0x8d, 0x67, 0x9f, 0x77, 0x7b, 0xc6, 0x70, 0x2c, 0x7d,
+                      0x39, 0xf2, 0x33, 0x69, 0xa9, 0xd9, 0xba, 0xcf, 0xa5, 0x30, 0xe2, 0x63, 0x04, 0x23, 0x14, 0x61,
+                      0xb2, 0xeb, 0x05, 0xe2, 0xc3, 0x9b, 0xe9, 0xfc, 0xda, 0x6c, 0x19, 0x07, 0x8c, 0x6a, 0x9d, 0x1b };
+#elif defined(AES192)
+    uint8_t key[] = { 0x8e, 0x73, 0xb0, 0xf7, 0xda, 0x0e, 0x64, 0x52, 0xc8, 0x10, 0xf3, 0x2b, 0x80, 0x90, 0x79, 0xe5, 0x62, 0xf8, 0xea, 0xd2, 0x52, 0x2c, 0x6b, 0x7b };
+    uint8_t out[] = { 0x4f, 0x02, 0x1d, 0xb2, 0x43, 0xbc, 0x63, 0x3d, 0x71, 0x78, 0x18, 0x3a, 0x9f, 0xa0, 0x71, 0xe8,
+                      0xb4, 0xd9, 0xad, 0xa9, 0xad, 0x7d, 0xed, 0xf4, 0xe5, 0xe7, 0x38, 0x76, 0x3f, 0x69, 0x14, 0x5a,
+                      0x57, 0x1b, 0x24, 0x20, 0x12, 0xfb, 0x7a, 0xe0, 0x7f, 0xa9, 0xba, 0xac, 0x3d, 0xf1, 0x02, 0xe0,
+                      0x08, 0xb0, 0xe2, 0x79, 0x88, 0x59, 0x88, 0x81, 0xd9, 0x20, 0xa9, 0xe6, 0x4f, 0x56, 0x15, 0xcd };
+#elif defined(AES128)
+    uint8_t key[] = { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c };
+    uint8_t out[] = { 0x76, 0x49, 0xab, 0xac, 0x81, 0x19, 0xb2, 0x46, 0xce, 0xe9, 0x8e, 0x9b, 0x12, 0xe9, 0x19, 0x7d,
+                      0x50, 0x86, 0xcb, 0x9b, 0x50, 0x72, 0x19, 0xee, 0x95, 0xdb, 0x11, 0x3a, 0x91, 0x76, 0x78, 0xb2,
+                      0x73, 0xbe, 0xd6, 0xb8, 0xe3, 0xc1, 0x74, 0x3b, 0x71, 0x16, 0xe6, 0x9e, 0x22, 0x22, 0x95, 0x16,
+                      0x3f, 0xf1, 0xca, 0xa1, 0x68, 0x1f, 0xac, 0x09, 0x12, 0x0e, 0xca, 0x30, 0x75, 0x86, 0xe1, 0xa7 };
+#endif
+    uint8_t iv[]  = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f };
+    uint8_t in[]  = { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a,
+                      0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51,
+                      0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef,
+                      0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 };
+    struct AES_ctx ctx;
+
+    AES_init_ctx_iv(&ctx, key, iv);
+    AES_CBC_encrypt_buffer(&ctx, in, 64);
+
+    //printf("CBC encrypt: ");
+
+    if (0 == memcmp((char*) out, (char*) in, 64)) {
+        //printf("SUCCESS!\n");
+	return(0);
+    } else {
+        //printf("FAILURE!\n");
+	return(1);
+    }
+}
+
+static int test_xcrypt_ctr(const char* xcrypt);
+static int test_encrypt_ctr(void)
+{
+    return test_xcrypt_ctr("encrypt");
+}
+
+static int test_decrypt_ctr(void)
+{
+    return test_xcrypt_ctr("decrypt");
+}
+
+static int test_xcrypt_ctr(const char* xcrypt)
+{
+#if defined(AES256)
+    uint8_t key[32] = { 0x60, 0x3d, 0xeb, 0x10, 0x15, 0xca, 0x71, 0xbe, 0x2b, 0x73, 0xae, 0xf0, 0x85, 0x7d, 0x77, 0x81,
+                        0x1f, 0x35, 0x2c, 0x07, 0x3b, 0x61, 0x08, 0xd7, 0x2d, 0x98, 0x10, 0xa3, 0x09, 0x14, 0xdf, 0xf4 };
+    uint8_t in[64]  = { 0x60, 0x1e, 0xc3, 0x13, 0x77, 0x57, 0x89, 0xa5, 0xb7, 0xa7, 0xf5, 0x04, 0xbb, 0xf3, 0xd2, 0x28, 
+                        0xf4, 0x43, 0xe3, 0xca, 0x4d, 0x62, 0xb5, 0x9a, 0xca, 0x84, 0xe9, 0x90, 0xca, 0xca, 0xf5, 0xc5, 
+                        0x2b, 0x09, 0x30, 0xda, 0xa2, 0x3d, 0xe9, 0x4c, 0xe8, 0x70, 0x17, 0xba, 0x2d, 0x84, 0x98, 0x8d, 
+                        0xdf, 0xc9, 0xc5, 0x8d, 0xb6, 0x7a, 0xad, 0xa6, 0x13, 0xc2, 0xdd, 0x08, 0x45, 0x79, 0x41, 0xa6 };
+#elif defined(AES192)
+    uint8_t key[24] = { 0x8e, 0x73, 0xb0, 0xf7, 0xda, 0x0e, 0x64, 0x52, 0xc8, 0x10, 0xf3, 0x2b, 0x80, 0x90, 0x79, 0xe5, 
+                        0x62, 0xf8, 0xea, 0xd2, 0x52, 0x2c, 0x6b, 0x7b };
+    uint8_t in[64]  = { 0x1a, 0xbc, 0x93, 0x24, 0x17, 0x52, 0x1c, 0xa2, 0x4f, 0x2b, 0x04, 0x59, 0xfe, 0x7e, 0x6e, 0x0b, 
+                        0x09, 0x03, 0x39, 0xec, 0x0a, 0xa6, 0xfa, 0xef, 0xd5, 0xcc, 0xc2, 0xc6, 0xf4, 0xce, 0x8e, 0x94, 
+                        0x1e, 0x36, 0xb2, 0x6b, 0xd1, 0xeb, 0xc6, 0x70, 0xd1, 0xbd, 0x1d, 0x66, 0x56, 0x20, 0xab, 0xf7, 
+                        0x4f, 0x78, 0xa7, 0xf6, 0xd2, 0x98, 0x09, 0x58, 0x5a, 0x97, 0xda, 0xec, 0x58, 0xc6, 0xb0, 0x50 };
+#elif defined(AES128)
+    uint8_t key[16] = { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c };
+    uint8_t in[64]  = { 0x87, 0x4d, 0x61, 0x91, 0xb6, 0x20, 0xe3, 0x26, 0x1b, 0xef, 0x68, 0x64, 0x99, 0x0d, 0xb6, 0xce,
+                        0x98, 0x06, 0xf6, 0x6b, 0x79, 0x70, 0xfd, 0xff, 0x86, 0x17, 0x18, 0x7b, 0xb9, 0xff, 0xfd, 0xff,
+                        0x5a, 0xe4, 0xdf, 0x3e, 0xdb, 0xd5, 0xd3, 0x5e, 0x5b, 0x4f, 0x09, 0x02, 0x0d, 0xb0, 0x3e, 0xab,
+                        0x1e, 0x03, 0x1d, 0xda, 0x2f, 0xbe, 0x03, 0xd1, 0x79, 0x21, 0x70, 0xa0, 0xf3, 0x00, 0x9c, 0xee };
+#endif
+    uint8_t iv[16]  = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff };
+    uint8_t out[64] = { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a,
+                        0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51,
+                        0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef,
+                        0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 };
+    struct AES_ctx ctx;
+    
+    AES_init_ctx_iv(&ctx, key, iv);
+    AES_CTR_xcrypt_buffer(&ctx, in, 64);
+  
+    //printf("CTR %s: ", xcrypt);
+  
+    if (0 == memcmp((char *) out, (char *) in, 64)) {
+        //printf("SUCCESS!\n");
+	return(0);
+    } else {
+        //printf("FAILURE!\n");
+	return(1);
+    }
+}
+
+
+static int test_decrypt_ecb(void)
+{
+#if defined(AES256)
+    uint8_t key[] = { 0x60, 0x3d, 0xeb, 0x10, 0x15, 0xca, 0x71, 0xbe, 0x2b, 0x73, 0xae, 0xf0, 0x85, 0x7d, 0x77, 0x81,
+                      0x1f, 0x35, 0x2c, 0x07, 0x3b, 0x61, 0x08, 0xd7, 0x2d, 0x98, 0x10, 0xa3, 0x09, 0x14, 0xdf, 0xf4 };
+    uint8_t in[]  = { 0xf3, 0xee, 0xd1, 0xbd, 0xb5, 0xd2, 0xa0, 0x3c, 0x06, 0x4b, 0x5a, 0x7e, 0x3d, 0xb1, 0x81, 0xf8 };
+#elif defined(AES192)
+    uint8_t key[] = { 0x8e, 0x73, 0xb0, 0xf7, 0xda, 0x0e, 0x64, 0x52, 0xc8, 0x10, 0xf3, 0x2b, 0x80, 0x90, 0x79, 0xe5,
+                      0x62, 0xf8, 0xea, 0xd2, 0x52, 0x2c, 0x6b, 0x7b };
+    uint8_t in[]  = { 0xbd, 0x33, 0x4f, 0x1d, 0x6e, 0x45, 0xf2, 0x5f, 0xf7, 0x12, 0xa2, 0x14, 0x57, 0x1f, 0xa5, 0xcc };
+#elif defined(AES128)
+    uint8_t key[] = { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c };
+    uint8_t in[]  = { 0x3a, 0xd7, 0x7b, 0xb4, 0x0d, 0x7a, 0x36, 0x60, 0xa8, 0x9e, 0xca, 0xf3, 0x24, 0x66, 0xef, 0x97 };
+#endif
+
+    uint8_t out[]   = { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a };
+    struct AES_ctx ctx;
+    
+    AES_init_ctx(&ctx, key);
+    AES_ECB_decrypt(&ctx, in);
+
+    //printf("ECB decrypt: ");
+
+    if (0 == memcmp((char*) out, (char*) in, 16)) {
+        //printf("SUCCESS!\n");
+	return(0);
+    } else {
+        //printf("FAILURE!\n");
+	return(1);
+    }
+}
+
+
diff --git a/verilog/dv/user_aes/user_aes_tb.v b/verilog/dv/user_aes/user_aes_tb.v
new file mode 100644
index 0000000..3d18ba5
--- /dev/null
+++ b/verilog/dv/user_aes/user_aes_tb.v
@@ -0,0 +1,469 @@
+////////////////////////////////////////////////////////////////////////////
+// SPDX-FileCopyrightText:  2021 , Dinesh Annayya
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileContributor: Modified by Dinesh Annayya <dinesha@opencores.org>
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  Standalone User validation Test bench                       ////
+////                                                              ////
+////  This file is part of the Riscduino cores project            ////
+////                                                              ////
+////  Description                                                 ////
+////   This is a standalone test bench to validate the            ////
+////   Digital core with Risc core executing code from TCM/SRAM.  ////
+////   with icache and dcache bypass mode                         ////
+////                                                              ////
+////  To Do:                                                      ////
+////    nothing                                                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Dinesh Annayya, dinesha@opencores.org                 ////
+////                                                              ////
+////  Revision :                                                  ////
+////    0.1 - 16th Feb 2021, Dinesh A                             ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2000 Authors and OPENCORES.ORG                 ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+
+`default_nettype wire
+
+`timescale 1 ns / 1 ns
+
+`include "sram_macros/sky130_sram_2kbyte_1rw1r_32x512_8.v"
+`include "uart_agent.v"
+module user_aes_tb;
+
+reg clock;
+reg wb_rst_i;
+reg power1, power2;
+reg power3, power4;
+
+reg        wbd_ext_cyc_i;  // strobe/request
+reg        wbd_ext_stb_i;  // strobe/request
+reg [31:0] wbd_ext_adr_i;  // address
+reg        wbd_ext_we_i;  // write
+reg [31:0] wbd_ext_dat_i;  // data output
+reg [3:0]  wbd_ext_sel_i;  // byte enable
+
+wire [31:0] wbd_ext_dat_o;  // data input
+wire        wbd_ext_ack_o;  // acknowlegement
+wire        wbd_ext_err_o;  // error
+
+// User I/O
+wire [37:0] io_oeb;
+wire [37:0] io_out;
+wire [37:0] io_in;
+
+wire gpio;
+wire [37:0] mprj_io;
+wire [7:0] mprj_io_0;
+reg         test_fail;
+reg [31:0] read_data;
+integer    d_risc_id;
+
+//----------------------------------
+// Uart Configuration
+// ---------------------------------
+reg [1:0]      uart_data_bit        ;
+reg	       uart_stop_bits       ; // 0: 1 stop bit; 1: 2 stop bit;
+reg	       uart_stick_parity    ; // 1: force even parity
+reg	       uart_parity_en       ; // parity enable
+reg	       uart_even_odd_parity ; // 0: odd parity; 1: even parity
+
+reg [7:0]      uart_data            ;
+reg [15:0]     uart_divisor         ;	// divided by n * 16
+reg [15:0]     uart_timeout         ;// wait time limit
+
+reg [15:0]     uart_rx_nu           ;
+reg [15:0]     uart_tx_nu           ;
+reg [7:0]      uart_write_data [0:39];
+reg 	       uart_fifo_enable     ;	// fifo mode disable
+
+
+	// External clock is used by default.  Make this artificially fast for the
+	// simulation.  Normally this would be a slow clock and the digital PLL
+	// would be the fast clock.
+
+	always #12.5 clock <= (clock === 1'b0);
+
+     /************* Port-B Mapping **********************************
+     *   Pin-14       PB0/CLKO/ICP1             digital_io[11]
+     *   Pin-15       PB1/SS[1]OC1A(PWM3)       digital_io[12]
+     *   Pin-16       PB2/SS[0]/OC1B(PWM4)      digital_io[13]
+     *   Pin-17       PB3/MOSI/OC2A(PWM5)       digital_io[14]
+     *   Pin-18       PB4/MISO                  digital_io[15]
+     *   Pin-19       PB5/SCK                   digital_io[16]
+     *   Pin-9        PB6/XTAL1/TOSC1           digital_io[6]
+     *   Pin-10       PB7/XTAL2/TOSC2           digital_io[7]
+     *   ********************************************************/
+
+     wire [7:0]  port_b_in = {   io_out[7],
+		                 io_out[6],
+		                 io_out[16],
+		                 io_out[15],
+			         io_out[14],
+			         io_out[13],
+		                 io_out[12],
+		                 io_out[11]
+			     };
+	initial begin
+		test_fail = 0;
+		clock = 0;
+                wbd_ext_cyc_i ='h0;  // strobe/request
+                wbd_ext_stb_i ='h0;  // strobe/request
+                wbd_ext_adr_i ='h0;  // address
+                wbd_ext_we_i  ='h0;  // write
+                wbd_ext_dat_i ='h0;  // data output
+                wbd_ext_sel_i ='h0;  // byte enable
+	end
+
+	`ifdef WFDUMP
+	   initial begin
+	   	$dumpfile("simx.vcd");
+	   	$dumpvars(2, user_aes_tb);
+	   	$dumpvars(0, user_aes_tb.u_top.u_riscv_top);
+	   	$dumpvars(0, user_aes_tb.u_top.u_uart_i2c_usb_spi.u_uart0_core);
+	   end
+       `endif
+
+	initial begin
+               uart_data_bit           = 2'b11;
+               uart_stop_bits          = 0; // 0: 1 stop bit; 1: 2 stop bit;
+               uart_stick_parity       = 0; // 1: force even parity
+               uart_parity_en          = 0; // parity enable
+               uart_even_odd_parity    = 1; // 0: odd parity; 1: even parity
+               uart_divisor            = 15;// divided by n * 16
+               uart_timeout            = 500;// wait time limit
+               uart_fifo_enable        = 0;	// fifo mode disable
+
+               #200; // Wait for reset removal
+               repeat (10) @(posedge clock);
+               $display("Monitor: Standalone User Uart Test Started");
+               
+               // Remove Wb Reset
+               wb_user_core_write(`ADDR_SPACE_WBHOST+`WBHOST_GLBL_CFG,'h1);
+
+               // Enable UART Multi Functional Ports
+               wb_user_core_write(`ADDR_SPACE_PINMUX+`PINMUX_GPIO_MULTI_FUNC,'h100);
+               
+               repeat (2) @(posedge clock);
+               #1;
+               // Remove all the reset
+               wb_user_core_write(`ADDR_SPACE_PINMUX+`PINMUX_GBL_CFG0,'h11F);
+
+               repeat (100) @(posedge clock);  // wait for Processor Get Ready
+
+               tb_uart.uart_init;
+               wb_user_core_write(`ADDR_SPACE_UART0+8'h0,{3'h0,2'b00,1'b1,1'b1,1'b1});  
+               tb_uart.control_setup (uart_data_bit, uart_stop_bits, uart_parity_en, uart_even_odd_parity, 
+                                              uart_stick_parity, uart_timeout, uart_divisor);
+
+		// Set the PORT-B Direction as Output
+                wb_user_core_write(`ADDR_SPACE_PINMUX+`PINMUX_GPIO_DSEL,'h0000FF00);
+		// Set the GPIO Output data: 0x00000000
+                wb_user_core_write(`ADDR_SPACE_PINMUX+`PINMUX_GPIO_ODATA,'h0000000);
+   
+               fork
+	          begin
+                     repeat (1400000) @(posedge clock); 
+	          end
+	          begin
+                     wait(port_b_in == 8'h18);
+	          end
+               join_any
+	
+	       wb_user_core_read_check(`ADDR_SPACE_PINMUX+`PINMUX_SOFT_REG_2,read_data,32'h00000000);
+
+               $display("###################################################");
+               if(test_fail == 0) begin
+                  `ifdef GL
+                      $display("Monitor: Standalone User AES Test (GL) Passed");
+                  `else
+                      $display("Monitor: Standalone User AES Test (RTL) Passed");
+                  `endif
+               end else begin
+                   `ifdef GL
+                       $display("Monitor: Standalone User AES Test (GL) Failed");
+                   `else
+                       $display("Monitor: Standalone User AES Test (RTL) Failed");
+                   `endif
+                end
+               $display("###################################################");
+               #100
+               $finish;
+
+	end
+
+	initial begin
+		wb_rst_i <= 1'b1;
+		#100;
+		wb_rst_i <= 1'b0;	    	// Release reset
+	end
+wire USER_VDD1V8 = 1'b1;
+wire VSS = 1'b0;
+
+user_project_wrapper u_top(
+`ifdef USE_POWER_PINS
+    .vccd1(USER_VDD1V8),	// User area 1 1.8V supply
+    .vssd1(VSS),	// User area 1 digital ground
+`endif
+    .wb_clk_i        (clock),  // System clock
+    .user_clock2     (1'b1),  // Real-time clock
+    .wb_rst_i        (wb_rst_i),  // Regular Reset signal
+
+    .wbs_cyc_i   (wbd_ext_cyc_i),  // strobe/request
+    .wbs_stb_i   (wbd_ext_stb_i),  // strobe/request
+    .wbs_adr_i   (wbd_ext_adr_i),  // address
+    .wbs_we_i    (wbd_ext_we_i),  // write
+    .wbs_dat_i   (wbd_ext_dat_i),  // data output
+    .wbs_sel_i   (wbd_ext_sel_i),  // byte enable
+
+    .wbs_dat_o   (wbd_ext_dat_o),  // data input
+    .wbs_ack_o   (wbd_ext_ack_o),  // acknowlegement
+
+ 
+    // Logic Analyzer Signals
+    .la_data_in      ('1) ,
+    .la_data_out     (),
+    .la_oenb         ('0),
+ 
+
+    // IOs
+    .io_in          (io_in)  ,
+    .io_out         (io_out) ,
+    .io_oeb         (io_oeb) ,
+
+    .user_irq       () 
+
+);
+
+`ifndef GL // Drive Power for Hold Fix Buf
+    // All standard cell need power hook-up for functionality work
+    initial begin
+
+    end
+`endif    
+
+//------------------------------------------------------
+//  Integrate the Serial flash with qurd support to
+//  user core using the gpio pads
+//  ----------------------------------------------------
+
+   wire flash_clk = io_out[24];
+   wire flash_csb = io_out[25];
+   // Creating Pad Delay
+   wire #1 io_oeb_29 = io_oeb[29];
+   wire #1 io_oeb_30 = io_oeb[30];
+   wire #1 io_oeb_31 = io_oeb[31];
+   wire #1 io_oeb_32 = io_oeb[32];
+   tri  #1 flash_io0 = (io_oeb_29== 1'b0) ? io_out[29] : 1'bz;
+   tri  #1 flash_io1 = (io_oeb_30== 1'b0) ? io_out[30] : 1'bz;
+   tri  #1 flash_io2 = (io_oeb_31== 1'b0) ? io_out[31] : 1'bz;
+   tri  #1 flash_io3 = (io_oeb_32== 1'b0) ? io_out[32] : 1'bz;
+
+   assign io_in[29] = flash_io0;
+   assign io_in[30] = flash_io1;
+   assign io_in[31] = flash_io2;
+   assign io_in[32] = flash_io3;
+
+   // Quard flash
+     s25fl256s #(.mem_file_name("user_aes.hex"),
+	         .otp_file_name("none"),
+                 .TimingModel("S25FL512SAGMFI010_F_30pF")) 
+		 u_spi_flash_256mb (
+           // Data Inputs/Outputs
+       .SI      (flash_io0),
+       .SO      (flash_io1),
+       // Controls
+       .SCK     (flash_clk),
+       .CSNeg   (flash_csb),
+       .WPNeg   (flash_io2),
+       .HOLDNeg (flash_io3),
+       .RSTNeg  (!wb_rst_i)
+
+       );
+
+
+//---------------------------
+//  UART Agent integration
+// --------------------------
+wire uart_txd,uart_rxd;
+
+assign uart_txd   = io_out[2];
+assign io_in[1]  = uart_rxd ;
+ 
+uart_agent tb_uart(
+	.mclk                (clock              ),
+	.txd                 (uart_rxd           ),
+	.rxd                 (uart_txd           )
+	);
+
+
+task wb_user_core_write;
+input [31:0] address;
+input [31:0] data;
+begin
+  repeat (1) @(posedge clock);
+  #1;
+  wbd_ext_adr_i =address;  // address
+  wbd_ext_we_i  ='h1;  // write
+  wbd_ext_dat_i =data;  // data output
+  wbd_ext_sel_i ='hF;  // byte enable
+  wbd_ext_cyc_i ='h1;  // strobe/request
+  wbd_ext_stb_i ='h1;  // strobe/request
+  wait(wbd_ext_ack_o == 1);
+  repeat (1) @(posedge clock);
+  #1;
+  wbd_ext_cyc_i ='h0;  // strobe/request
+  wbd_ext_stb_i ='h0;  // strobe/request
+  wbd_ext_adr_i ='h0;  // address
+  wbd_ext_we_i  ='h0;  // write
+  wbd_ext_dat_i ='h0;  // data output
+  wbd_ext_sel_i ='h0;  // byte enable
+  $display("DEBUG WB USER ACCESS WRITE Address : %x, Data : %x",address,data);
+  repeat (2) @(posedge clock);
+end
+endtask
+
+task  wb_user_core_read;
+input [31:0] address;
+output [31:0] data;
+reg    [31:0] data;
+begin
+  repeat (1) @(posedge clock);
+  #1;
+  wbd_ext_adr_i =address;  // address
+  wbd_ext_we_i  ='h0;  // write
+  wbd_ext_dat_i ='0;  // data output
+  wbd_ext_sel_i ='hF;  // byte enable
+  wbd_ext_cyc_i ='h1;  // strobe/request
+  wbd_ext_stb_i ='h1;  // strobe/request
+  wait(wbd_ext_ack_o == 1);
+  repeat (1) @(negedge clock);
+  data  = wbd_ext_dat_o;  
+  repeat (1) @(posedge clock);
+  #1;
+  wbd_ext_cyc_i ='h0;  // strobe/request
+  wbd_ext_stb_i ='h0;  // strobe/request
+  wbd_ext_adr_i ='h0;  // address
+  wbd_ext_we_i  ='h0;  // write
+  wbd_ext_dat_i ='h0;  // data output
+  wbd_ext_sel_i ='h0;  // byte enable
+  $display("DEBUG WB USER ACCESS READ Address : %x, Data : %x",address,data);
+  repeat (2) @(posedge clock);
+end
+endtask
+
+task  wb_user_core_read_check;
+input [31:0] address;
+output [31:0] data;
+input [31:0] cmp_data;
+reg    [31:0] data;
+begin
+  repeat (1) @(posedge clock);
+  #1;
+  wbd_ext_adr_i =address;  // address
+  wbd_ext_we_i  ='h0;  // write
+  wbd_ext_dat_i ='0;  // data output
+  wbd_ext_sel_i ='hF;  // byte enable
+  wbd_ext_cyc_i ='h1;  // strobe/request
+  wbd_ext_stb_i ='h1;  // strobe/request
+  wait(wbd_ext_ack_o == 1);
+  repeat (1) @(negedge clock);
+  data  = wbd_ext_dat_o;  
+  repeat (1) @(posedge clock);
+  #1;
+  wbd_ext_cyc_i ='h0;  // strobe/request
+  wbd_ext_stb_i ='h0;  // strobe/request
+  wbd_ext_adr_i ='h0;  // address
+  wbd_ext_we_i  ='h0;  // write
+  wbd_ext_dat_i ='h0;  // data output
+  wbd_ext_sel_i ='h0;  // byte enable
+  if(data !== cmp_data) begin
+     $display("ERROR : WB USER ACCESS READ  Address : 0x%x, Exd: 0x%x Rxd: 0x%x ",address,cmp_data,data);
+     test_fail = 1;
+  end else begin
+     $display("STATUS: WB USER ACCESS READ  Address : 0x%x, Data : 0x%x",address,data);
+  end
+  repeat (2) @(posedge clock);
+end
+endtask
+
+`ifdef GL
+
+wire        wbd_spi_stb_i   = u_top.u_qspi_master.wbd_stb_i;
+wire        wbd_spi_ack_o   = u_top.u_qspi_master.wbd_ack_o;
+wire        wbd_spi_we_i    = u_top.u_qspi_master.wbd_we_i;
+wire [31:0] wbd_spi_adr_i   = u_top.u_qspi_master.wbd_adr_i;
+wire [31:0] wbd_spi_dat_i   = u_top.u_qspi_master.wbd_dat_i;
+wire [31:0] wbd_spi_dat_o   = u_top.u_qspi_master.wbd_dat_o;
+wire [3:0]  wbd_spi_sel_i   = u_top.u_qspi_master.wbd_sel_i;
+
+wire        wbd_uart_stb_i  = u_top.u_uart_i2c_usb_spi.reg_cs;
+wire        wbd_uart_ack_o  = u_top.u_uart_i2c_usb_spi.reg_ack;
+wire        wbd_uart_we_i   = u_top.u_uart_i2c_usb_spi.reg_wr;
+wire [8:0]  wbd_uart_adr_i  = u_top.u_uart_i2c_usb_spi.reg_addr;
+wire [7:0]  wbd_uart_dat_i  = u_top.u_uart_i2c_usb_spi.reg_wdata;
+wire [7:0]  wbd_uart_dat_o  = u_top.u_uart_i2c_usb_spi.reg_rdata;
+wire        wbd_uart_sel_i  = u_top.u_uart_i2c_usb_spi.reg_be;
+
+`endif
+
+/**
+`ifdef GL
+//-----------------------------------------------------------------------------
+// RISC IMEM amd DMEM Monitoring TASK
+//-----------------------------------------------------------------------------
+
+`define RISC_CORE  user_uart_tb.u_top.u_core.u_riscv_top
+
+always@(posedge `RISC_CORE.wb_clk) begin
+    if(`RISC_CORE.wbd_imem_ack_i)
+          $display("RISCV-DEBUG => IMEM ADDRESS: %x Read Data : %x", `RISC_CORE.wbd_imem_adr_o,`RISC_CORE.wbd_imem_dat_i);
+    if(`RISC_CORE.wbd_dmem_ack_i && `RISC_CORE.wbd_dmem_we_o)
+          $display("RISCV-DEBUG => DMEM ADDRESS: %x Write Data: %x Resonse: %x", `RISC_CORE.wbd_dmem_adr_o,`RISC_CORE.wbd_dmem_dat_o);
+    if(`RISC_CORE.wbd_dmem_ack_i && !`RISC_CORE.wbd_dmem_we_o)
+          $display("RISCV-DEBUG => DMEM ADDRESS: %x READ Data : %x Resonse: %x", `RISC_CORE.wbd_dmem_adr_o,`RISC_CORE.wbd_dmem_dat_i);
+end
+
+`endif
+**/
+endmodule
+`include "s25fl256s.sv"
+`default_nettype wire