add spinal cocotb code
diff --git a/README.md b/README.md
index 4acbc90..4602c82 100644
--- a/README.md
+++ b/README.md
@@ -49,26 +49,26 @@
 
 ## Code Structure
 
-To compatible with Caravel user project, the Spinal HDL, cocotb code is seperated from Caravel user project code.
+To compatible with Caravel user project, the Spinal HDL, cocotb code is separated from Caravel user project code.
 
 ### Spinal HDL and cocotb Code
 
 The Spinal HDL and cocotb code is located in [spinal-cocotb](./spinal-cocotb). Some important code files are:
 * [DmaController.scala](./spinal-cocotb/SpinalNet/src/main/scala/dma/DmaController.scala) is the DMA controller implemented in Spinal HDL;
 * [DmaMem.scala](./spinal-cocotb/SpinalNet/src/main/scala/dma/DmaMem.scala) is the top module that connect the DMA controller and the SDRAM controller;
-* [DmaControllerSim.scala](./spinal-cocotb/SpinalNet/src/main/scala/sdram/DmaControllerSim.scala) is the DMA controller simulation implemented in Spinal HDL;
+* [DmaControllerSim.scala](./spinal-cocotb/SpinalNet/src/main/scala/dma/DmaControllerSim.scala) is the DMA controller simulation implemented in Spinal HDL;
 * [SdramController.scala](./spinal-cocotb/SpinalNet/src/main/scala/sdram/SdramController.scala) is the SDRAM controller implemented in Spinal HDL;
-* [SdramControllerTest.py](./spinal-cocotb/SpinalNet/test/src/python/sdram/SdramControllerTest.py) is the SDRAM controller test bench implemented in cocotb.
+* [SdramControllerTest.py](./spinal-cocotb/SpinalNet/test/src/python/sdram_controller/SdramControllerTest.py) is the SDRAM controller test bench implemented in cocotb.
 
 To build and test the Spinal HDL code, there are two shell scripts:
 * [setup.sh](./spinal-cocotb/setup.sh) is the script to install the dependent libraries;
 * [run.sh](./spinal-cocotb/run.sh) is the script to build and run the Spinal HDL and cocotb code.
 
-The `run.sh` script will generate the Verilog code, [DmaMem.v](./spinal-cocotb/DmaMem.v), used in Caravel user project for post processing.
+The `run.sh` script will generate the Verilog code, `DmaMem.v`, under [spinal-cocotb](./spinal-cocotb), which is used in Caravel user project for post-processing.
 `DmaMem.v` will be moved to the Verilog code directory of the Caravel user project.
 
 ### Caravel Code
 
 The Caravel user project follows the code structure of the example project exactly. Some important code files are:
 * [DmaMem.v](./verilog/rtl/DmaMem.v) is the Verilog code generated from `DmaMem.scala` by Spinal HDL;
-* [axi_dma.v](./verilog/rtl/axi_dma.v) is the wrapper to `DmaMem.v`, and is instantiated in `user_project_wrapper.v`;
+* [axi_dma.v](./verilog/rtl/axi_dma.v) is the wrapper to `DmaMem.v`, and is instantiated in `user_project_wrapper.v`;
\ No newline at end of file
diff --git a/spinal-cocotb/SpinalNet/src/main/scala/dma/AxiMemorySim.scala b/spinal-cocotb/SpinalNet/src/main/scala/dma/AxiMemorySim.scala
new file mode 100644
index 0000000..549d094
--- /dev/null
+++ b/spinal-cocotb/SpinalNet/src/main/scala/dma/AxiMemorySim.scala
@@ -0,0 +1,601 @@
+package dma
+
+import spinal.core._
+import spinal.sim._
+import spinal.core.sim._
+import spinal.lib._
+import spinal.lib.bus.amba4.axi._
+
+import scala.collection.mutable
+import scala.collection.concurrent.TrieMap
+import java.nio.file.Paths
+import java.nio.file.Files
+import java.awt.image.BufferedImage
+import java.io.ByteArrayInputStream
+import javax.imageio.ImageIO
+import java.io.File
+import java.awt.image.Raster
+import java.awt.image.DataBufferByte
+import scala.util.Random
+
+@SuppressWarnings(
+  Array(
+    "scalafix:DisableSyntax.var"
+  )
+)
+class MemoryPage(size: Int) {
+  val data = new Array[Byte](size)
+
+  def clear(value: Byte): Unit = {
+    data.transform(x => value)
+  }
+
+  def read(offset: Int): Byte = {
+    this.data(offset)
+  }
+
+  def write(offset: Int, data: Byte): Unit = {
+    this.data(offset) = data
+  }
+
+  /** Reads an array from this page.
+    *
+    * @param offset Offset into page
+    * @return Byte array containing the read bytes. Reads may be limited by the page end.
+    */
+  def readArray(offset: Int, len: Int): Array[Byte] = {
+    var length = scala.math.min(len, size - offset)
+    var data = new Array[Byte](length)
+
+    for (i <- 0 until length) {
+      data(i) = this.data(offset + i)
+    }
+
+    data
+  }
+
+  /** Writes an array to this page.
+    *
+    * @param offset Offset into page.
+    * @param data The byte array.
+    * @return Number of bytes written. Writes may be limited by the page end.
+    */
+  def writeArray(offset: Int, data: Array[Byte]): Int = {
+    var length = scala.math.min(data.length, size - offset)
+
+    for (i <- 0 until length) {
+      this.data(offset + i) = data(i)
+    }
+
+    length
+  }
+}
+
+@SuppressWarnings(
+  Array(
+    "scalafix:DisableSyntax.null",
+    "scalafix:DisableSyntax.var"
+  )
+)
+case class SparseMemory() {
+  val memory = Array.fill[MemoryPage](4096)(null)
+
+  def allocPage(): MemoryPage = {
+    val page = new MemoryPage(1024 * 1024)
+    page.clear(0xcd.toByte)
+    page
+  }
+
+  def invalidPage(): MemoryPage = {
+    val page = new MemoryPage(1024 * 1024)
+    page.clear(0xef.toByte)
+    page
+  }
+
+  def getElseAllocPage(index: Int): MemoryPage = {
+    if (memory(index) == null) {
+      println(s"Adding page ${index} at 0x${(index << 20).toHexString}")
+      memory(index) = allocPage()
+    }
+    memory(index)
+  }
+
+  def getElseInvalidPage(index: Int): MemoryPage = {
+    if (memory(index) == null) {
+      println(
+        s"Page fault while reading page ${index} (0x${(index << 20).toHexString})"
+      )
+      invalidPage()
+    } else
+      memory(index)
+  }
+
+  def getPageIndex(address: Long): Int = {
+    (address >> 20).toInt
+  }
+
+  def getOffset(address: Long): Int = {
+    val mask = (1 << 20) - 1
+    (address & mask).toInt
+  }
+
+  def read(address: Long): Byte = {
+    getElseInvalidPage(getPageIndex(address)).read(getOffset(address))
+  }
+
+  def write(address: Long, data: Byte): Unit = {
+    getElseAllocPage(getPageIndex(address)).write(getOffset(address), data)
+  }
+
+  def readArray(address: Long, len: Long): Array[Byte] = {
+    val startPageIndex = getPageIndex(address)
+    val endPageIndex = getPageIndex(address + len - 1)
+    var offset = getOffset(address)
+    val buffer = new mutable.ArrayBuffer[Byte](0)
+
+    for (i <- startPageIndex to endPageIndex) {
+      val page = getElseInvalidPage(i)
+      val readArray = page.readArray(offset, len.toInt - buffer.length)
+      buffer.appendAll(readArray)
+      offset = 0
+    }
+
+    buffer.toArray
+  }
+
+  def writeArray(address: Long, data: Array[Byte]): Unit = {
+    val startPageIndex = getPageIndex(address)
+    val endPageIndex = getPageIndex(address + data.length - 1)
+    var offset = getOffset(address)
+
+    for (i <- startPageIndex to endPageIndex) {
+      val page = getElseAllocPage(i)
+      val bytesWritten = page.writeArray(offset, data)
+      data.drop(bytesWritten)
+      offset = 0
+    }
+  }
+
+  // 64-bit strobe, support bus width max to 512 bits
+  def writeArrayStrb(address: Long, data: Array[Byte], strb: Long): Unit = {
+    for (i <- 0 until data.length) {
+      val strobeBitSel = 1 << i
+      val strobe = strobeBitSel & strb
+      if (strobe > 0) {
+        val idx = getPageIndex(address + i)
+        val page = getElseAllocPage(idx)
+        val offset = getOffset(address + i)
+        page.write(offset, data(i))
+      }
+    }
+  }
+
+  /** Reads a BigInt value from the given address.
+    *
+    * @param address Read address.
+    * @param width Length of the byte array to be read in bytes.
+    * @return BigInt read from the given address.
+    */
+  def readBigInt(address: Long, length: Int): BigInt = {
+    val dataArray = readArray(address, length)
+    val buffer =
+      dataArray.reverse.toBuffer // revert for Little Endian representation
+
+    // We never want negative numbers
+    buffer.prepend(0.toByte)
+
+    BigInt(buffer.toArray)
+  }
+
+  /** Writes a BigInt value to the given address.
+    * The BigInt will be resized to a byte Array of given width.
+    * The data will be trimmed if it is bigger than the given width.
+    * If it is smaller, the unused bytes will be filled with '0x00'.
+    *
+    * @param address Write address.
+    * @param data Data to be written.
+    * @param width Width of the byte Array the data is resized to (if necessary).
+    */
+  def writeBigInt(address: Long, data: BigInt, width: Int): Unit = {
+    var dataArray = data.toByteArray.reverse // Little endian?
+    var length = scala.math.min(width, dataArray.length)
+    var result = Array.fill[Byte](width)(0.toByte)
+
+    for (i <- 0 until length)
+      result(i) = dataArray(i)
+
+    writeArray(address, result)
+  }
+
+  def writeBigIntStrb(
+      address: Long,
+      data: BigInt,
+      width: Int,
+      strb: Long
+  ): Unit = {
+    var dataArray = data.toByteArray.reverse // Little endian?
+    var length = scala.math.min(width, dataArray.length)
+    var result = Array.fill[Byte](width)(0.toByte)
+
+    for (i <- 0 until length)
+      result(i) = dataArray(i)
+
+    writeArrayStrb(address, result, strb)
+  }
+
+  def loadBinary(address: Long, file: String): Unit = {
+    val byteArray = Files.readAllBytes(Paths.get(file))
+    writeArray(address, byteArray)
+
+    println(
+      s"Loading 0x${byteArray.length.toHexString} bytes from ${file} to 0x${address.toHexString}"
+    )
+  }
+
+  def loadDebugSequence(address: Long, length: Int, width: Int): Unit = {
+    for (i <- 0 until length) {
+      writeBigInt(address + i * width, BigInt(address + i * width), width)
+    }
+  }
+
+  //def saveImage(address : Long, len : Long, file : String) : Unit = {
+  //  val byteArray = readArray(address, len)
+  //  val img = new BufferedImage(480, 640, BufferedImage.TYPE_INT_RGB)
+  //  img.setData(Raster.createRaster(img.getSampleModel(), new DataBufferByte(byteArray, byteArray.length), null))
+  //  ImageIO.write(img, "png", new File(file));
+  //}
+
+  def saveBinary(address: Long, len: Long, file: String): Unit = {
+    val byteArray = readArray(address, len)
+    Files.write(Paths.get(file), byteArray)
+
+    println(
+      s"Saving 0x${len.toHexString} bytes from 0x${address.toHexString} to ${file}"
+    )
+  }
+}
+
+case class AxiJob(
+    address: Long,
+    burstLength: Int,
+    id: Int
+) {
+  // check for read/write over 4k boundary
+}
+
+/** Configuration class for the AxiMemorySim.
+  *
+  * @param maxOutstandingReads
+  * @param maxOutstandingWrites
+  * @param useAlteraBehavior    Couple write command and write channel as in the Altera Cyclone 5 F2H_SDRAM port.
+  */
+case class AxiMemorySimConfig(
+    maxOutstandingReads: Int = 8,
+    maxOutstandingWrites: Int = 8,
+    readResponseDelay: Int = 0,
+    writeResponseDelay: Int = 0,
+    interruptProbability: Int = 0,
+    interruptMaxDelay: Int = 0,
+    useAlteraBehavior: Boolean = false
+) {}
+
+@SuppressWarnings(
+  Array(
+    "scalafix:DisableSyntax.var"
+  )
+)
+case class AxiMemorySim(
+    axi: Axi4,
+    clockDomain: ClockDomain,
+    config: AxiMemorySimConfig
+) {
+  val memory = SparseMemory()
+  val pending_reads = new mutable.Queue[AxiJob]
+  val pending_writes = new mutable.Queue[AxiJob]
+
+  /** Bus word width in bytes */
+  val busWordWidth = axi.config.dataWidth / 8
+
+  def newAxiJob(address: Long, burstLength: Int, id: Int): AxiJob = {
+    AxiJob(address, burstLength, id)
+  }
+
+  def start(): Unit = {
+    fork {
+      handleAr(axi.ar)
+    }
+
+    fork {
+      handleR(axi.r)
+    }
+
+    if (config.useAlteraBehavior) {
+      fork {
+        handleAwAndW(axi.w, axi.aw, axi.b)
+      }
+    } else {
+      fork {
+        handleAw(axi.aw)
+      }
+
+      fork {
+        handleW(axi.w, axi.b)
+      }
+    }
+  }
+
+  def handleAr(ar: Stream[Axi4Ar]): Unit = {
+    println("Handling AXI4 Master read cmds...")
+
+    ar.ready #= false
+
+    while (true) {
+      ar.ready #= true
+      clockDomain.waitSamplingWhere(ar.valid.toBoolean)
+      ar.ready #= false
+
+      // assert(
+      //   assertion = (ar.payload.len.toBigInt + (ar.payload.addr.toBigInt & 4095)) <= 4095,
+      //   message   = s"Read request crossing 4k boundary (addr=${ar.payload.addr.toBigInt.toString(16)}, len=${ar.payload.len.toLong.toHexString}"
+      // )
+
+      assert(
+        assertion = 4096 >= (
+          (ar.payload.len.toLong + 1) * scala.math
+            .pow(2, ar.payload.size.toInt - 3)
+            .toInt + (ar.payload.addr.toBigInt & 4095)
+        ),
+        message = s"""
+          Read request crossing 4k boundary (
+            addr=${ar.payload.addr.toBigInt},
+            len=${ar.payload.len.toLong + 1}
+            size=${ar.payload.size.toLong}
+            end=${(ar.payload.len.toLong + 1) * scala.math
+          .pow(2, ar.payload.size.toInt - 3)
+          .toInt + (ar.payload.addr.toBigInt & 4095)}
+          )"""
+      )
+
+      println(
+        s"create new AXI read job with id=${ar.payload.id.toInt}, addr=${ar.payload.addr.toLong}, burst len=${ar.payload.len.toInt}"
+      )
+      pending_reads += newAxiJob(
+        ar.payload.addr.toLong,
+        ar.payload.len.toInt,
+        ar.payload.id.toInt
+      )
+
+      //println("AXI4 read cmd: addr=0x" + ar.payload.addr.toLong.toHexString + " count=" + (ar.payload.len.toBigInt+1))
+
+      if (pending_reads.length >= config.maxOutstandingReads)
+        clockDomain.waitSamplingWhere(
+          pending_reads.length < config.maxOutstandingReads
+        )
+    }
+  }
+
+  def handleR(r: Stream[Axi4R]): Unit = {
+    println("Handling AXI4 Master read resp...")
+
+    val random = Random
+
+    r.valid #= false
+    r.payload.last #= false
+
+    while (true) {
+      clockDomain.waitSampling(1)
+
+      // todo: implement read issuing delay
+
+      if (pending_reads.nonEmpty) {
+        //var job = pending_reads.front
+        var job = pending_reads.dequeue()
+
+        r.payload.id #= job.id
+        r.valid #= true
+
+        var i = 0
+        while (i <= job.burstLength) {
+          if (config.interruptProbability > random.nextInt(100)) {
+            r.valid #= false
+            clockDomain.waitSampling(
+              random.nextInt(config.interruptMaxDelay + 1)
+            )
+            r.valid #= true
+          } else {
+            if (i == job.burstLength)
+              r.payload.last #= true
+            r.payload.data #= memory.readBigInt(
+              job.address + i * busWordWidth,
+              busWordWidth
+            )
+            clockDomain.waitSamplingWhere(
+              r.ready.toBoolean
+            ) // BUG: must wait for RREADY
+            i = i + 1
+            val addr = job.address + (i - 1) * busWordWidth
+            println(
+              f"AXI4 burst ${i}-th read: addr=$addr%d, data=${r.payload.data.toBigInt}%x, width=${axi.config.dataWidth}, len=${job.burstLength + 1}"
+            )
+          }
+        }
+
+        r.valid #= false
+        r.payload.last #= false
+
+        //pending_reads.dequeue()
+
+        println(
+          "AXI4 read rsp: addr=0x" + job.address.toLong.toHexString + " count=" + (job.burstLength + 1)
+        )
+      }
+    }
+  }
+
+  def handleAw(aw: Stream[Axi4Aw]): Unit = {
+    println("Handling AXI4 Master write cmds...")
+
+    aw.ready #= false
+
+    while (true) {
+      aw.ready #= true
+      clockDomain.waitSamplingWhere(aw.valid.toBoolean)
+      aw.ready #= false
+
+      assert(
+        assertion = 4096 >= (
+          (aw.payload.len.toLong + 1) * scala.math
+            .pow(2, aw.payload.size.toInt - 3)
+            .toInt + (aw.payload.addr.toBigInt & 4095)
+        ),
+        message = s"""
+          Write request crossing 4k boundary (
+            addr=${aw.payload.addr.toBigInt},
+            len=${aw.payload.len.toLong + 1}
+            size=${aw.payload.size.toLong}
+            end=${(aw.payload.len.toLong + 1) * scala.math
+          .pow(2, aw.payload.size.toInt - 3)
+          .toInt + (aw.payload.addr.toBigInt & 4095)}
+          )"""
+      )
+
+      println(
+        s"create new AXI write job with id=${aw.payload.id.toInt}, addr=${aw.payload.addr.toLong}, burst len=${aw.payload.len.toInt}"
+      )
+      pending_writes += newAxiJob(
+        aw.payload.addr.toLong,
+        aw.payload.len.toInt,
+        aw.payload.id.toInt
+      )
+
+      //println("AXI4 write cmd: addr=0x" + aw.payload.addr.toLong.toHexString + " count=" + (aw.payload.len.toBigInt+1))
+
+      if (pending_writes.length >= config.maxOutstandingWrites)
+        clockDomain.waitSamplingWhere(
+          pending_writes.length < config.maxOutstandingWrites
+        )
+    }
+  }
+
+  def handleW(w: Stream[Axi4W], b: Stream[Axi4B]): Unit = {
+    println("Handling AXI4 Master write...")
+
+    w.ready #= false
+    b.valid #= false
+
+    while (true) {
+      clockDomain.waitSampling(10)
+
+      if (pending_writes.nonEmpty) {
+        var job = pending_writes.front
+        var count = job.burstLength
+
+        w.ready #= true
+
+        for (i <- 0 to job.burstLength) {
+          clockDomain.waitSamplingWhere(w.valid.toBoolean)
+          // memory.writeBigInt(job.address + i * busWordWidth, w.payload.data.toBigInt, busWordWidth)
+          memory.writeBigIntStrb(
+            job.address + i * busWordWidth,
+            w.payload.data.toBigInt,
+            busWordWidth,
+            w.payload.strb.toLong
+          )
+          val addr = job.address + i * busWordWidth
+          println(
+            f"AXI4 burst ${i}-th write: addr=$addr, strb=${w.payload.strb.toLong}%x, data=${w.payload.data.toBigInt}%x, width=$busWordWidth, len=${job.burstLength + 1}"
+          )
+        }
+        w.ready #= false
+
+        clockDomain.waitSampling(config.writeResponseDelay)
+
+        b.valid #= true
+        b.payload.id #= job.id
+        b.payload.resp #= 0
+        clockDomain.waitSamplingWhere(b.ready.toBoolean)
+        b.valid #= false
+
+        pending_writes.dequeue()
+
+        println(
+          "AXI4 write: addr=0x" + job.address.toLong.toHexString + " count=" + (job.burstLength + 1)
+        )
+      }
+    }
+  }
+
+  /** Handle write command, write, and write response channel as implemented
+    * by Altera/Intel on their Cyclone 5 platform.
+    * Their implementation behaves as all three channels are coupled. The
+    * implementation waits until all words for a write operation have been
+    * transfered. Then it asserts the AWREADY to accept the write command.
+    * After that, BVALID is asserted.
+    *
+    * @param w  AXI write channel
+    * @param aw AXI write command channel
+    * @param b  AXI write response channel
+    */
+  def handleAwAndW(
+      w: Stream[Axi4W],
+      aw: Stream[Axi4Aw],
+      b: Stream[Axi4B]
+  ): Unit = {
+    println(
+      "Handling AXI4 Master write cmds and write (Altera/Intel behavior)..."
+    )
+
+    val random = Random
+
+    aw.ready #= false
+    w.ready #= false
+    b.valid #= false
+
+    while (true) {
+      clockDomain.waitSamplingWhere(aw.valid.toBoolean && w.valid.toBoolean)
+      w.ready #= true
+
+      assert(
+        assertion =
+          (aw.payload.len.toBigInt + (aw.payload.addr.toBigInt & 4095)) <= 4095,
+        message =
+          s"Write request crossing 4k boundary (addr=${aw.payload.addr.toBigInt
+            .toString(16)}, len=${aw.payload.len.toLong.toHexString}"
+      )
+
+      var i = 0;
+      while (i <= aw.payload.len.toInt) {
+        if (config.interruptProbability > random.nextInt(100)) {
+          w.ready #= false
+          clockDomain.waitSampling(random.nextInt(config.interruptMaxDelay + 1))
+          w.ready #= true
+        } else {
+          clockDomain.waitSamplingWhere(w.valid.toBoolean)
+          memory.writeBigInt(
+            aw.payload.addr.toLong + i * busWordWidth,
+            w.payload.data.toBigInt,
+            busWordWidth
+          )
+          i = i + 1
+        }
+      }
+
+      aw.ready #= true
+
+      clockDomain.waitSampling(1)
+
+      aw.ready #= false
+      w.ready #= false
+
+      // Handle write response
+      clockDomain.waitSampling(config.writeResponseDelay)
+
+      b.valid #= true
+      b.payload.resp #= 0
+      clockDomain.waitSamplingWhere(b.ready.toBoolean)
+      b.valid #= false
+
+      //println("AXI4 write cmd: addr=0x" + aw.payload.addr.toLong.toHexString + " count=" + (aw.payload.len.toBigInt+1))
+    }
+  }
+}
diff --git a/spinal-cocotb/SpinalNet/src/main/scala/dma/DmaController.scala b/spinal-cocotb/SpinalNet/src/main/scala/dma/DmaController.scala
new file mode 100644
index 0000000..4e71807
--- /dev/null
+++ b/spinal-cocotb/SpinalNet/src/main/scala/dma/DmaController.scala
@@ -0,0 +1,754 @@
+package dma
+
+import spinal.core._
+import spinal.lib._
+import spinal.lib.bus.amba4.axi._
+import spinal.lib.fsm._
+
+case class DmaConfig(
+    addressWidth: Int = 32,
+    bufDepth: Int = 24,
+    burstLen: Int = 16,
+    dataWidth: Int = 32,
+    littleEndien: Boolean = true,
+    idWidth: Int = 4,
+    xySizeMax: Int = 65536
+) {
+  val busByteSize = dataWidth / 8
+  val burstLenWidth = 8 // log2Up(burstLen)
+  val burstByteSize = burstLen * busByteSize
+  val fullStrbBits = scala.math.pow(2, busByteSize).toInt - 1 // all bits valid
+  val xySizeWidth = log2Up(xySizeMax)
+
+  require(dataWidth % 8 == 0, s"$dataWidth % 8 == 0 assert failed")
+  require(burstLen <= xySizeMax, s"$burstLen < $xySizeMax assert failed")
+  require(burstLen <= 256, s"$burstLen < 256 assert failed")
+  require(
+    xySizeWidth < addressWidth,
+    s"$xySizeWidth < $addressWidth assert failed"
+  )
+
+  val axiConfig = Axi4Config(
+    addressWidth = addressWidth,
+    dataWidth = dataWidth,
+    idWidth = idWidth,
+    useId = true,
+    useQos = false,
+    useRegion = false,
+    useLock = false,
+    useCache = false,
+    useProt = false
+  )
+}
+
+class DmaController(dmaConfig: DmaConfig) extends Component {
+  val io = new Bundle {
+    val param = slave(Param(dmaConfig.addressWidth, dmaConfig.xySizeWidth))
+    val axi = master(Axi4(dmaConfig.axiConfig))
+    val ctrl = slave(Ctrl())
+  }
+
+  // val buf = new StreamFifo(
+  //   dataType = Bits(dmaConfig.dataWidth bits),
+  //   depth = dmaConfig.bufDepth
+  // )
+
+  val read = new DmaRead(dmaConfig)
+  val write = new DmaWrite(dmaConfig)
+  read.io.ctrl.start := io.ctrl.start
+  read.io.ctrl.halt := io.ctrl.halt
+  read.io.param := io.param
+  //buf.io.push << read.io.dout
+  write.io.ctrl.start := io.ctrl.start
+  write.io.ctrl.halt := io.ctrl.halt
+  write.io.param := io.param
+  //write.io.din << buf.io.pop
+  write.io.din << read.io.dout.queue(dmaConfig.bufDepth)
+
+  io.ctrl.busy := read.io.ctrl.busy || write.io.ctrl.busy
+  io.ctrl.done := write.io.ctrl.done // No need to consider read done
+
+  io.axi << read.io.axiR
+  io.axi << write.io.axiW
+}
+
+class DmaRead(dmaConfig: DmaConfig) extends Component {
+  val io = new Bundle {
+    val dout = master(Stream(Bits(dmaConfig.dataWidth bits)))
+    val param = slave(Param(dmaConfig.addressWidth, dmaConfig.xySizeWidth))
+    val axiR = master(Axi4ReadOnly(dmaConfig.axiConfig))
+    val ctrl = slave(Ctrl())
+  }
+  io.ctrl.done := False
+
+  val busyReg = Reg(Bool) init (False)
+  when(io.ctrl.start) {
+    busyReg := True
+  } elsewhen (io.ctrl.done) {
+    busyReg := False
+  }
+  io.ctrl.busy := busyReg
+
+  val id = 3
+
+  val arValidReg = Reg(False) init (False)
+  val burstLenReg = Reg(UInt(dmaConfig.burstLenWidth bits)) init (0)
+  val nxtBurstLen = UInt(dmaConfig.burstLenWidth bits)
+
+  val curRowAddrReg = Reg(UInt(dmaConfig.addressWidth bits)) init (0)
+  val alignOffsetReg = Reg(UInt(log2Up(dmaConfig.busByteSize) bits)) init (0)
+  val alignOffsetNxt = UInt(
+    log2Up(dmaConfig.busByteSize) bits
+  ) // The alignment offset of row start address to bus width
+  val curAlignedAddrReg = Reg(UInt(dmaConfig.addressWidth bits)) init (0)
+  val curAlignedRowAddr = curRowAddrReg - alignOffsetReg
+  val rowByteSize = io.param.xsize
+  val nxtAlignedAddr =
+    curAlignedAddrReg + (burstLenReg << log2Up(dmaConfig.busByteSize))
+  val srcRowGap = io.param.xsize + io.param.srcystep
+  val nxtRowAddr = curRowAddrReg + srcRowGap
+
+  val runSignal = ~io.ctrl.halt
+  val idMatch = io.axiR.r.id === id
+
+  io.axiR.ar.id := id
+  io.axiR.ar.addr := curAlignedAddrReg // read addr
+  io.axiR.ar.len := burstLenReg - 1
+  io.axiR.ar.size := log2Up(dmaConfig.dataWidth)
+  io.axiR.ar.burst := Axi4.burst.INCR
+  if (dmaConfig.axiConfig.useLock) {
+    io.axiR.ar.lock := Axi4.lock.NORMAL
+  }
+  if (dmaConfig.axiConfig.useCache) {
+    io.axiR.ar.cache := B(0, 2 bits) ## io.param.cf ## io.param.bf
+  }
+  if (dmaConfig.axiConfig.useProt) {
+    io.axiR.ar.prot := 2
+  }
+  io.axiR.ar.valid := arValidReg
+
+  val dataPreReg = Reg(Bits(dmaConfig.dataWidth bits)) init (0)
+  val curBeatBytes = UInt((log2Up(dmaConfig.busByteSize) + 1) bits)
+  val output = Bits(dmaConfig.dataWidth bits)
+  val doutValid = False
+  val rReady = False
+
+  // Save read data to StreamFifo
+  io.dout.valid := doutValid
+  io.axiR.r.ready := rReady
+  io.dout.payload := output
+
+  val rowReadCntReg =
+    Reg(UInt(dmaConfig.xySizeWidth bits)) init (0) // Number of rows read
+  val rowReadCntNxt = rowReadCntReg + 1
+  val colByteReadCntReg =
+    Reg(
+      UInt(dmaConfig.xySizeWidth bits)
+    ) init (0) // Number of bytes read in a row
+  val colByteReadCntNxt = colByteReadCntReg + curBeatBytes
+  val colRemainByteCntReg = Reg(
+    UInt(dmaConfig.xySizeWidth bits)
+  ) init (rowByteSize) // Number of bytes left to read
+  val colRemainByteCntNxt = colRemainByteCntReg - curBeatBytes
+
+  val rowFirstBurstReg = Reg(Bool) init (False)
+  val rowFirstBeatReg = Reg(Bool) init (False)
+  //val rowLastBeat         = False//colRemainByteCntReg <= dmaConfig.busByteSize
+  val nxtRow = False
+
+  val fsmR = new StateMachine {
+    val IDLE: State = new State with EntryPoint {
+      whenIsActive {
+        arValidReg := False
+        alignOffsetReg := 0
+        burstLenReg := 0
+        curRowAddrReg := io.param.sar - srcRowGap // TODO: refactor this
+        curAlignedAddrReg := 0
+
+        dataPreReg := 0
+
+        rowFirstBurstReg := False
+        rowFirstBeatReg := False
+
+        rowReadCntReg := 0
+        colByteReadCntReg := 0
+        colRemainByteCntReg := rowByteSize
+
+        when(runSignal && io.ctrl.start) {
+          arValidReg := True
+          alignOffsetReg := alignOffsetNxt
+          burstLenReg := nxtBurstLen
+
+          curRowAddrReg := io.param.sar
+          curAlignedAddrReg := io.param.sar - alignOffsetNxt
+          //colRemainByteCntReg := rowByteSize
+
+          rowFirstBeatReg := True
+          rowFirstBurstReg := True
+          nxtRow := True
+
+          goto(AR)
+        }
+      }
+    }
+
+    val AR: State = new State { // Send AXI read address
+      whenIsActive {
+        when(runSignal && io.axiR.ar.valid && io.axiR.ar.ready) {
+          arValidReg := False
+          when(alignOffsetReg =/= 0 && rowFirstBeatReg) {
+            goto(FR)
+          } otherwise {
+            goto(BR)
+          }
+        }
+      }
+    }
+
+    val FR: State = new State { // First read, read non-aligned data
+      whenIsActive {
+        rReady := True
+        doutValid := False // No output, just cache first read
+
+        when(runSignal && io.axiR.r.valid && io.axiR.r.ready) {
+          colByteReadCntReg := colByteReadCntNxt
+          colRemainByteCntReg := colRemainByteCntNxt
+          rowFirstBeatReg := False
+
+          when(io.axiR.r.last) {
+            // Minimum number of bytes to transfer is 2 * busByteSize
+            curAlignedAddrReg := nxtAlignedAddr
+            burstLenReg := nxtBurstLen
+            arValidReg := True
+
+            goto(AR)
+          } otherwise {
+            goto(BR)
+          }
+        }
+      }
+    }
+
+    val BR: State = new State { // Burst aligned read
+      whenIsActive {
+        doutValid := io.axiR.r.valid
+        rReady := io.dout.ready
+
+        when(runSignal && io.axiR.r.valid && io.axiR.r.ready) {
+          colByteReadCntReg := colByteReadCntNxt
+          colRemainByteCntReg := colRemainByteCntNxt
+          rowFirstBeatReg := False
+        }
+
+        when(
+          runSignal && io.axiR.r.valid && io.axiR.r.ready && io.axiR.r.last
+        ) { // Prepare next read address
+          rowFirstBurstReg := False
+          //burstLenReg := nxtBurstLen
+
+          when(colByteReadCntNxt < rowByteSize) { // Continue read same row
+            curAlignedAddrReg := nxtAlignedAddr
+            burstLenReg := nxtBurstLen
+            arValidReg := True
+
+            goto(AR)
+          } otherwise { // Finish read one row
+            //rowLastBeat       := True
+            colByteReadCntReg := 0
+            colRemainByteCntReg := rowByteSize
+
+            when(alignOffsetReg =/= 0) {
+              goto(LAST)
+            } elsewhen (rowReadCntNxt < io.param.ysize) {
+              burstLenReg := nxtBurstLen
+              //colRemainByteCntReg := rowByteSize
+
+              rowReadCntReg := rowReadCntNxt
+              rowFirstBeatReg := True
+              rowFirstBurstReg := True
+
+              nxtRow := True
+              alignOffsetReg := alignOffsetNxt
+              curAlignedAddrReg := nxtRowAddr - alignOffsetNxt
+              curRowAddrReg := nxtRowAddr
+
+              arValidReg := True
+              goto(AR)
+            } otherwise { // Finish read all rows
+              io.ctrl.done := True
+              goto(IDLE)
+            }
+          }
+        }
+      }
+    }
+
+    val LAST: State = new State { // Send last beat when non-aligned read
+      whenIsActive {
+        rReady := False
+        doutValid := True
+        when(runSignal && io.dout.valid && io.dout.ready) {
+          when(rowReadCntNxt < io.param.ysize) { // Send next row address
+            burstLenReg := nxtBurstLen
+            //colRemainByteCntReg := rowByteSize
+
+            rowReadCntReg := rowReadCntNxt
+            rowFirstBeatReg := True
+            rowFirstBurstReg := True
+
+            nxtRow := True
+            alignOffsetReg := alignOffsetNxt
+            curAlignedAddrReg := nxtRowAddr - alignOffsetNxt
+            curRowAddrReg := nxtRowAddr
+
+            arValidReg := True
+            goto(AR)
+          } otherwise { // Finish read all rows
+            io.ctrl.done := True
+            goto(IDLE)
+          }
+        }
+      }
+    }
+  }
+
+  val computeNextReadBurstLen = new Area {
+    if (dmaConfig.busByteSize > 1) {
+      alignOffsetNxt := nxtRowAddr((log2Up(dmaConfig.busByteSize) - 1) downto 0)
+    } else {
+      alignOffsetNxt := 0
+    }
+
+    val nxtAlignedRowAddr = nxtRowAddr - alignOffsetNxt
+    val tmpBurstByteSizeIn4K = (4096 - nxtAlignedRowAddr(0, 12 bits))
+    val nxtAlignedRowByteSize = rowByteSize + alignOffsetNxt
+    val rowCross4K = (tmpBurstByteSizeIn4K < dmaConfig.burstByteSize
+      && tmpBurstByteSizeIn4K < nxtAlignedRowByteSize)
+    when(nxtRow) {
+      when(rowCross4K) {
+        nxtBurstLen := (tmpBurstByteSizeIn4K >> (log2Up(
+          dmaConfig.busByteSize
+        ))).resized
+      } elsewhen (dmaConfig.burstByteSize < nxtAlignedRowByteSize) {
+        nxtBurstLen := dmaConfig.burstLen
+      } otherwise {
+        when(
+          nxtAlignedRowByteSize(
+            (log2Up(dmaConfig.busByteSize) - 1) downto 0
+          ) =/= 0
+        ) {
+          nxtBurstLen := ((nxtAlignedRowByteSize >> (log2Up(
+            dmaConfig.busByteSize
+          ))) + 1).resized
+        } otherwise {
+          nxtBurstLen := (nxtAlignedRowByteSize >> (log2Up(
+            dmaConfig.busByteSize
+          ))).resized
+        }
+      }
+    } elsewhen (colRemainByteCntNxt < dmaConfig.burstByteSize) {
+      when(
+        colRemainByteCntReg((log2Up(dmaConfig.busByteSize) - 1) downto 0) =/= 0
+      ) {
+        nxtBurstLen := ((colRemainByteCntNxt >> (log2Up(
+          dmaConfig.busByteSize
+        ))) + 1).resized
+      } otherwise {
+        nxtBurstLen := (colRemainByteCntNxt >> (log2Up(
+          dmaConfig.busByteSize
+        ))).resized
+      }
+    } otherwise {
+      nxtBurstLen := dmaConfig.burstLen
+    }
+  }
+
+  val computeNextReadPayload = new Area {
+    // Burst length is at least 2, the minimum data size to transfer is twice bus bytes
+    when(rowFirstBeatReg) {
+      // Mask padding bits as invalid for first write beat
+      curBeatBytes := dmaConfig.busByteSize - alignOffsetReg
+      // } elsewhen (rowLastBeat) {
+      //   // colRemainByteCntReg is smaller than dmaConfig.busByteSize
+      //   curBeatBytes := (rowByteSize - colByteReadCntReg).resized//colRemainByteCntReg.resized
+    } otherwise {
+      curBeatBytes := dmaConfig.busByteSize
+    }
+
+    when(runSignal && io.axiR.r.valid && io.axiR.r.ready) {
+      dataPreReg := io.axiR.r.data
+    }
+
+    when(alignOffsetReg =/= 0) {
+      switch(alignOffsetReg) {
+        for (off <- 0 until dmaConfig.busByteSize) {
+          is(off) {
+            val paddingWidth = off << log2Up(8) // off * 8
+            val restWidth =
+              (dmaConfig.busByteSize - off) << log2Up(
+                8
+              ) // (busByteSize - off) * 8
+
+            if (dmaConfig.littleEndien) {
+              output := io.axiR.r.data(0, paddingWidth bits) ## dataPreReg(
+                paddingWidth,
+                restWidth bits
+              )
+            } else {
+              output := dataPreReg(paddingWidth, restWidth bits) ## io.axiR.r
+                .data(0, paddingWidth bits)
+            }
+          }
+        }
+      }
+    } otherwise {
+      output := io.axiR.r.data
+    }
+  }
+}
+
+class DmaWrite(dmaConfig: DmaConfig) extends Component {
+  val io = new Bundle {
+    val din = slave(Stream(Bits(dmaConfig.dataWidth bits)))
+    val param = slave(Param(dmaConfig.addressWidth, dmaConfig.xySizeWidth))
+    val axiW = master(Axi4WriteOnly(dmaConfig.axiConfig))
+    val ctrl = slave(Ctrl())
+  }
+
+  io.ctrl.done := False
+
+  val busyReg = Reg(Bool) init (False)
+  when(io.ctrl.start) {
+    busyReg := True
+  } elsewhen (io.ctrl.done) {
+    busyReg := False
+  }
+  io.ctrl.busy := busyReg
+
+  val id = 5
+
+  val awValidReg = Reg(False) init (False)
+  val burstLenReg = Reg(UInt(dmaConfig.burstLenWidth bits)) init (0)
+  val nxtBurstLen = UInt(dmaConfig.burstLenWidth bits)
+
+  val curRowAddrReg = Reg(UInt(dmaConfig.addressWidth bits)) init (0)
+  val alignOffsetReg = Reg(UInt(log2Up(dmaConfig.busByteSize) bits)) init (0)
+  val alignOffsetNxt = UInt(
+    log2Up(dmaConfig.busByteSize) bits
+  ) // The alignment offset of row start address to bus width
+  val curAlignedAddrReg = Reg(UInt(dmaConfig.addressWidth bits)) init (0)
+  val curAlignedRowAddr = curRowAddrReg - alignOffsetReg
+  val rowByteSize = io.param.xsize
+  val nxtAlignedAddr =
+    curAlignedAddrReg + (burstLenReg << log2Up(dmaConfig.busByteSize))
+  val dstRowGap = io.param.xsize + io.param.dstystep
+  val nxtRowAddr = curRowAddrReg + dstRowGap
+  val beatCnt = Counter(
+    start = 0,
+    end = dmaConfig.burstLen
+  ) // Count how many transfers in a burst
+
+  val runSignal = ~io.ctrl.halt
+  val idMatch = io.axiW.b.id === id
+
+  io.axiW.aw.id := id
+  io.axiW.aw.addr := curAlignedAddrReg // write addr
+  io.axiW.aw.len := burstLenReg - 1
+  io.axiW.aw.size := log2Up(dmaConfig.dataWidth)
+  io.axiW.aw.burst := Axi4.burst.INCR
+  if (dmaConfig.axiConfig.useProt) {
+    io.axiW.aw.lock := Axi4.lock.NORMAL
+  }
+  if (dmaConfig.axiConfig.useProt) {
+    io.axiW.aw.cache := B(0, 2 bits) ## io.param.cf ## io.param.bf
+  }
+  if (dmaConfig.axiConfig.useProt) {
+    io.axiW.aw.prot := 2 // Unprivileged non-secure data access
+  }
+  io.axiW.aw.valid := awValidReg
+
+  val bReadyReg = Reg(Bool) init (False)
+  io.axiW.b.ready := bReadyReg
+
+  io.axiW.w.last := beatCnt === (burstLenReg - 1)
+  when(io.axiW.w.last) {
+    beatCnt.clear()
+  }
+
+  val dinPrevReg = Reg(Bits(dmaConfig.dataWidth bits)) init (0)
+  val curBeatBytes = UInt((log2Up(dmaConfig.busByteSize) + 1) bits)
+  val strobe = UInt(dmaConfig.busByteSize bits)
+  val payload = Bits(dmaConfig.dataWidth bits)
+  val dinReady = False
+  val wValid = False
+
+  // Send write data to AXI write channel
+  io.axiW.w.valid := wValid
+  io.din.ready := dinReady
+  io.axiW.w.data := payload
+  io.axiW.w.strb := strobe.asBits
+
+  val rowWriteCntReg =
+    Reg(UInt(dmaConfig.xySizeWidth bits)) init (0) // Number of rows write
+  val rowWriteCntNxt = rowWriteCntReg + 1
+  val colByteWriteCntReg =
+    Reg(
+      UInt(dmaConfig.xySizeWidth bits)
+    ) init (0) // Number of bytes written in a row
+  val colByteWriteCntNxt =
+    colByteWriteCntReg + curBeatBytes // Each burst beat transfers bus width data including invalid ones
+  val colRemainByteCntReg = Reg(
+    UInt(dmaConfig.xySizeWidth bits)
+  ) init (rowByteSize) // Number of bytes left to write
+  val colRemainByteCntNxt = colRemainByteCntReg - curBeatBytes
+
+  val rowFirstBurstReg = Reg(Bool) init (False)
+  val rowFirstBeatReg = Reg(Bool) init (False)
+  val rowLastBeat = colRemainByteCntReg <= dmaConfig.busByteSize
+  val nxtRow = False
+
+  val fsmW = new StateMachine {
+    val IDLE: State = new State with EntryPoint {
+      whenIsActive {
+        awValidReg := False
+        alignOffsetReg := 0
+        burstLenReg := 0
+        curRowAddrReg := io.param.dar - dstRowGap // TODO: refactor this
+        curAlignedAddrReg := 0
+
+        bReadyReg := False
+        dinPrevReg := 0
+
+        rowFirstBurstReg := False
+        rowFirstBeatReg := False
+
+        rowWriteCntReg := 0
+        colByteWriteCntReg := 0
+        colRemainByteCntReg := rowByteSize
+
+        when(runSignal && io.ctrl.start) {
+          awValidReg := True
+          alignOffsetReg := alignOffsetNxt
+          burstLenReg := nxtBurstLen
+
+          curRowAddrReg := io.param.dar
+          curAlignedAddrReg := io.param.dar - alignOffsetNxt
+          //colRemainByteCntReg := rowByteSize
+
+          rowFirstBeatReg := True
+          rowFirstBurstReg := True
+          nxtRow := True
+          //goto(AW)
+          goto(W)
+        }
+      }
+    }
+    /*
+    val AW: State = new State {  // Send AXI write address
+      whenIsActive {
+        when (runSignal && io.axiW.aw.valid && io.axiW.aw.ready) {
+          awValidReg := False
+          goto(W)
+        }
+      }
+    }
+     */
+    val W: State = new State {
+      onEntry {
+        beatCnt.clear()
+      }
+      whenIsActive {
+        when(runSignal && io.axiW.aw.valid && io.axiW.aw.ready) {
+          awValidReg := False
+        }
+
+        dinReady := io.axiW.w.ready
+        wValid := io.din.valid
+
+        when(runSignal && io.axiW.w.valid && io.axiW.w.ready) {
+          beatCnt.increment()
+
+          colByteWriteCntReg := colByteWriteCntNxt
+          colRemainByteCntReg := colRemainByteCntNxt
+          rowFirstBeatReg := False
+        }
+
+        when(runSignal) {
+          when(io.axiW.w.valid && io.axiW.w.ready) {
+            when(io.axiW.w.last) {
+              rowFirstBurstReg := False
+              beatCnt.clear()
+
+              bReadyReg := True
+              goto(B)
+            } elsewhen (alignOffsetReg =/= 0 && colRemainByteCntNxt < dmaConfig.busByteSize) {
+              goto(LAST)
+            }
+          }
+        }
+      }
+    }
+
+    val LAST: State = new State {
+      whenIsActive {
+        dinReady := False
+        wValid := True
+
+        when(
+          runSignal && io.axiW.w.last && io.axiW.w.valid && io.axiW.w.ready
+        ) {
+          rowFirstBurstReg := False
+          beatCnt.clear()
+
+          bReadyReg := True
+          goto(B)
+        }
+      }
+    }
+
+    val B: State = new State {
+      whenIsActive {
+        // TODO: handle BRESP error
+        when(runSignal && io.axiW.b.valid && io.axiW.b.ready) { // Prepare next write address
+          bReadyReg := False
+          burstLenReg := nxtBurstLen
+
+          when(colByteWriteCntReg < rowByteSize) { // Continue write same row
+            curAlignedAddrReg := nxtAlignedAddr
+            awValidReg := True
+
+            //goto(AW)
+            goto(W)
+          } otherwise { // Finish write one row
+            colByteWriteCntReg := 0
+            colRemainByteCntReg := rowByteSize
+
+            when(rowWriteCntNxt < io.param.ysize) {
+              rowWriteCntReg := rowWriteCntNxt
+              rowFirstBeatReg := True
+              rowFirstBurstReg := True
+
+              nxtRow := True
+              alignOffsetReg := alignOffsetNxt
+              curAlignedAddrReg := nxtRowAddr - alignOffsetNxt
+              curRowAddrReg := nxtRowAddr
+
+              awValidReg := True
+              //goto(AW)
+              goto(W)
+            } otherwise { // Finish write all rows
+              io.ctrl.done := True
+              goto(IDLE)
+            }
+          }
+        }
+      }
+    }
+  }
+
+  val computeNextWriteBurstLen = new Area {
+    if (dmaConfig.busByteSize > 1) {
+      alignOffsetNxt := nxtRowAddr((log2Up(dmaConfig.busByteSize) - 1) downto 0)
+    } else {
+      alignOffsetNxt := 0
+    }
+
+    val nxtAlignedRowAddr = nxtRowAddr - alignOffsetNxt
+    val tmpBurstByteSizeIn4K = (4096 - nxtAlignedRowAddr(0, 12 bits))
+    val nxtAlignedRowByteSize = rowByteSize + alignOffsetNxt
+    val rowCross4K = (tmpBurstByteSizeIn4K < dmaConfig.burstByteSize
+      && tmpBurstByteSizeIn4K < nxtAlignedRowByteSize)
+    when(nxtRow) {
+      when(rowCross4K) {
+        nxtBurstLen := (tmpBurstByteSizeIn4K >> (log2Up(
+          dmaConfig.busByteSize
+        ))).resized
+      } elsewhen (dmaConfig.burstByteSize < nxtAlignedRowByteSize) {
+        nxtBurstLen := dmaConfig.burstLen
+      } otherwise {
+        when(
+          nxtAlignedRowByteSize(
+            (log2Up(dmaConfig.busByteSize) - 1) downto 0
+          ) =/= 0
+        ) {
+          nxtBurstLen := ((nxtAlignedRowByteSize >> (log2Up(
+            dmaConfig.busByteSize
+          ))) + 1).resized
+        } otherwise {
+          nxtBurstLen := (nxtAlignedRowByteSize >> (log2Up(
+            dmaConfig.busByteSize
+          ))).resized
+        }
+      }
+    } elsewhen (colRemainByteCntReg < dmaConfig.burstByteSize) {
+      when(
+        colRemainByteCntReg((log2Up(dmaConfig.busByteSize) - 1) downto 0) =/= 0
+      ) {
+        nxtBurstLen := ((colRemainByteCntReg >> (log2Up(
+          dmaConfig.busByteSize
+        ))) + 1).resized
+      } otherwise {
+        nxtBurstLen := (colRemainByteCntReg >> (log2Up(
+          dmaConfig.busByteSize
+        ))).resized
+      }
+    } otherwise {
+      nxtBurstLen := dmaConfig.burstLen
+    }
+  }
+
+  val computeNextWritePayload = new Area {
+    // Burst length is at least 2, the minimum data to transfer is twice bus bytes
+    when(rowFirstBeatReg) {
+      // Mask padding bits as invalid for first write beat
+      strobe := (dmaConfig.fullStrbBits - ((U(1) << alignOffsetReg) - 1))
+      curBeatBytes := dmaConfig.busByteSize - alignOffsetReg
+    } elsewhen (rowLastBeat) {
+      // In this case colRemainByteCntReg is smaller than dmaConfig.busByteSize
+      strobe := ((U(1) << colRemainByteCntReg) - 1).resized
+      curBeatBytes := colRemainByteCntReg.resized
+    } otherwise {
+      strobe := dmaConfig.fullStrbBits
+      curBeatBytes := dmaConfig.busByteSize
+    }
+
+    when(io.din.valid && io.din.ready) {
+      dinPrevReg := io.din.payload
+    }
+
+    switch(alignOffsetReg) {
+      for (off <- 0 until dmaConfig.busByteSize) {
+        is(off) {
+          val paddingWidth = off << log2Up(8) // off * 8
+          val restWidth =
+            (dmaConfig.busByteSize - off) << log2Up(
+              8
+            ) // (busByteSize - off) * 8
+
+          if (dmaConfig.littleEndien) {
+            payload := io.din.payload(0, restWidth bits) ## dinPrevReg(
+              restWidth,
+              paddingWidth bits
+            )
+          } else {
+            payload := dinPrevReg(restWidth, paddingWidth bits) ## io.din
+              .payload(0, restWidth bits)
+          }
+        }
+      }
+    }
+  }
+}
+
+object DmaController {
+  def main(args: Array[String]): Unit = {
+    val dmaConfig = DmaConfig(
+      addressWidth = 16,
+      burstLen = 8,
+      bufDepth = 24,
+      dataWidth = 8,
+      xySizeMax = 256
+    )
+    SpinalVerilog(new DmaController(dmaConfig)) printPruned ()
+  }
+}
diff --git a/spinal-cocotb/SpinalNet/src/main/scala/dma/DmaControllerSim.scala b/spinal-cocotb/SpinalNet/src/main/scala/dma/DmaControllerSim.scala
new file mode 100644
index 0000000..826444f
--- /dev/null
+++ b/spinal-cocotb/SpinalNet/src/main/scala/dma/DmaControllerSim.scala
@@ -0,0 +1,139 @@
+package dma
+
+import spinal.core._
+import spinal.core.sim._
+import spinal.lib._
+import spinal.lib.bus.amba4.axi._
+// import spinal.lib.bus.amba4.axi.sim._
+import spinal.lib.sim._
+
+import scala.collection.mutable
+
+object DmaControllerSim extends App {
+  val dmaConfig = DmaConfig(
+    addressWidth = 32,
+    burstLen = 8,
+    bufDepth = 32,
+    dataWidth = 32,
+    xySizeMax = 256
+  )
+
+  def runDma(
+      dut: DmaController,
+      axiMem: AxiMemorySim,
+      sar: Int,
+      dar: Int,
+      xsize: Int,
+      ysize: Int,
+      srcystep: Int,
+      dstystep: Int
+  ): Unit = {
+    val queue = mutable.Queue[Int]()
+
+    dut.io.ctrl.start #= false
+    dut.io.ctrl.halt #= false
+
+    dut.io.param.sar #= sar
+    dut.io.param.dar #= dar
+    dut.io.param.xsize #= xsize // At least bus byte size
+    dut.io.param.ysize #= ysize
+    dut.io.param.srcystep #= srcystep
+    dut.io.param.dstystep #= dstystep
+    dut.io.param.llr #= 0
+    dut.io.param.bf #= true
+    dut.io.param.cf #= true
+
+    sleep(0) // Make io.param assignment effective
+    // val axiMem = AxiMemorySim(
+    //   dut.io.axi,
+    //   dut.clockDomain,
+    //   AxiMemorySimConfig(writeResponseDelay = 0)
+    // )
+
+    // Prepare memory data
+    val srcBeginAddr = dut.io.param.sar.toLong
+    val srcRowGap = dut.io.param.xsize.toInt + dut.io.param.srcystep.toInt
+    val rowSize = dut.io.param.xsize.toInt
+    for (y <- 0 until dut.io.param.ysize.toInt) {
+      for (x <- 0 until dut.io.param.xsize.toInt) {
+        val inc = y * srcRowGap + x
+        val d = (y * rowSize + x) % 128
+        val addr = srcBeginAddr + inc
+
+        axiMem.memory.write(addr, d.toByte)
+        println(f"prepare: addr=$addr, data=${d}=${d}%x")
+        queue.enqueue(d)
+      }
+    }
+
+    dut.clockDomain.waitSampling(10)
+    dut.io.ctrl.start #= true
+    axiMem.start()
+    dut.clockDomain.waitSampling()
+    dut.io.ctrl.start #= false
+
+    waitUntil(dut.io.ctrl.done.toBoolean)
+    dut.clockDomain.waitSampling(2)
+
+    val dstBeginAddr = dut.io.param.dar.toLong
+    val dstRowGap = dut.io.param.xsize.toInt + dut.io.param.dstystep.toInt
+    for (y <- 0 until dut.io.param.ysize.toInt) {
+      for (x <- 0 until dut.io.param.xsize.toInt) {
+        val inc = y * dstRowGap + x
+        val addr = dstBeginAddr + inc
+
+        val b = axiMem.memory.read(addr)
+        println(f"check: addr=$addr, data=${b.toInt}=${b.toInt}%x")
+        val t = queue.dequeue()
+        assert(b.toInt == t, s"${b.toInt}==${t} assert failed")
+      }
+    }
+  }
+
+  SimConfig.withWave
+    .compile(new DmaController(dmaConfig))
+    .doSim { dut =>
+      val axiMem = AxiMemorySim(
+        dut.io.axi,
+        dut.clockDomain,
+        AxiMemorySimConfig(writeResponseDelay = 0)
+      )
+
+      dut.clockDomain.forkStimulus(5)
+
+      val sar = 4091
+      val dar = 8183
+      val xsize = 57 // At least twice bus byte size, minimum burst length is 2
+      val ysize = 2
+      val srcystep = 5
+      val dstystep = 9
+      runDma(
+        dut,
+        axiMem,
+        sar,
+        dar,
+        xsize,
+        ysize,
+        srcystep,
+        dstystep
+      )
+
+    // dut.clockDomain.waitSampling(2)
+    // sar      = 8183
+    // dar      = 4091
+    // // xsize    = 57 // At least bus byte size
+    // // ysize    = 2
+    // // srcystep = 5
+    // // dstystep = 9
+    // runDma(
+    //   dut,
+    //   axiMem,
+    //   sar,
+    //   dar,
+    //   xsize,
+    //   ysize,
+    //   srcystep,
+    //   dstystep
+    // )
+    }
+}
diff --git a/spinal-cocotb/SpinalNet/src/main/scala/dma/DmaIf.scala b/spinal-cocotb/SpinalNet/src/main/scala/dma/DmaIf.scala
new file mode 100644
index 0000000..2375dbb
--- /dev/null
+++ b/spinal-cocotb/SpinalNet/src/main/scala/dma/DmaIf.scala
@@ -0,0 +1,53 @@
+package dma
+
+import spinal.core._
+import spinal.lib._
+
+//--- control
+case class Ctrl() extends Bundle with IMasterSlave {
+  val start = Bool
+  val busy = Bool
+  val done = Bool
+  val halt = Bool
+
+  override def asMaster(): Unit = {
+    out(start, halt)
+    in(busy, done)
+  }
+}
+
+case class Param(addressWidth: Int, xySizeWidth: Int)
+    extends Bundle
+    with IMasterSlave {
+  val sar = UInt(addressWidth bits) // source byte addr
+  val dar = UInt(addressWidth bits) // destination byte addr
+  val xsize = UInt(
+    xySizeWidth bits
+  ) // 2D DMA x-dir transfer byte size, cnt from 0
+  val ysize = UInt(xySizeWidth bits) // 2D DMA y-dir transfer lines, cnt fom 0
+  val srcystep = UInt(
+    xySizeWidth bits
+  ) // source byte addr offset between each line, cnt from 1
+  val dstystep = UInt(
+    xySizeWidth bits
+  ) // destination byte addr offset between each line, cnt from 1
+  val llr = UInt(
+    addressWidth bits
+  ) // DMA cmd linked list base addr (addr pointer)
+  val bf = Bool // bufferable flag in AXI cmd
+  val cf = Bool // cacheable flag in AXI cmd
+
+  override def asMaster(): Unit = {
+    out(
+      sar,
+      dar,
+      xsize,
+      ysize,
+      srcystep,
+      dstystep,
+      llr,
+      bf,
+      cf
+    )
+  }
+}
diff --git a/spinal-cocotb/SpinalNet/src/main/scala/dma/DmaMem.scala b/spinal-cocotb/SpinalNet/src/main/scala/dma/DmaMem.scala
new file mode 100644
index 0000000..ccfe3d9
--- /dev/null
+++ b/spinal-cocotb/SpinalNet/src/main/scala/dma/DmaMem.scala
@@ -0,0 +1,104 @@
+package dma
+
+import spinal.core._
+import spinal.lib._
+import spinal.lib.bus.wishbone._
+import spinal.lib.bus.amba4.axi._
+import spinal.lib.bus.regif.AccessType._
+import spinal.lib.memory.sdram.sdr.{MT48LC16M16A2, SdramInterface}
+
+import sdram._
+
+class DmaMem(
+    addressWidth: Int = 32,
+    bufSize: Int = 16,
+    burstLen: Int = 8,
+    dataWidth: Int = 32,
+    idWidth: Int = 4,
+    selWidth: Int = 4
+) extends Component {
+  val axiConfig = Axi4Config(
+    addressWidth = addressWidth, //log2Up(memByteSize)
+    dataWidth = dataWidth,
+    idWidth = idWidth,
+    useLock = false,
+    useRegion = false,
+    useCache = false,
+    useProt = false,
+    useQos = false
+  )
+
+  val wbConfig = WishboneConfig(
+    addressWidth = addressWidth, //log2Up(memByteSize),
+    dataWidth = dataWidth,
+    selWidth = selWidth
+  )
+
+  val dmaConfig = DmaConfig(
+    addressWidth = addressWidth,
+    burstLen = burstLen,
+    bufDepth = bufSize,
+    dataWidth = dataWidth,
+    xySizeMax = 256
+  )
+
+  val sdramConfig = SdramConfig(
+    CAS = 2,
+    addressWidth = addressWidth,
+    burstLen = burstLen,
+    busDataWidth = dataWidth,
+    idWidth = idWidth
+  )
+
+  val sdramDevice = MT48LC16M16A2
+
+  val io = new Bundle {
+    val ctrl = slave(Ctrl())
+    val sdram = master(SdramInterface(sdramDevice.layout))
+    val wb = slave(
+      Wishbone(wbConfig)
+    )
+  }
+
+  val busif = BusInterface(io.wb, (0, 100 Byte))
+  val SAR_REG = busif.newReg(doc = "DMA src address")
+  val DAR_REG = busif.newReg(doc = "DMA dst address")
+
+  val srcAddr = SAR_REG.field(addressWidth bits, RW)
+  val dstAddr = DAR_REG.field(addressWidth bits, RW)
+
+  val dmaArea = new Area {
+    val dmaController = new DmaController(dmaConfig)
+    dmaController.io.ctrl <> io.ctrl
+
+    dmaController.io.param.sar := srcAddr.asUInt
+    dmaController.io.param.dar := dstAddr.asUInt
+    dmaController.io.param.xsize := 2 * dmaConfig.busByteSize // At least twice bus byte size
+    dmaController.io.param.ysize := 1
+    dmaController.io.param.srcystep := 0
+    dmaController.io.param.dstystep := 0
+    dmaController.io.param.llr := 0
+    dmaController.io.param.bf := True
+    dmaController.io.param.cf := True
+  }
+
+  val sdramArea = new Area {
+    val sdramController = new SdramController(
+      sdramDevice.layout,
+      sdramDevice.timingGrade7,
+      SdramConfig()
+    )
+    io.sdram <> sdramController.io.sdram
+  }
+
+  sdramArea.sdramController.io.axi << dmaArea.dmaController.io.axi
+}
+
+object DmaMem {
+  def main(args: Array[String]): Unit = {
+    SpinalConfig(defaultClockDomainFrequency = FixedFrequency(100 MHz))
+      .generateVerilog(
+        new DmaMem()
+      ) printPruned ()
+  }
+}
diff --git a/spinal-cocotb/SpinalNet/src/main/scala/dma/WishboneBusInterface.scala b/spinal-cocotb/SpinalNet/src/main/scala/dma/WishboneBusInterface.scala
new file mode 100644
index 0000000..64b3774
--- /dev/null
+++ b/spinal-cocotb/SpinalNet/src/main/scala/dma/WishboneBusInterface.scala
@@ -0,0 +1,91 @@
+package dma
+
+import spinal.core._
+import spinal.lib.bus.misc.SizeMapping
+import spinal.lib.bus.regif.{BusIf, ClassName}
+import spinal.lib.bus.wishbone.Wishbone
+
+case class WishboneBusInterface(
+    bus: Wishbone,
+    sizeMap: SizeMapping,
+    selId: Int = 0,
+    readSync: Boolean = true,
+    regPre: String = ""
+)(implicit moduleName: ClassName)
+    extends BusIf {
+  override def getModuleName = moduleName.name
+
+  val readError = Bool()
+  val readData = Bits(bus.config.dataWidth bits)
+
+  if (readSync) {
+    readError.setAsReg() init False
+    readData.setAsReg() init 0
+  } else {
+    readError := False
+    readData := 0
+  }
+
+  bus.ACK := True
+  bus.DAT_MISO := readData
+  if (bus.config.useERR) bus.ERR := readError
+
+  val selMatch = if (bus.config.useSEL) bus.SEL(selId) else True
+  val askWrite = (selMatch && bus.CYC && bus.STB && bus.WE).allowPruning()
+  val askRead = (selMatch && bus.CYC && bus.STB && !bus.WE).allowPruning()
+  val doWrite =
+    (selMatch && bus.CYC && bus.STB && bus.ACK && bus.WE).allowPruning()
+  val doRead =
+    (selMatch && bus.CYC && bus.STB && bus.ACK && !bus.WE).allowPruning()
+  val writeData = bus.DAT_MISO
+
+  override def readAddress() = bus.ADR
+  override def writeAddress() = bus.ADR
+
+  override def readHalt() = bus.ACK := False
+  override def writeHalt() = bus.ACK := False
+
+  override def busDataWidth = bus.config.dataWidth
+}
+
+object BusInterface {
+  def apply(bus: Wishbone, sizeMap: SizeMapping)(implicit
+      moduleName: ClassName
+  ): BusIf = WishboneBusInterface(bus, sizeMap)(moduleName)
+  def apply(bus: Wishbone, sizeMap: SizeMapping, selID: Int)(implicit
+      moduleName: ClassName
+  ): BusIf = WishboneBusInterface(bus, sizeMap, selID)(moduleName)
+  def apply(bus: Wishbone, sizeMap: SizeMapping, selID: Int, regPre: String)(
+      implicit moduleName: ClassName
+  ): BusIf =
+    WishboneBusInterface(bus, sizeMap, selID, regPre = regPre)(moduleName)
+  def apply(bus: Wishbone, sizeMap: SizeMapping, selID: Int, readSync: Boolean)(
+      implicit moduleName: ClassName
+  ): BusIf = WishboneBusInterface(bus, sizeMap, selID, readSync)(moduleName)
+  def apply(
+      bus: Wishbone,
+      sizeMap: SizeMapping,
+      selID: Int,
+      readSync: Boolean,
+      regPre: String
+  )(implicit moduleName: ClassName): BusIf =
+    WishboneBusInterface(bus, sizeMap, selID, readSync, regPre = regPre)(
+      moduleName
+    )
+
+//  def apply(bus: Apb3, sizeMap: SizeMapping, selID: Int)(implicit moduleName: ClassName): BusIf = Apb3BusInterface(bus, sizeMap, selID)(moduleName)
+//  def apply(bus: Apb3, sizeMap: SizeMapping, selID: Int, regPre: String)(implicit moduleName: ClassName): BusIf = Apb3BusInterface(bus, sizeMap, selID, regPre = regPre)(moduleName)
+//  def apply(bus: Apb3, sizeMap: SizeMapping, selID: Int, readSync: Boolean)(implicit moduleName: ClassName): BusIf = Apb3BusInterface(bus, sizeMap, selID, readSync)(moduleName)
+//  def apply(bus: Apb3, sizeMap: SizeMapping, selID: Int, readSync: Boolean, regPre: String)(implicit moduleName: ClassName): BusIf = Apb3BusInterface(bus, sizeMap, selID, readSync, regPre = regPre)(moduleName)
+
+//  def apply(bus: AhbLite3, sizeMap: SizeMapping)(implicit moduleName: ClassName): BusIf = AhbLite3BusInterface(bus, sizeMap)(moduleName)
+//  def apply(bus: AhbLite3, sizeMap: SizeMapping, regPre: String)(implicit moduleName: ClassName): BusIf = AhbLite3BusInterface(bus, sizeMap, regPre = regPre)(moduleName)
+//  def apply(bus: AhbLite3, sizeMap: SizeMapping, readSync: Boolean)(implicit moduleName: ClassName): BusIf = AhbLite3BusInterface(bus, sizeMap, readSync)(moduleName)
+//  def apply(bus: AhbLite3, sizeMap: SizeMapping, readSync: Boolean, regPre: String)(implicit moduleName: ClassName): BusIf = AhbLite3BusInterface(bus, sizeMap, readSync, regPre = regPre)(moduleName)
+
+//  def apply(bus: Axi4, sizeMap: SizeMapping): BusIf = Axi4BusInterface(bus, sizeMap)
+//  def apply(bus: Axi4, sizeMap: SizeMapping, readSync: Boolean): BusIf = Axi4BusInterface(bus, sizeMap)
+//
+//  def apply(bus: AxiLite4, sizeMap: SizeMapping): BusIf = AxiLite4BusInterface(bus, sizeMap)
+//  def apply(bus: AxiLite4, sizeMap: SizeMapping, readSync: Boolean): BusIf = AxiLite4BusInterface(bus, sizeMap)
+}
diff --git a/spinal-cocotb/SpinalNet/src/main/scala/sdram/SdramController.scala b/spinal-cocotb/SpinalNet/src/main/scala/sdram/SdramController.scala
new file mode 100644
index 0000000..786f4c0
--- /dev/null
+++ b/spinal-cocotb/SpinalNet/src/main/scala/sdram/SdramController.scala
@@ -0,0 +1,553 @@
+package sdram
+
+import scala.math.BigDecimal.RoundingMode
+import spinal.core._
+import spinal.lib._
+import spinal.lib.bus.amba4.axi._
+import spinal.lib.fsm._
+import spinal.lib.io.TriStateArray
+import spinal.lib.memory.sdram._
+import spinal.lib.memory.sdram.sdr.{MT48LC16M16A2, SdramTimings, SdramInterface}
+
+case class SdramConfig(
+    CAS: Int = 3,
+    addressWidth: Int = 32,
+    burstLen: Int = 16,
+    busDataWidth: Int = 32,
+    idWidth: Int = 4
+) {
+  val busByteSize = busDataWidth / 8
+  val burstLenWidth = 8 // AXI4 burst length width // log2Up(burstLen)
+  val burstByteSize = burstLen * busByteSize
+  val fullStrbBits = scala.math.pow(2, busByteSize).toInt - 1 // all bits valid
+  val bufDepth = burstLen * 4
+
+  require(
+    busDataWidth % 8 == 0,
+    s"${busDataWidth} % 8 == 0 bus data width assertaion failed"
+  )
+  require(burstLen <= 256, s"$burstLen < 256 burst lenth assertion failed")
+
+  val axiConfig = Axi4Config(
+    addressWidth = addressWidth,
+    dataWidth = busDataWidth,
+    idWidth = idWidth,
+    useId = true,
+    useQos = false,
+    useRegion = false,
+    useLock = false,
+    useCache = false,
+    useProt = false
+  )
+}
+
+class SdramController(l: SdramLayout, t: SdramTimings, c: SdramConfig)
+    extends Component {
+  require(c.burstLen >= 4, "burst length at least 4")
+  require(
+    c.burstLen <= l.columnSize,
+    "burst length should be less than column size"
+  )
+  require(
+    c.busDataWidth % l.dataWidth == 0,
+    "AXI bus data width is not divisible by SDRAM data width"
+  )
+
+  val DATA_WIDTH_MULIPLER = c.busDataWidth / l.dataWidth
+  val dataWidthMultiplerSet = Set(1, 2, 4, 8, 16, 32, 64, 128, 256)
+  require(
+    dataWidthMultiplerSet(DATA_WIDTH_MULIPLER),
+    "DATA_WIDTH_MULIPLER is not valid"
+  )
+
+  val CMD_UNSELECTED = B"4'b1000"
+  val CMD_NOP = B"4'b0111"
+  val CMD_ACTIVE = B"4'b0011"
+  val CMD_READ = B"4'b0101"
+  val CMD_WRITE = B"4'b0100"
+  val CMD_BURST_TERMINATE = B"4'b0110"
+  val CMD_PRECHARGE = B"4'b0010"
+  val CMD_REFRESH = B"4'b0001"
+  val CMD_LOAD_MODE_REG = B"4'b0000"
+
+  val DQM_ALL_VALID = B(0, l.bytePerWord bits) // 2'b00
+  val DQM_ALL_INVALID = ~DQM_ALL_VALID // High means invalid
+  val DQM_READ_DELAY_CYCLES = 2
+
+  val MODE_VALUE =
+    U"6'b000_0_00" @@ U(c.CAS, 3 bits) @@ U"4'b0_111" // sequential full page
+  val ALL_BANK_ADDR = 1 << 10
+
+  val io = new Bundle {
+    val axi = slave(Axi4(c.axiConfig))
+    val sdram = master(SdramInterface(l)) // setAsReg
+    val initDone = out Bool
+
+    axi.addAttribute(name = "IOB", value = "TRUE")
+    sdram.addAttribute(name = "IOB", value = "TRUE")
+    initDone.addAttribute(name = "IOB", value = "TRUE")
+  }
+  val awFifo = StreamFifo(Axi4Aw(c.axiConfig), c.bufDepth)
+  val wFifo = StreamFifo(Axi4W(c.axiConfig), c.bufDepth)
+  val bFifo = StreamFifo(Axi4B(c.axiConfig), c.bufDepth)
+  val arFifo = StreamFifo(Axi4Ar(c.axiConfig), c.bufDepth)
+  val rFifo = StreamFifo(Axi4R(c.axiConfig), c.bufDepth)
+  awFifo.io.push << io.axi.aw
+  wFifo.io.push << io.axi.w
+  io.axi.b << bFifo.io.pop
+  arFifo.io.push << io.axi.ar
+  io.axi.r << rFifo.io.pop
+
+  val commandReg = Reg(Bits(4 bits)) init (0)
+  val addressReg = Reg(UInt(l.chipAddressWidth bits)) init (0)
+  val bankAddrReg = Reg(UInt(l.bankWidth bits)) init (0)
+  val burstLenReg = Reg(UInt(c.burstLenWidth bits)) init (0)
+  val columnAddrReg = Reg(UInt(l.columnWidth bits)) init (0)
+  val busReadDataReg = Reg(Bits(c.busDataWidth bits)) init (0)
+  val busReadDataVldReg = Reg(Bool) init (False)
+  val busReadDataLastReg = Reg(Bool) init (False)
+  val opIdReg = Reg(UInt(c.idWidth bits)) init (0)
+  val strobeReg = Reg(Bits(c.busByteSize bits)) init (0)
+  val busWriteDataReg = Reg(Bits(c.busDataWidth bits)) init (0)
+  val busDataShiftCnt =
+    Reg(UInt((log2Up(DATA_WIDTH_MULIPLER) + 1) bits)) init (0)
+
+  val writeMask = strobeReg((l.bytePerWord - 1) downto 0)
+  val busWrite = busWriteDataReg((l.dataWidth - 1) downto 0)
+  val mask = Bits(l.bytePerWord bits)
+
+  awFifo.io.pop.ready := False
+  wFifo.io.pop.ready := False
+  bFifo.io.push.valid := False
+  arFifo.io.pop.ready := False
+
+  bFifo.io.push.payload.id := opIdReg
+  bFifo.io.push.payload.setOKAY()
+  rFifo.io.push.payload.data := busReadDataReg
+  rFifo.io.push.payload.id := opIdReg
+  rFifo.io.push.payload.last := busReadDataLastReg
+  rFifo.io.push.payload.setOKAY()
+  rFifo.io.push.valid := busReadDataVldReg
+
+  io.sdram.BA := bankAddrReg.asBits
+  io.sdram.ADDR := addressReg.asBits
+  io.sdram.DQM := mask
+  io.sdram.CKE := True
+  io.sdram.CSn := commandReg(3)
+  io.sdram.RASn := commandReg(2)
+  io.sdram.CASn := commandReg(1)
+  io.sdram.WEn := commandReg(0)
+  io.sdram.DQ.write := busWrite //wFifo.io.pop.payload.data
+  io.initDone := False
+
+  commandReg := CMD_NOP
+  busReadDataLastReg := False
+
+  assert(
+    assertion = (columnAddrReg < l.columnSize - c.burstLen),
+    message = "invalid column address and burst length",
+    severity = ERROR
+  )
+  assert(
+    assertion =
+      awFifo.io.pop.payload.isINCR() && arFifo.io.pop.payload.isINCR(),
+    message = "only burst type INCR allowed",
+    severity = ERROR
+  )
+  assert(
+    assertion = awFifo.io.pop.payload.len < (256 >> DATA_WIDTH_MULIPLER),
+    message = s"burst length should be less than 256/${DATA_WIDTH_MULIPLER}",
+    severity = ERROR
+  )
+
+  val clkFrequancy = ClockDomain.current.frequency.getValue
+  def timeToCycles(time: TimeNumber): BigInt =
+    (clkFrequancy * time).setScale(0, RoundingMode.UP).toBigInt
+
+  def cycleCounter(cycleMax: BigInt) = new Area {
+    val counter = Reg(UInt(log2Up(cycleMax) bits)) init (0)
+    val busy = counter =/= 0
+    if (cycleMax > 1) {
+      when(busy) {
+        counter := counter - 1
+      }
+    }
+    def setCycles(cycles: BigInt) = {
+      assert(
+        cycles <= cycleMax && cycles > 0,
+        s"invalid counter cycle=${cycles}, cycleMax=${cycleMax}"
+      )
+      counter := cycles - 1
+    }
+    def setCycles(cycles: UInt) = {
+      assert(
+        cycles <= cycleMax && cycles > 0,
+        s"invalid counter cycle=${cycles}, cycleMax=${cycleMax}"
+      )
+      counter := (cycles - 1).resized
+    }
+    def setTime(time: TimeNumber) = setCycles(
+      timeToCycles(time).max(1)
+    ) // Minimum 1 cycles
+  }
+  def timeCounter(timeMax: TimeNumber) = cycleCounter(timeToCycles(timeMax))
+
+  val initCounter = timeCounter(t.tPOW)
+  val stateCounter = timeCounter(
+    t.tRFC
+  ) // tRFC is the largest delay except tPOW and tREF
+  val refreshCounter = CounterFreeRun(timeToCycles(t.tREF / (1 << l.rowWidth)))
+
+  val initPeriod = Bool
+  val refreshReqReg = Reg(Bool) init (False)
+  val preReqIsWriteReg = Reg(Bool) init (False)
+  val readReq =
+    arFifo.io.pop.valid && arFifo.io.pop.payload.len <= rFifo.io.availability
+  val writeReq =
+    awFifo.io.pop.valid && awFifo.io.pop.payload.len <= wFifo.io.occupancy && bFifo.io.availability > 0
+
+  val initFsm = new StateMachine {
+    val INIT_WAIT: State = new State with EntryPoint {
+      onEntry {
+        initCounter.setTime(t.tPOW)
+      } whenIsActive {
+        when(!initCounter.busy) {
+          goto(INIT_PRECHARGE)
+        }
+      }
+    }
+
+    val INIT_PRECHARGE: State = new State {
+      onEntry {
+        addressReg := ALL_BANK_ADDR
+        commandReg := CMD_PRECHARGE
+        stateCounter.setTime(t.tRP)
+      } whenIsActive {
+        when(!stateCounter.busy) {
+          goto(INIT_REFRESH_1)
+        }
+      }
+    }
+
+    val INIT_REFRESH_1: State = new State {
+      onEntry {
+        commandReg := CMD_REFRESH
+        stateCounter.setTime(t.tRFC)
+      } whenIsActive {
+        when(!stateCounter.busy) {
+          goto(INIT_REFRESH_2)
+        }
+      }
+    }
+
+    val INIT_REFRESH_2: State = new State {
+      onEntry {
+        commandReg := CMD_REFRESH
+        stateCounter.setTime(t.tRFC)
+      } whenIsActive {
+        when(!stateCounter.busy) {
+          goto(INIT_LOAD_MODE_REG)
+        }
+      }
+    }
+
+    val INIT_LOAD_MODE_REG: State = new State {
+      onEntry {
+        addressReg := MODE_VALUE
+        commandReg := CMD_LOAD_MODE_REG
+        stateCounter.setCycles(t.cMRD)
+      } whenIsActive {
+        when(!stateCounter.busy) {
+          exit()
+        }
+      }
+    }
+  }
+
+  val refreshFsm = new StateMachine {
+    val REFRESH_PRECHARGE: State = new State with EntryPoint {
+      onEntry {
+        addressReg := ALL_BANK_ADDR
+        commandReg := CMD_REFRESH
+        stateCounter.setTime(t.tRP)
+      } whenIsActive {
+        when(!stateCounter.busy) {
+          goto(REFRESH)
+        }
+      }
+    }
+
+    val REFRESH: State = new State {
+      onEntry {
+        commandReg := CMD_PRECHARGE
+        stateCounter.setTime(t.tRFC)
+      } whenIsActive {
+        when(!stateCounter.busy) {
+          exit()
+        }
+      } onExit {
+        refreshReqReg := False
+      }
+    }
+  }
+
+  val writeFsm = new StateMachine {
+    val ACTIVE_WRITE: State = new State with EntryPoint {
+      onEntry {
+        commandReg := CMD_ACTIVE
+        bankAddrReg := awFifo.io.pop.payload
+          .addr(l.wordAddressWidth - l.bankWidth - 1, l.bankWidth bits)
+        addressReg := awFifo.io.pop.payload.addr(
+          (l.rowWidth + l.columnWidth - 1) downto l.columnWidth
+        ) // Row address
+        columnAddrReg := awFifo.io.pop.payload
+          .addr((l.columnWidth - 1) downto 0)
+          .resized // Colume address
+        opIdReg := awFifo.io.pop.payload.id
+
+        burstLenReg := (awFifo.io.pop.payload.len << log2Up(
+          DATA_WIDTH_MULIPLER
+        )).resized
+        stateCounter.setTime(t.tRCD)
+
+        awFifo.io.pop.ready := True // awFifo.io.pop.valid must be true here
+      } whenIsActive {
+        when(!stateCounter.busy) {
+          goto(BURST_WRITE)
+        }
+      }
+    }
+
+    val BURST_WRITE: State = new State {
+      onEntry {
+        addressReg := columnAddrReg.resized
+        commandReg := CMD_WRITE
+        stateCounter.setCycles(burstLenReg)
+
+        strobeReg := wFifo.io.pop.payload.strb
+        busWriteDataReg := wFifo.io.pop.payload.data
+        wFifo.io.pop.ready := True // wFifo.io.pop.valid must be true here
+        busDataShiftCnt := DATA_WIDTH_MULIPLER - 1
+      } whenIsActive {
+        if (DATA_WIDTH_MULIPLER > 1) {
+          strobeReg := (strobeReg >> l.bytePerWord).resized
+          busWriteDataReg := (busWriteDataReg >> l.dataWidth).resized
+          when(busDataShiftCnt > 0) {
+            busDataShiftCnt := busDataShiftCnt - 1
+          } otherwise {
+            strobeReg := wFifo.io.pop.payload.strb
+            busWriteDataReg := wFifo.io.pop.payload.data
+            wFifo.io.pop.ready := True // wFifo.io.pop.valid must be true here
+            busDataShiftCnt := DATA_WIDTH_MULIPLER - 1
+          }
+        } else {
+          busWriteDataReg := wFifo.io.pop.payload.data
+          wFifo.io.pop.ready := stateCounter.busy // wFifo.io.pop.valid must be true here
+        }
+
+        when(!stateCounter.busy) {
+          goto(TERM_WRITE)
+        }
+      }
+    }
+
+    val TERM_WRITE: State = new State {
+      onEntry {
+        commandReg := CMD_BURST_TERMINATE
+      } whenIsActive {
+        exit() // Must be one cycle, because AXI4 write response will be sent in this cycle right after burst write finish
+      } onExit {
+        preReqIsWriteReg := True
+        bFifo.io.push.valid := True // bFifo.io.push.ready must be true here
+      }
+    }
+  }
+
+  val readFsm = new StateMachine {
+    val ACTIVE: State = new State with EntryPoint {
+      onEntry {
+        commandReg := CMD_ACTIVE
+        bankAddrReg := arFifo.io.pop.payload
+          .addr(l.wordAddressWidth - l.bankWidth - 1, l.bankWidth bits)
+        addressReg := arFifo.io.pop.payload.addr(
+          (l.rowWidth + l.columnWidth - 1) downto l.columnWidth
+        ) // Row address
+        columnAddrReg := arFifo.io.pop.payload
+          .addr((l.columnWidth - 1) downto 0)
+          .resized // Colume address
+        opIdReg := arFifo.io.pop.payload.id
+
+        burstLenReg := (arFifo.io.pop.payload.len << log2Up(
+          DATA_WIDTH_MULIPLER
+        )).resized
+        stateCounter.setTime(t.tRCD)
+
+        arFifo.io.pop.ready := True // arFifo.io.pop.valid must be true here
+      } whenIsActive {
+        when(!stateCounter.busy) {
+          goto(SEND_READ_CMD)
+        }
+      }
+    }
+
+    val SEND_READ_CMD: State = new State {
+      onEntry {
+        addressReg := columnAddrReg.resized
+        commandReg := CMD_READ
+        stateCounter.setCycles(c.CAS)
+      } whenIsActive {
+        when(!stateCounter.busy) {
+          goto(BURST_READ)
+        }
+      }
+    }
+
+    val BURST_READ: State = new State {
+      onEntry {
+        stateCounter.setCycles(burstLenReg)
+      } whenIsActive {
+        when(stateCounter.counter === c.CAS) {
+          commandReg := CMD_BURST_TERMINATE
+        } elsewhen (!stateCounter.busy) {
+          exit()
+        }
+      } onExit {
+        preReqIsWriteReg := False
+        busReadDataLastReg := True
+      }
+    }
+  }
+
+  val fsm = new StateMachine {
+    val INIT: State = new StateFsm(initFsm) with EntryPoint {
+      whenCompleted {
+        goto(IDLE)
+      } onExit {
+        io.initDone := True
+      }
+    }
+
+    val IDLE: State = new State {
+      whenIsActive {
+        when(refreshReqReg) {
+          goto(REFRESH)
+        } elsewhen (readReq && writeReq) {
+          when(preReqIsWriteReg) {
+            goto(READ)
+          } otherwise {
+            goto(WRITE)
+          }
+        } elsewhen (writeReq && !readReq) {
+          goto(WRITE)
+        } elsewhen (readReq && !writeReq) {
+          goto(READ)
+        }
+      }
+    }
+
+    val REFRESH: State = new StateFsm(refreshFsm) {
+      whenCompleted {
+        goto(IDLE)
+      }
+    }
+
+    val WRITE: State = new StateFsm(writeFsm) {
+      whenCompleted {
+        goto(PRECHARGE)
+      }
+    }
+
+    val READ: State = new StateFsm(readFsm) {
+      whenCompleted {
+        goto(PRECHARGE)
+      }
+    }
+
+    val PRECHARGE: State = new State {
+      onEntry {
+        commandReg := CMD_PRECHARGE
+      } whenIsActive {
+        goto(IDLE)
+      }
+    }
+  }
+
+  // assert(
+  //   assertion = writeFsm.isEntering(writeFsm.TERM_WRITE) && wFifo.io.pop.payload.last,
+  //   message = "write burst finish requires AXI AWLAST is true",
+  //   severity = ERROR
+  // )
+
+  initPeriod := fsm.isActive(fsm.INIT)
+
+  when(writeFsm.isActive(writeFsm.BURST_WRITE)) {
+    mask := DQM_ALL_VALID | ~writeMask
+  } elsewhen (fsm.isActive(fsm.READ)) {
+    mask := DQM_ALL_VALID
+  } otherwise {
+    mask := DQM_ALL_INVALID
+  }
+
+  // Handle SDRAM read
+  val readArea = new ClockingArea(
+    clockDomain = ClockDomain(
+      clock = ClockDomain.current.clock,
+      reset = ClockDomain.current.reset,
+      config = ClockDomainConfig(clockEdge = FALLING)
+    )
+  ) {
+    val readReg = RegNextWhen(io.sdram.DQ.read, io.sdram.DQ.writeEnable === 0)
+  }
+
+  val startBurstReadReg = RegNext(readFsm.isEntering(readFsm.BURST_READ))
+  if (DATA_WIDTH_MULIPLER > 1) {
+    when(startBurstReadReg) {
+      busDataShiftCnt := DATA_WIDTH_MULIPLER - 1
+    }
+
+    busReadDataVldReg := False
+    when(readFsm.isActive(readFsm.BURST_READ)) {
+      // Little Endien
+      busReadDataReg := readArea.readReg ## busReadDataReg(
+        (c.busDataWidth - 1) downto l.dataWidth
+      )
+      // Big Endien
+      // busReadDataReg := busReadDataReg(
+      //   (c.busDataWidth - l.dataWidth - 1) downto 0
+      // ) ## readArea.readReg
+      when(busDataShiftCnt > 0) {
+        busDataShiftCnt := busDataShiftCnt - 1
+      } otherwise {
+        busReadDataVldReg := True
+        busDataShiftCnt := DATA_WIDTH_MULIPLER - 1
+      }
+    }
+  } else {
+    busReadDataReg := readArea.readReg
+    busReadDataVldReg := readFsm.isActive(readFsm.BURST_READ)
+  }
+
+  when(!initPeriod && refreshCounter.willOverflow) {
+    refreshReqReg := True
+  }
+
+  when(writeFsm.isActive(writeFsm.BURST_WRITE)) {
+    io.sdram.DQ.writeEnable.setAll()
+  } otherwise {
+    io.sdram.DQ.writeEnable := 0
+  }
+}
+
+object SdramController {
+  def main(args: Array[String]): Unit = {
+    val device = MT48LC16M16A2
+    SpinalConfig(defaultClockDomainFrequency = FixedFrequency(100 MHz))
+      .generateVerilog(
+        new SdramController(
+          device.layout,
+          device.timingGrade7,
+          SdramConfig()
+        )
+      )
+  }
+}
diff --git a/spinal-cocotb/SpinalNet/test/src/python/common/timescale.v b/spinal-cocotb/SpinalNet/test/src/python/common/timescale.v
new file mode 100644
index 0000000..e66f979
--- /dev/null
+++ b/spinal-cocotb/SpinalNet/test/src/python/common/timescale.v
@@ -0,0 +1 @@
+`timescale 1ns/1ps
diff --git a/spinal-cocotb/SpinalNet/test/src/python/sdram_controller/Makefile b/spinal-cocotb/SpinalNet/test/src/python/sdram_controller/Makefile
new file mode 100644
index 0000000..0993202
--- /dev/null
+++ b/spinal-cocotb/SpinalNet/test/src/python/sdram_controller/Makefile
@@ -0,0 +1,15 @@
+#SIM ?= icarus
+
+
+# ../../../../../simWorkspace/SdramControllerTb/rtl/SdramControllerTb.v
+VERILOG_SOURCES += ../../../../../simWorkspace/SdramController/rtl/SdramController.v ../../verilog/tb_sdram_controller.v ../../verilog/mt48lc16m16a2.v ../common/timescale.v
+TOPLEVEL=tb_sdram_controller
+MODULE=SdramControllerTest
+
+# Python path to cocotblib
+export PYTHONPATH := ..
+export COCOTB_RESOLVE_X := ZEROS
+#export COCOTB_LOG_LEVEL := DEBUG
+#export COCOTB_SCHEDULER_DEBUG := 1
+include $(shell cocotb-config --makefiles)/Makefile.sim
+#include ../common/Makefile.sim
diff --git a/spinal-cocotb/SpinalNet/test/src/python/sdram_controller/SdramControllerTest.py b/spinal-cocotb/SpinalNet/test/src/python/sdram_controller/SdramControllerTest.py
new file mode 100644
index 0000000..9f08633
--- /dev/null
+++ b/spinal-cocotb/SpinalNet/test/src/python/sdram_controller/SdramControllerTest.py
@@ -0,0 +1,107 @@
+import cocotb
+from cocotb import utils
+from cocotb.triggers import Timer, RisingEdge
+from cocotblib.misc import simulationSpeedPrinter
+
+async def ClockDomainAsyncResetCustom(clk, reset):
+    if reset:
+        reset <= 1
+    clk <= 0
+    await Timer(100000)
+    if reset:
+        reset <= 0
+    while True:
+        clk <= 0
+        await Timer(3750)
+        clk <= 1
+        await Timer(3750)
+
+async def waitUntil(clk, cond):
+    while True:
+        if cond():
+            break
+        await RisingEdge(clk)
+
+@cocotb.test()
+async def testFunc(dut):
+    cocotb.fork(ClockDomainAsyncResetCustom(dut.clk, dut.reset))
+    cocotb.fork(simulationSpeedPrinter(dut.clk))
+
+    dut.io_axi_aw_valid <= False
+    dut.io_axi_w_valid  <= False
+    dut.io_axi_b_ready  <= False
+    dut.io_axi_ar_valid <= False
+    dut.io_axi_r_ready  <= False
+
+    await waitUntil(dut.clk, lambda: int(dut.io_initDone) == 1)
+    print("init done at: {}".format(utils.get_sim_time(units='ns')))
+    dut.io_axi_b_ready  <= True
+
+    busDataWidth = 32
+    sdramDQWidth = 16
+    dataWidthMultipler = int(busDataWidth / sdramDQWidth) # AXI burst size in bytes
+    strobe = int(pow(2, pow(2, dataWidthMultipler)) - 1)
+    burstLen = 4
+    rangeMax = 16
+    print("bus width = {}, SDRAM DQ width = {}, strobe = {}, burst length = {}".format(
+        busDataWidth, sdramDQWidth, strobe, burstLen
+    ))
+
+    for i in range(rangeMax):
+        dut.io_axi_aw_valid         <= True
+        dut.io_axi_aw_payload_addr  <= i * burstLen * dataWidthMultipler
+        dut.io_axi_aw_payload_id    <= i
+        dut.io_axi_aw_payload_len   <= burstLen
+        dut.io_axi_aw_payload_size  <= dataWidthMultipler
+        dut.io_axi_aw_payload_burst <= 1 # INCR
+        await RisingEdge(dut.clk)
+        await waitUntil(dut.clk, lambda: (int(dut.io_axi_aw_valid) == 1 and int(dut.io_axi_aw_ready) == 1))
+        dut.io_axi_aw_valid <= False
+
+        for d in range(burstLen):
+            dut.io_axi_w_valid        <= True
+            dut.io_axi_w_payload_data <= i * burstLen + d
+            dut.io_axi_w_payload_strb <= strobe
+            dut.io_axi_w_payload_last <= ((d + 1) % burstLen == 0)
+            await RisingEdge(dut.clk)
+            await waitUntil(dut.clk, lambda: (int(dut.io_axi_w_valid) == 1 and int(dut.io_axi_w_ready) == 1))
+            dut.io_axi_w_valid <= False
+
+            print("write: addr={}, data={}, id={}, last={}".format(
+                i * burstLen + d,
+                int(dut.io_axi_w_payload_data),
+                int(dut.io_axi_aw_payload_id),
+                int(dut.io_axi_w_payload_last)
+            ))
+
+        await waitUntil(dut.clk, lambda: (int(dut.io_axi_b_valid) == 1 and int(dut.io_axi_b_ready) == 1))
+
+    dut.io_axi_b_ready  <= False
+    dut.io_axi_r_ready  <= True
+
+    for i in range(rangeMax):
+        dut.io_axi_ar_valid         <= True
+        dut.io_axi_ar_payload_addr  <= i * burstLen * dataWidthMultipler
+        dut.io_axi_ar_payload_id    <= i
+        dut.io_axi_ar_payload_len   <= burstLen
+        dut.io_axi_ar_payload_size  <= dataWidthMultipler
+        dut.io_axi_ar_payload_burst <= 1 # INCR
+        await RisingEdge(dut.clk)
+        await waitUntil(dut.clk, lambda: (int(dut.io_axi_ar_valid) == 1 and int(dut.io_axi_ar_ready) == 1))
+        dut.io_axi_ar_valid <= False
+
+        for d in range(burstLen):
+            await RisingEdge(dut.clk)
+            await waitUntil(dut.clk, lambda: (int(dut.io_axi_r_valid) == 1 and int(dut.io_axi_r_ready) == 1))
+
+            print("read: addr={}, data={}, id={}, last={}".format(
+                (i * burstLen + d),
+                int(dut.io_axi_r_payload_data),
+                int(dut.io_axi_r_payload_id),
+                int(dut.io_axi_r_payload_last)
+            ))
+            assert int(dut.io_axi_r_payload_data) == (i * burstLen + d), "read data not match"
+
+    dut.io_axi_r_ready  <= False
+    print("finished at: {}".format(utils.get_sim_time(units='ns')))
+    await RisingEdge(dut.clk)
diff --git a/spinal-cocotb/SpinalNet/test/src/scala/dma/DmaControllerTest.scala b/spinal-cocotb/SpinalNet/test/src/scala/dma/DmaControllerTest.scala
new file mode 100644
index 0000000..a2fed7f
--- /dev/null
+++ b/spinal-cocotb/SpinalNet/test/src/scala/dma/DmaControllerTest.scala
@@ -0,0 +1,15 @@
+ 
+package dma
+
+import org.scalatest.funsuite.AnyFunSuite
+
+import spinal.core._
+import spinal.core.sim._
+import spinal.lib._
+import spinal.sim._
+
+class DmaControllerTest extends AnyFunSuite {
+  test("dma controller test") {
+    DmaControllerSim.main(null)
+  }
+}
\ No newline at end of file
diff --git a/spinal-cocotb/SpinalNet/test/src/scala/sdram/SdramControllerTest.scala b/spinal-cocotb/SpinalNet/test/src/scala/sdram/SdramControllerTest.scala
new file mode 100644
index 0000000..5a43781
--- /dev/null
+++ b/spinal-cocotb/SpinalNet/test/src/scala/sdram/SdramControllerTest.scala
@@ -0,0 +1,114 @@
+package sdram
+
+import org.scalatest.funsuite.AnyFunSuite
+
+import spinal.core._
+import spinal.core.sim._
+import spinal.lib._
+import spinal.lib.com.eth._
+import spinal.sim._
+import spinal.lib.memory.sdram.SdramLayout
+import spinal.lib.memory.sdram.sdr.{MT48LC16M16A2, SdramTimings, SdramInterface}
+
+import testutils.CocotbRunner
+
+case class mt48lc16m16a2(l: SdramLayout) extends BlackBox {
+  val io = new Bundle {
+    val sdram = slave(SdramInterface(l)).setName("")
+  }
+
+  noIoPrefix()
+  addRTLPath("./SpinalNet/test/src/verilog/mt48lc16m16a2.v")
+}
+
+/*
+case class SdramModelPlus(l: SdramLayout) extends BlackBox {
+  val io = new Bundle {
+    val Dq    = inout Bits(l.dataWidth bits)
+    val Addr  = in Bits(l.chipAddressWidth bits)
+    val Ba    = in Bits(l.bankWidth bits)
+    val Clk   = in Bool
+    val Cke   = in Bool
+    val Ras_n = in Bool
+    val Cas_n = in Bool
+    val Cs_n  = in Bool
+    val We_n  = in Bool
+    val Dqm   = in Bits(l.bytePerWord bits)
+    val Debug = in Bool
+  }
+  mapCurrentClockDomain(io.Clk)
+  noIoPrefix()
+  addGeneric("addr_bits", l.chipAddressWidth)
+  addGeneric("data_bits", l.dataWidth)
+  addGeneric("col_bits", l.columnWidth)
+  addRTLPath("./SpinalNet/test/src/verilog/SdramModelPlus.v")
+}
+
+class SdramControllerTb(l: SdramLayout, t: SdramTimings, c: SdramConfig) extends Component {
+  val io = new Bundle {
+    val bus = slave(SdramBus(l, c))
+  }
+
+  val controller = new SdramController(l, t, c)
+  controller.io.bus <> io.bus
+
+  val sdramDevice = SdramModelPlus(l)
+  sdramDevice.io.Addr := controller.io.sdram.ADDR
+  sdramDevice.io.Ba := controller.io.sdram.BA
+  sdramDevice.io.Cke := controller.io.sdram.CKE
+  sdramDevice.io.Ras_n := controller.io.sdram.RASn
+  sdramDevice.io.Cas_n := controller.io.sdram.CASn
+  sdramDevice.io.Cs_n := controller.io.sdram.CSn
+  sdramDevice.io.We_n := controller.io.sdram.WEn
+  sdramDevice.io.Dqm := controller.io.sdram.DQM
+  sdramDevice.io.Debug := True
+  when (controller.io.sdram.DQ.writeEnable === 1) {
+    sdramDevice.io.Dq := controller.io.sdram.DQ.write
+  }
+  controller.io.sdram.DQ.read := sdramDevice.io.Dq
+  
+  // val sdramDevice = mt48lc16m16a2(l)
+  // sdramDevice.io.sdram <> controller.io.sdram
+}
+*/
+
+class SdramControllerTest extends AnyFunSuite {
+  val device = MT48LC16M16A2
+  val l = device.layout
+  val t = device.timingGrade7
+  val c = SdramConfig()
+
+  test("sdram test") {
+    val compiled = SimConfig
+    .withWave
+    .withConfig(SpinalConfig(defaultClockDomainFrequency = FixedFrequency(200 MHz)))
+    .compile(new SdramController(l, t, c))
+    assert(CocotbRunner("./SpinalNet/test/src/python/sdram_controller"), "Simulation faild")
+    println("SUCCESS")
+  }
+
+/*
+  def simTest(dut: SdramControllerTb) {
+    SimTimeout(1000)
+    dut.clockDomain.forkStimulus(2)
+
+    dut.io.bus.cmd.valid #= true
+    dut.io.bus.cmd.address  #= 0
+    dut.io.bus.cmd.write    #= true
+    dut.io.bus.cmd.data     #= 127
+    dut.io.bus.cmd.burstLen #= 1
+    dut.io.bus.cmd.mask     #= 3
+    dut.io.bus.cmd.opId       #= 1
+    dut.io.bus.cmd.last     #= true
+    dut.io.bus.rsp.ready #= false
+  }
+
+  test("sdram test") {
+    SimConfig
+    .withWave
+    .withConfig(SpinalConfig(defaultClockDomainFrequency = FixedFrequency(100 MHz)))
+    .compile(new SdramControllerTb(l, t, c))
+    .doSim(simTest(_))
+  }
+*/
+}
diff --git a/spinal-cocotb/SpinalNet/test/src/scala/testutils/CocotbRunner.scala b/spinal-cocotb/SpinalNet/test/src/scala/testutils/CocotbRunner.scala
new file mode 100644
index 0000000..1e76b30
--- /dev/null
+++ b/spinal-cocotb/SpinalNet/test/src/scala/testutils/CocotbRunner.scala
@@ -0,0 +1,50 @@
+package testutils
+
+import scala.sys.process.{Process, ProcessIO}
+
+object CocotbRunner {
+  def apply(path: String): Boolean = {
+
+    doCmd(Seq(s"cd $path", "rm -f results.xml", "make"))
+    getCocotbPass(path)
+  }
+
+  def doCmd(cmds: Seq[String]): Unit = {
+    var out, err: String = null
+    val io = new ProcessIO(
+      stdin => {
+        for (cmd <- cmds)
+          stdin.write((cmd + "\n").getBytes)
+        stdin.close()
+      },
+      stdout => {
+        out = scala.io.Source
+          .fromInputStream(stdout)
+          .getLines
+          .foldLeft("")(_ + "\n" + _)
+        stdout.close()
+      },
+      stderr => {
+        err = scala.io.Source
+          .fromInputStream(stderr)
+          .getLines
+          .foldLeft("")(_ + "\n" + _)
+        stderr.close()
+      }
+    )
+    val proc = Process("sh").run(io)
+    proc.exitValue()
+    println(out)
+    println(err)
+  }
+
+  def getCocotbPass(path: String): Boolean = {
+    import scala.io.Source
+    for (line <- Source.fromFile(path + "/results.xml").getLines()) {
+      if (line.contains("failure") || line.contains("skipped")) {
+        return false
+      }
+    }
+    return true
+  }
+}
diff --git a/spinal-cocotb/SpinalNet/test/src/verilog/mt48lc16m16a2.v b/spinal-cocotb/SpinalNet/test/src/verilog/mt48lc16m16a2.v
new file mode 100644
index 0000000..dca98d8
--- /dev/null
+++ b/spinal-cocotb/SpinalNet/test/src/verilog/mt48lc16m16a2.v
@@ -0,0 +1,1072 @@
+/**************************************************************************
+*
+*    File Name:  MT48LC16M16A2.V  
+*      Version:  2.1
+*         Date:  June 6th, 2002
+*        Model:  BUS Functional
+*    Simulator:  Model Technology
+*
+* Dependencies:  None
+*
+*        Email:  modelsupport@micron.com
+*      Company:  Micron Technology, Inc.
+*        Model:  MT48LC16M16A2 (4Meg x 16 x 4 Banks)
+*
+*  Description:  Micron 256Mb SDRAM Verilog model
+*
+*   Limitation:  - Doesn't check for 8192 cycle refresh
+*
+*         Note:  - Set simulator resolution to "ps" accuracy
+*                - Set Debug = 0 to disable $display messages
+*
+*   Disclaimer:  THESE DESIGNS ARE PROVIDED "AS IS" WITH NO WARRANTY 
+*                WHATSOEVER AND MICRON SPECIFICALLY DISCLAIMS ANY 
+*                IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR
+*                A PARTICULAR PURPOSE, OR AGAINST INFRINGEMENT.
+*
+*                Copyright � 2001 Micron Semiconductor Products, Inc.
+*                All rights researved
+*
+* Rev  Author          Date        Changes
+* ---  --------------------------  ---------------------------------------
+* 2.1  SH              06/06/2002  - Typo in bank multiplex
+*      Micron Technology Inc.
+*
+* 2.0  SH              04/30/2002  - Second release
+*      Micron Technology Inc.
+*
+**************************************************************************/
+
+`timescale 1ns / 1ps
+
+module mt48lc16m16a2 (Dq, Addr, Ba, Clk, Cke, Cs_n, Ras_n, Cas_n, We_n, Dqm);
+
+    parameter addr_bits =      13;
+    parameter data_bits =      16;
+    parameter col_bits  =       9;
+    parameter mem_sizes = 100000;
+
+    inout     [data_bits - 1 : 0] Dq;
+    input     [addr_bits - 1 : 0] Addr;
+    input                 [1 : 0] Ba;
+    input                         Clk;
+    input                         Cke;
+    input                         Cs_n;
+    input                         Ras_n;
+    input                         Cas_n;
+    input                         We_n;
+    input                 [1 : 0] Dqm;
+
+    reg       [data_bits - 1 : 0] Bank0 [0 : mem_sizes];
+    reg       [data_bits - 1 : 0] Bank1 [0 : mem_sizes];
+    reg       [data_bits - 1 : 0] Bank2 [0 : mem_sizes];
+    reg       [data_bits - 1 : 0] Bank3 [0 : mem_sizes];
+
+    reg                   [1 : 0] Bank_addr [0 : 3];                // Bank Address Pipeline
+    reg        [col_bits - 1 : 0] Col_addr [0 : 3];                 // Column Address Pipeline
+    reg                   [3 : 0] Command [0 : 3];                  // Command Operation Pipeline
+    reg                   [1 : 0] Dqm_reg0, Dqm_reg1;               // DQM Operation Pipeline
+    reg       [addr_bits - 1 : 0] B0_row_addr, B1_row_addr, B2_row_addr, B3_row_addr;
+
+    reg       [addr_bits - 1 : 0] Mode_reg;
+    reg       [data_bits - 1 : 0] Dq_reg, Dq_dqm;
+    reg        [col_bits - 1 : 0] Col_temp, Burst_counter;
+
+    reg                           Act_b0, Act_b1, Act_b2, Act_b3;   // Bank Activate
+    reg                           Pc_b0, Pc_b1, Pc_b2, Pc_b3;       // Bank Precharge
+
+    reg                   [1 : 0] Bank_precharge       [0 : 3];     // Precharge Command
+    reg                           A10_precharge        [0 : 3];     // Addr[10] = 1 (All banks)
+    reg                           Auto_precharge       [0 : 3];     // RW Auto Precharge (Bank)
+    reg                           Read_precharge       [0 : 3];     // R  Auto Precharge
+    reg                           Write_precharge      [0 : 3];     //  W Auto Precharge
+    reg                           RW_interrupt_read    [0 : 3];     // RW Interrupt Read with Auto Precharge
+    reg                           RW_interrupt_write   [0 : 3];     // RW Interrupt Write with Auto Precharge
+    reg                   [1 : 0] RW_interrupt_bank;                // RW Interrupt Bank
+    integer                       RW_interrupt_counter [0 : 3];     // RW Interrupt Counter
+    integer                       Count_precharge      [0 : 3];     // RW Auto Precharge Counter
+
+    reg                           Data_in_enable;
+    reg                           Data_out_enable;
+
+    reg                   [1 : 0] Bank, Prev_bank;
+    reg       [addr_bits - 1 : 0] Row;
+    reg        [col_bits - 1 : 0] Col, Col_brst;
+
+    // Internal system clock
+    reg                           CkeZ, Sys_clk;
+
+    // Commands Decode
+    wire      Active_enable    = ~Cs_n & ~Ras_n &  Cas_n &  We_n;
+    wire      Aref_enable      = ~Cs_n & ~Ras_n & ~Cas_n &  We_n;
+    wire      Burst_term       = ~Cs_n &  Ras_n &  Cas_n & ~We_n;
+    wire      Mode_reg_enable  = ~Cs_n & ~Ras_n & ~Cas_n & ~We_n;
+    wire      Prech_enable     = ~Cs_n & ~Ras_n &  Cas_n & ~We_n;
+    wire      Read_enable      = ~Cs_n &  Ras_n & ~Cas_n &  We_n;
+    wire      Write_enable     = ~Cs_n &  Ras_n & ~Cas_n & ~We_n;
+
+    // Burst Length Decode
+    wire      Burst_length_1   = ~Mode_reg[2] & ~Mode_reg[1] & ~Mode_reg[0];
+    wire      Burst_length_2   = ~Mode_reg[2] & ~Mode_reg[1] &  Mode_reg[0];
+    wire      Burst_length_4   = ~Mode_reg[2] &  Mode_reg[1] & ~Mode_reg[0];
+    wire      Burst_length_8   = ~Mode_reg[2] &  Mode_reg[1] &  Mode_reg[0];
+    wire      Burst_length_f   =  Mode_reg[2] &  Mode_reg[1] &  Mode_reg[0];
+
+    // CAS Latency Decode
+    wire      Cas_latency_2    = ~Mode_reg[6] &  Mode_reg[5] & ~Mode_reg[4];
+    wire      Cas_latency_3    = ~Mode_reg[6] &  Mode_reg[5] &  Mode_reg[4];
+
+    // Write Burst Mode
+    wire      Write_burst_mode = Mode_reg[9];
+
+    wire      Debug            = 1'b0;                          // Debug messages : 1 = On
+    wire      Dq_chk           = Sys_clk & Data_in_enable;      // Check setup/hold time for DQ
+    
+    assign    Dq               = Dq_reg;                        // DQ buffer
+
+    // Commands Operation
+    `define   ACT       0
+    `define   NOP       1
+    `define   READ      2
+    `define   WRITE     3
+    `define   PRECH     4
+    `define   A_REF     5
+    `define   BST       6
+    `define   LMR       7
+
+    // Timing Parameters for -7E PC133 CL2
+    parameter tAC  =   5.4;
+    parameter tHZ  =   5.4;
+    parameter tOH  =   3.0;
+    parameter tMRD =   2.0;     // 2 Clk Cycles
+    parameter tRAS =  37.0;
+    parameter tRC  =  60.0;
+    parameter tRCD =  15.0;
+    parameter tRFC =  66.0;
+    parameter tRP  =  15.0;
+    parameter tRRD =  14.0;
+    parameter tWRa =   7.0;     // A2 Version - Auto precharge mode (1 Clk + 7 ns)
+    parameter tWRm =  14.0;     // A2 Version - Manual precharge mode (14 ns)
+
+    // Timing Check variable
+    time  MRD_chk;
+    time  WR_chkm [0 : 3];
+    time  RFC_chk, RRD_chk;
+    time  RC_chk0, RC_chk1, RC_chk2, RC_chk3;
+    time  RAS_chk0, RAS_chk1, RAS_chk2, RAS_chk3;
+    time  RCD_chk0, RCD_chk1, RCD_chk2, RCD_chk3;
+    time  RP_chk0, RP_chk1, RP_chk2, RP_chk3;
+
+    initial begin
+        Dq_reg = {data_bits{1'bz}};
+        Data_in_enable = 0; Data_out_enable = 0;
+        Act_b0 = 1; Act_b1 = 1; Act_b2 = 1; Act_b3 = 1;
+        Pc_b0 = 0; Pc_b1 = 0; Pc_b2 = 0; Pc_b3 = 0;
+        WR_chkm[0] = 0; WR_chkm[1] = 0; WR_chkm[2] = 0; WR_chkm[3] = 0;
+        RW_interrupt_read[0] = 0; RW_interrupt_read[1] = 0; RW_interrupt_read[2] = 0; RW_interrupt_read[3] = 0;
+        RW_interrupt_write[0] = 0; RW_interrupt_write[1] = 0; RW_interrupt_write[2] = 0; RW_interrupt_write[3] = 0;
+        MRD_chk = 0; RFC_chk = 0; RRD_chk = 0;
+        RAS_chk0 = 0; RAS_chk1 = 0; RAS_chk2 = 0; RAS_chk3 = 0;
+        RCD_chk0 = 0; RCD_chk1 = 0; RCD_chk2 = 0; RCD_chk3 = 0;
+        RC_chk0 = 0; RC_chk1 = 0; RC_chk2 = 0; RC_chk3 = 0;
+        RP_chk0 = 0; RP_chk1 = 0; RP_chk2 = 0; RP_chk3 = 0;
+        $timeformat (-9, 1, " ns", 12);
+    end
+
+    // System clock generator
+    always begin
+        @ (posedge Clk) begin
+            Sys_clk = CkeZ;
+            CkeZ = Cke;
+        end
+        @ (negedge Clk) begin
+            Sys_clk = 1'b0;
+        end
+    end
+
+    always @ (posedge Sys_clk) begin
+        // Internal Commamd Pipelined
+        Command[0] = Command[1];
+        Command[1] = Command[2];
+        Command[2] = Command[3];
+        Command[3] = `NOP;
+
+        Col_addr[0] = Col_addr[1];
+        Col_addr[1] = Col_addr[2];
+        Col_addr[2] = Col_addr[3];
+        Col_addr[3] = {col_bits{1'b0}};
+
+        Bank_addr[0] = Bank_addr[1];
+        Bank_addr[1] = Bank_addr[2];
+        Bank_addr[2] = Bank_addr[3];
+        Bank_addr[3] = 2'b0;
+
+        Bank_precharge[0] = Bank_precharge[1];
+        Bank_precharge[1] = Bank_precharge[2];
+        Bank_precharge[2] = Bank_precharge[3];
+        Bank_precharge[3] = 2'b0;
+
+        A10_precharge[0] = A10_precharge[1];
+        A10_precharge[1] = A10_precharge[2];
+        A10_precharge[2] = A10_precharge[3];
+        A10_precharge[3] = 1'b0;
+
+        // Dqm pipeline for Read
+        Dqm_reg0 = Dqm_reg1;
+        Dqm_reg1 = Dqm;
+
+        // Read or Write with Auto Precharge Counter
+        if (Auto_precharge[0] === 1'b1) begin
+            Count_precharge[0] = Count_precharge[0] + 1;
+        end
+        if (Auto_precharge[1] === 1'b1) begin
+            Count_precharge[1] = Count_precharge[1] + 1;
+        end
+        if (Auto_precharge[2] === 1'b1) begin
+            Count_precharge[2] = Count_precharge[2] + 1;
+        end
+        if (Auto_precharge[3] === 1'b1) begin
+            Count_precharge[3] = Count_precharge[3] + 1;
+        end
+
+        // Read or Write Interrupt Counter
+        if (RW_interrupt_write[0] === 1'b1) begin
+            RW_interrupt_counter[0] = RW_interrupt_counter[0] + 1;
+        end
+        if (RW_interrupt_write[1] === 1'b1) begin
+            RW_interrupt_counter[1] = RW_interrupt_counter[1] + 1;
+        end
+        if (RW_interrupt_write[2] === 1'b1) begin
+            RW_interrupt_counter[2] = RW_interrupt_counter[2] + 1;
+        end
+        if (RW_interrupt_write[3] === 1'b1) begin
+            RW_interrupt_counter[3] = RW_interrupt_counter[3] + 1;
+        end
+
+        // tMRD Counter
+        MRD_chk = MRD_chk + 1;
+
+        // Auto Refresh
+        if (Aref_enable === 1'b1) begin
+            if (Debug) begin
+                $display ("%m : at time %t AREF : Auto Refresh", $time);
+            end
+
+            // Auto Refresh to Auto Refresh
+            if ($time - RFC_chk < tRFC) begin
+                $display ("%m : at time %t ERROR: tRFC violation during Auto Refresh", $time);
+            end
+
+            // Precharge to Auto Refresh
+            if (($time - RP_chk0 < tRP) || ($time - RP_chk1 < tRP) ||
+                ($time - RP_chk2 < tRP) || ($time - RP_chk3 < tRP)) begin
+                $display ("%m : at time %t ERROR: tRP violation during Auto Refresh", $time);
+            end
+
+            // Precharge to Refresh
+            if (Pc_b0 === 1'b0 || Pc_b1 === 1'b0 || Pc_b2 === 1'b0 || Pc_b3 === 1'b0) begin
+                $display ("%m : at time %t ERROR: All banks must be Precharge before Auto Refresh", $time);
+            end
+
+            // Load Mode Register to Auto Refresh
+            if (MRD_chk < tMRD) begin
+                $display ("%m : at time %t ERROR: tMRD violation during Auto Refresh", $time);
+            end
+
+            // Record Current tRFC time
+            RFC_chk = $time;
+        end
+        
+        // Load Mode Register
+        if (Mode_reg_enable === 1'b1) begin
+            // Register Mode
+            Mode_reg = Addr;
+
+            // Decode CAS Latency, Burst Length, Burst Type, and Write Burst Mode
+            if (Debug) begin
+                $display ("%m : at time %t LMR  : Load Mode Register", $time);
+                // CAS Latency
+                case (Addr[6 : 4])
+                    3'b010  : $display ("%m :                             CAS Latency      = 2");
+                    3'b011  : $display ("%m :                             CAS Latency      = 3");
+                    default : $display ("%m :                             CAS Latency      = Reserved");
+                endcase
+
+                // Burst Length
+                case (Addr[2 : 0])
+                    3'b000  : $display ("%m :                             Burst Length     = 1");
+                    3'b001  : $display ("%m :                             Burst Length     = 2");
+                    3'b010  : $display ("%m :                             Burst Length     = 4");
+                    3'b011  : $display ("%m :                             Burst Length     = 8");
+                    3'b111  : $display ("%m :                             Burst Length     = Full");
+                    default : $display ("%m :                             Burst Length     = Reserved");
+                endcase
+
+                // Burst Type
+                if (Addr[3] === 1'b0) begin
+                    $display ("%m :                             Burst Type       = Sequential");
+                end else if (Addr[3] === 1'b1) begin
+                    $display ("%m :                             Burst Type       = Interleaved");
+                end else begin
+                    $display ("%m :                             Burst Type       = Reserved");
+                end
+
+                // Write Burst Mode
+                if (Addr[9] === 1'b0) begin
+                    $display ("%m :                             Write Burst Mode = Programmed Burst Length");
+                end else if (Addr[9] === 1'b1) begin
+                    $display ("%m :                             Write Burst Mode = Single Location Access");
+                end else begin
+                    $display ("%m :                             Write Burst Mode = Reserved");
+                end
+            end
+
+            // Precharge to Load Mode Register
+            if (Pc_b0 === 1'b0 && Pc_b1 === 1'b0 && Pc_b2 === 1'b0 && Pc_b3 === 1'b0) begin
+                $display ("%m : at time %t ERROR: all banks must be Precharge before Load Mode Register", $time);
+            end
+
+            // Precharge to Load Mode Register
+            if (($time - RP_chk0 < tRP) || ($time - RP_chk1 < tRP) ||
+                ($time - RP_chk2 < tRP) || ($time - RP_chk3 < tRP)) begin
+                $display ("%m : at time %t ERROR: tRP violation during Load Mode Register", $time);
+            end
+
+            // Auto Refresh to Load Mode Register
+            if ($time - RFC_chk < tRFC) begin
+                $display ("%m : at time %t ERROR: tRFC violation during Load Mode Register", $time);
+            end
+
+            // Load Mode Register to Load Mode Register
+            if (MRD_chk < tMRD) begin
+                $display ("%m : at time %t ERROR: tMRD violation during Load Mode Register", $time);
+            end
+
+            // Reset MRD Counter
+            MRD_chk = 0;
+        end
+        
+        // Active Block (Latch Bank Address and Row Address)
+        if (Active_enable === 1'b1) begin
+            // Activate an open bank can corrupt data
+            if ((Ba === 2'b00 && Act_b0 === 1'b1) || (Ba === 2'b01 && Act_b1 === 1'b1) ||
+                (Ba === 2'b10 && Act_b2 === 1'b1) || (Ba === 2'b11 && Act_b3 === 1'b1)) begin
+                $display ("%m : at time %t ERROR: Bank already activated -- data can be corrupted", $time);
+            end
+
+            // Activate Bank 0
+            if (Ba === 2'b00 && Pc_b0 === 1'b1) begin
+                // Debug Message
+                if (Debug) begin
+                    $display ("%m : at time %t ACT  : Bank = 0 Row = %d", $time, Addr);
+                end
+
+                // ACTIVE to ACTIVE command period
+                if ($time - RC_chk0 < tRC) begin
+                    $display ("%m : at time %t ERROR: tRC violation during Activate bank 0", $time);
+                end
+
+                // Precharge to Activate Bank 0
+                if ($time - RP_chk0 < tRP) begin
+                    $display ("%m : at time %t ERROR: tRP violation during Activate bank 0", $time);
+                end
+
+                // Record variables
+                Act_b0 = 1'b1;
+                Pc_b0 = 1'b0;
+                B0_row_addr = Addr [addr_bits - 1 : 0];
+                RAS_chk0 = $time;
+                RC_chk0 = $time;
+                RCD_chk0 = $time;
+            end
+
+            if (Ba == 2'b01 && Pc_b1 == 1'b1) begin
+                // Debug Message
+                if (Debug) begin
+                    $display ("%m : at time %t ACT  : Bank = 1 Row = %d", $time, Addr);
+                end
+
+                // ACTIVE to ACTIVE command period
+                if ($time - RC_chk1 < tRC) begin
+                    $display ("%m : at time %t ERROR: tRC violation during Activate bank 1", $time);
+                end
+
+                // Precharge to Activate Bank 1
+                if ($time - RP_chk1 < tRP) begin
+                    $display ("%m : at time %t ERROR: tRP violation during Activate bank 1", $time);
+                end
+
+                // Record variables
+                Act_b1 = 1'b1;
+                Pc_b1 = 1'b0;
+                B1_row_addr = Addr [addr_bits - 1 : 0];
+                RAS_chk1 = $time;
+                RC_chk1 = $time;
+                RCD_chk1 = $time;
+            end
+
+            if (Ba == 2'b10 && Pc_b2 == 1'b1) begin
+                // Debug Message
+                if (Debug) begin
+                    $display ("%m : at time %t ACT  : Bank = 2 Row = %d", $time, Addr);
+                end
+
+                // ACTIVE to ACTIVE command period
+                if ($time - RC_chk2 < tRC) begin
+                    $display ("%m : at time %t ERROR: tRC violation during Activate bank 2", $time);
+                end
+
+                // Precharge to Activate Bank 2
+                if ($time - RP_chk2 < tRP) begin
+                    $display ("%m : at time %t ERROR: tRP violation during Activate bank 2", $time);
+                end
+
+                // Record variables
+                Act_b2 = 1'b1;
+                Pc_b2 = 1'b0;
+                B2_row_addr = Addr [addr_bits - 1 : 0];
+                RAS_chk2 = $time;
+                RC_chk2 = $time;
+                RCD_chk2 = $time;
+            end
+
+            if (Ba == 2'b11 && Pc_b3 == 1'b1) begin
+                // Debug Message
+                if (Debug) begin
+                    $display ("%m : at time %t ACT  : Bank = 3 Row = %d", $time, Addr);
+                end
+
+                // ACTIVE to ACTIVE command period
+                if ($time - RC_chk3 < tRC) begin
+                    $display ("%m : at time %t ERROR: tRC violation during Activate bank 3", $time);
+                end
+
+                // Precharge to Activate Bank 3
+                if ($time - RP_chk3 < tRP) begin
+                    $display ("%m : at time %t ERROR: tRP violation during Activate bank 3", $time);
+                end
+
+                // Record variables
+                Act_b3 = 1'b1;
+                Pc_b3 = 1'b0;
+                B3_row_addr = Addr [addr_bits - 1 : 0];
+                RAS_chk3 = $time;
+                RC_chk3 = $time;
+                RCD_chk3 = $time;
+            end
+
+            // Active Bank A to Active Bank B
+            if ((Prev_bank != Ba) && ($time - RRD_chk < tRRD)) begin
+                $display ("%m : at time %t ERROR: tRRD violation during Activate bank = %d", $time, Ba);
+            end
+
+            // Auto Refresh to Activate
+            if ($time - RFC_chk < tRFC) begin
+                $display ("%m : at time %t ERROR: tRFC violation during Activate bank = %d", $time, Ba);
+            end
+
+            // Load Mode Register to Active
+            if (MRD_chk < tMRD ) begin
+                $display ("%m : at time %t ERROR: tMRD violation during Activate bank = %d", $time, Ba);
+            end
+
+            // Record variables for checking violation
+            RRD_chk = $time;
+            Prev_bank = Ba;
+        end
+        
+        // Precharge Block
+        if (Prech_enable == 1'b1) begin
+            // Load Mode Register to Precharge
+            if ($time - MRD_chk < tMRD) begin
+                $display ("%m : at time %t ERROR: tMRD violaiton during Precharge", $time);
+            end
+
+            // Precharge Bank 0
+            if ((Addr[10] === 1'b1 || (Addr[10] === 1'b0 && Ba === 2'b00)) && Act_b0 === 1'b1) begin
+                Act_b0 = 1'b0;
+                Pc_b0 = 1'b1;
+                RP_chk0 = $time;
+
+                // Activate to Precharge
+                if ($time - RAS_chk0 < tRAS) begin
+                    $display ("%m : at time %t ERROR: tRAS violation during Precharge", $time);
+                end
+
+                // tWR violation check for write
+                if ($time - WR_chkm[0] < tWRm) begin
+                    $display ("%m : at time %t ERROR: tWR violation during Precharge", $time);
+                end
+            end
+
+            // Precharge Bank 1
+            if ((Addr[10] === 1'b1 || (Addr[10] === 1'b0 && Ba === 2'b01)) && Act_b1 === 1'b1) begin
+                Act_b1 = 1'b0;
+                Pc_b1 = 1'b1;
+                RP_chk1 = $time;
+
+                // Activate to Precharge
+                if ($time - RAS_chk1 < tRAS) begin
+                    $display ("%m : at time %t ERROR: tRAS violation during Precharge", $time);
+                end
+
+                // tWR violation check for write
+                if ($time - WR_chkm[1] < tWRm) begin
+                    $display ("%m : at time %t ERROR: tWR violation during Precharge", $time);
+                end
+            end
+
+            // Precharge Bank 2
+            if ((Addr[10] === 1'b1 || (Addr[10] === 1'b0 && Ba === 2'b10)) && Act_b2 === 1'b1) begin
+                Act_b2 = 1'b0;
+                Pc_b2 = 1'b1;
+                RP_chk2 = $time;
+
+                // Activate to Precharge
+                if ($time - RAS_chk2 < tRAS) begin
+                    $display ("%m : at time %t ERROR: tRAS violation during Precharge", $time);
+                end
+
+                // tWR violation check for write
+                if ($time - WR_chkm[2] < tWRm) begin
+                    $display ("%m : at time %t ERROR: tWR violation during Precharge", $time);
+                end
+            end
+
+            // Precharge Bank 3
+            if ((Addr[10] === 1'b1 || (Addr[10] === 1'b0 && Ba === 2'b11)) && Act_b3 === 1'b1) begin
+                Act_b3 = 1'b0;
+                Pc_b3 = 1'b1;
+                RP_chk3 = $time;
+
+                // Activate to Precharge
+                if ($time - RAS_chk3 < tRAS) begin
+                    $display ("%m : at time %t ERROR: tRAS violation during Precharge", $time);
+                end
+
+                // tWR violation check for write
+                if ($time - WR_chkm[3] < tWRm) begin
+                    $display ("%m : at time %t ERROR: tWR violation during Precharge", $time);
+                end
+            end
+
+            // Terminate a Write Immediately (if same bank or all banks)
+            if (Data_in_enable === 1'b1 && (Bank === Ba || Addr[10] === 1'b1)) begin
+                Data_in_enable = 1'b0;
+            end
+
+            // Precharge Command Pipeline for Read
+            if (Cas_latency_3 === 1'b1) begin
+                Command[2] = `PRECH;
+                Bank_precharge[2] = Ba;
+                A10_precharge[2] = Addr[10];
+            end else if (Cas_latency_2 === 1'b1) begin
+                Command[1] = `PRECH;
+                Bank_precharge[1] = Ba;
+                A10_precharge[1] = Addr[10];
+            end
+        end
+        
+        // Burst terminate
+        if (Burst_term === 1'b1) begin
+            // Terminate a Write Immediately
+            if (Data_in_enable == 1'b1) begin
+                Data_in_enable = 1'b0;
+            end
+
+            // Terminate a Read Depend on CAS Latency
+            if (Cas_latency_3 === 1'b1) begin
+                Command[2] = `BST;
+            end else if (Cas_latency_2 == 1'b1) begin
+                Command[1] = `BST;
+            end
+
+            // Display debug message
+            if (Debug) begin
+                $display ("%m : at time %t BST  : Burst Terminate",$time);
+            end
+        end
+        
+        // Read, Write, Column Latch
+        if (Read_enable === 1'b1) begin
+            // Check to see if bank is open (ACT)
+            if ((Ba == 2'b00 && Pc_b0 == 1'b1) || (Ba == 2'b01 && Pc_b1 == 1'b1) ||
+                (Ba == 2'b10 && Pc_b2 == 1'b1) || (Ba == 2'b11 && Pc_b3 == 1'b1)) begin
+                $display("%m : at time %t ERROR: Bank is not Activated for Read", $time);
+            end
+
+            // Activate to Read or Write
+            if ((Ba == 2'b00) && ($time - RCD_chk0 < tRCD) ||
+                (Ba == 2'b01) && ($time - RCD_chk1 < tRCD) ||
+                (Ba == 2'b10) && ($time - RCD_chk2 < tRCD) ||
+                (Ba == 2'b11) && ($time - RCD_chk3 < tRCD)) begin
+                $display("%m : at time %t ERROR: tRCD violation during Read", $time);
+            end
+
+            // CAS Latency pipeline
+            if (Cas_latency_3 == 1'b1) begin
+                Command[2] = `READ;
+                Col_addr[2] = Addr;
+                Bank_addr[2] = Ba;
+            end else if (Cas_latency_2 == 1'b1) begin
+                Command[1] = `READ;
+                Col_addr[1] = Addr;
+                Bank_addr[1] = Ba;
+            end
+
+            // Read interrupt Write (terminate Write immediately)
+            if (Data_in_enable == 1'b1) begin
+                Data_in_enable = 1'b0;
+
+                // Interrupting a Write with Autoprecharge
+                if (Auto_precharge[RW_interrupt_bank] == 1'b1 && Write_precharge[RW_interrupt_bank] == 1'b1) begin
+                    RW_interrupt_write[RW_interrupt_bank] = 1'b1;
+                    RW_interrupt_counter[RW_interrupt_bank] = 0;
+
+                    // Display debug message
+                    if (Debug) begin
+                        $display ("%m : at time %t NOTE : Read interrupt Write with Autoprecharge", $time);
+                    end
+                end
+            end
+
+            // Write with Auto Precharge
+            if (Addr[10] == 1'b1) begin
+                Auto_precharge[Ba] = 1'b1;
+                Count_precharge[Ba] = 0;
+                RW_interrupt_bank = Ba;
+                Read_precharge[Ba] = 1'b1;
+            end
+        end
+
+        // Write Command
+        if (Write_enable == 1'b1) begin
+            // Activate to Write
+            if ((Ba == 2'b00 && Pc_b0 == 1'b1) || (Ba == 2'b01 && Pc_b1 == 1'b1) ||
+                (Ba == 2'b10 && Pc_b2 == 1'b1) || (Ba == 2'b11 && Pc_b3 == 1'b1)) begin
+                $display("%m : at time %t ERROR: Bank is not Activated for Write", $time);
+            end
+
+            // Activate to Read or Write
+            if ((Ba == 2'b00) && ($time - RCD_chk0 < tRCD) ||
+                (Ba == 2'b01) && ($time - RCD_chk1 < tRCD) ||
+                (Ba == 2'b10) && ($time - RCD_chk2 < tRCD) ||
+                (Ba == 2'b11) && ($time - RCD_chk3 < tRCD)) begin
+                $display("%m : at time %t ERROR: tRCD violation during Read", $time);
+            end
+
+            // Latch Write command, Bank, and Column
+            Command[0] = `WRITE;
+            Col_addr[0] = Addr;
+            Bank_addr[0] = Ba;
+
+            // Write interrupt Write (terminate Write immediately)
+            if (Data_in_enable == 1'b1) begin
+                Data_in_enable = 1'b0;
+
+                // Interrupting a Write with Autoprecharge
+                if (Auto_precharge[RW_interrupt_bank] == 1'b1 && Write_precharge[RW_interrupt_bank] == 1'b1) begin
+                    RW_interrupt_write[RW_interrupt_bank] = 1'b1;
+
+                    // Display debug message
+                    if (Debug) begin
+                        $display ("%m : at time %t NOTE : Read Bank %d interrupt Write Bank %d with Autoprecharge", $time, Ba, RW_interrupt_bank);
+                    end
+                end
+            end
+
+            // Write interrupt Read (terminate Read immediately)
+            if (Data_out_enable == 1'b1) begin
+                Data_out_enable = 1'b0;
+                
+                // Interrupting a Read with Autoprecharge
+                if (Auto_precharge[RW_interrupt_bank] == 1'b1 && Read_precharge[RW_interrupt_bank] == 1'b1) begin
+                    RW_interrupt_read[RW_interrupt_bank] = 1'b1;
+
+                    // Display debug message
+                    if (Debug) begin
+                        $display ("%m : at time %t NOTE : Write Bank %d interrupt Read Bank %d with Autoprecharge", $time, Ba, RW_interrupt_bank);
+                    end
+                end
+            end
+
+            // Write with Auto Precharge
+            if (Addr[10] == 1'b1) begin
+                Auto_precharge[Ba] = 1'b1;
+                Count_precharge[Ba] = 0;
+                RW_interrupt_bank = Ba;
+                Write_precharge[Ba] = 1'b1;
+            end
+        end
+
+        /*
+            Write with Auto Precharge Calculation
+                The device start internal precharge when:
+                    1.  Meet minimum tRAS requirement
+                and 2.  tWR cycle(s) after last valid data
+                 or 3.  Interrupt by a Read or Write (with or without Auto Precharge)
+
+            Note: Model is starting the internal precharge 1 cycle after they meet all the
+                  requirement but tRP will be compensate for the time after the 1 cycle.
+        */
+        if ((Auto_precharge[0] == 1'b1) && (Write_precharge[0] == 1'b1)) begin
+            if ((($time - RAS_chk0 >= tRAS) &&                                                          // Case 1
+               (((Burst_length_1 == 1'b1 || Write_burst_mode == 1'b1) && Count_precharge [0] >= 1) ||   // Case 2
+                 (Burst_length_2 == 1'b1                              && Count_precharge [0] >= 2) ||
+                 (Burst_length_4 == 1'b1                              && Count_precharge [0] >= 4) ||
+                 (Burst_length_8 == 1'b1                              && Count_precharge [0] >= 8))) ||
+                 (RW_interrupt_write[0] == 1'b1 && RW_interrupt_counter[0] >= 1)) begin                 // Case 3
+                    Auto_precharge[0] = 1'b0;
+                    Write_precharge[0] = 1'b0;
+                    RW_interrupt_write[0] = 1'b0;
+                    Pc_b0 = 1'b1;
+                    Act_b0 = 1'b0;
+                    RP_chk0 = $time + tWRa;
+                    if (Debug) begin
+                        $display ("%m : at time %t NOTE : Start Internal Auto Precharge for Bank 0", $time);
+                    end
+            end
+        end
+        if ((Auto_precharge[1] == 1'b1) && (Write_precharge[1] == 1'b1)) begin
+            if ((($time - RAS_chk1 >= tRAS) &&                                                          // Case 1
+               (((Burst_length_1 == 1'b1 || Write_burst_mode == 1'b1) && Count_precharge [1] >= 1) ||   // Case 2
+                 (Burst_length_2 == 1'b1                              && Count_precharge [1] >= 2) ||
+                 (Burst_length_4 == 1'b1                              && Count_precharge [1] >= 4) ||
+                 (Burst_length_8 == 1'b1                              && Count_precharge [1] >= 8))) ||
+                 (RW_interrupt_write[1] == 1'b1 && RW_interrupt_counter[1] >= 1)) begin                 // Case 3
+                    Auto_precharge[1] = 1'b0;
+                    Write_precharge[1] = 1'b0;
+                    RW_interrupt_write[1] = 1'b0;
+                    Pc_b1 = 1'b1;
+                    Act_b1 = 1'b0;
+                    RP_chk1 = $time + tWRa;
+                    if (Debug) begin
+                        $display ("%m : at time %t NOTE : Start Internal Auto Precharge for Bank 1", $time);
+                    end
+            end
+        end
+        if ((Auto_precharge[2] == 1'b1) && (Write_precharge[2] == 1'b1)) begin
+            if ((($time - RAS_chk2 >= tRAS) &&                                                          // Case 1
+               (((Burst_length_1 == 1'b1 || Write_burst_mode == 1'b1) && Count_precharge [2] >= 1) ||   // Case 2
+                 (Burst_length_2 == 1'b1                              && Count_precharge [2] >= 2) ||
+                 (Burst_length_4 == 1'b1                              && Count_precharge [2] >= 4) ||
+                 (Burst_length_8 == 1'b1                              && Count_precharge [2] >= 8))) ||
+                 (RW_interrupt_write[2] == 1'b1 && RW_interrupt_counter[2] >= 1)) begin                 // Case 3
+                    Auto_precharge[2] = 1'b0;
+                    Write_precharge[2] = 1'b0;
+                    RW_interrupt_write[2] = 1'b0;
+                    Pc_b2 = 1'b1;
+                    Act_b2 = 1'b0;
+                    RP_chk2 = $time + tWRa;
+                    if (Debug) begin
+                        $display ("%m : at time %t NOTE : Start Internal Auto Precharge for Bank 2", $time);
+                    end
+            end
+        end
+        if ((Auto_precharge[3] == 1'b1) && (Write_precharge[3] == 1'b1)) begin
+            if ((($time - RAS_chk3 >= tRAS) &&                                                          // Case 1
+               (((Burst_length_1 == 1'b1 || Write_burst_mode == 1'b1) && Count_precharge [3] >= 1) ||   // Case 2
+                 (Burst_length_2 == 1'b1                              && Count_precharge [3] >= 2) ||
+                 (Burst_length_4 == 1'b1                              && Count_precharge [3] >= 4) ||
+                 (Burst_length_8 == 1'b1                              && Count_precharge [3] >= 8))) ||
+                 (RW_interrupt_write[3] == 1'b1 && RW_interrupt_counter[3] >= 1)) begin                 // Case 3
+                    Auto_precharge[3] = 1'b0;
+                    Write_precharge[3] = 1'b0;
+                    RW_interrupt_write[3] = 1'b0;
+                    Pc_b3 = 1'b1;
+                    Act_b3 = 1'b0;
+                    RP_chk3 = $time + tWRa;
+                    if (Debug) begin
+                        $display ("%m : at time %t NOTE : Start Internal Auto Precharge for Bank 3", $time);
+                    end
+            end
+        end
+
+        //  Read with Auto Precharge Calculation
+        //      The device start internal precharge:
+        //          1.  Meet minimum tRAS requirement
+        //      and 2.  CAS Latency - 1 cycles before last burst
+        //       or 3.  Interrupt by a Read or Write (with or without AutoPrecharge)
+        if ((Auto_precharge[0] == 1'b1) && (Read_precharge[0] == 1'b1)) begin
+            if ((($time - RAS_chk0 >= tRAS) &&                                                      // Case 1
+                ((Burst_length_1 == 1'b1 && Count_precharge[0] >= 1) ||                             // Case 2
+                 (Burst_length_2 == 1'b1 && Count_precharge[0] >= 2) ||
+                 (Burst_length_4 == 1'b1 && Count_precharge[0] >= 4) ||
+                 (Burst_length_8 == 1'b1 && Count_precharge[0] >= 8))) ||
+                 (RW_interrupt_read[0] == 1'b1)) begin                                              // Case 3
+                    Pc_b0 = 1'b1;
+                    Act_b0 = 1'b0;
+                    RP_chk0 = $time;
+                    Auto_precharge[0] = 1'b0;
+                    Read_precharge[0] = 1'b0;
+                    RW_interrupt_read[0] = 1'b0;
+                    if (Debug) begin
+                        $display ("%m : at time %t NOTE : Start Internal Auto Precharge for Bank 0", $time);
+                    end
+            end
+        end
+        if ((Auto_precharge[1] == 1'b1) && (Read_precharge[1] == 1'b1)) begin
+            if ((($time - RAS_chk1 >= tRAS) &&
+                ((Burst_length_1 == 1'b1 && Count_precharge[1] >= 1) || 
+                 (Burst_length_2 == 1'b1 && Count_precharge[1] >= 2) ||
+                 (Burst_length_4 == 1'b1 && Count_precharge[1] >= 4) ||
+                 (Burst_length_8 == 1'b1 && Count_precharge[1] >= 8))) ||
+                 (RW_interrupt_read[1] == 1'b1)) begin
+                    Pc_b1 = 1'b1;
+                    Act_b1 = 1'b0;
+                    RP_chk1 = $time;
+                    Auto_precharge[1] = 1'b0;
+                    Read_precharge[1] = 1'b0;
+                    RW_interrupt_read[1] = 1'b0;
+                    if (Debug) begin
+                        $display ("%m : at time %t NOTE : Start Internal Auto Precharge for Bank 1", $time);
+                    end
+            end
+        end
+        if ((Auto_precharge[2] == 1'b1) && (Read_precharge[2] == 1'b1)) begin
+            if ((($time - RAS_chk2 >= tRAS) &&
+                ((Burst_length_1 == 1'b1 && Count_precharge[2] >= 1) || 
+                 (Burst_length_2 == 1'b1 && Count_precharge[2] >= 2) ||
+                 (Burst_length_4 == 1'b1 && Count_precharge[2] >= 4) ||
+                 (Burst_length_8 == 1'b1 && Count_precharge[2] >= 8))) ||
+                 (RW_interrupt_read[2] == 1'b1)) begin
+                    Pc_b2 = 1'b1;
+                    Act_b2 = 1'b0;
+                    RP_chk2 = $time;
+                    Auto_precharge[2] = 1'b0;
+                    Read_precharge[2] = 1'b0;
+                    RW_interrupt_read[2] = 1'b0;
+                    if (Debug) begin
+                        $display ("%m : at time %t NOTE : Start Internal Auto Precharge for Bank 2", $time);
+                    end
+            end
+        end
+        if ((Auto_precharge[3] == 1'b1) && (Read_precharge[3] == 1'b1)) begin
+            if ((($time - RAS_chk3 >= tRAS) &&
+                ((Burst_length_1 == 1'b1 && Count_precharge[3] >= 1) || 
+                 (Burst_length_2 == 1'b1 && Count_precharge[3] >= 2) ||
+                 (Burst_length_4 == 1'b1 && Count_precharge[3] >= 4) ||
+                 (Burst_length_8 == 1'b1 && Count_precharge[3] >= 8))) ||
+                 (RW_interrupt_read[3] == 1'b1)) begin
+                    Pc_b3 = 1'b1;
+                    Act_b3 = 1'b0;
+                    RP_chk3 = $time;
+                    Auto_precharge[3] = 1'b0;
+                    Read_precharge[3] = 1'b0;
+                    RW_interrupt_read[3] = 1'b0;
+                    if (Debug) begin
+                        $display("%m : at time %t NOTE : Start Internal Auto Precharge for Bank 3", $time);
+                    end
+            end
+        end
+
+        // Internal Precharge or Bst
+        if (Command[0] == `PRECH) begin                         // Precharge terminate a read with same bank or all banks
+            if (Bank_precharge[0] == Bank || A10_precharge[0] == 1'b1) begin
+                if (Data_out_enable == 1'b1) begin
+                    Data_out_enable = 1'b0;
+                end
+            end
+        end else if (Command[0] == `BST) begin                  // BST terminate a read to current bank
+            if (Data_out_enable == 1'b1) begin
+                Data_out_enable = 1'b0;
+            end
+        end
+
+        if (Data_out_enable == 1'b0) begin
+            Dq_reg <= #tOH {data_bits{1'bz}};
+        end
+
+        // Detect Read or Write command
+        if (Command[0] == `READ) begin
+            Bank = Bank_addr[0];
+            Col = Col_addr[0];
+            Col_brst = Col_addr[0];
+            case (Bank_addr[0])
+                2'b00 : Row = B0_row_addr;
+                2'b01 : Row = B1_row_addr;
+                2'b10 : Row = B2_row_addr;
+                2'b11 : Row = B3_row_addr;
+            endcase
+            Burst_counter = 0;
+            Data_in_enable = 1'b0;
+            Data_out_enable = 1'b1;
+        end else if (Command[0] == `WRITE) begin
+            Bank = Bank_addr[0];
+            Col = Col_addr[0];
+            Col_brst = Col_addr[0];
+            case (Bank_addr[0])
+                2'b00 : Row = B0_row_addr;
+                2'b01 : Row = B1_row_addr;
+                2'b10 : Row = B2_row_addr;
+                2'b11 : Row = B3_row_addr;
+            endcase
+            Burst_counter = 0;
+            Data_in_enable = 1'b1;
+            Data_out_enable = 1'b0;
+        end
+
+        // DQ buffer (Driver/Receiver)
+        if (Data_in_enable == 1'b1) begin                                   // Writing Data to Memory
+            // Array buffer
+            case (Bank)
+                2'b00 : Dq_dqm = Bank0 [{Row, Col}];
+                2'b01 : Dq_dqm = Bank1 [{Row, Col}];
+                2'b10 : Dq_dqm = Bank2 [{Row, Col}];
+                2'b11 : Dq_dqm = Bank3 [{Row, Col}];
+            endcase
+
+            // Dqm operation
+            if (Dqm[0] == 1'b0) begin
+                Dq_dqm [ 7 : 0] = Dq [ 7 : 0];
+            end
+            if (Dqm[1] == 1'b0) begin
+                Dq_dqm [15 : 8] = Dq [15 : 8];
+            end
+
+            // Write to memory
+            case (Bank)
+                2'b00 : Bank0 [{Row, Col}] = Dq_dqm;
+                2'b01 : Bank1 [{Row, Col}] = Dq_dqm;
+                2'b10 : Bank2 [{Row, Col}] = Dq_dqm;
+                2'b11 : Bank3 [{Row, Col}] = Dq_dqm;
+            endcase
+
+            // Display debug message
+            if (Dqm !== 2'b11) begin
+                // Record tWR for manual precharge
+                WR_chkm [Bank] = $time;
+
+                if (Debug) begin
+                    $display("%m : at time %t WRITE: Bank = %d Row = %d, Col = %d, Data = %d", $time, Bank, Row, Col, Dq_dqm);
+                end
+            end else begin
+                if (Debug) begin
+                    $display("%m : at time %t WRITE: Bank = %d Row = %d, Col = %d, Data = Hi-Z due to DQM", $time, Bank, Row, Col);
+                end
+            end
+
+            // Advance burst counter subroutine
+            #tHZ Burst_decode;
+
+        end else if (Data_out_enable == 1'b1) begin                         // Reading Data from Memory
+            // Array buffer
+            case (Bank)
+                2'b00 : Dq_dqm = Bank0[{Row, Col}];
+                2'b01 : Dq_dqm = Bank1[{Row, Col}];
+                2'b10 : Dq_dqm = Bank2[{Row, Col}];
+                2'b11 : Dq_dqm = Bank3[{Row, Col}];
+            endcase
+
+            // Dqm operation
+            if (Dqm_reg0 [0] == 1'b1) begin
+                Dq_dqm [ 7 : 0] = 8'bz;
+            end
+            if (Dqm_reg0 [1] == 1'b1) begin
+                Dq_dqm [15 : 8] = 8'bz;
+            end
+
+            // Display debug message
+            if (Dqm_reg0 !== 2'b11) begin
+                Dq_reg = #tAC Dq_dqm;
+                if (Debug) begin
+                    $display("%m : at time %t READ : Bank = %d Row = %d, Col = %d, Data = %d", $time, Bank, Row, Col, Dq_reg);
+                end
+            end else begin
+                Dq_reg = #tHZ {data_bits{1'bz}};
+                if (Debug) begin
+                    $display("%m : at time %t READ : Bank = %d Row = %d, Col = %d, Data = Hi-Z due to DQM", $time, Bank, Row, Col);
+                end
+            end
+
+            // Advance burst counter subroutine
+            Burst_decode;
+        end
+    end
+
+    // Burst counter decode
+    task Burst_decode;
+        begin
+            // Advance Burst Counter
+            Burst_counter = Burst_counter + 1;
+
+            // Burst Type
+            if (Mode_reg[3] == 1'b0) begin                                  // Sequential Burst
+                Col_temp = Col + 1;
+            end else if (Mode_reg[3] == 1'b1) begin                         // Interleaved Burst
+                Col_temp[2] =  Burst_counter[2] ^  Col_brst[2];
+                Col_temp[1] =  Burst_counter[1] ^  Col_brst[1];
+                Col_temp[0] =  Burst_counter[0] ^  Col_brst[0];
+            end
+
+            // Burst Length
+            if (Burst_length_2) begin                                       // Burst Length = 2
+                Col [0] = Col_temp [0];
+            end else if (Burst_length_4) begin                              // Burst Length = 4
+                Col [1 : 0] = Col_temp [1 : 0];
+            end else if (Burst_length_8) begin                              // Burst Length = 8
+                Col [2 : 0] = Col_temp [2 : 0];
+            end else begin                                                  // Burst Length = FULL
+                Col = Col_temp;
+            end
+
+            // Burst Read Single Write            
+            if (Write_burst_mode == 1'b1) begin
+                Data_in_enable = 1'b0;
+            end
+
+            // Data Counter
+            if (Burst_length_1 == 1'b1) begin
+                if (Burst_counter >= 1) begin
+                    Data_in_enable = 1'b0;
+                    Data_out_enable = 1'b0;
+                end
+            end else if (Burst_length_2 == 1'b1) begin
+                if (Burst_counter >= 2) begin
+                    Data_in_enable = 1'b0;
+                    Data_out_enable = 1'b0;
+                end
+            end else if (Burst_length_4 == 1'b1) begin
+                if (Burst_counter >= 4) begin
+                    Data_in_enable = 1'b0;
+                    Data_out_enable = 1'b0;
+                end
+            end else if (Burst_length_8 == 1'b1) begin
+                if (Burst_counter >= 8) begin
+                    Data_in_enable = 1'b0;
+                    Data_out_enable = 1'b0;
+                end
+            end
+        end
+    endtask
+
+    // Timing Parameters for -7E (133 MHz @ CL2)
+    specify
+        specparam
+            tAH  =  0.8,                                        // Addr, Ba Hold Time
+            tAS  =  1.5,                                        // Addr, Ba Setup Time
+            tCH  =  2.5,                                        // Clock High-Level Width
+            tCL  =  2.5,                                        // Clock Low-Level Width
+            tCK  =  7.0,                                        // Clock Cycle Time
+            tDH  =  0.8,                                        // Data-in Hold Time
+            tDS  =  1.5,                                        // Data-in Setup Time
+            tCKH =  0.8,                                        // CKE Hold  Time
+            tCKS =  1.5,                                        // CKE Setup Time
+            tCMH =  0.8,                                        // CS#, RAS#, CAS#, WE#, DQM# Hold  Time
+            tCMS =  1.5;                                        // CS#, RAS#, CAS#, WE#, DQM# Setup Time
+        $width    (posedge Clk,           tCH);
+        $width    (negedge Clk,           tCL);
+        $period   (negedge Clk,           tCK);
+        $period   (posedge Clk,           tCK);
+        $setuphold(posedge Clk,    Cke,   tCKS, tCKH);
+        $setuphold(posedge Clk,    Cs_n,  tCMS, tCMH);
+        $setuphold(posedge Clk,    Cas_n, tCMS, tCMH);
+        $setuphold(posedge Clk,    Ras_n, tCMS, tCMH);
+        $setuphold(posedge Clk,    We_n,  tCMS, tCMH);
+        $setuphold(posedge Clk,    Addr,  tAS,  tAH);
+        $setuphold(posedge Clk,    Ba,    tAS,  tAH);
+        $setuphold(posedge Clk,    Dqm,   tCMS, tCMH);
+        $setuphold(posedge Dq_chk, Dq,    tDS,  tDH);
+    endspecify
+
+endmodule
diff --git a/spinal-cocotb/SpinalNet/test/src/verilog/tb_sdram_controller.v b/spinal-cocotb/SpinalNet/test/src/verilog/tb_sdram_controller.v
new file mode 100644
index 0000000..335cc91
--- /dev/null
+++ b/spinal-cocotb/SpinalNet/test/src/verilog/tb_sdram_controller.v
@@ -0,0 +1,123 @@
+
+module tb_sdram_controller (
+  input               io_axi_aw_valid,
+  output              io_axi_aw_ready,
+  input      [31:0]   io_axi_aw_payload_addr,
+  input      [3:0]    io_axi_aw_payload_id,
+  input      [7:0]    io_axi_aw_payload_len,
+  input      [2:0]    io_axi_aw_payload_size,
+  input      [1:0]    io_axi_aw_payload_burst,
+  input               io_axi_w_valid,
+  output              io_axi_w_ready,
+  input      [31:0]   io_axi_w_payload_data,
+  input      [3:0]    io_axi_w_payload_strb,
+  input               io_axi_w_payload_last,
+  output              io_axi_b_valid,
+  input               io_axi_b_ready,
+  output     [3:0]    io_axi_b_payload_id,
+  output     [1:0]    io_axi_b_payload_resp,
+  input               io_axi_ar_valid,
+  output              io_axi_ar_ready,
+  input      [31:0]   io_axi_ar_payload_addr,
+  input      [3:0]    io_axi_ar_payload_id,
+  input      [7:0]    io_axi_ar_payload_len,
+  input      [2:0]    io_axi_ar_payload_size,
+  input      [1:0]    io_axi_ar_payload_burst,
+  output              io_axi_r_valid,
+  input               io_axi_r_ready,
+  output     [31:0]   io_axi_r_payload_data,
+  output     [3:0]    io_axi_r_payload_id,
+  output     [1:0]    io_axi_r_payload_resp,
+  output              io_axi_r_payload_last,
+  output              io_initDone,
+  input               clk,
+  input               reset
+);
+  wire       [12:0]   controller_io_sdram_ADDR;
+  wire       [1:0]    controller_io_sdram_BA;
+  wire                controller_io_sdram_CASn;
+  wire                controller_io_sdram_CKE;
+  wire                controller_io_sdram_CSn;
+  wire       [1:0]    controller_io_sdram_DQM;
+  wire                controller_io_sdram_RASn;
+  wire                controller_io_sdram_WEn;
+  wire       [15:0]   controller_io_sdram_DQ_write;
+  wire       [15:0]   controller_io_sdram_DQ_writeEnable;
+  wire       [15:0]   sdramDevice_DQ_read;
+
+  SdramController controller (
+    .io_axi_aw_valid                (io_axi_aw_valid                           ), //i
+    .io_axi_aw_ready                (io_axi_aw_ready                           ), //o
+    .io_axi_aw_payload_addr         (io_axi_aw_payload_addr                    ), //i
+    .io_axi_aw_payload_id           (io_axi_aw_payload_id                      ), //i
+    .io_axi_aw_payload_len          (io_axi_aw_payload_len                     ), //i
+    .io_axi_aw_payload_size         (io_axi_aw_payload_size                    ), //i
+    .io_axi_aw_payload_burst        (io_axi_aw_payload_burst                   ), //i
+    .io_axi_w_valid                 (io_axi_w_valid                            ), //i
+    .io_axi_w_ready                 (io_axi_w_ready                            ), //o
+    .io_axi_w_payload_data          (io_axi_w_payload_data                     ), //i
+    .io_axi_w_payload_strb          (io_axi_w_payload_strb                     ), //i
+    .io_axi_w_payload_last          (io_axi_w_payload_last                     ), //i
+    .io_axi_b_valid                 (io_axi_b_valid                            ), //o
+    .io_axi_b_ready                 (io_axi_b_ready                            ), //i
+    .io_axi_b_payload_id            (io_axi_b_payload_id                       ), //o
+    .io_axi_b_payload_resp          (io_axi_b_payload_resp                     ), //o
+    .io_axi_ar_valid                (io_axi_ar_valid                           ), //i
+    .io_axi_ar_ready                (io_axi_ar_ready                           ), //o
+    .io_axi_ar_payload_addr         (io_axi_ar_payload_addr                    ), //i
+    .io_axi_ar_payload_id           (io_axi_ar_payload_id                      ), //i
+    .io_axi_ar_payload_len          (io_axi_ar_payload_len                     ), //i
+    .io_axi_ar_payload_size         (io_axi_ar_payload_size                    ), //i
+    .io_axi_ar_payload_burst        (io_axi_ar_payload_burst                   ), //i
+    .io_axi_r_valid                 (io_axi_r_valid                            ), //o
+    .io_axi_r_ready                 (io_axi_r_ready                            ), //i
+    .io_axi_r_payload_data          (io_axi_r_payload_data                     ), //o
+    .io_axi_r_payload_id            (io_axi_r_payload_id                       ), //o
+    .io_axi_r_payload_resp          (io_axi_r_payload_resp                     ), //o
+    .io_axi_r_payload_last          (io_axi_r_payload_last                     ), //o
+    .io_sdram_ADDR                  (controller_io_sdram_ADDR[12:0]            ), //o
+    .io_sdram_BA                    (controller_io_sdram_BA[1:0]               ), //o
+    .io_sdram_DQ_read               (sdramDevice_DQ_read[15:0]                 ), //i
+    .io_sdram_DQ_write              (controller_io_sdram_DQ_write[15:0]        ), //o
+    .io_sdram_DQ_writeEnable        (controller_io_sdram_DQ_writeEnable[15:0]  ), //o
+    .io_sdram_DQM                   (controller_io_sdram_DQM[1:0]              ), //o
+    .io_sdram_CASn                  (controller_io_sdram_CASn                  ), //o
+    .io_sdram_CKE                   (controller_io_sdram_CKE                   ), //o
+    .io_sdram_CSn                   (controller_io_sdram_CSn                   ), //o
+    .io_sdram_RASn                  (controller_io_sdram_RASn                  ), //o
+    .io_sdram_WEn                   (controller_io_sdram_WEn                   ), //o
+    .io_initDone                    (io_initDone                               ), //o
+    .clk                            (clk                                       ), //i
+    .reset                          (reset                                     )  //i
+  );
+
+  wire [15:0] io_sdram_DQ;
+  assign sdramDevice_DQ_read = io_sdram_DQ;
+  assign io_sdram_DQ = controller_io_sdram_DQ_writeEnable ? controller_io_sdram_DQ_write : 16'bZZZZZZZZZZZZZZZZ;
+
+  // sdram_model_plus #(
+  //   .addr_bits        (12           ),   // 地址位宽
+  //   .data_bits        (16           ),   // 数据位宽
+  //   .col_bits         (9            ),   // col地址位宽A0-A8
+  //   .mem_sizes        (2*1024*1024-1)    // 2M
+  // ) sdramDeivce (
+  //   .Debug             (1'b1                                      ), //i
+  mt48lc16m16a2 sdramDevice (
+    .Clk               (~clk                                      ), //i
+    .Addr              (controller_io_sdram_ADDR[12:0]            ), //i
+    .Ba                (controller_io_sdram_BA[1:0]               ), //i
+    .Dq                (io_sdram_DQ                               ), //io
+    .Dqm               (controller_io_sdram_DQM[1:0]              ), //i
+    .Cas_n             (controller_io_sdram_CASn                  ), //i
+    .Cke               (controller_io_sdram_CKE                   ), //i
+    .Cs_n              (controller_io_sdram_CSn                   ), //i
+    .Ras_n             (controller_io_sdram_RASn                  ), //i
+    .We_n              (controller_io_sdram_WEn                   )  //i
+  );
+
+  initial begin
+    $dumpfile ("wave.vcd");
+    $dumpvars;
+    #1;
+  end
+endmodule
diff --git a/spinal-cocotb/run.sh b/spinal-cocotb/run.sh
new file mode 100755
index 0000000..f26e1c0
--- /dev/null
+++ b/spinal-cocotb/run.sh
@@ -0,0 +1,27 @@
+#! /bin/sh
+
+set -o errexit
+set -o nounset
+set -o xtrace
+
+MILL_VERSION=0.9.7
+
+if [ ! -f mill ]; then
+  curl -L https://github.com/com-lihaoyi/mill/releases/download/$MILL_VERSION/$MILL_VERSION > mill && chmod +x mill
+fi
+
+./mill version
+
+# Check format
+./mill SpinalNet.checkFormat
+./mill SpinalNet.fix --check
+
+
+# Run test and simulation
+./mill SpinalNet.test.testOnly dma.DmaControllerTest
+./mill SpinalNet.test.testOnly sdram.SdramControllerTest
+./mill SpinalNet.test.testOnly udp.UdpTest
+
+# Generate Verilog code for Caravel
+./mill SpinalNet.runMain dma.DmaMem
+
diff --git a/spinal-cocotb/setup.sh b/spinal-cocotb/setup.sh
new file mode 100755
index 0000000..138245d
--- /dev/null
+++ b/spinal-cocotb/setup.sh
@@ -0,0 +1,15 @@
+#! /bin/sh
+
+set -o errexit
+set -o nounset
+set -o xtrace
+
+sudo apt-get update
+sudo apt-get install -y default-jdk iverilog verilator
+
+# Add Spinal HDL CocotbLib
+git submodule add https://github.com/SpinalHDL/CocotbLib.git SpinalNet/test/src/python/cocotblib
+# Local install Cocotb and set PATH env
+pip3 install cocotb
+export PATH="$HOME/.local/bin:$PATH"
+