blob: 91c457b037b502ca672dc58790c766ee20db5f00 [file] [log] [blame]
/*
_ _ __ ___ __ _________
| \ | |/ _(_) \ / /__|___ /___ \
| \| | |_| |\ \ / / _ \ |_ \ __) |
| |\ | _| | \ V / __/___) / __/
|_| \_|_| |_| \_/ \___|____/_____|
Copyright 2020 Mohamed Shalan
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Started as a One day project on May 2, 2020 by Mohamed Shalan
NfiVe32 is area optimized RV32IC core with the following features:
* Target clock frequency > 100MHz in 130nm technologies
* CPI ~ 3
* ASIC cell count: < 10K
+ SKY130A (HD): ~8.5K @ CP=5.8ns (DLY3)
* Instruction Cycles (3/4)
+ C0 : Fetch and Decompress,
+ C1 : Fetch cyle 2; optional, only used for unaligned 32-bit instructions
+ C2 : RF read, ALU & Branch,
+ C3 : Memory & RF write-back
* A single AHB-Lite Master interface for both instructions and data
+ Instr: A(C3), I(C0)
+ Data: A(C2), D(C3)
To do:
- [X] Exception Handeling + PIC
- [X] Bus wait states
- [X] Some Performance counters (CYCLE and INSTRET)
- [X] Systick timer
- [] Wait for Interrupt Instruction (wfi)
- [] Comprehensive testing
- [X] Add a latch based Register File
- [X] Add hand-crafted ALU
*/
`timescale 1ns/1ps
`default_nettype none
`define USE_RF_MODULE
//`define USE_HC_REGF
//`define USE_RF_HC
//`define USE_ALU_HC
//`define DBG
// Macros used by all modules
`define SYNC_BEGIN(r, v) always @ (posedge HCLK or negedge HRESETn) if(!HRESETn) r <= v; else begin
`define SYNC_END end
`define IR_rs1 19:15
`define IR_rs2 24:20
`define IR_rd 11:7
`define IR_opcode 6:2
`define IR_funct3 14:12
`define IR_cond 14:12
`define IR_funct7 31:25
`define IR_shamt 24:20
`define IR_csr 31:20
`define OPCODE_Branch 5'b11_000
`define OPCODE_Load 5'b00_000
`define OPCODE_Store 5'b01_000
`define OPCODE_JALR 5'b11_001
`define OPCODE_JAL 5'b11_011
`define OPCODE_Arith_I 5'b00_100
`define OPCODE_Arith_R 5'b01_100
`define OPCODE_AUIPC 5'b00_101
`define OPCODE_LUI 5'b01_101
`define OPCODE_SYSTEM 5'b11_100
`define OPCODE_Custom 5'b10_001
`define F3_ADD 3'b000
`define F3_SLL 3'b001
`define F3_SLT 3'b010
`define F3_SLTU 3'b011
`define F3_XOR 3'b100
`define F3_SRL 3'b101
`define F3_OR 3'b110
`define F3_AND 3'b111
`define BR_BEQ 3'b000
`define BR_BNE 3'b001
`define BR_BLT 3'b100
`define BR_BGE 3'b101
`define BR_BLTU 3'b110
`define BR_BGEU 3'b111
//`define OPCODE IR[`IR_opcode]
`define ALU_ADD 4'b00_00
`define ALU_SUB 4'b00_01
`define ALU_PASS 4'b00_11
`define ALU_OR 4'b01_00
`define ALU_AND 4'b01_01
`define ALU_XOR 4'b01_11
`define ALU_SRL 4'b10_00
`define ALU_SRA 4'b10_10
`define ALU_SLL 4'b10_01
`define ALU_SLT 4'b11_01
`define ALU_SLTU 4'b11_11
`define SYS_EC_EB 3'b000
`define SYS_CSRRW 3'b001
`define SYS_CSRRS 3'b010
`define SYS_CSRRC 3'b011
`define SYS_CSRRWI 3'b101
`define SYS_CSRRSI 3'b110
`define SYS_CSRRCI 3'b111
module RV32_DECOMP (
input [15:0] IRi,
output [31:0] IRo
);
reg [31:0] Instout;
wire [15:0] InstIn;
assign InstIn = IRi;
assign IRo = Instout;
//signals used for decoding the 16bit instruction: case and if statements
wire [1:0] op = InstIn[1:0];
wire [2:0] fun3 = InstIn[15:13];
wire [1:0] fun2 = InstIn[11:10];
wire [1:0] fun = InstIn[6:5];
wire [4:0] Brs1 = InstIn[11:7];
wire [4:0] Brs2 = InstIn[6:2];
//Decoding and encoding process
always @(*) begin
Instout = 32'd0;
case(op)
2'b00:begin //C0
case(fun3)
3'b000:begin //C.ADDI4SPN
//addi rd0, x2, nzuimm[9:2].
Instout = { 2'b00,
InstIn[10:7],
InstIn[12:11],
InstIn[5],
InstIn[6],
2'b00,
5'b00010,
3'b000,
2'b01,
InstIn[4:2],
7'b0010011
};
end
3'b010:begin //C.LW
//lw rd',offset[6:2](rs1').
Instout = {
5'd0,InstIn[5],
InstIn[12:10],
InstIn[6],
2'b00,2'b01,
InstIn[9:7],
3'b010,2'b01,
InstIn[4:2],
7'b0000011
};
end
3'b110:begin //C.SW
//sw rs2',offset[6:2](rs1').
Instout = {
5'd0,InstIn[5],
InstIn[12],
2'b01,
InstIn[4:2],
2'b01,
InstIn[9:7],
3'b010,
InstIn[11:10],
InstIn[6],
2'b00,
7'b0100011
};
end
endcase
end
2'b01:begin //C1
case(fun3)
3'b000:begin //C.ADDI
//addi rd, rd, nzimm[5:0].
Instout = {
{6{InstIn[12]}},
InstIn[12],
InstIn[6:2],
InstIn[11:7],
3'b000,
InstIn[11:7],
7'b0010011
};
end
3'b001:begin //C.JAL
//jal x1, offset[11:1].
Instout = {
InstIn[12],
InstIn[8],
InstIn[10:9],
InstIn[6],
InstIn[7],
InstIn[2],
InstIn[11],
InstIn[5:3],
InstIn[12],
{8{InstIn[12]}},
5'b00001,
7'b1101111
};
end
3'b010:begin //C.LI
//addi rd, x0, imm[5:0].
Instout = {
{6{InstIn[12]}},
InstIn[12],
InstIn[6:2],
5'b00000,3'b000,
InstIn[11:7],
7'b0010011
};
end
3'b011:begin //C.LUI,C.ADDI16SP
case(Brs1)
5'b00010: begin //C.ADDI16SP
//addi x2, x2, nzimm[9:4].
Instout = {
{3{InstIn[12]}},
InstIn[12],
InstIn[4:3],
InstIn[5],
InstIn[2],
InstIn[6],
4'd0,
Brs1,
3'b000,
Brs1,
7'b0010011
};
end
default: begin //C.LUI
//lui rd, nzuimm[17:12].
Instout = {{14{InstIn[12]}},InstIn[12],InstIn[6:2],Brs1,7'b0110111};
end
endcase
end
3'b100:begin //C.SRLI, C.SRAI, C.ANDI
case(fun2)
2'b00:begin //C.SRLI
//srli rd', rd', shamt[5:0]
Instout = {7'b0000000,InstIn[6:2],2'b01,InstIn[9:7],3'b101,2'b01,InstIn[9:7],7'b0010011};
end
2'b01:begin //C.SRAI
//srai rd', rd', shamt[5:0],
Instout = {7'b0100000,InstIn[6:2],2'b01,InstIn[9:7],3'b101,2'b01,InstIn[9:7],7'b0010011};
end
2'b10:begin //C.ANDI
//andi rd', rd', imm[5:0].
Instout = {
{6{InstIn[12]}},
InstIn[12],
InstIn[6:2],
2'b01,
InstIn[9:7],
3'b111,2'b01,
InstIn[9:7],
7'b0010011
};
end
2'b11:
if(!InstIn[12])begin
case(fun)
2'b11: begin //C.AND
//and rd', rd', rs2'.
Instout = {
7'b0000000,2'b01,
InstIn[4:2],
2'b01,
InstIn[9:7],
3'b111,2'b01,
InstIn[9:7],
7'b0110011
};
end
2'b10: begin //C.OR
//or rd', rd', rs2'.
Instout = {
7'b0000000,2'b01,
InstIn[4:2],
2'b01,
InstIn[9:7],
3'b110,2'b01,
InstIn[9:7],
7'b0110011
};
end
2'b01: begin //C.XOR
//xor rd', rd', rs2'.
Instout = {
7'b0000000,2'b01,
InstIn[4:2],
2'b01,
InstIn[9:7],
3'b100,2'b01,
InstIn[9:7],
7'b0110011
};
end
2'b00: begin //C.SUB
//sub rd', rd', rs2'.
Instout = {7'b0100000,2'b01, InstIn[4:2],2'b01,InstIn[9:7],3'b000,2'b01,InstIn[9:7],7'b0110011};
end
endcase
end
endcase
end
3'b101:begin //C.J
//jal x0,offset[11:1].
Instout = {
InstIn[12],
InstIn[8],
InstIn[10:9],
InstIn[6],
InstIn[7],
InstIn[2],
InstIn[11],
InstIn[5:3],
InstIn[12],
{8{InstIn[12]}},
5'b00000,
7'b1101111
};
end
3'b110:begin //C.BEQZ
//beq rs1', x0, offset[8:1].
Instout = {
InstIn[12],
{2{InstIn[12]}},
InstIn[12],
InstIn[6:5],
InstIn[2],
5'b00000,
2'b01,
InstIn[9:7],
3'b000,
InstIn[11:10],
InstIn[4:3],
InstIn[12],
7'b1100011
};
end
3'b111:begin //C.BNEZ
//bne rs1', x0, offset[8:1].
Instout = {
InstIn[12],
{2{InstIn[12]}},
InstIn[12],
InstIn[6:5],
InstIn[2],
5'b00000,
2'b01,
InstIn[9:7],
3'b001,
InstIn[11:10],
InstIn[4:3],
InstIn[12],
7'b1100011
};
end
endcase
end
2'b10:begin //C2
case(fun3)
3'b000:begin //C.SLLI
//slli rd, rd, shamt[5:0],.
Instout = {
7'b0000000,
InstIn[6:2],
InstIn[11:7],
3'b001,
InstIn[11:7],
7'b0010011
};
end
3'b010:begin //C.LWSP
//lw rd,offset[7:2](x2).
Instout = {
4'd0,InstIn[3:2],
InstIn[12],
InstIn[6:4],
2'b00,5'b00010,3'b010,
InstIn[11:7],
7'b000011
};
end
3'b100:begin //C.JR, C.JALR, C.MV, C.ADD, C.EBREAK
case(InstIn[12])
1'b0: begin
if(!Brs2) begin //C.JR
//jalr x0, rs1, 0.
Instout = {12'd0,Brs1,3'b000,5'b00000,7'b1100111};
end
else begin //C.MV
//add rd, x0, rs2.
Instout = {7'b0000000,Brs2,5'b00000,3'b000,Brs1,7'b0110011};
end
end
1'b1: begin
if(!Brs2&&!Brs1) begin //C.EBREAK
//EBREAK
Instout = {12'd1,5'd0,3'b000,5'd0,7'b1110011};
end
else if(!Brs2) begin //C.JALR
//jalr x1, rs1, 0.
Instout = {12'd0,Brs1,3'b000,5'b00001,7'b1100111};
end
else begin //C.ADD
//add rd, rd, rs2.
Instout = {7'b0000000,Brs2,Brs1,3'b000,Brs1,7'b0110011};
end
end
endcase
end
3'b110:begin //C.SWSP
//sw rs2,offset[7:2](x2).
Instout = {
4'd0,InstIn[8:7],
InstIn[12],
InstIn[6:2],
5'b00010,3'b010,
InstIn[11:9],
2'b00,7'b0100011
};
end
endcase
end
endcase
end
endmodule
// The ALU and its modules
`ifdef USE_ALU_HC
`include "../rtl/ALU_HC.v"
`else
// Mirioring Unit for the Shifter
module mirror (input [31:0] in, output reg [31:0] out);
integer i;
always @ *
for(i=0; i<32; i=i+1)
out[i] = in[31-i];
endmodule
// Shift Right Unit
module shr(input [31:0] a, output [31:0] r, input [4:0] shamt, input ar);
wire [31:0] r1, r2, r3, r4;
wire fill = ar ? a[31] : 1'b0;
assign r1 = shamt[0] ? {{1{fill}}, a[31:1]} : a;
assign r2 = shamt[1] ? {{2{fill}}, r1[31:2]} : r1;
assign r3 = shamt[2] ? {{4{fill}}, r2[31:4]} : r2;
assign r4 = shamt[3] ? {{8{fill}}, r3[31:8]} : r3;
assign r = shamt[4] ? {{16{fill}}, r4[31:16]} : r4;
endmodule
// The Shifter
module shift(
input wire [31:0] a,
input wire [4:0] shamt,
input wire [1:0] typ, // type[0] sll or srl - type[1] sra
// 00 : srl, 10 : sra, 01 : sll
output wire [31:0] r
);
wire [31 : 0] ma, my, y, x, sy;
mirror m1(.in(a), .out(ma));
mirror m2(.in(y), .out(my));
assign x = typ[0] ? ma : a;
shr sh0(.a(x), .r(y), .shamt(shamt), .ar(typ[1]));
assign r = typ[0] ? my : y;
endmodule
// The ALU
module ALU(
input wire [31:0] a, b,
input wire [4:0] shamt,
output reg [31:0] r,
output wire cf, zf, vf, sf,
input wire [3:0] alufn
);
wire [31:0] add, sub, op_b;
wire cfa, cfs;
assign op_b = (~b);
assign {cf, add} = alufn[0] ? (a + op_b + 1'b1) : (a + b);
assign zf = (add == 0);
assign sf = add[31];
assign vf = (a[31] ^ (op_b[31]) ^ add[31] ^ cf);
wire[31:0] sh;
shift shift0 (
.a(a),
.shamt(shamt),
.typ(alufn[1:0]),
.r(sh)
);
always @ * begin
//r = 0;
(* full_case *)
(* parallel_case *)
case (alufn)
// arithmetic
4'b00_00 : r = add;
4'b00_01 : r = add;
4'b00_11 : r = b;
// logic
4'b01_00: r = a | b;
4'b01_01: r = a & b;
4'b01_11: r = a ^ b;
// shift
4'b10_00: r=sh;
4'b10_01: r=sh;
4'b10_10: r=sh;
// slt & sltu
4'b11_01: r = {31'b0,(sf != vf)};
4'b11_11: r = {31'b0,(~cf)};
default: r = add;
endcase
end
endmodule
`endif
// Immediate Generator
module IMMGEN (
input wire [31:0] INSTR,
output reg [31:0] IMM
);
always @(*) begin
case (INSTR[`IR_opcode])
`OPCODE_Arith_I : IMM = { {21{INSTR[31]}}, INSTR[30:25], INSTR[24:21], INSTR[20] };
`OPCODE_Store : IMM = { {21{INSTR[31]}}, INSTR[30:25], INSTR[11:8], INSTR[7] };
`OPCODE_LUI : IMM = { INSTR[31], INSTR[30:20], INSTR[19:12], 12'b0 };
`OPCODE_AUIPC : IMM = { INSTR[31], INSTR[30:20], INSTR[19:12], 12'b0 };
`OPCODE_JAL : IMM = { {12{INSTR[31]}}, INSTR[19:12], INSTR[20], INSTR[30:25], INSTR[24:21], 1'b0 };
`OPCODE_JALR : IMM = { {21{INSTR[31]}}, INSTR[30:25], INSTR[24:21], INSTR[20] };
`OPCODE_Branch : IMM = { {20{INSTR[31]}}, INSTR[7], INSTR[30:25], INSTR[11:8], 1'b0};
default : IMM = { {21{INSTR[31]}}, INSTR[30:25], INSTR[24:21], INSTR[20] };
endcase
end
endmodule
// Instruction decoder that generates the ALU operation
module RV32_DEC(
input [31:0] INSTR,
output reg [3:0] alu_fn,
output alu_op2_src
);
wire [2:0] func3 = INSTR[`IR_funct3];
wire [6:0] func7 = INSTR[`IR_funct7];
wire [11:0] csr = INSTR[`IR_csr];
wire [4:0] opcode = INSTR[`IR_opcode];
wire W32 = 1;//sz[0] & sz[1];
wire I = W32 & (opcode == `OPCODE_Arith_I);
wire R = W32 & (opcode == `OPCODE_Arith_R);
wire IorR = I | R;
wire instr_logic = ((IorR==1'b1) && ((func3==`F3_XOR) || (func3==`F3_AND) || (func3==`F3_OR)));
wire instr_shift = ((IorR==1'b1) && ((func3==`F3_SLL) || (func3==`F3_SRL) ));
wire instr_slt = ((IorR==1'b1) && (func3==`F3_SLT));
wire instr_sltu = ((IorR==1'b1) && (func3==`F3_SLTU));
wire instr_store = W32 & (opcode == `OPCODE_Store);
wire instr_load = W32 & (opcode == `OPCODE_Load);
wire instr_add = R & (func3 == `F3_ADD) & (~func7[5]);
wire instr_sub = R & (func3 == `F3_ADD) & (func7[5]);
wire instr_addi = I & (func3 == `F3_ADD);
wire instr_lui = W32 & (opcode == `OPCODE_LUI);
wire instr_auipc = W32 & (opcode == `OPCODE_AUIPC);
wire instr_branch= W32 & (opcode == `OPCODE_Branch);
wire instr_jalr = W32 & (INSTR[`IR_opcode] == `OPCODE_JALR);
wire instr_jal = W32 & (INSTR[`IR_opcode] == `OPCODE_JAL);
wire instr_sll = ((IorR==1'b1) && (func3 == `F3_SLL) && (func7 == 7'b0));
wire instr_srl = ((IorR==1'b1) && (func3 == `F3_SRL) && (func7 == 7'b0));
wire instr_sra = ((IorR==1'b1) && (func3 == `F3_SRL) && (func7 != 7'b0));
wire instr_and = ((IorR==1'b1) && (func3 == `F3_AND));
wire instr_or = ((IorR==1'b1) && (func3 == `F3_OR));
wire instr_xor = ((IorR==1'b1) && (func3 == `F3_XOR));
assign alu_op2_src = R;
always @ * begin
case (1'b1)
instr_load : alu_fn = `ALU_ADD;
instr_addi : alu_fn = `ALU_ADD;
instr_store : alu_fn = `ALU_ADD;
instr_add : alu_fn = `ALU_ADD;
instr_jalr : alu_fn = `ALU_ADD;
instr_lui : alu_fn = `ALU_PASS;
instr_sll : alu_fn = `ALU_SLL;
instr_srl : alu_fn = `ALU_SRL;
instr_sra : alu_fn = `ALU_SRA;
instr_slt : alu_fn = `ALU_SLT;
instr_sltu : alu_fn = `ALU_SLTU;
instr_and : alu_fn = `ALU_AND;
instr_or : alu_fn = `ALU_OR;
instr_xor : alu_fn = `ALU_XOR;
default : alu_fn = `ALU_SUB;
endcase
end
endmodule
// Conditional Branchig Unit. It checks whether the branch is taken or not
module BRANCH (
input [2:0] cond,
input [31:0] R1, R2,
output taken
);
wire zf, cf, vf, sf;
wire [31:0] add, op_b;
reg taken;
assign op_b = (~R2);
assign {cf, add} = (R1 + op_b + 1'b1);
assign zf = (add == 0);
assign sf = add[31];
assign vf = (R1[31] ^ (op_b[31]) ^ add[31] ^ cf);
always @ * begin
(* full_case *)
case(cond)
`BR_BEQ: taken = zf; // BEQ
`BR_BNE: taken = ~zf; // BNE
`BR_BLT: taken = (sf != vf); // BLT
`BR_BGE: taken = (sf == vf); // BGE
`BR_BLTU: taken = (~cf); // BLTU
`BR_BGEU: taken = (cf); // BGEU
default: taken = 1'b0;
endcase
end
endmodule
// Memory data (R) aligner
module mrdata_align(
input wire [31:0] d,
output wire [31:0] ed,
input wire [1:0] size,
input wire [1:0] A,
input wire sign
);
wire [31:0] s_ext, u_ext;
wire [7:0] _byte_;
wire [15:0] hword;
assign _byte_ = (A==2'd0) ? d[7:0] :
(A==2'd1) ? d[15:8] :
(A==2'd2) ? d[23:16] : d[31:24];
assign hword = (A[1]==0) ? d[15:0] : d[31:16];
assign u_ext = (size==2'd0) ? {24'd0,_byte_} :
(size==2'd1) ? {16'd0,hword} : d;
assign s_ext = (size==2'd0) ? {{24{_byte_[7]}},_byte_} :
(size==2'd1) ? {{24{hword[15]}},hword} : d;
assign ed = sign ? u_ext : s_ext;
endmodule
// Memory data (W) aligner
module mwdata_align(
input wire [31:0] d,
output wire [31:0] fd,
input wire [1:0] size,
input wire [1:0] A
);
wire [7:0] _byte_ = d[7:0];
wire [15:0] hword = d[15:0];
wire [31:0] _byte__word, hw_word;
assign _byte__word = (A==2'd0) ? d :
(A==2'd1) ? {16'd0, _byte_, 8'd0} :
(A==2'd2) ? {8'd0, _byte_, 16'd0} : {_byte_, 24'd0} ;
assign hw_word = (~A[1]) ? d : {hword, 16'd0};
assign fd = (size==2'd0) ? _byte__word :
(size==2'd1) ? hw_word : d;
endmodule
// The Instruction Fetch Unit
module NfiVe32_FU(
input wire [31:0] IDATA0,
input wire [31:0] IDATA1,
input wire [31:0] PC,
input wire C1,
output wire [31:0] INSTR,
output wire IS32
);
wire [31:0] instr32;
wire [31:0] instr = (~C1 & ~PC[1]) ? IDATA0 : // Aligned 32 or Lower 16
(~C1 & PC[1]) ? {16'h0, IDATA0[31:16]} : // Upper 16
{IDATA0[15:0], IDATA1[31:16]} ; // Unaligned 32
wire is32 = instr[0] & instr[1];
RV32_DECOMP nfive_decomp (.IRi(instr[15:0]), .IRo(instr32));
assign INSTR = is32 ? instr : instr32;
assign IS32 = is32;
endmodule
// Instruction Execution Unit (ALU + next PC generation)
module NfiVe32_XU(
output wire [31:0] ALUR,
output wire [31:0] NPC,
output wire [31:0] PC24,
output wire [31:0] PCI,
input wire [31:0] PC,
input wire [31:0] INSTR,
input wire [31:0] R1,
input wire [31:0] R2,
input wire IS32
);
wire instr_branch = (INSTR[`IR_opcode] == `OPCODE_Branch);
wire instr_jalr = (INSTR[`IR_opcode] == `OPCODE_JALR);
wire instr_jal = (INSTR[`IR_opcode] == `OPCODE_JAL);
wire alu_op2_src;
wire [31:0] imm;
wire [31:0] pc4 = PC + 32'h4;
wire [31:0] pc2 = PC + 32'h2;
wire [31:0] pci = PC + imm;
wire [31:0] alu_op2 = alu_op2_src ? R2 : imm;
wire [4:0] alu_shamt = INSTR[`IR_shamt];
wire [3:0] alu_fn;
wire branch_taken;
IMMGEN immgen (.INSTR(INSTR), .IMM(imm));
`ifdef USE_ALU_HC
ALU_HC
`else
ALU
`endif
alu (.a(R1), .b(alu_op2),.shamt(alu_shamt),.r(ALUR),.alufn(alu_fn));
BRANCH brunint (.cond(INSTR[`IR_cond]),.R1(R1),.R2(R2),.taken(branch_taken));
RV32_DEC decoder (.INSTR(INSTR),.alu_fn(alu_fn),.alu_op2_src(alu_op2_src));
assign NPC = ((branch_taken & instr_branch) | (instr_jal)) ? pci : (instr_jalr) ? ALUR : IS32 ? pc4 : pc2;
assign PC24 = IS32 ? pc4 : pc2;
assign PCI = pci;
endmodule
module NfiVe32_RF (
input HCLK, // System clock
input WR,
input [ 4:0] RA,
input [ 4:0] RB,
input [ 4:0] RW,
input [31:0] DW,
output [31:0] DA,
output [31:0] DB
);
reg [31:0] RF [31:0];
assign DA = RF[RA] & {32{~(RA==5'd0)}};
assign DB = RF[RB] & {32{~(RB==5'd0)}};
always @ (posedge HCLK)
if(WR)
if(RW!=5'd0) begin
RF[RW] <= DW;
`ifdef DBG
#1 $display("Write: RF[%d]=0x%X []", RW, RF[RW]);
`endif
end
endmodule
// The CPU Core
`define CYC_C0 2'h0
`define CYC_C1 2'h1
`define CYC_C2 2'h2
`define CYC_C3 2'h3
module NfiVe32 (
input HCLK, // System clock
input HRESETn, // System Reset, active low
// AHB-LITE MASTER PORT for Instructions
output wire [31:0] HADDR, // AHB transaction address
output wire [ 2:0] HSIZE, // AHB size: _byte_, half-word or word
output wire [ 1:0] HTRANS, // AHB transfer: non-sequential only
output wire [31:0] HWDATA, // AHB write-data
output wire HWRITE, // AHB write control
input wire [31:0] HRDATA, // AHB read-data
input wire HREADY, // AHB stall signal
// MISCELLANEOUS
input wire NMI, // Non-maskable interrupt input
input wire IRQ, // Interrupt request line
input wire [4:0] IRQ_NUM, // Interrupt number from the PIC
input wire SYSTICKCLK, // SYSTICK clock; ON pulse width is one HCLK period
output wire [31:0] IRQ_MASK
);
reg [1:0] CYC, NCYC;
reg RUN;
reg IS32;
reg INEXCEPTION;
reg [31:0] PC;
reg [31:0] PCI;
reg [31:0] IDATA;
reg [31:0] PC24;
reg [31:0] INSTR;
reg [31:0] ALU_R;
reg [31:0] CSR_CYCLE;
reg [31:0] CSR_INSTRET;
reg [31:0] CSR_TIME;
reg [31:0] CSR_TIMELOAD;
reg [31:0] CSR_MIE;
reg [31:0] CSR_IRQMASK;
// reg [31:0] CSR_MIP;
reg [31:0] CSR_EPC;
wire [31:0] instr;
wire [31:0] hrdata;
wire [31:0] hwdata;
wire [31:0] alur;
wire [31:0] npc;
wire [31:0] pc24, pci;
wire is32;
wire tmr_int;
wire unaligned = PC[1] & HRDATA[16] & HRDATA[17];
wire C0 = (CYC==2'h0), C1 = (CYC==2'h1), C2 = (CYC==2'h2), C3 = (CYC==2'h3);
wire shamt = INSTR[`IR_shamt];
wire instr_i = (INSTR[`IR_opcode] == `OPCODE_Arith_I);
wire instr_r = (INSTR[`IR_opcode] == `OPCODE_Arith_R);
wire instr_lui = (INSTR[`IR_opcode] == `OPCODE_LUI);
wire instr_auipc = (INSTR[`IR_opcode] == `OPCODE_AUIPC);
wire instr_branch = (INSTR[`IR_opcode] == `OPCODE_Branch);
wire instr_jalr = (INSTR[`IR_opcode] == `OPCODE_JALR);
wire instr_jal = (INSTR[`IR_opcode] == `OPCODE_JAL);
wire instr_store = (INSTR[`IR_opcode] == `OPCODE_Store);
wire instr_load = (INSTR[`IR_opcode] == `OPCODE_Load);
wire [11:0] csr_num = INSTR[`IR_csr];
wire instr_priv = (INSTR[`IR_opcode] == 5'h1C);
wire instr_rdcsr = instr_priv & (INSTR[`IR_funct3] == 3'd2);
wire instr_wrcsr = instr_priv & (INSTR[`IR_funct3] == 3'd1);
wire instr_ecall = instr_priv & (INSTR[`IR_funct3] == 3'b0) & (csr_num == 12'h0);
wire instr_ebreak= instr_priv & (INSTR[`IR_funct3] == 3'b0) & (csr_num == 12'h1);
wire instr_mret = instr_priv & (INSTR[`IR_funct3] == 3'b0) & (csr_num == 12'h302);
wire instr_wfi = instr_priv & (INSTR[`IR_funct3] == 3'b0) & (csr_num == 12'h105);
wire rf_wr = instr_load | instr_r | instr_i | instr_jal | instr_jalr | instr_lui | instr_auipc;
wire exception = (CSR_MIE[0] & ((tmr_int & CSR_MIE[1]) | (IRQ & CSR_MIE[2]))) | NMI | instr_ecall;
wire [31:0] pc_ex = instr_ecall ? 32'd12 :
instr_ebreak? 32'd16 :
NMI ? 32'd4 :
tmr_int ? 32'd8 :
IRQ ? (32'd64+(IRQ_NUM<<2)) : 32'd60;
assign IRQ_MASK = CSR_IRQMASK;
// The Register File
wire [4:0] rs1 = INSTR[`IR_rs1];
wire [4:0] rs2 = INSTR[`IR_rs2];
wire [4:0] rd = INSTR[`IR_rd];
wire [31:0] r1, r2;
wire [31:0] rf_dw = (instr_jal | instr_jalr) ? PC24 :
(instr_auipc) ? PCI :
(instr_load) ? hrdata :
(instr_rdcsr) ? csr : alur;
`ifdef USE_RF_MODULE
`ifdef USE_HC_REGF
DFFRFile RF (
.R1(rs1), .R2(rs2), .RW(rd),
.DW(rf_dw),
.D1(r1), .D2(r2),
.CLK(HCLK),
.WE(rf_wr&C3)
);
`else
NfiVe32_RF RF (
.HCLK(HCLK),
.WR(rf_wr & C3),
.RA(rs1),
.RB(rs2),
.RW(rd),
.DW(rf_dw),
.DA(r1),
.DB(r2)
);
`endif
`ifdef DBG
always @(posedge HCLK)
if(rd != 5'd0)
if(rf_wr & C3) begin
$display("RF[%02d]=%X (%d)", rd, rf_dw, rf_dw);
if(rd == 10) $display("<===");
end
`endif
`else
reg [31:0] RF[31:0];
assign r1 = RF[rs1] & {32{~(rs1==5'd0)}};
assign r2 = RF[rs2] & {32{~(rs2==5'd0)}};
always @(posedge HCLK)
if(rd != 5'd0)
if(rf_wr & C3) begin
RF[rd] <= rf_dw;
`ifdef DBG
#1 $display("Write: RF[%d]=0x%X [PC=0x%X, INSTR=0x%X]", rd, RF[rd], PC, INSTR);
`endif
end
`endif
wire [31:0] csr = (csr_num==12'hC00) ? CSR_CYCLE :
(csr_num==12'hC01) ? CSR_TIME :
(csr_num==12'hC02) ? CSR_INSTRET :
(csr_num==12'hC03) ? CSR_TIMELOAD :
(csr_num==12'h304) ? CSR_MIE :
(csr_num==12'h310) ? CSR_IRQMASK :
32'hBAAAAAAD;
assign HADDR = ~RUN ? 32'h0 : C3 ? {PC[31:2],2'b0} : C0 ? ({PC[31:2],2'b0}+32'h4) : C2 ? alur : 32'd0;
assign HTRANS[0] = 1'h0;
assign HTRANS[1] = ~RUN | C3 | (C0 & unaligned) | (C2 & (instr_load | instr_store));
assign HWRITE = C2 & instr_store;
assign HSIZE = {1'b0,INSTR[13:12]};
assign HWDATA = (C3 & instr_store) ? hwdata : 32'd0;
mrdata_align mralign(
.d(HRDATA),
.ed(hrdata),
.size(HSIZE[1:0]),
//.A(alur[1:0]),
.A(ALU_R[1:0]),
.sign(INSTR[14])
);
mwdata_align mwalign(
.d(r2),
.fd(hwdata),
.size(HSIZE[1:0]),
.A(alur[1:0])
);
NfiVe32_FU fetch_unit(
.IDATA0(HRDATA),
.IDATA1(IDATA),
.PC(PC),
.C1(C1),
.INSTR(instr),
.IS32(is32)
);
NfiVe32_XU exec_unit(
.ALUR(alur),
.NPC(npc),
.PC24(pc24),
.PCI(pci),
.PC(PC),
.INSTR(INSTR),
.R1(r1),
.R2(r2),
.IS32(IS32)
);
// CPU Cycle
always @*
case (CYC)
`CYC_C0: if(HREADY) begin
if(~PC[1]) NCYC = `CYC_C2; // Alighed
else if(HRDATA[16]&HRDATA[17]) NCYC = `CYC_C1; // Not aligned and 32-bit instruction
else NCYC = `CYC_C2; // Not aligned but 16-bit instruction
end
else
NCYC = `CYC_C0;
`CYC_C1: if(HREADY)
NCYC = `CYC_C2;
else
NCYC = `CYC_C1;
`CYC_C2: NCYC = `CYC_C3;
`CYC_C3: if(HREADY)
NCYC = `CYC_C0;
else
NCYC = `CYC_C3;
default: NCYC = `CYC_C0;
endcase
// The resgisters: 4 x 32 + 2 x 1 + 1 x 2 = 132 Bits
// Synthesized into 118 bits only:
// + CYC is expanded from 2 to 4 (OHE FSM)
// + IDATA lower 16 bits are not used -> removed during optimization
always @(posedge HCLK or negedge HRESETn)
if(!HRESETn) RUN <= 0;
else RUN <= 1;
/*
`SYNC_BEGIN(RUN, 1'h0)
//if(~RUN)
RUN <= 1'b1;
`SYNC_END
*/
`SYNC_BEGIN(CYC, 2'h0)
if(RUN) CYC <= NCYC;
`SYNC_END
`SYNC_BEGIN(INEXCEPTION, 1'h0)
if(exception & C3 & !INEXCEPTION) INEXCEPTION <= 1'h1;
else if(instr_mret & C3) INEXCEPTION <= 1'h0;
`SYNC_END
`SYNC_BEGIN(IDATA, 32'h0)
if(C0)
IDATA <= HRDATA;
`SYNC_END
/*
`SYNC_BEGIN(ALUR, 32'h0)
if(C2)
ALUR <= alur;
`SYNC_END
*/
always @(posedge HCLK or negedge HRESETn)
if(!HRESETn) ALU_R <= 0;
else if(C2) ALU_R <= alur;
`SYNC_BEGIN(INSTR, 32'h0)
if(C0 | C1)
INSTR <= instr;
`SYNC_END
`SYNC_BEGIN(IS32, 1'h0)
if(C0 | C1) IS32 <= is32;
`SYNC_END
`SYNC_BEGIN(PC24, 32'h0)
if(C2)
PC24 <= pc24;
`SYNC_END
`SYNC_BEGIN(PCI, 32'h0)
if(C2)
PCI <= pci;
`SYNC_END
`SYNC_BEGIN(PC, 32'h0)
if(C2 & instr_mret)
PC <= CSR_EPC;
else if(C2 & exception & !INEXCEPTION)
PC <= pc_ex;
else if(C2)
PC <= npc;
//#1 $display ("PC=%x", PC);
`SYNC_END
// Counters and Special function Registers (CSRs)
// Retired Instruction
`SYNC_BEGIN(CSR_INSTRET, 32'h0)
if(C3)
CSR_INSTRET <= CSR_INSTRET + 32'h1;
`SYNC_END
// Number of CPU cycles
`SYNC_BEGIN(CSR_CYCLE, 32'h0)
if(RUN) CSR_CYCLE <= CSR_CYCLE + 32'h1;
`SYNC_END
// SYSTICK Timer
wire csr_time_zero = (CSR_TIME == 32'h0);
assign tmr_int = csr_time_zero;
`SYNC_BEGIN(CSR_TIME, 32'hFFFF_FFFF)
if(SYSTICKCLK)
if(csr_time_zero)
CSR_TIME <= CSR_TIMELOAD;
else
CSR_TIME <= CSR_TIME - 32'h1;
`SYNC_END
// SYSTICK TimeLoad register
`SYNC_BEGIN(CSR_TIMELOAD, 32'hFFFF_FFFF)
if(instr_wrcsr & (csr_num == 12'hC03))
CSR_TIMELOAD <= r1;
`SYNC_END
// Non Standard Machine Interrupt Enable CSR
// Bit 0: Global Int En
// Bit 1: Timer Int En
// Bit 2: External Int En
`SYNC_BEGIN(CSR_MIE, 32'h0)
if(instr_wrcsr & (csr_num == 12'h304))
CSR_MIE <= r1;
`SYNC_END
// Non standard IRQ MASK CSR
`SYNC_BEGIN(CSR_IRQMASK, 32'h0)
if(instr_wrcsr & (csr_num == 12'h310))
CSR_IRQMASK <= r1;
`SYNC_END
// Exception PC CSR
`SYNC_BEGIN(CSR_EPC, 32'h0)
if(exception & C2 & !INEXCEPTION)
CSR_EPC <= npc;
`SYNC_END
endmodule
// A very simple Programmable Interrupts Controller
module NfiVe32_PIC(
input wire [31:0] IRQ,
output reg irq,
output wire [4:0] IRQ_NUM,
input wire [31:0] IRQ_MASK
);
reg [4:0] irq_num;
assign IRQ_NUM = irq_num;
integer i;
always @ * begin
irq = 0;
irq_num = 0;
for(i=0; i<32; i=i+1)
if(IRQ_MASK[i] & IRQ[i]) begin
irq = 1'b1;
irq_num = i;
end
end
endmodule
/*
NfiVe Top Level Integration
NfiVe CPU + PIC + SYSTICK
*/
module NfiVe32_SYS (
`ifdef USE_POWER_PINS
input VPWR,
input VGND,
`endif
input HCLK, // System clock
input HRESETn, // System Reset, active low
// AHB-LITE MASTER PORT for Instructions
output wire [31:0] HADDR, // AHB transaction address
output wire [ 2:0] HSIZE, // AHB size: _byte_, half-word or word
output wire [ 1:0] HTRANS, // AHB transfer: non-sequential only
output wire [31:0] HWDATA, // AHB write-data
output wire HWRITE, // AHB write control
input wire [31:0] HRDATA, // AHB read-data
input wire HREADY, // AHB stall signal
// MISCELLANEOUS
input wire NMI, // Non-maskable interrupt input
input wire [31:0] IRQ, // 32 IRQ Line
input wire [7:0] SYSTICKCLKDIV
);
wire irq;
wire [4:0] irq_num;
wire [31:0] irq_mask;
//wire [31:0] IRQ;
wire div;
reg [7:0] clkdiv;
reg systickclk;
NfiVe32 N5(
.HCLK(HCLK),
.HRESETn(HRESETn),
// AHB-LITE MASTER PORT for Instructions and Data
.HADDR(HADDR),
.HSIZE(HSIZE),
.HTRANS(HTRANS),
.HWDATA(HWDATA),
.HWRITE(HWRITE),
.HRDATA(HRDATA),
.HREADY(HREADY),
// MISCELLANEOUS
.NMI(NMI),
.IRQ(irq),
.IRQ_NUM(irq_num),
.SYSTICKCLK(systickclk),
.IRQ_MASK(irq_mask)
);
NfiVe32_PIC PIC(
.IRQ(IRQ),
.irq(irq),
.IRQ_NUM(irq_num),
.IRQ_MASK(irq_mask)
);
assign div = (clkdiv == SYSTICKCLKDIV);
`SYNC_BEGIN(clkdiv, 8'b0)
if(div)
clkdiv <= 8'h0;
else
clkdiv <= clkdiv + 8'h1;
`SYNC_END
`SYNC_BEGIN(systickclk, 1'b0)
if(div)
systickclk <= 1'b1;
else
systickclk <= 1'b0;
`SYNC_END
endmodule