Add fpu verilog files
diff --git a/verilog/rtl/ips/fpu b/verilog/rtl/ips/fpu
deleted file mode 160000
index 42b9437..0000000
--- a/verilog/rtl/ips/fpu
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 42b943772a6467ac91b58b051c6f5798b204173b
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/control_tp.v b/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/control_tp.v
new file mode 100644
index 0000000..0f32b98
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/control_tp.v
@@ -0,0 +1,599 @@
+module control_tp 
+#(
+   parameter   Precision_ctl_Enable_S = 1
+)
+(
+	Clk_CI,
+	Rst_RBI,
+	Div_start_SI,
+	Sqrt_start_SI,
+	Start_SI,
+	Precision_ctl_SI,
+	Numerator_DI,
+	Exp_num_DI,
+	Denominator_DI,
+	Exp_den_DI,
+	First_iteration_cell_sum_DI,
+	First_iteration_cell_carry_DI,
+	Sqrt_Da0,
+	Sec_iteration_cell_sum_DI,
+	Sec_iteration_cell_carry_DI,
+	Sqrt_Da1,
+	Thi_iteration_cell_sum_DI,
+	Thi_iteration_cell_carry_DI,
+	Sqrt_Da2,
+	Fou_iteration_cell_sum_DI,
+	Fou_iteration_cell_carry_DI,
+	Sqrt_Da3,
+	Div_start_dly_SO,
+	Sqrt_start_dly_SO,
+	Div_enable_SO,
+	Sqrt_enable_SO,
+	Sqrt_D0,
+	Sqrt_D1,
+	Sqrt_D2,
+	Sqrt_D3,
+	First_iteration_cell_a_DO,
+	First_iteration_cell_b_DO,
+	Sec_iteration_cell_a_DO,
+	Sec_iteration_cell_b_DO,
+	Thi_iteration_cell_a_DO,
+	Thi_iteration_cell_b_DO,
+	Fou_iteration_cell_a_DO,
+	Fou_iteration_cell_b_DO,
+	Ready_SO,
+	Done_SO,
+	Mant_result_prenorm_DO,
+	Exp_result_prenorm_DO
+);
+parameter C_DIV_RM           = 2;
+parameter C_DIV_RM_NEAREST   = 2'h0;
+parameter C_DIV_RM_TRUNC     = 2'h1;
+parameter C_DIV_RM_PLUSINF   = 2'h2;
+parameter C_DIV_RM_MINUSINF  = 2'h3;
+parameter C_DIV_PC           = 5;
+parameter C_DIV_OP           = 32;
+parameter C_DIV_MANT         = 23;
+parameter C_DIV_EXP          = 8;
+parameter C_DIV_BIAS         = 127;
+parameter C_DIV_BIAS_AONE    = 8'h80;
+parameter C_DIV_HALF_BIAS    = 63;
+parameter C_DIV_MANT_PRENORM = C_DIV_MANT+1;
+parameter C_DIV_EXP_ZERO     = 8'h00;
+parameter C_DIV_EXP_ONE      = 8'h01;
+parameter C_DIV_EXP_INF      = 8'hff;
+parameter C_DIV_MANT_ZERO    = 23'h0;
+parameter C_DIV_MANT_NAN     = 23'h400000;
+
+
+
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+
+parameter C_CMD               = 4;
+parameter C_FPU_ADD_CMD       = 4'h0;
+parameter C_FPU_SUB_CMD       = 4'h1;
+parameter C_FPU_MUL_CMD       = 4'h2;
+parameter C_FPU_DIV_CMD       = 4'h3;
+parameter C_FPU_I2F_CMD       = 4'h4;
+parameter C_FPU_F2I_CMD       = 4'h5;
+parameter C_FPU_SQRT_CMD      = 4'h6;
+parameter C_FPU_NOP_CMD       = 4'h7;
+parameter C_FPU_FMADD_CMD     = 4'h8;
+parameter C_FPU_FMSUB_CMD     = 4'h9;
+parameter C_FPU_FNMADD_CMD    = 4'hA;
+parameter C_FPU_FNMSUB_CMD    = 4'hB;
+parameter C_RM_NEAREST_MAX = 3'h4;
+parameter C_EXP_PRENORM  = C_EXP+2;
+parameter C_MANT_ADDIN   = C_MANT+4;
+parameter C_MANT_ADDOUT  = C_MANT+5;
+parameter C_MANT_SHIFTIN = C_MANT+3;
+parameter C_MANT_SHIFTED = C_MANT+4;
+parameter C_MANT_INT     = C_OP-1;
+parameter C_INF          = 32'h7fffffff;
+parameter C_MINF         = 32'h80000000;
+parameter C_EXP_SHIFT    = C_EXP_PRENORM;
+parameter C_SHIFT_BIAS   = 9'd127;
+parameter C_UNKNOWN      = 8'd157;
+parameter C_PADMANT      = 16'b0;
+parameter C_MANT_NoHB_ZERO   = 23'h0;
+parameter C_MANT_PRENORM_IND = 6;
+parameter F_QNAN         =32'h7FC00000;
+parameter C_FFLAG         = 5;
+	//parameter Precision_ctl_Enable_S = 1;
+	input wire Clk_CI;
+	input wire Rst_RBI;
+	input wire Div_start_SI;
+	input wire Sqrt_start_SI;
+	input wire Start_SI;
+	input wire [C_DIV_PC - 1:0] Precision_ctl_SI;
+	input wire [C_DIV_MANT:0] Numerator_DI;
+	input wire [C_DIV_EXP:0] Exp_num_DI;
+	input wire [C_DIV_MANT:0] Denominator_DI;
+	input wire [C_DIV_EXP:0] Exp_den_DI;
+	input wire [C_DIV_MANT + 1:0] First_iteration_cell_sum_DI;
+	input wire First_iteration_cell_carry_DI;
+	input wire [1:0] Sqrt_Da0;
+	input wire [C_DIV_MANT + 1:0] Sec_iteration_cell_sum_DI;
+	input wire Sec_iteration_cell_carry_DI;
+	input wire [1:0] Sqrt_Da1;
+	input wire [C_DIV_MANT + 1:0] Thi_iteration_cell_sum_DI;
+	input wire Thi_iteration_cell_carry_DI;
+	input wire [1:0] Sqrt_Da2;
+	input wire [C_DIV_MANT + 1:0] Fou_iteration_cell_sum_DI;
+	input wire Fou_iteration_cell_carry_DI;
+	input wire [1:0] Sqrt_Da3;
+	output wire Div_start_dly_SO;
+	output wire Sqrt_start_dly_SO;
+	output reg Div_enable_SO;
+	output reg Sqrt_enable_SO;
+	output reg [1:0] Sqrt_D0;
+	output reg [1:0] Sqrt_D1;
+	output reg [1:0] Sqrt_D2;
+	output reg [1:0] Sqrt_D3;
+	output wire [C_DIV_MANT + 1:0] First_iteration_cell_a_DO;
+	output wire [C_DIV_MANT + 1:0] First_iteration_cell_b_DO;
+	output wire [C_DIV_MANT + 1:0] Sec_iteration_cell_a_DO;
+	output wire [C_DIV_MANT + 1:0] Sec_iteration_cell_b_DO;
+	output wire [C_DIV_MANT + 1:0] Thi_iteration_cell_a_DO;
+	output wire [C_DIV_MANT + 1:0] Thi_iteration_cell_b_DO;
+	output wire [C_DIV_MANT + 1:0] Fou_iteration_cell_a_DO;
+	output wire [C_DIV_MANT + 1:0] Fou_iteration_cell_b_DO;
+	output reg Ready_SO;
+	output reg Done_SO;
+	output wire [C_DIV_MANT:0] Mant_result_prenorm_DO;
+	output wire [C_DIV_EXP + 1:0] Exp_result_prenorm_DO;
+	reg [C_DIV_MANT + 1:0] Partial_remainder_DN;
+	reg [C_DIV_MANT + 1:0] Partial_remainder_DP;
+	reg [C_DIV_MANT:0] Quotient_DP;
+	wire [C_DIV_MANT + 1:0] Numerator_se_D;
+	wire [C_DIV_MANT + 1:0] Denominator_se_D;
+	wire [C_DIV_MANT + 1:0] Denominator_se_DB;
+	assign Numerator_se_D = {1'b0, Numerator_DI};
+	assign Denominator_se_D = {1'b0, Denominator_DI};
+	assign Denominator_se_DB = ~Denominator_se_D;
+	wire [C_DIV_MANT + 1:0] Mant_D_sqrt_Norm;
+	assign Mant_D_sqrt_Norm = (Exp_num_DI[0] ? {1'b0, Numerator_DI} : {Numerator_DI, 1'b0});
+	reg [C_DIV_PC - 1:0] Precision_ctl_S;
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			Precision_ctl_S <= 'b0;
+		else if (Start_SI) begin
+			if (Precision_ctl_Enable_S == 1)
+				Precision_ctl_S <= Precision_ctl_SI;
+			else
+				Precision_ctl_S <= 5'b10111;
+		end
+		else
+			Precision_ctl_S <= Precision_ctl_S;
+	reg [2:0] State_ctl_S;
+	always @(*)
+		if (Precision_ctl_Enable_S == 1)
+			case (Precision_ctl_S)
+				5'b01000, 5'b01001, 5'b01010, 5'b01011: State_ctl_S <= 3'b010;
+				5'b01100, 5'b01101, 5'b01110, 5'b01111: State_ctl_S <= 3'b011;
+				5'b10000, 5'b10001, 5'b10010, 5'b10011: State_ctl_S <= 3'b100;
+				5'b10100, 5'b10101, 5'b10110, 5'b10111: State_ctl_S <= 3'b101;
+				default: State_ctl_S <= 3'b101;
+			endcase
+		else
+			State_ctl_S <= 3'b101;
+	reg Div_start_dly_S;
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			Div_start_dly_S <= 1'b0;
+		else if (Div_start_SI)
+			Div_start_dly_S <= 1'b1;
+		else
+			Div_start_dly_S <= 1'b0;
+	assign Div_start_dly_SO = Div_start_dly_S;
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			Div_enable_SO <= 1'b0;
+		else if (Div_start_SI)
+			Div_enable_SO <= 1'b1;
+		else if (Done_SO)
+			Div_enable_SO <= 1'b0;
+		else
+			Div_enable_SO <= Div_enable_SO;
+	reg Sqrt_start_dly_S;
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			Sqrt_start_dly_S <= 1'b0;
+		else if (Sqrt_start_SI)
+			Sqrt_start_dly_S <= 1'b1;
+		else
+			Sqrt_start_dly_S <= 1'b0;
+	assign Sqrt_start_dly_SO = Sqrt_start_dly_S;
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			Sqrt_enable_SO <= 1'b0;
+		else if (Sqrt_start_SI)
+			Sqrt_enable_SO <= 1'b1;
+		else if (Done_SO)
+			Sqrt_enable_SO <= 1'b0;
+		else
+			Sqrt_enable_SO <= Sqrt_enable_SO;
+	reg [2:0] Crtl_cnt_S;
+	wire Start_dly_S;
+	assign Start_dly_S = Div_start_dly_S | Sqrt_start_dly_S;
+	wire Fsm_enable_S;
+	assign Fsm_enable_S = Start_dly_S | |Crtl_cnt_S[2:0];
+	wire Final_state_S;
+	assign Final_state_S = Crtl_cnt_S == State_ctl_S;
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			Crtl_cnt_S <= 1'sb0;
+		else if (Final_state_S)
+			Crtl_cnt_S <= 1'sb0;
+		else if (Fsm_enable_S)
+			Crtl_cnt_S <= Crtl_cnt_S + 1;
+		else
+			Crtl_cnt_S <= 1'sb0;
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			Done_SO <= 1'b0;
+		else if (Start_SI)
+			Done_SO <= 1'b0;
+		else if (Final_state_S)
+			Done_SO <= 1'b1;
+		else
+			Done_SO <= 1'b0;
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			Ready_SO <= 1'b1;
+		else if (Start_SI)
+			Ready_SO <= 1'b0;
+		else if (Final_state_S)
+			Ready_SO <= 1'b1;
+		else
+			Ready_SO <= Ready_SO;
+	wire [C_DIV_MANT + 1:0] Sqrt_R0;
+	wire [C_DIV_MANT + 1:0] Sqrt_R1;
+	wire [C_DIV_MANT + 1:0] Sqrt_R2;
+	wire [C_DIV_MANT + 1:0] Sqrt_R3;
+	wire [C_DIV_MANT + 1:0] Sqrt_R4;
+	wire [3:0] Qcnt0;
+	wire [3:0] Q_cnt_cmp_0;
+	wire [6:0] Qcnt1;
+	wire [6:0] Q_cnt_cmp_1;
+	wire [10:0] Qcnt2;
+	wire [10:0] Q_cnt_cmp_2;
+	wire [14:0] Qcnt3;
+	wire [14:0] Q_cnt_cmp_3;
+	wire [18:0] Qcnt4;
+	wire [18:0] Q_cnt_cmp_4;
+	wire [22:0] Qcnt5;
+	wire [22:0] Q_cnt_cmp_5;
+	reg [C_DIV_MANT + 1:0] Sqrt_Q0;
+	reg [C_DIV_MANT + 1:0] Sqrt_Q1;
+	reg [C_DIV_MANT + 1:0] Sqrt_Q2;
+	reg [C_DIV_MANT + 1:0] Sqrt_Q3;
+	wire [C_DIV_MANT + 1:0] Sqrt_Q4;
+	reg [C_DIV_MANT + 1:0] Q_sqrt0;
+	reg [C_DIV_MANT + 1:0] Q_sqrt1;
+	reg [C_DIV_MANT + 1:0] Q_sqrt2;
+	reg [C_DIV_MANT + 1:0] Q_sqrt3;
+	reg [C_DIV_MANT + 1:0] Q_sqrt4;
+	reg [C_DIV_MANT + 1:0] Q_sqrt_com_0;
+	reg [C_DIV_MANT + 1:0] Q_sqrt_com_1;
+	reg [C_DIV_MANT + 1:0] Q_sqrt_com_2;
+	reg [C_DIV_MANT + 1:0] Q_sqrt_com_3;
+	reg [C_DIV_MANT + 1:0] Q_sqrt_com_4;
+	assign Qcnt0 = {1'b0, ~First_iteration_cell_sum_DI[24], ~Sec_iteration_cell_sum_DI[24], ~Thi_iteration_cell_sum_DI[24]};
+	assign Qcnt1 = {Quotient_DP[3:0], ~First_iteration_cell_sum_DI[24], ~Sec_iteration_cell_sum_DI[24], ~Thi_iteration_cell_sum_DI[24]};
+	assign Qcnt2 = {Quotient_DP[7:0], ~First_iteration_cell_sum_DI[24], ~Sec_iteration_cell_sum_DI[24], ~Thi_iteration_cell_sum_DI[24]};
+	assign Qcnt3 = {Quotient_DP[11:0], ~First_iteration_cell_sum_DI[24], ~Sec_iteration_cell_sum_DI[24], ~Thi_iteration_cell_sum_DI[24]};
+	assign Qcnt4 = {Quotient_DP[15:0], ~First_iteration_cell_sum_DI[24], ~Sec_iteration_cell_sum_DI[24], ~Thi_iteration_cell_sum_DI[24]};
+	assign Qcnt5 = {Quotient_DP[19:0], ~First_iteration_cell_sum_DI[24], ~Sec_iteration_cell_sum_DI[24], ~Thi_iteration_cell_sum_DI[24]};
+	assign Q_cnt_cmp_0 = ~Qcnt0;
+	assign Q_cnt_cmp_1 = ~Qcnt1;
+	assign Q_cnt_cmp_2 = ~Qcnt2;
+	assign Q_cnt_cmp_3 = ~Qcnt3;
+	assign Q_cnt_cmp_4 = ~Qcnt4;
+	assign Q_cnt_cmp_5 = ~Qcnt5;
+	always @(*)
+		case (Crtl_cnt_S)
+			3'b000: begin
+				Sqrt_D0 = Mant_D_sqrt_Norm[C_DIV_MANT + 1:C_DIV_MANT];
+				Sqrt_D1 = Mant_D_sqrt_Norm[C_DIV_MANT - 1:C_DIV_MANT - 2];
+				Sqrt_D2 = Mant_D_sqrt_Norm[C_DIV_MANT - 3:C_DIV_MANT - 4];
+				Sqrt_D3 = Mant_D_sqrt_Norm[C_DIV_MANT - 5:C_DIV_MANT - 6];
+				Q_sqrt0 = {24'h000000, Qcnt0[3]};
+				Q_sqrt1 = {23'h000000, Qcnt0[3:2]};
+				Q_sqrt2 = {22'h000000, Qcnt0[3:1]};
+				Q_sqrt3 = {21'h000000, Qcnt0[3:0]};
+				Q_sqrt_com_0 = {24'hffffff, Q_cnt_cmp_0[3]};
+				Q_sqrt_com_1 = {23'h7fffff, Q_cnt_cmp_0[3:2]};
+				Q_sqrt_com_2 = {22'h3fffff, Q_cnt_cmp_0[3:1]};
+				Q_sqrt_com_3 = {21'h1fffff, Q_cnt_cmp_0[3:0]};
+				Sqrt_Q0 = Q_sqrt_com_0;
+				Sqrt_Q1 = (First_iteration_cell_sum_DI[24] ? Q_sqrt1 : Q_sqrt_com_1);
+				Sqrt_Q2 = (Sec_iteration_cell_sum_DI[24] ? Q_sqrt2 : Q_sqrt_com_2);
+				Sqrt_Q3 = (Thi_iteration_cell_sum_DI[24] ? Q_sqrt3 : Q_sqrt_com_3);
+			end
+			3'b001: begin
+				Sqrt_D0 = Mant_D_sqrt_Norm[C_DIV_MANT - 7:C_DIV_MANT - 8];
+				Sqrt_D1 = Mant_D_sqrt_Norm[C_DIV_MANT - 9:C_DIV_MANT - 10];
+				Sqrt_D2 = Mant_D_sqrt_Norm[C_DIV_MANT - 11:C_DIV_MANT - 12];
+				Sqrt_D3 = Mant_D_sqrt_Norm[C_DIV_MANT - 13:C_DIV_MANT - 14];
+				Q_sqrt0 = {21'h000000, Qcnt1[6:3]};
+				Q_sqrt1 = {20'h00000, Qcnt1[6:2]};
+				Q_sqrt2 = {19'h00000, Qcnt1[6:1]};
+				Q_sqrt3 = {18'h00000, Qcnt1[6:0]};
+				Q_sqrt_com_0 = {21'h1fffff, Q_cnt_cmp_1[6:3]};
+				Q_sqrt_com_1 = {20'hfffff, Q_cnt_cmp_1[6:2]};
+				Q_sqrt_com_2 = {19'h7ffff, Q_cnt_cmp_1[6:1]};
+				Q_sqrt_com_3 = {18'h3ffff, Q_cnt_cmp_1[6:0]};
+				Sqrt_Q0 = (Quotient_DP[0] ? Q_sqrt_com_0 : Q_sqrt0);
+				Sqrt_Q1 = (First_iteration_cell_sum_DI[24] ? Q_sqrt1 : Q_sqrt_com_1);
+				Sqrt_Q2 = (Sec_iteration_cell_sum_DI[24] ? Q_sqrt2 : Q_sqrt_com_2);
+				Sqrt_Q3 = (Thi_iteration_cell_sum_DI[24] ? Q_sqrt3 : Q_sqrt_com_3);
+			end
+			3'b010: begin
+				Sqrt_D0 = Mant_D_sqrt_Norm[C_DIV_MANT - 15:C_DIV_MANT - 16];
+				Sqrt_D1 = Mant_D_sqrt_Norm[C_DIV_MANT - 17:C_DIV_MANT - 18];
+				Sqrt_D2 = Mant_D_sqrt_Norm[C_DIV_MANT - 19:C_DIV_MANT - 20];
+				Sqrt_D3 = Mant_D_sqrt_Norm[C_DIV_MANT - 21:C_DIV_MANT - 22];
+				Q_sqrt0 = {17'h00000, Qcnt2[10:3]};
+				Q_sqrt1 = {16'h0000, Qcnt2[10:2]};
+				Q_sqrt2 = {15'h0000, Qcnt2[10:1]};
+				Q_sqrt3 = {14'h0000, Qcnt2[10:0]};
+				Q_sqrt_com_0 = {17'h1ffff, Q_cnt_cmp_2[10:3]};
+				Q_sqrt_com_1 = {16'hffff, Q_cnt_cmp_2[10:2]};
+				Q_sqrt_com_2 = {15'h7fff, Q_cnt_cmp_2[10:1]};
+				Q_sqrt_com_3 = {14'h3fff, Q_cnt_cmp_2[10:0]};
+				Sqrt_Q0 = (Quotient_DP[0] ? Q_sqrt_com_0 : Q_sqrt0);
+				Sqrt_Q1 = (First_iteration_cell_sum_DI[24] ? Q_sqrt1 : Q_sqrt_com_1);
+				Sqrt_Q2 = (Sec_iteration_cell_sum_DI[24] ? Q_sqrt2 : Q_sqrt_com_2);
+				Sqrt_Q3 = (Thi_iteration_cell_sum_DI[24] ? Q_sqrt3 : Q_sqrt_com_3);
+			end
+			3'b011: begin
+				Sqrt_D0 = {Mant_D_sqrt_Norm[0], 1'b0};
+				Sqrt_D1 = 1'sb0;
+				Sqrt_D2 = 1'sb0;
+				Sqrt_D3 = 1'sb0;
+				Q_sqrt0 = {13'h0000, Qcnt3[14:3]};
+				Q_sqrt1 = {12'h000, Qcnt3[14:2]};
+				Q_sqrt2 = {11'h000, Qcnt3[14:1]};
+				Q_sqrt3 = {10'h000, Qcnt3[14:0]};
+				Q_sqrt_com_0 = {13'h1fff, Q_cnt_cmp_3[14:3]};
+				Q_sqrt_com_1 = {12'hfff, Q_cnt_cmp_3[14:2]};
+				Q_sqrt_com_2 = {11'h7ff, Q_cnt_cmp_3[14:1]};
+				Q_sqrt_com_3 = {10'h3ff, Q_cnt_cmp_3[14:0]};
+				Sqrt_Q0 = (Quotient_DP[0] ? Q_sqrt_com_0 : Q_sqrt0);
+				Sqrt_Q1 = (First_iteration_cell_sum_DI[24] ? Q_sqrt1 : Q_sqrt_com_1);
+				Sqrt_Q2 = (Sec_iteration_cell_sum_DI[24] ? Q_sqrt2 : Q_sqrt_com_2);
+				Sqrt_Q3 = (Thi_iteration_cell_sum_DI[24] ? Q_sqrt3 : Q_sqrt_com_3);
+			end
+			3'b100: begin
+				Sqrt_D0 = 1'sb0;
+				Sqrt_D1 = 1'sb0;
+				Sqrt_D2 = 1'sb0;
+				Sqrt_D3 = 1'sb0;
+				Q_sqrt0 = {9'h000, Qcnt4[18:3]};
+				Q_sqrt1 = {8'h00, Qcnt4[18:2]};
+				Q_sqrt2 = {7'h00, Qcnt4[18:1]};
+				Q_sqrt3 = {6'h00, Qcnt4[18:0]};
+				Q_sqrt_com_0 = {9'h1ff, Q_cnt_cmp_4[18:3]};
+				Q_sqrt_com_1 = {8'hff, Q_cnt_cmp_4[18:2]};
+				Q_sqrt_com_2 = {7'h7f, Q_cnt_cmp_4[18:1]};
+				Q_sqrt_com_3 = {6'h3f, Q_cnt_cmp_4[18:0]};
+				Sqrt_Q0 = (Quotient_DP[0] ? Q_sqrt_com_0 : Q_sqrt0);
+				Sqrt_Q1 = (First_iteration_cell_sum_DI[24] ? Q_sqrt1 : Q_sqrt_com_1);
+				Sqrt_Q2 = (Sec_iteration_cell_sum_DI[24] ? Q_sqrt2 : Q_sqrt_com_2);
+				Sqrt_Q3 = (Thi_iteration_cell_sum_DI[24] ? Q_sqrt3 : Q_sqrt_com_3);
+			end
+			3'b101: begin
+				Sqrt_D0 = 1'sb0;
+				Sqrt_D1 = 1'sb0;
+				Sqrt_D2 = 1'sb0;
+				Sqrt_D3 = 1'sb0;
+				Q_sqrt0 = {5'h00, Qcnt5[22:3]};
+				Q_sqrt1 = {4'h0, Qcnt5[22:2]};
+				Q_sqrt2 = {3'h0, Qcnt5[22:1]};
+				Q_sqrt3 = {2'h0, Qcnt5[22:0]};
+				Q_sqrt_com_0 = {5'h1f, Q_cnt_cmp_5[22:3]};
+				Q_sqrt_com_1 = {4'hf, Q_cnt_cmp_5[22:2]};
+				Q_sqrt_com_2 = {3'h7, Q_cnt_cmp_5[22:1]};
+				Q_sqrt_com_3 = {2'h3, Q_cnt_cmp_5[22:0]};
+				Sqrt_Q0 = (Quotient_DP[0] ? Q_sqrt_com_0 : Q_sqrt0);
+				Sqrt_Q1 = (First_iteration_cell_sum_DI[24] ? Q_sqrt1 : Q_sqrt_com_1);
+				Sqrt_Q2 = (Sec_iteration_cell_sum_DI[24] ? Q_sqrt2 : Q_sqrt_com_2);
+				Sqrt_Q3 = (Thi_iteration_cell_sum_DI[24] ? Q_sqrt3 : Q_sqrt_com_3);
+			end
+			default: begin
+				Sqrt_D0 = 1'sb0;
+				Sqrt_D1 = 1'sb0;
+				Sqrt_D2 = 1'sb0;
+				Sqrt_D3 = 1'sb0;
+				Q_sqrt0 = 1'sb0;
+				Q_sqrt1 = 1'sb0;
+				Q_sqrt2 = 1'sb0;
+				Q_sqrt3 = 1'sb0;
+				Q_sqrt4 = 1'sb0;
+				Q_sqrt_com_0 = 1'sb0;
+				Q_sqrt_com_1 = 1'sb0;
+				Q_sqrt_com_2 = 1'sb0;
+				Q_sqrt_com_3 = 1'sb0;
+				Q_sqrt_com_4 = 1'sb0;
+				Sqrt_Q0 = 1'sb0;
+				Sqrt_Q1 = 1'sb0;
+				Sqrt_Q2 = 1'sb0;
+				Sqrt_Q3 = 1'sb0;
+			end
+		endcase
+	assign Sqrt_R0 = (Sqrt_start_dly_S ? {((C_DIV_MANT + 1) >= 0 ? C_DIV_MANT + 2 : 1 - (C_DIV_MANT + 1)) {1'sb0}} : Partial_remainder_DP);
+	assign Sqrt_R1 = {First_iteration_cell_sum_DI[24], First_iteration_cell_sum_DI[21:0], Sqrt_Da0};
+	assign Sqrt_R2 = {Sec_iteration_cell_sum_DI[24], Sec_iteration_cell_sum_DI[21:0], Sqrt_Da1};
+	assign Sqrt_R3 = {Thi_iteration_cell_sum_DI[24], Thi_iteration_cell_sum_DI[21:0], Sqrt_Da2};
+	assign Sqrt_R4 = {Fou_iteration_cell_sum_DI[24], Fou_iteration_cell_sum_DI[21:0], Sqrt_Da3};
+	wire [C_DIV_MANT + 1:0] First_iteration_cell_div_a_D;
+	wire [C_DIV_MANT + 1:0] First_iteration_cell_div_b_D;
+	wire Sel_b_for_first_S;
+	assign First_iteration_cell_div_a_D = (Div_start_dly_S ? Numerator_se_D : {Partial_remainder_DP[C_DIV_MANT:0], Quotient_DP[0]});
+	assign Sel_b_for_first_S = (Div_start_dly_S ? 1 : Quotient_DP[0]);
+	assign First_iteration_cell_div_b_D = (Sel_b_for_first_S ? Denominator_se_DB : Denominator_se_D);
+	assign First_iteration_cell_a_DO = (Sqrt_enable_SO ? Sqrt_R0 : First_iteration_cell_div_a_D);
+	assign First_iteration_cell_b_DO = (Sqrt_enable_SO ? Sqrt_Q0 : First_iteration_cell_div_b_D);
+	wire [C_DIV_MANT + 1:0] Sec_iteration_cell_div_a_D;
+	wire [C_DIV_MANT + 1:0] Sec_iteration_cell_div_b_D;
+	wire Sel_b_for_sec_S;
+	assign Sec_iteration_cell_div_a_D = {First_iteration_cell_sum_DI[C_DIV_MANT:0], First_iteration_cell_carry_DI};
+	assign Sel_b_for_sec_S = First_iteration_cell_carry_DI;
+	assign Sec_iteration_cell_div_b_D = (Sel_b_for_sec_S ? Denominator_se_DB : Denominator_se_D);
+	assign Sec_iteration_cell_a_DO = (Sqrt_enable_SO ? Sqrt_R1 : Sec_iteration_cell_div_a_D);
+	assign Sec_iteration_cell_b_DO = (Sqrt_enable_SO ? Sqrt_Q1 : Sec_iteration_cell_div_b_D);
+	wire [C_DIV_MANT + 1:0] Thi_iteration_cell_div_a_D;
+	wire [C_DIV_MANT + 1:0] Thi_iteration_cell_div_b_D;
+	wire Sel_b_for_thi_S;
+	assign Thi_iteration_cell_div_a_D = {Sec_iteration_cell_sum_DI[C_DIV_MANT:0], Sec_iteration_cell_carry_DI};
+	assign Sel_b_for_thi_S = Sec_iteration_cell_carry_DI;
+	assign Thi_iteration_cell_div_b_D = (Sel_b_for_thi_S ? Denominator_se_DB : Denominator_se_D);
+	assign Thi_iteration_cell_a_DO = (Sqrt_enable_SO ? Sqrt_R2 : Thi_iteration_cell_div_a_D);
+	assign Thi_iteration_cell_b_DO = (Sqrt_enable_SO ? Sqrt_Q2 : Thi_iteration_cell_div_b_D);
+	wire [C_DIV_MANT + 1:0] Fou_iteration_cell_div_a_D;
+	wire [C_DIV_MANT + 1:0] Fou_iteration_cell_div_b_D;
+	wire Sel_b_for_fou_S;
+	assign Fou_iteration_cell_div_a_D = {Thi_iteration_cell_sum_DI[C_DIV_MANT:0], Thi_iteration_cell_carry_DI};
+	assign Sel_b_for_fou_S = Thi_iteration_cell_carry_DI;
+	assign Fou_iteration_cell_div_b_D = (Sel_b_for_fou_S ? Denominator_se_DB : Denominator_se_D);
+	assign Fou_iteration_cell_a_DO = (Sqrt_enable_SO ? Sqrt_R3 : Fou_iteration_cell_div_a_D);
+	assign Fou_iteration_cell_b_DO = (Sqrt_enable_SO ? Sqrt_Q3 : Fou_iteration_cell_div_b_D);
+	always @(*)
+		if (Fsm_enable_S)
+			Partial_remainder_DN = (Sqrt_enable_SO ? Sqrt_R4 : Fou_iteration_cell_sum_DI);
+		else
+			Partial_remainder_DN = Partial_remainder_DP;
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			Partial_remainder_DP <= 1'sb0;
+		else
+			Partial_remainder_DP <= Partial_remainder_DN;
+	reg [C_DIV_MANT:0] Quotient_DN;
+	always @(*)
+		if (Fsm_enable_S)
+			Quotient_DN = {Quotient_DP[C_DIV_MANT - 4:0], First_iteration_cell_carry_DI, Sec_iteration_cell_carry_DI, Thi_iteration_cell_carry_DI, Fou_iteration_cell_carry_DI};
+		else
+			Quotient_DN = Quotient_DP;
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			Quotient_DP <= 1'sb0;
+		else
+			Quotient_DP <= Quotient_DN;
+	wire Msc_D;
+	wire [C_DIV_MANT + 1:0] Sum_msc_D;
+	assign {Msc_D, Sum_msc_D} = First_iteration_cell_div_a_D + First_iteration_cell_div_b_D;
+	reg [C_DIV_MANT:0] Mant_result_prenorm_noncorrect_D;
+	reg [C_DIV_MANT:0] Msc_forcorrect_D;
+	wire [C_DIV_MANT + 1:0] Mant_result_prenorm_correct_D;
+	always @(*)
+		if (Precision_ctl_Enable_S == 1)
+			case (Precision_ctl_S)
+				5'b01000: begin
+					Mant_result_prenorm_noncorrect_D = {Quotient_DP[C_DIV_MANT - 12:3], 15'b000000000000000};
+					Msc_forcorrect_D = {8'b00000000, Quotient_DP[2], 15'b000000000000000};
+				end
+				5'b01001: begin
+					Mant_result_prenorm_noncorrect_D = {Quotient_DP[C_DIV_MANT - 12:2], 14'b00000000000000};
+					Msc_forcorrect_D = {9'b000000000, Quotient_DP[1], 14'b00000000000000};
+				end
+				5'b01010: begin
+					Mant_result_prenorm_noncorrect_D = {Quotient_DP[C_DIV_MANT - 12:1], 13'b0000000000000};
+					Msc_forcorrect_D = {10'b0000000000, Quotient_DP[0], 13'b0000000000000};
+				end
+				5'b01011: begin
+					Mant_result_prenorm_noncorrect_D = {Quotient_DP[C_DIV_MANT - 12:0], 12'b000000000000};
+					Msc_forcorrect_D = {11'b00000000000, Msc_D, 12'b000000000000};
+				end
+				5'b01100: begin
+					Mant_result_prenorm_noncorrect_D = {Quotient_DP[C_DIV_MANT - 8:3], 11'b00000000000};
+					Msc_forcorrect_D = {12'b000000000000, Quotient_DP[2], 11'b00000000000};
+				end
+				5'b01101: begin
+					Mant_result_prenorm_noncorrect_D = {Quotient_DP[C_DIV_MANT - 8:2], 10'b0000000000};
+					Msc_forcorrect_D = {13'b0000000000000, Quotient_DP[1], 10'b0000000000};
+				end
+				5'b01110: begin
+					Mant_result_prenorm_noncorrect_D = {Quotient_DP[C_DIV_MANT - 8:1], 9'b000000000};
+					Msc_forcorrect_D = {14'b00000000000000, Quotient_DP[0], 9'b000000000};
+				end
+				5'b01111: begin
+					Mant_result_prenorm_noncorrect_D = {Quotient_DP[C_DIV_MANT - 8:0], 8'b00000000};
+					Msc_forcorrect_D = {15'b000000000000000, Msc_D, 8'b00000000};
+				end
+				5'b10000: begin
+					Mant_result_prenorm_noncorrect_D = {Quotient_DP[C_DIV_MANT - 4:3], 7'b0000000};
+					Msc_forcorrect_D = {16'b0000000000000000, Quotient_DP[2], 7'b0000000};
+				end
+				5'b10001: begin
+					Mant_result_prenorm_noncorrect_D = {Quotient_DP[C_DIV_MANT - 4:2], 6'b000000};
+					Msc_forcorrect_D = {17'b00000000000000000, Quotient_DP[1], 6'b000000};
+				end
+				5'b10010: begin
+					Mant_result_prenorm_noncorrect_D = {Quotient_DP[C_DIV_MANT - 4:1], 5'b00000};
+					Msc_forcorrect_D = {18'b000000000000000000, Quotient_DP[0], 5'b00000};
+				end
+				5'b10011: begin
+					Mant_result_prenorm_noncorrect_D = {Quotient_DP[C_DIV_MANT - 4:0], 4'b0000};
+					Msc_forcorrect_D = {19'b0000000000000000000, Msc_D, 4'b0000};
+				end
+				5'b10100: begin
+					Mant_result_prenorm_noncorrect_D = {Quotient_DP[C_DIV_MANT:3], 3'b000};
+					Msc_forcorrect_D = {20'b00000000000000000000, Quotient_DP[2], 3'b000};
+				end
+				5'b10101: begin
+					Mant_result_prenorm_noncorrect_D = {Quotient_DP[C_DIV_MANT:2], 2'b00};
+					Msc_forcorrect_D = {21'b000000000000000000000, Quotient_DP[1], 2'b00};
+				end
+				5'b10110: begin
+					Mant_result_prenorm_noncorrect_D = {Quotient_DP[C_DIV_MANT:1], 1'b0};
+					Msc_forcorrect_D = {22'b0000000000000000000000, Quotient_DP[0], 1'b0};
+				end
+				5'b10111: begin
+					Mant_result_prenorm_noncorrect_D = Quotient_DP[C_DIV_MANT:0];
+					Msc_forcorrect_D = {23'b00000000000000000000000, Msc_D};
+				end
+				default: begin
+					Mant_result_prenorm_noncorrect_D = Quotient_DP[C_DIV_MANT:0];
+					Msc_forcorrect_D = {23'b00000000000000000000000, Msc_D};
+				end
+			endcase
+		else begin
+			Mant_result_prenorm_noncorrect_D = Quotient_DP[C_DIV_MANT:0];
+			Msc_forcorrect_D = {23'b00000000000000000000000, Msc_D};
+		end
+	assign Mant_result_prenorm_correct_D = Mant_result_prenorm_noncorrect_D + {(Div_enable_SO ? Msc_forcorrect_D : 24'b000000000000000000000000)};
+	assign Mant_result_prenorm_DO = (Mant_result_prenorm_correct_D[C_DIV_MANT + 1] ? Mant_result_prenorm_noncorrect_D : Mant_result_prenorm_correct_D[C_DIV_MANT:0]);
+	wire [C_DIV_EXP + 1:0] Exp_result_prenorm_DN;
+	reg [C_DIV_EXP + 1:0] Exp_result_prenorm_DP;
+	wire [C_DIV_EXP + 1:0] Exp_add_a_D;
+	wire [C_DIV_EXP + 1:0] Exp_add_b_D;
+	wire [C_DIV_EXP + 1:0] Exp_add_c_D;
+	assign Exp_add_a_D = {(Sqrt_start_dly_S ? {Exp_num_DI[C_DIV_EXP], Exp_num_DI[C_DIV_EXP], Exp_num_DI[C_DIV_EXP], Exp_num_DI[C_DIV_EXP:1]} : {Exp_num_DI[C_DIV_EXP], Exp_num_DI[C_DIV_EXP], Exp_num_DI})};
+	assign Exp_add_b_D = {(Sqrt_start_dly_S ? {1'b0, {C_DIV_EXP_ZERO}, Exp_num_DI[0]} : {~Exp_den_DI[C_DIV_EXP], ~Exp_den_DI[C_DIV_EXP], ~Exp_den_DI})};
+	assign Exp_add_c_D = {(Div_start_dly_S ? {2'b00, {C_DIV_BIAS_AONE}} : {2'b00, {C_DIV_HALF_BIAS}})};
+	assign Exp_result_prenorm_DN = (Start_dly_S ? {(Exp_add_a_D + Exp_add_b_D) + Exp_add_c_D} : Exp_result_prenorm_DP);
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			Exp_result_prenorm_DP <= 1'sb0;
+		else
+			Exp_result_prenorm_DP <= Exp_result_prenorm_DN;
+	assign Exp_result_prenorm_DO = Exp_result_prenorm_DP;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/div_sqrt_top_tp.v b/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/div_sqrt_top_tp.v
new file mode 100644
index 0000000..b5353d3
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/div_sqrt_top_tp.v
@@ -0,0 +1,139 @@
+module div_sqrt_top_tp 
+#(
+   parameter   Precision_ctl_Enable_S = 1
+)
+(
+	Clk_CI,
+	Rst_RBI,
+	Div_start_SI,
+	Sqrt_start_SI,
+	Operand_a_DI,
+	Operand_b_DI,
+	RM_SI,
+	Precision_ctl_SI,
+	Result_DO,
+	Exp_OF_SO,
+	Exp_UF_SO,
+	Div_zero_SO,
+	Ready_SO,
+	Done_SO
+);
+parameter C_DIV_RM           = 2;
+parameter C_DIV_RM_NEAREST   = 2'h0;
+parameter C_DIV_RM_TRUNC     = 2'h1;
+parameter C_DIV_RM_PLUSINF   = 2'h2;
+parameter C_DIV_RM_MINUSINF  = 2'h3;
+parameter C_DIV_PC           = 5;
+parameter C_DIV_OP           = 32;
+parameter C_DIV_MANT         = 23;
+parameter C_DIV_EXP          = 8;
+parameter C_DIV_BIAS         = 127;
+parameter C_DIV_BIAS_AONE    = 8'h80;
+parameter C_DIV_HALF_BIAS    = 63;
+parameter C_DIV_MANT_PRENORM = C_DIV_MANT+1;
+parameter C_DIV_EXP_ZERO     = 8'h00;
+parameter C_DIV_EXP_ONE      = 8'h01;
+parameter C_DIV_EXP_INF      = 8'hff;
+parameter C_DIV_MANT_ZERO    = 23'h0;
+parameter C_DIV_MANT_NAN     = 23'h400000;
+	//parameter Precision_ctl_Enable_S = 1;
+	input wire Clk_CI;
+	input wire Rst_RBI;
+	input wire Div_start_SI;
+	input wire Sqrt_start_SI;
+	input wire [C_DIV_OP - 1:0] Operand_a_DI;
+	input wire [C_DIV_OP - 1:0] Operand_b_DI;
+	input wire [C_DIV_RM - 1:0] RM_SI;
+	input wire [C_DIV_PC - 1:0] Precision_ctl_SI;
+	output wire [31:0] Result_DO;
+	output wire Exp_OF_SO;
+	output wire Exp_UF_SO;
+	output wire Div_zero_SO;
+	output wire Ready_SO;
+	output wire Done_SO;
+	wire [C_DIV_MANT - 1:0] Mant_res_DO;
+	wire [C_DIV_EXP - 1:0] Exp_res_DO;
+	wire Sign_res_DO;
+	assign Result_DO = {Sign_res_DO, Exp_res_DO, Mant_res_DO};
+	wire Sign_a_D;
+	wire Sign_b_D;
+	wire [C_DIV_EXP:0] Exp_a_D;
+	wire [C_DIV_EXP:0] Exp_b_D;
+	wire [C_DIV_MANT:0] Mant_a_D;
+	wire [C_DIV_MANT:0] Mant_b_D;
+	wire [C_DIV_EXP + 1:0] Exp_z_D;
+	wire [C_DIV_MANT:0] Mant_z_D;
+	wire Sign_z_D;
+	wire Start_S;
+	wire [C_DIV_RM - 1:0] RM_dly_S;
+	wire Mant_zero_S_a;
+	wire Mant_zero_S_b;
+	wire Div_enable_S;
+	wire Sqrt_enable_S;
+	wire Inf_a_S;
+	wire Inf_b_S;
+	wire Zero_a_S;
+	wire Zero_b_S;
+	wire NaN_a_S;
+	wire NaN_b_S;
+	preprocess precess_U0(
+		.Clk_CI(Clk_CI),
+		.Rst_RBI(Rst_RBI),
+		.Div_start_SI(Div_start_SI),
+		.Sqrt_start_SI(Sqrt_start_SI),
+		.Operand_a_DI(Operand_a_DI),
+		.Operand_b_DI(Operand_b_DI),
+		.RM_SI(RM_SI),
+		.Start_SO(Start_S),
+		.Exp_a_DO_norm(Exp_a_D),
+		.Exp_b_DO_norm(Exp_b_D),
+		.Mant_a_DO_norm(Mant_a_D),
+		.Mant_b_DO_norm(Mant_b_D),
+		.RM_dly_SO(RM_dly_S),
+		.Sign_z_DO(Sign_z_D),
+		.Inf_a_SO(Inf_a_S),
+		.Inf_b_SO(Inf_b_S),
+		.Zero_a_SO(Zero_a_S),
+		.Zero_b_SO(Zero_b_S),
+		.NaN_a_SO(NaN_a_S),
+		.NaN_b_SO(NaN_b_S)
+	);
+	nrbd_nrsc_tp #(Precision_ctl_Enable_S) nrbd_nrsc_U0(
+		.Clk_CI(Clk_CI),
+		.Rst_RBI(Rst_RBI),
+		.Div_start_SI(Div_start_SI),
+		.Sqrt_start_SI(Sqrt_start_SI),
+		.Start_SI(Start_S),
+		.Div_enable_SO(Div_enable_S),
+		.Sqrt_enable_SO(Sqrt_enable_S),
+		.Precision_ctl_SI(Precision_ctl_SI),
+		.Exp_a_DI(Exp_a_D),
+		.Exp_b_DI(Exp_b_D),
+		.Mant_a_DI(Mant_a_D),
+		.Mant_b_DI(Mant_b_D),
+		.Ready_SO(Ready_SO),
+		.Done_SO(Done_SO),
+		.Exp_z_DO(Exp_z_D),
+		.Mant_z_DO(Mant_z_D)
+	);
+	fpu_norm_div_sqrt fpu_norm_U0(
+		.Mant_in_DI(Mant_z_D),
+		.Exp_in_DI(Exp_z_D),
+		.Sign_in_DI(Sign_z_D),
+		.Div_enable_SI(Div_enable_S),
+		.Sqrt_enable_SI(Sqrt_enable_S),
+		.Inf_a_SI(Inf_a_S),
+		.Inf_b_SI(Inf_b_S),
+		.Zero_a_SI(Zero_a_S),
+		.Zero_b_SI(Zero_b_S),
+		.NaN_a_SI(NaN_a_S),
+		.NaN_b_SI(NaN_b_S),
+		.RM_SI(RM_dly_S),
+		.Mant_res_DO(Mant_res_DO),
+		.Exp_res_DO(Exp_res_DO),
+		.Sign_res_DO(Sign_res_DO),
+		.Exp_OF_SO(Exp_OF_SO),
+		.Exp_UF_SO(Exp_UF_SO),
+		.Div_zero_SO(Div_zero_SO)
+	);
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/fpu_defs_div_sqrt_tp.sv b/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/fpu_defs_div_sqrt_tp.sv
new file mode 100644
index 0000000..9193c40
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/fpu_defs_div_sqrt_tp.sv
@@ -0,0 +1,39 @@
+// Copyright 2017 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+///////////////////////////////////////////////////////////////////////////////
+// This file contains all div_sqrt_top parameters                            //
+//                                                                           //
+// Authors    : Lei Li  (lile@iis.ee.ethz.ch)                                //
+// Copyright (c) 2017 Integrated Systems Laboratory, ETH Zurich              //
+///////////////////////////////////////////////////////////////////////////////
+
+
+package fpu_defs_div_sqrt_tp;
+
+   parameter C_DIV_RM           = 2;
+   parameter C_DIV_RM_NEAREST   = 2'h0;
+   parameter C_DIV_RM_TRUNC     = 2'h1;
+   parameter C_DIV_RM_PLUSINF   = 2'h2;
+   parameter C_DIV_RM_MINUSINF  = 2'h3;
+   parameter C_DIV_PC           = 5;
+   parameter C_DIV_OP           = 32;
+   parameter C_DIV_MANT         = 23;
+   parameter C_DIV_EXP          = 8;
+   parameter C_DIV_BIAS         = 127;
+   parameter C_DIV_BIAS_AONE    = 8'h80;
+   parameter C_DIV_HALF_BIAS    = 63;
+   parameter C_DIV_MANT_PRENORM = C_DIV_MANT+1;
+   parameter C_DIV_EXP_ZERO     = 8'h00;
+   parameter C_DIV_EXP_ONE      = 8'h01;
+   parameter C_DIV_EXP_INF      = 8'hff;
+   parameter C_DIV_MANT_ZERO    = 23'h0;
+   parameter C_DIV_MANT_NAN     = 23'h400000;
+
+endpackage : fpu_defs_div_sqrt_tp
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/fpu_norm_div_sqrt.v b/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/fpu_norm_div_sqrt.v
new file mode 100644
index 0000000..4c91f98
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/fpu_norm_div_sqrt.v
@@ -0,0 +1,283 @@
+module fpu_norm_div_sqrt (
+	Mant_in_DI,
+	Exp_in_DI,
+	Sign_in_DI,
+	Div_enable_SI,
+	Sqrt_enable_SI,
+	Inf_a_SI,
+	Inf_b_SI,
+	Zero_a_SI,
+	Zero_b_SI,
+	NaN_a_SI,
+	NaN_b_SI,
+	RM_SI,
+	Mant_res_DO,
+	Exp_res_DO,
+	Sign_res_DO,
+	Exp_OF_SO,
+	Exp_UF_SO,
+	Div_zero_SO
+);
+parameter C_DIV_RM           = 2;
+parameter C_DIV_RM_NEAREST   = 2'h0;
+parameter C_DIV_RM_TRUNC     = 2'h1;
+parameter C_DIV_RM_PLUSINF   = 2'h2;
+parameter C_DIV_RM_MINUSINF  = 2'h3;
+parameter C_DIV_PC           = 5;
+parameter C_DIV_OP           = 32;
+parameter C_DIV_MANT         = 23;
+parameter C_DIV_EXP          = 8;
+parameter C_DIV_BIAS         = 127;
+parameter C_DIV_BIAS_AONE    = 8'h80;
+parameter C_DIV_HALF_BIAS    = 63;
+parameter C_DIV_MANT_PRENORM = C_DIV_MANT+1;
+parameter C_DIV_EXP_ZERO     = 8'h00;
+parameter C_DIV_EXP_ONE      = 8'h01;
+parameter C_DIV_EXP_INF      = 8'hff;
+parameter C_DIV_MANT_ZERO    = 23'h0;
+parameter C_DIV_MANT_NAN     = 23'h400000;
+	input wire [C_DIV_MANT_PRENORM - 1:0] Mant_in_DI;
+	input wire signed [C_DIV_EXP + 1:0] Exp_in_DI;
+	input wire Sign_in_DI;
+	input wire Div_enable_SI;
+	input wire Sqrt_enable_SI;
+	input wire Inf_a_SI;
+	input wire Inf_b_SI;
+	input wire Zero_a_SI;
+	input wire Zero_b_SI;
+	input wire NaN_a_SI;
+	input wire NaN_b_SI;
+	input wire [C_DIV_RM - 1:0] RM_SI;
+	output wire [C_DIV_MANT - 1:0] Mant_res_DO;
+	output wire [C_DIV_EXP - 1:0] Exp_res_DO;
+	output reg Sign_res_DO;
+	output reg Exp_OF_SO;
+	output reg Exp_UF_SO;
+	output reg Div_zero_SO;
+	reg [C_DIV_MANT:0] Mant_res_norm_D;
+	reg [C_DIV_EXP - 1:0] Exp_res_norm_D;
+	wire [C_DIV_EXP + 1:0] Exp_Max_RS_D;
+	assign Exp_Max_RS_D = (Exp_in_DI[C_DIV_EXP:0] + C_DIV_MANT) + 1;
+	wire [C_DIV_EXP + 1:0] Num_RS_D;
+	assign Num_RS_D = ~Exp_in_DI + 2;
+	wire [C_DIV_MANT_PRENORM + 1:0] Mant_RS_D;
+	wire [C_DIV_MANT - 2:0] Mant_forsticky_D;
+	assign {Mant_RS_D, Mant_forsticky_D} = {Mant_in_DI, 1'b0, 1'b0, 22'h000000} >> Num_RS_D;
+	wire Mant_sticky_D;
+	assign Mant_sticky_D = (Exp_in_DI[C_DIV_EXP + 1] && Exp_Max_RS_D[C_DIV_EXP + 1]) && |Mant_forsticky_D;
+	reg [1:0] Mant_lower_D;
+	always @(*)
+		if (NaN_a_SI) begin
+			Div_zero_SO = 1'b0;
+			Exp_OF_SO = 1'b0;
+			Exp_UF_SO = 1'b0;
+			Mant_res_norm_D = {1'b0, C_DIV_MANT_NAN};
+			Exp_res_norm_D = 1'sb1;
+			Mant_lower_D = 2'b00;
+			Sign_res_DO = 1'b0;
+		end
+		else if (NaN_b_SI) begin
+			Div_zero_SO = 1'b0;
+			Exp_OF_SO = 1'b0;
+			Exp_UF_SO = 1'b0;
+			Mant_res_norm_D = {1'b0, C_DIV_MANT_NAN};
+			Exp_res_norm_D = 1'sb1;
+			Mant_lower_D = 2'b00;
+			Sign_res_DO = 1'b0;
+		end
+		else if (Inf_a_SI) begin
+			if (Div_enable_SI && Inf_b_SI) begin
+				Div_zero_SO = 1'b0;
+				Exp_OF_SO = 1'b0;
+				Exp_UF_SO = 1'b0;
+				Mant_res_norm_D = {1'b0, C_DIV_MANT_NAN};
+				Exp_res_norm_D = 1'sb1;
+				Mant_lower_D = 2'b00;
+				Sign_res_DO = 1'b0;
+			end
+			else begin
+				Div_zero_SO = 1'b0;
+				Exp_OF_SO = 1'b1;
+				Exp_UF_SO = 1'b0;
+				Mant_res_norm_D = 1'sb0;
+				Exp_res_norm_D = 1'sb1;
+				Mant_lower_D = 2'b00;
+				Sign_res_DO = Sign_in_DI;
+			end
+		end
+		else if (Div_enable_SI && Inf_b_SI) begin
+			Div_zero_SO = 1'b0;
+			Exp_OF_SO = 1'b1;
+			Exp_UF_SO = 1'b0;
+			Mant_res_norm_D = 1'sb0;
+			Exp_res_norm_D = 1'sb0;
+			Mant_lower_D = 2'b00;
+			Sign_res_DO = Sign_in_DI;
+		end
+		else if (Zero_a_SI) begin
+			if (Div_enable_SI && Zero_b_SI) begin
+				Div_zero_SO = 1'b1;
+				Exp_OF_SO = 1'b0;
+				Exp_UF_SO = 1'b0;
+				Mant_res_norm_D = {1'b0, C_DIV_MANT_NAN};
+				Exp_res_norm_D = 1'sb1;
+				Mant_lower_D = 2'b00;
+				Sign_res_DO = 1'b0;
+			end
+			else begin
+				Div_zero_SO = 1'b0;
+				Exp_OF_SO = 1'b0;
+				Exp_UF_SO = 1'b0;
+				Mant_res_norm_D = 1'sb0;
+				Exp_res_norm_D = 1'sb0;
+				Mant_lower_D = 2'b00;
+				Sign_res_DO = Sign_in_DI;
+			end
+		end
+		else if (Div_enable_SI && Zero_b_SI) begin
+			Div_zero_SO = 1'b1;
+			Exp_OF_SO = 1'b0;
+			Exp_UF_SO = 1'b0;
+			Mant_res_norm_D = 1'sb0;
+			Exp_res_norm_D = 1'sb1;
+			Mant_lower_D = 2'b00;
+			Sign_res_DO = Sign_in_DI;
+		end
+		else if (Sign_in_DI && Sqrt_enable_SI) begin
+			Div_zero_SO = 1'b0;
+			Exp_OF_SO = 1'b0;
+			Exp_UF_SO = 1'b0;
+			Mant_res_norm_D = {1'b0, C_DIV_MANT_NAN};
+			Exp_res_norm_D = 1'sb1;
+			Mant_lower_D = 2'b00;
+			Sign_res_DO = 1'b0;
+		end
+		else if (Exp_in_DI[C_DIV_EXP:0] == {(C_DIV_EXP >= 0 ? C_DIV_EXP + 1 : 1 - C_DIV_EXP) {1'sb0}}) begin
+			if (Mant_in_DI != {C_DIV_MANT_PRENORM {1'sb0}}) begin
+				Div_zero_SO = 1'b0;
+				Exp_OF_SO = 1'b0;
+				Exp_UF_SO = 1'b1;
+				Mant_res_norm_D = {2'b00, Mant_in_DI[C_DIV_MANT_PRENORM - 1:1]};
+				Exp_res_norm_D = 1'sb0;
+				Mant_lower_D = {Mant_in_DI[0], 1'b0};
+				Sign_res_DO = Sign_in_DI;
+			end
+			else begin
+				Div_zero_SO = 1'b0;
+				Exp_OF_SO = 1'b0;
+				Exp_UF_SO = 1'b0;
+				Mant_res_norm_D = 1'sb0;
+				Exp_res_norm_D = 1'sb0;
+				Mant_lower_D = 2'b00;
+				Sign_res_DO = Sign_in_DI;
+			end
+		end
+		else if ((Exp_in_DI[C_DIV_EXP:0] == C_DIV_EXP_ONE) && ~Mant_in_DI[C_DIV_MANT_PRENORM - 1]) begin
+			Div_zero_SO = 1'b0;
+			Exp_OF_SO = 1'b0;
+			Exp_UF_SO = 1'b1;
+			Mant_res_norm_D = Mant_in_DI[C_DIV_MANT_PRENORM - 1:0];
+			Exp_res_norm_D = 1'sb0;
+			Mant_lower_D = 2'b00;
+			Sign_res_DO = Sign_in_DI;
+		end
+		else if (Exp_in_DI[C_DIV_EXP + 1]) begin
+			if (~Exp_Max_RS_D[C_DIV_EXP + 1]) begin
+				Div_zero_SO = 1'b0;
+				Exp_OF_SO = 1'b1;
+				Exp_UF_SO = 1'b0;
+				Mant_res_norm_D = 1'sb0;
+				Exp_res_norm_D = 1'sb0;
+				Mant_lower_D = 2'b00;
+				Sign_res_DO = Sign_in_DI;
+			end
+			else begin
+				Div_zero_SO = 1'b0;
+				Exp_OF_SO = 1'b0;
+				Exp_UF_SO = 1'b1;
+				Mant_res_norm_D = {1'b0, Mant_RS_D[C_DIV_MANT + 1:2]};
+				Exp_res_norm_D = 1'sb0;
+				Mant_lower_D = Mant_RS_D[1:0];
+				Sign_res_DO = Sign_in_DI;
+			end
+		end
+		else if (Exp_in_DI[C_DIV_EXP]) begin
+			Div_zero_SO = 1'b0;
+			Exp_OF_SO = 1'b1;
+			Exp_UF_SO = 1'b0;
+			Mant_res_norm_D = 1'sb0;
+			Exp_res_norm_D = 1'sb1;
+			Mant_lower_D = 2'b00;
+			Sign_res_DO = Sign_in_DI;
+		end
+		else if (Exp_in_DI[C_DIV_EXP - 1:0] == {C_DIV_EXP {1'sb1}}) begin
+			if (~Mant_in_DI[C_DIV_MANT_PRENORM - 1]) begin
+				Div_zero_SO = 1'b0;
+				Exp_OF_SO = 1'b0;
+				Exp_UF_SO = 1'b0;
+				Mant_res_norm_D = {Mant_in_DI[C_DIV_MANT_PRENORM - 2:0], 1'b0};
+				Exp_res_norm_D = Exp_in_DI[C_DIV_EXP - 1:0] - 1;
+				Mant_lower_D = 2'b00;
+				Sign_res_DO = Sign_in_DI;
+			end
+			else if (Mant_in_DI != {C_DIV_MANT_PRENORM {1'sb0}}) begin
+				Div_zero_SO = 1'b0;
+				Exp_OF_SO = 1'b1;
+				Exp_UF_SO = 1'b0;
+				Mant_res_norm_D = 1'sb0;
+				Exp_res_norm_D = 1'sb1;
+				Mant_lower_D = 2'b00;
+				Sign_res_DO = Sign_in_DI;
+			end
+			else begin
+				Div_zero_SO = 1'b0;
+				Exp_OF_SO = 1'b1;
+				Exp_UF_SO = 1'b0;
+				Mant_res_norm_D = 1'sb0;
+				Exp_res_norm_D = 1'sb1;
+				Mant_lower_D = 2'b00;
+				Sign_res_DO = Sign_in_DI;
+			end
+		end
+		else if (Mant_in_DI[C_DIV_MANT_PRENORM - 1]) begin
+			Div_zero_SO = 1'b0;
+			Exp_OF_SO = 1'b0;
+			Exp_UF_SO = 1'b0;
+			Mant_res_norm_D = Mant_in_DI[C_DIV_MANT_PRENORM - 1:0];
+			Exp_res_norm_D = Exp_in_DI[C_DIV_EXP - 1:0];
+			Mant_lower_D = 2'b00;
+			Sign_res_DO = Sign_in_DI;
+		end
+		else begin
+			Div_zero_SO = 1'b0;
+			Exp_OF_SO = 1'b0;
+			Exp_UF_SO = 1'b0;
+			Mant_res_norm_D = {Mant_in_DI[C_DIV_MANT_PRENORM - 2:0], 1'b0};
+			Exp_res_norm_D = Exp_in_DI[C_DIV_EXP - 1:0] - 1;
+			Mant_lower_D = 2'b00;
+			Sign_res_DO = Sign_in_DI;
+		end
+	wire [C_DIV_MANT:0] Mant_upper_D;
+	wire [C_DIV_MANT + 1:0] Mant_upperRounded_D;
+	reg Mant_roundUp_S;
+	wire Mant_rounded_S;
+	assign Mant_upper_D = Mant_res_norm_D;
+	assign Mant_rounded_S = |Mant_lower_D | Mant_sticky_D;
+	always @(*) begin
+		Mant_roundUp_S = 1'b0;
+		case (RM_SI)
+			C_DIV_RM_NEAREST: Mant_roundUp_S = Mant_lower_D[1] && ((Mant_lower_D[0] | Mant_sticky_D) || Mant_upper_D[0]);
+			C_DIV_RM_TRUNC: Mant_roundUp_S = 0;
+			C_DIV_RM_PLUSINF: Mant_roundUp_S = Mant_rounded_S & ~Sign_in_DI;
+			C_DIV_RM_MINUSINF: Mant_roundUp_S = Mant_rounded_S & Sign_in_DI;
+			default: Mant_roundUp_S = 0;
+		endcase
+	end
+	wire Mant_renorm_S;
+	assign Mant_upperRounded_D = Mant_upper_D + Mant_roundUp_S;
+	assign Mant_renorm_S = Mant_upperRounded_D[C_DIV_MANT + 1];
+	wire Rounded_SO;
+	assign Mant_res_DO = (Mant_renorm_S ? Mant_upperRounded_D[C_DIV_MANT:1] : Mant_upperRounded_D[C_DIV_MANT - 1:0]);
+	assign Exp_res_DO = Exp_res_norm_D + Mant_renorm_S;
+	assign Rounded_SO = Mant_rounded_S;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/iteration_div_sqrt.v b/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/iteration_div_sqrt.v
new file mode 100644
index 0000000..4254739
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/iteration_div_sqrt.v
@@ -0,0 +1,46 @@
+module iteration_div_sqrt (
+	A_DI,
+	B_DI,
+	Div_enable_SI,
+	Sqrt_enable_SI,
+	D_DI,
+	D_DO,
+	Sum_DO,
+	Carry_out_DO
+);
+parameter C_DIV_RM           = 2;
+parameter C_DIV_RM_NEAREST   = 2'h0;
+parameter C_DIV_RM_TRUNC     = 2'h1;
+parameter C_DIV_RM_PLUSINF   = 2'h2;
+parameter C_DIV_RM_MINUSINF  = 2'h3;
+parameter C_DIV_PC           = 5;
+parameter C_DIV_OP           = 32;
+parameter C_DIV_MANT         = 23;
+parameter C_DIV_EXP          = 8;
+parameter C_DIV_BIAS         = 127;
+parameter C_DIV_BIAS_AONE    = 8'h80;
+parameter C_DIV_HALF_BIAS    = 63;
+parameter C_DIV_MANT_PRENORM = C_DIV_MANT+1;
+parameter C_DIV_EXP_ZERO     = 8'h00;
+parameter C_DIV_EXP_ONE      = 8'h01;
+parameter C_DIV_EXP_INF      = 8'hff;
+parameter C_DIV_MANT_ZERO    = 23'h0;
+parameter C_DIV_MANT_NAN     = 23'h400000;
+	input wire [C_DIV_MANT + 1:0] A_DI;
+	input wire [C_DIV_MANT + 1:0] B_DI;
+	input wire Div_enable_SI;
+	input wire Sqrt_enable_SI;
+	input wire [1:0] D_DI;
+	output wire [1:0] D_DO;
+	output wire [C_DIV_MANT + 1:0] Sum_DO;
+	output wire Carry_out_DO;
+	wire D_carry_D;
+	wire Sqrt_cin_D;
+	wire Cin_D;
+	assign D_DO[0] = ~D_DI[0];
+	assign D_DO[1] = ~(D_DI[1] ^ D_DI[0]);
+	assign D_carry_D = D_DI[1] | D_DI[0];
+	assign Sqrt_cin_D = Sqrt_enable_SI && D_carry_D;
+	assign Cin_D = (Div_enable_SI ? 0 : Sqrt_cin_D);
+	assign {Carry_out_DO, Sum_DO} = (A_DI + B_DI) + Cin_D;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/iteration_div_sqrt_first.v b/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/iteration_div_sqrt_first.v
new file mode 100644
index 0000000..e108aa3
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/iteration_div_sqrt_first.v
@@ -0,0 +1,48 @@
+module iteration_div_sqrt_first (
+	A_DI,
+	B_DI,
+	Div_enable_SI,
+	Div_start_dly_SI,
+	Sqrt_enable_SI,
+	D_DI,
+	D_DO,
+	Sum_DO,
+	Carry_out_DO
+);
+parameter C_DIV_RM           = 2;
+parameter C_DIV_RM_NEAREST   = 2'h0;
+parameter C_DIV_RM_TRUNC     = 2'h1;
+parameter C_DIV_RM_PLUSINF   = 2'h2;
+parameter C_DIV_RM_MINUSINF  = 2'h3;
+parameter C_DIV_PC           = 5;
+parameter C_DIV_OP           = 32;
+parameter C_DIV_MANT         = 23;
+parameter C_DIV_EXP          = 8;
+parameter C_DIV_BIAS         = 127;
+parameter C_DIV_BIAS_AONE    = 8'h80;
+parameter C_DIV_HALF_BIAS    = 63;
+parameter C_DIV_MANT_PRENORM = C_DIV_MANT+1;
+parameter C_DIV_EXP_ZERO     = 8'h00;
+parameter C_DIV_EXP_ONE      = 8'h01;
+parameter C_DIV_EXP_INF      = 8'hff;
+parameter C_DIV_MANT_ZERO    = 23'h0;
+parameter C_DIV_MANT_NAN     = 23'h400000;
+	input wire [C_DIV_MANT + 1:0] A_DI;
+	input wire [C_DIV_MANT + 1:0] B_DI;
+	input wire Div_enable_SI;
+	input wire Div_start_dly_SI;
+	input wire Sqrt_enable_SI;
+	input wire [1:0] D_DI;
+	output wire [1:0] D_DO;
+	output wire [C_DIV_MANT + 1:0] Sum_DO;
+	output wire Carry_out_DO;
+	wire D_carry_D;
+	wire Sqrt_cin_D;
+	wire Cin_D;
+	assign D_DO[0] = ~D_DI[0];
+	assign D_DO[1] = ~(D_DI[1] ^ D_DI[0]);
+	assign D_carry_D = D_DI[1] | D_DI[0];
+	assign Sqrt_cin_D = Sqrt_enable_SI && D_carry_D;
+	assign Cin_D = (Div_enable_SI ? Div_start_dly_SI : Sqrt_cin_D);
+	assign {Carry_out_DO, Sum_DO} = (A_DI + B_DI) + Cin_D;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/nrbd_nrsc_tp.v b/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/nrbd_nrsc_tp.v
new file mode 100644
index 0000000..7b3b0cf
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/nrbd_nrsc_tp.v
@@ -0,0 +1,169 @@
+module nrbd_nrsc_tp 
+#(
+   parameter   Precision_ctl_Enable_S = 1
+)
+(
+	Clk_CI,
+	Rst_RBI,
+	Div_start_SI,
+	Sqrt_start_SI,
+	Start_SI,
+	Precision_ctl_SI,
+	Mant_a_DI,
+	Mant_b_DI,
+	Exp_a_DI,
+	Exp_b_DI,
+	Div_enable_SO,
+	Sqrt_enable_SO,
+	Ready_SO,
+	Done_SO,
+	Mant_z_DO,
+	Exp_z_DO
+);
+parameter C_DIV_RM           = 2;
+parameter C_DIV_RM_NEAREST   = 2'h0;
+parameter C_DIV_RM_TRUNC     = 2'h1;
+parameter C_DIV_RM_PLUSINF   = 2'h2;
+parameter C_DIV_RM_MINUSINF  = 2'h3;
+parameter C_DIV_PC           = 5;
+parameter C_DIV_OP           = 32;
+parameter C_DIV_MANT         = 23;
+parameter C_DIV_EXP          = 8;
+parameter C_DIV_BIAS         = 127;
+parameter C_DIV_BIAS_AONE    = 8'h80;
+parameter C_DIV_HALF_BIAS    = 63;
+parameter C_DIV_MANT_PRENORM = C_DIV_MANT+1;
+parameter C_DIV_EXP_ZERO     = 8'h00;
+parameter C_DIV_EXP_ONE      = 8'h01;
+parameter C_DIV_EXP_INF      = 8'hff;
+parameter C_DIV_MANT_ZERO    = 23'h0;
+parameter C_DIV_MANT_NAN     = 23'h400000;
+	//parameter Precision_ctl_Enable_S = 1;
+	input wire Clk_CI;
+	input wire Rst_RBI;
+	input wire Div_start_SI;
+	input wire Sqrt_start_SI;
+	input wire Start_SI;
+	input wire [C_DIV_PC - 1:0] Precision_ctl_SI;
+	input wire [C_DIV_MANT:0] Mant_a_DI;
+	input wire [C_DIV_MANT:0] Mant_b_DI;
+	input wire [C_DIV_EXP:0] Exp_a_DI;
+	input wire [C_DIV_EXP:0] Exp_b_DI;
+	output wire Div_enable_SO;
+	output wire Sqrt_enable_SO;
+	output wire Ready_SO;
+	output wire Done_SO;
+	output wire [C_DIV_MANT:0] Mant_z_DO;
+	output wire [C_DIV_EXP + 1:0] Exp_z_DO;
+	wire [C_DIV_MANT + 1:0] First_iteration_cell_sum_D;
+	wire [C_DIV_MANT + 1:0] Sec_iteration_cell_sum_D;
+	wire [C_DIV_MANT + 1:0] Thi_iteration_cell_sum_D;
+	wire [C_DIV_MANT + 1:0] Fou_iteration_cell_sum_D;
+	wire First_iteration_cell_carry_D;
+	wire Sec_iteration_cell_carry_D;
+	wire Thi_iteration_cell_carry_D;
+	wire Fou_iteration_cell_carry_D;
+	wire [1:0] Sqrt_Da0;
+	wire [1:0] Sqrt_Da1;
+	wire [1:0] Sqrt_Da2;
+	wire [1:0] Sqrt_Da3;
+	wire [1:0] Sqrt_D0;
+	wire [1:0] Sqrt_D1;
+	wire [1:0] Sqrt_D2;
+	wire [1:0] Sqrt_D3;
+	wire [C_DIV_MANT + 1:0] First_iteration_cell_a_D;
+	wire [C_DIV_MANT + 1:0] First_iteration_cell_b_D;
+	wire [C_DIV_MANT + 1:0] Sec_iteration_cell_a_D;
+	wire [C_DIV_MANT + 1:0] Sec_iteration_cell_b_D;
+	wire [C_DIV_MANT + 1:0] Thi_iteration_cell_a_D;
+	wire [C_DIV_MANT + 1:0] Thi_iteration_cell_b_D;
+	wire [C_DIV_MANT + 1:0] Fou_iteration_cell_a_D;
+	wire [C_DIV_MANT + 1:0] Fou_iteration_cell_b_D;
+	wire Div_start_dly_S;
+	wire Sqrt_start_dly_S;
+	control_tp #(Precision_ctl_Enable_S) control_U0(
+		.Clk_CI(Clk_CI),
+		.Rst_RBI(Rst_RBI),
+		.Div_start_SI(Div_start_SI),
+		.Sqrt_start_SI(Sqrt_start_SI),
+		.Start_SI(Start_SI),
+		.Precision_ctl_SI(Precision_ctl_SI),
+		.Numerator_DI(Mant_a_DI),
+		.Exp_num_DI(Exp_a_DI),
+		.Denominator_DI(Mant_b_DI),
+		.Exp_den_DI(Exp_b_DI),
+		.First_iteration_cell_sum_DI(First_iteration_cell_sum_D),
+		.First_iteration_cell_carry_DI(First_iteration_cell_carry_D),
+		.Sqrt_Da0(Sqrt_Da0),
+		.Sec_iteration_cell_sum_DI(Sec_iteration_cell_sum_D),
+		.Sec_iteration_cell_carry_DI(Sec_iteration_cell_carry_D),
+		.Sqrt_Da1(Sqrt_Da1),
+		.Thi_iteration_cell_sum_DI(Thi_iteration_cell_sum_D),
+		.Thi_iteration_cell_carry_DI(Thi_iteration_cell_carry_D),
+		.Sqrt_Da2(Sqrt_Da2),
+		.Fou_iteration_cell_sum_DI(Fou_iteration_cell_sum_D),
+		.Fou_iteration_cell_carry_DI(Fou_iteration_cell_carry_D),
+		.Sqrt_Da3(Sqrt_Da3),
+		.Div_start_dly_SO(Div_start_dly_S),
+		.Sqrt_start_dly_SO(Sqrt_start_dly_S),
+		.Div_enable_SO(Div_enable_SO),
+		.Sqrt_enable_SO(Sqrt_enable_SO),
+		.Sqrt_D0(Sqrt_D0),
+		.Sqrt_D1(Sqrt_D1),
+		.Sqrt_D2(Sqrt_D2),
+		.Sqrt_D3(Sqrt_D3),
+		.First_iteration_cell_a_DO(First_iteration_cell_a_D),
+		.First_iteration_cell_b_DO(First_iteration_cell_b_D),
+		.Sec_iteration_cell_a_DO(Sec_iteration_cell_a_D),
+		.Sec_iteration_cell_b_DO(Sec_iteration_cell_b_D),
+		.Thi_iteration_cell_a_DO(Thi_iteration_cell_a_D),
+		.Thi_iteration_cell_b_DO(Thi_iteration_cell_b_D),
+		.Fou_iteration_cell_a_DO(Fou_iteration_cell_a_D),
+		.Fou_iteration_cell_b_DO(Fou_iteration_cell_b_D),
+		.Ready_SO(Ready_SO),
+		.Done_SO(Done_SO),
+		.Mant_result_prenorm_DO(Mant_z_DO),
+		.Exp_result_prenorm_DO(Exp_z_DO)
+	);
+	iteration_div_sqrt_first iteration_unit_U0(
+		.A_DI(First_iteration_cell_a_D),
+		.B_DI(First_iteration_cell_b_D),
+		.Div_enable_SI(Div_enable_SO),
+		.Div_start_dly_SI(Div_start_dly_S),
+		.Sqrt_enable_SI(Sqrt_enable_SO),
+		.D_DI(Sqrt_D0),
+		.D_DO(Sqrt_Da0),
+		.Sum_DO(First_iteration_cell_sum_D),
+		.Carry_out_DO(First_iteration_cell_carry_D)
+	);
+	iteration_div_sqrt iteration_unit_U1(
+		.A_DI(Sec_iteration_cell_a_D),
+		.B_DI(Sec_iteration_cell_b_D),
+		.Div_enable_SI(Div_enable_SO),
+		.Sqrt_enable_SI(Sqrt_enable_SO),
+		.D_DI(Sqrt_D1),
+		.D_DO(Sqrt_Da1),
+		.Sum_DO(Sec_iteration_cell_sum_D),
+		.Carry_out_DO(Sec_iteration_cell_carry_D)
+	);
+	iteration_div_sqrt iteration_unit_U2(
+		.A_DI(Thi_iteration_cell_a_D),
+		.B_DI(Thi_iteration_cell_b_D),
+		.Div_enable_SI(Div_enable_SO),
+		.Sqrt_enable_SI(Sqrt_enable_SO),
+		.D_DI(Sqrt_D2),
+		.D_DO(Sqrt_Da2),
+		.Sum_DO(Thi_iteration_cell_sum_D),
+		.Carry_out_DO(Thi_iteration_cell_carry_D)
+	);
+	iteration_div_sqrt iteration_unit_U3(
+		.A_DI(Fou_iteration_cell_a_D),
+		.B_DI(Fou_iteration_cell_b_D),
+		.Div_enable_SI(Div_enable_SO),
+		.Sqrt_enable_SI(Sqrt_enable_SO),
+		.D_DI(Sqrt_D3),
+		.D_DO(Sqrt_Da3),
+		.Sum_DO(Fou_iteration_cell_sum_D),
+		.Carry_out_DO(Fou_iteration_cell_carry_D)
+	);
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/preprocess.v b/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/preprocess.v
new file mode 100644
index 0000000..1f6f916
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_div_sqrt_tp_nlp/preprocess.v
@@ -0,0 +1,215 @@
+module preprocess (
+	Clk_CI,
+	Rst_RBI,
+	Div_start_SI,
+	Sqrt_start_SI,
+	Operand_a_DI,
+	Operand_b_DI,
+	RM_SI,
+	Start_SO,
+	Exp_a_DO_norm,
+	Exp_b_DO_norm,
+	Mant_a_DO_norm,
+	Mant_b_DO_norm,
+	RM_dly_SO,
+	Sign_z_DO,
+	Inf_a_SO,
+	Inf_b_SO,
+	Zero_a_SO,
+	Zero_b_SO,
+	NaN_a_SO,
+	NaN_b_SO
+);
+parameter C_DIV_RM           = 2;
+parameter C_DIV_RM_NEAREST   = 2'h0;
+parameter C_DIV_RM_TRUNC     = 2'h1;
+parameter C_DIV_RM_PLUSINF   = 2'h2;
+parameter C_DIV_RM_MINUSINF  = 2'h3;
+parameter C_DIV_PC           = 5;
+parameter C_DIV_OP           = 32;
+parameter C_DIV_MANT         = 23;
+parameter C_DIV_EXP          = 8;
+parameter C_DIV_BIAS         = 127;
+parameter C_DIV_BIAS_AONE    = 8'h80;
+parameter C_DIV_HALF_BIAS    = 63;
+parameter C_DIV_MANT_PRENORM = C_DIV_MANT+1;
+parameter C_DIV_EXP_ZERO     = 8'h00;
+parameter C_DIV_EXP_ONE      = 8'h01;
+parameter C_DIV_EXP_INF      = 8'hff;
+parameter C_DIV_MANT_ZERO    = 23'h0;
+parameter C_DIV_MANT_NAN     = 23'h400000;
+	input wire Clk_CI;
+	input wire Rst_RBI;
+	input wire Div_start_SI;
+	input wire Sqrt_start_SI;
+	input wire [C_DIV_OP - 1:0] Operand_a_DI;
+	input wire [C_DIV_OP - 1:0] Operand_b_DI;
+	input wire [C_DIV_RM - 1:0] RM_SI;
+	output wire Start_SO;
+	output wire [C_DIV_EXP:0] Exp_a_DO_norm;
+	output wire [C_DIV_EXP:0] Exp_b_DO_norm;
+	output wire [C_DIV_MANT:0] Mant_a_DO_norm;
+	output wire [C_DIV_MANT:0] Mant_b_DO_norm;
+	output wire [C_DIV_RM - 1:0] RM_dly_SO;
+	output wire Sign_z_DO;
+	output wire Inf_a_SO;
+	output wire Inf_b_SO;
+	output wire Zero_a_SO;
+	output wire Zero_b_SO;
+	output wire NaN_a_SO;
+	output wire NaN_b_SO;
+	wire Hb_a_D;
+	wire Hb_b_D;
+	wire [C_DIV_EXP - 1:0] Exp_a_D;
+	wire [C_DIV_EXP - 1:0] Exp_b_D;
+	wire [C_DIV_MANT:0] Mant_a_D;
+	wire [C_DIV_MANT:0] Mant_b_D;
+	wire Sign_a_D;
+	wire Sign_b_D;
+	wire Start_S;
+	assign Sign_a_D = Operand_a_DI[C_DIV_OP - 1];
+	assign Sign_b_D = Operand_b_DI[C_DIV_OP - 1];
+	assign Exp_a_D = Operand_a_DI[C_DIV_OP - 2:C_DIV_MANT];
+	assign Exp_b_D = Operand_b_DI[C_DIV_OP - 2:C_DIV_MANT];
+	assign Mant_a_D = {Hb_a_D, Operand_a_DI[C_DIV_MANT - 1:0]};
+	assign Mant_b_D = {Hb_b_D, Operand_b_DI[C_DIV_MANT - 1:0]};
+	assign Hb_a_D = |Exp_a_D;
+	assign Hb_b_D = |Exp_b_D;
+	assign Start_S = Div_start_SI | Sqrt_start_SI;
+	wire Mant_a_prenorm_zero_S;
+	wire Mant_b_prenorm_zero_S;
+	assign Mant_a_prenorm_zero_S = Operand_a_DI[C_DIV_MANT - 1:0] == C_DIV_MANT_ZERO;
+	assign Mant_b_prenorm_zero_S = Operand_b_DI[C_DIV_MANT - 1:0] == C_DIV_MANT_ZERO;
+	wire Exp_a_prenorm_zero_S;
+	wire Exp_b_prenorm_zero_S;
+	assign Exp_a_prenorm_zero_S = Exp_a_D == C_DIV_EXP_ZERO;
+	assign Exp_b_prenorm_zero_S = Exp_b_D == C_DIV_EXP_ZERO;
+	wire Exp_a_prenorm_Inf_NaN_S;
+	wire Exp_b_prenorm_Inf_NaN_S;
+	assign Exp_a_prenorm_Inf_NaN_S = Exp_a_D == C_DIV_EXP_INF;
+	assign Exp_b_prenorm_Inf_NaN_S = Exp_b_D == C_DIV_EXP_INF;
+	wire Zero_a_SN;
+	reg Zero_a_SP;
+	wire Zero_b_SN;
+	reg Zero_b_SP;
+	wire Inf_a_SN;
+	reg Inf_a_SP;
+	wire Inf_b_SN;
+	reg Inf_b_SP;
+	wire NaN_a_SN;
+	reg NaN_a_SP;
+	wire NaN_b_SN;
+	reg NaN_b_SP;
+	assign Zero_a_SN = (Start_S ? Exp_a_prenorm_zero_S && Mant_a_prenorm_zero_S : Zero_a_SP);
+	assign Zero_b_SN = (Start_S ? Exp_b_prenorm_zero_S && Mant_b_prenorm_zero_S : Zero_b_SP);
+	assign Inf_a_SN = (Start_S ? Exp_a_prenorm_Inf_NaN_S && Mant_a_prenorm_zero_S : Inf_a_SP);
+	assign Inf_b_SN = (Start_S ? Exp_b_prenorm_Inf_NaN_S && Mant_b_prenorm_zero_S : Inf_b_SP);
+	assign NaN_a_SN = (Start_S ? Exp_a_prenorm_Inf_NaN_S && ~Mant_a_prenorm_zero_S : NaN_a_SP);
+	assign NaN_b_SN = (Start_S ? Exp_b_prenorm_Inf_NaN_S && ~Mant_b_prenorm_zero_S : NaN_b_SP);
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI) begin
+			Zero_a_SP <= 1'sb0;
+			Zero_b_SP <= 1'sb0;
+			Inf_a_SP <= 1'sb0;
+			Inf_b_SP <= 1'sb0;
+			NaN_a_SP <= 1'sb0;
+			NaN_b_SP <= 1'sb0;
+		end
+		else begin
+			Inf_a_SP <= Inf_a_SN;
+			Inf_b_SP <= Inf_b_SN;
+			Zero_a_SP <= Zero_a_SN;
+			Zero_b_SP <= Zero_b_SN;
+			NaN_a_SP <= NaN_a_SN;
+			NaN_b_SP <= NaN_b_SN;
+		end
+	reg Sign_z_DN;
+	reg Sign_z_DP;
+	always @(*)
+		if (~Rst_RBI)
+			Sign_z_DN = 1'sb0;
+		else if (Div_start_SI)
+			Sign_z_DN = Sign_a_D ^ Sign_b_D;
+		else if (Sqrt_start_SI)
+			Sign_z_DN = Sign_a_D;
+		else
+			Sign_z_DN = Sign_z_DP;
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			Sign_z_DP <= 1'sb0;
+		else
+			Sign_z_DP <= Sign_z_DN;
+	reg [C_DIV_RM - 1:0] RM_DN;
+	reg [C_DIV_RM - 1:0] RM_DP;
+	always @(*)
+		if (~Rst_RBI)
+			RM_DN = 1'sb0;
+		else if (Start_S)
+			RM_DN = RM_SI;
+		else
+			RM_DN = RM_DP;
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			RM_DP <= 1'sb0;
+		else
+			RM_DP <= RM_DN;
+	assign RM_dly_SO = RM_DP;
+	wire [4:0] Mant_leadingOne_a;
+	wire [4:0] Mant_leadingOne_b;
+	wire Mant_zero_S_a;
+	wire Mant_zero_S_b;
+	fpu_ff #(.LEN(C_DIV_MANT + 1)) LOD_Ua(
+		.in_i(Mant_a_D),
+		.first_one_o(Mant_leadingOne_a),
+		.no_ones_o(Mant_zero_S_a)
+	);
+	wire [C_DIV_MANT:0] Mant_a_norm_DN;
+	reg [C_DIV_MANT:0] Mant_a_norm_DP;
+	assign Mant_a_norm_DN = (Start_S ? Mant_a_D << Mant_leadingOne_a : Mant_a_norm_DP);
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			Mant_a_norm_DP <= 1'sb0;
+		else
+			Mant_a_norm_DP <= Mant_a_norm_DN;
+	wire [C_DIV_EXP:0] Exp_a_norm_DN;
+	reg [C_DIV_EXP:0] Exp_a_norm_DP;
+	assign Exp_a_norm_DN = (Start_S ? (Exp_a_D - Mant_leadingOne_a) + |Mant_leadingOne_a : Exp_a_norm_DP);
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			Exp_a_norm_DP <= 1'sb0;
+		else
+			Exp_a_norm_DP <= Exp_a_norm_DN;
+	fpu_ff #(.LEN(C_DIV_MANT + 1)) LOD_Ub(
+		.in_i(Mant_b_D),
+		.first_one_o(Mant_leadingOne_b),
+		.no_ones_o(Mant_zero_S_b)
+	);
+	wire [C_DIV_MANT:0] Mant_b_norm_DN;
+	reg [C_DIV_MANT:0] Mant_b_norm_DP;
+	assign Mant_b_norm_DN = (Start_S ? Mant_b_D << Mant_leadingOne_b : Mant_b_norm_DP);
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			Mant_b_norm_DP <= 1'sb0;
+		else
+			Mant_b_norm_DP <= Mant_b_norm_DN;
+	wire [C_DIV_EXP:0] Exp_b_norm_DN;
+	reg [C_DIV_EXP:0] Exp_b_norm_DP;
+	assign Exp_b_norm_DN = (Start_S ? (Exp_b_D - Mant_leadingOne_b) + |Mant_leadingOne_b : Exp_b_norm_DP);
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI)
+			Exp_b_norm_DP <= 1'sb0;
+		else
+			Exp_b_norm_DP <= Exp_b_norm_DN;
+	assign Start_SO = Start_S;
+	assign Exp_a_DO_norm = Exp_a_norm_DP;
+	assign Exp_b_DO_norm = Exp_b_norm_DP;
+	assign Mant_a_DO_norm = Mant_a_norm_DP;
+	assign Mant_b_DO_norm = Mant_b_norm_DP;
+	assign Sign_z_DO = Sign_z_DP;
+	assign Inf_a_SO = Inf_a_SP;
+	assign Inf_b_SO = Inf_b_SP;
+	assign Zero_a_SO = Zero_a_SP;
+	assign Zero_b_SO = Zero_b_SP;
+	assign NaN_a_SO = NaN_a_SP;
+	assign NaN_b_SO = NaN_b_SP;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_fmac/CSA.v b/verilog/rtl/ips/fpu/hdl/fpu_fmac/CSA.v
new file mode 100644
index 0000000..3d16722
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_fmac/CSA.v
@@ -0,0 +1,25 @@
+module CSA 
+#( parameter n=49 )
+(
+	A_DI,
+	B_DI,
+	C_DI,
+	Sum_DO,
+	Carry_DO
+);
+	//parameter n = 49;
+	input wire [n - 1:0] A_DI;
+	input wire [n - 1:0] B_DI;
+	input wire [n - 1:0] C_DI;
+	output reg [n - 1:0] Sum_DO;
+	output reg [n - 1:0] Carry_DO;
+	genvar i;
+	generate
+		for (i = 0; i <= (n - 1); i = i + 1) begin : genblk1
+			always @(*) begin
+				Sum_DO[i] = (A_DI[i] ^ B_DI[i]) ^ C_DI[i];
+				Carry_DO[i] = ((A_DI[i] & B_DI[i]) | (A_DI[i] & C_DI[i])) | (B_DI[i] & C_DI[i]);
+			end
+		end
+	endgenerate
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_fmac/LZA.v b/verilog/rtl/ips/fpu/hdl/fpu_fmac/LZA.v
new file mode 100644
index 0000000..a3d872f
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_fmac/LZA.v
@@ -0,0 +1,67 @@
+module LZA 
+#( parameter  C_WIDTH = 74)
+(
+	A_DI,
+	B_DI,
+	Leading_one_DO,
+	No_one_SO
+);
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+	//parameter C_WIDTH = 74;
+	input wire [C_WIDTH - 1:0] A_DI;
+	input wire [C_WIDTH - 1:0] B_DI;
+	output wire [C_LEADONE_WIDTH - 1:0] Leading_one_DO;
+	output wire No_one_SO;
+	reg [C_WIDTH - 1:0] T_D;
+	reg [C_WIDTH - 1:0] G_D;
+	reg [C_WIDTH - 1:0] Z_D;
+	reg [C_WIDTH - 1:0] F_S;
+	genvar i;
+	generate
+		for (i = 0; i <= (C_WIDTH - 1); i = i + 1) begin : genblk1
+			always @(*) begin
+				T_D[i] = A_DI[i] ^ B_DI[i];
+				G_D[i] = A_DI[i] && B_DI[i];
+				Z_D[i] = ~(A_DI[i] | B_DI[i]);
+			end
+		end
+	endgenerate
+	wire [1:1] sv2v_tmp_CEAFB;
+	assign sv2v_tmp_CEAFB = ~T_D[C_WIDTH - 1] & T_D[C_WIDTH - 2];
+	always @(*) F_S[C_WIDTH - 1] = sv2v_tmp_CEAFB;
+	genvar j;
+	generate
+		for (j = 1; j < (C_WIDTH - 1); j = j + 1) begin : genblk2
+			always @(*) F_S[j] = (T_D[j + 1] & ((G_D[j] & ~Z_D[j - 1]) | (Z_D[j] & ~G_D[j - 1]))) | (~T_D[j + 1] & ((Z_D[j] && ~Z_D[j - 1]) | (G_D[j] & ~G_D[j - 1])));
+		end
+	endgenerate
+	wire [1:1] sv2v_tmp_42D88;
+	assign sv2v_tmp_42D88 = (T_D[1] & Z_D[0]) | (~T_D[1] & (T_D[0] | G_D[0]));
+	always @(*) F_S[0] = sv2v_tmp_42D88;
+	wire [C_LEADONE_WIDTH - 1:0] Leading_one_D;
+	wire No_one_S;
+	fpu_ff #(.LEN(C_WIDTH)) LOD_Ub(
+		.in_i(F_S),
+		.first_one_o(Leading_one_D),
+		.no_ones_o(No_one_S)
+	);
+	assign Leading_one_DO = Leading_one_D;
+	assign No_one_SO = No_one_S;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_fmac/adders.v b/verilog/rtl/ips/fpu/hdl/fpu_fmac/adders.v
new file mode 100644
index 0000000..884c174
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_fmac/adders.v
@@ -0,0 +1,65 @@
+`include "fpu_defs_fmac.sv"
+
+module adders (
+	AL_DI,
+	BL_DI,
+	Sub_SI,
+	Sign_cor_SI,
+	Sign_amt_DI,
+	Sft_stop_SI,
+	BH_DI,
+	Sign_postalig_DI,
+	Sum_pos_DO,
+	Sign_out_DO,
+	A_LZA_DO,
+	B_LZA_DO
+);
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+	input wire [(2 * C_MANT) + 1:0] AL_DI;
+	input wire [(2 * C_MANT) + 1:0] BL_DI;
+	input wire Sub_SI;
+	input wire [2:0] Sign_cor_SI;
+	input wire Sign_amt_DI;
+	input wire Sft_stop_SI;
+	input wire [C_MANT + 3:0] BH_DI;
+	input wire Sign_postalig_DI;
+	output wire [(3 * C_MANT) + 4:0] Sum_pos_DO;
+	output wire Sign_out_DO;
+	output wire [(3 * C_MANT) + 4:0] A_LZA_DO;
+	output wire [(3 * C_MANT) + 4:0] B_LZA_DO;
+	wire Carry_postcor_D;
+	assign Carry_postcor_D = (Sign_amt_DI ? 1'b0 : {~(|Sign_cor_SI) ^ BL_DI[(2 * C_MANT) + 1]});
+	wire Carry_uninv_LS;
+	wire [(2 * C_MANT) + 1:0] Sum_uninv_LD;
+	assign {Carry_uninv_LS, Sum_uninv_LD} = {1'b0, AL_DI} + {Carry_postcor_D, BL_DI[2 * C_MANT:0], Sub_SI};
+	wire Carry_inv_LS;
+	wire [(2 * C_MANT) + 2:0] Sum_inv_LD;
+	assign {Carry_inv_LS, Sum_inv_LD} = ({1'b1, ~AL_DI, 1'b1} + {~Carry_postcor_D, ~BL_DI[2 * C_MANT:0], 2'b11}) + 2;
+	wire [C_MANT + 3:0] BH_inv_D;
+	wire [C_MANT + 3:0] Sum_uninv_HD;
+	wire [C_MANT + 3:0] Sum_inv_HD;
+	assign BH_inv_D = ~BH_DI;
+	assign {Carryout_uninv_HS, Sum_uninv_HD} = (Carry_uninv_LS ? {BH_DI + 1} : BH_DI);
+	assign {Carryout_inv_HS, Sum_inv_HD} = (Carry_inv_LS ? BH_inv_D : {BH_inv_D - 1});
+	assign Sum_pos_DO = (Sft_stop_SI ? {26'h0000000, Sum_uninv_LD[(2 * C_MANT) + 1:0]} : {(Sign_amt_DI ? {BH_DI[C_MANT + 2:0], 48'b000000000000000000000000000000000000000000000000} : {(Sum_uninv_HD[C_MANT + 3] ? {Sum_inv_HD[C_MANT + 2:0], Sum_inv_LD[(2 * C_MANT) + 2:1]} : {Sum_uninv_HD[C_MANT + 2:0], Sum_uninv_LD})})});
+	assign Sign_out_DO = (Sign_amt_DI ? Sign_postalig_DI : Sum_uninv_HD[C_MANT + 3] ^ Sign_postalig_DI);
+	assign A_LZA_DO = (Sign_amt_DI ? {BH_DI[C_MANT + 2:0], 48'b000000000000000000000000000000000000000000000000} : {BH_DI[C_MANT + 2:0], 48'b000000000000000000000000000000000000000000000000});
+	assign B_LZA_DO = (Sign_amt_DI ? 74'h0000000000000000000 : {25'h0000000, Carry_uninv_LS, Sum_uninv_LD});
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_fmac/aligner.v b/verilog/rtl/ips/fpu/hdl/fpu_fmac/aligner.v
new file mode 100644
index 0000000..f1eb93c
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_fmac/aligner.v
@@ -0,0 +1,78 @@
+`include "fpu_defs_fmac.sv"
+
+module aligner (
+	Exp_a_DI,
+	Exp_b_DI,
+	Exp_c_DI,
+	Mant_a_DI,
+	Sign_a_DI,
+	Sign_b_DI,
+	Sign_c_DI,
+	Pp_sum_DI,
+	Pp_carry_DI,
+	Sub_SO,
+	Mant_postalig_a_DO,
+	Exp_postalig_DO,
+	Sign_postalig_DO,
+	Sign_amt_DO,
+	Sft_stop_SO,
+	Pp_sum_postcal_DO,
+	Pp_carry_postcal_DO
+);
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+	input wire [C_EXP - 1:0] Exp_a_DI;
+	input wire [C_EXP - 1:0] Exp_b_DI;
+	input wire [C_EXP - 1:0] Exp_c_DI;
+	input wire [C_MANT:0] Mant_a_DI;
+	input wire Sign_a_DI;
+	input wire Sign_b_DI;
+	input wire Sign_c_DI;
+	input wire [(2 * C_MANT) + 2:0] Pp_sum_DI;
+	input wire [(2 * C_MANT) + 2:0] Pp_carry_DI;
+	output wire Sub_SO;
+	output wire [74:0] Mant_postalig_a_DO;
+	output wire [C_EXP + 1:0] Exp_postalig_DO;
+	output wire Sign_postalig_DO;
+	output wire Sign_amt_DO;
+	output wire Sft_stop_SO;
+	output wire [(2 * C_MANT) + 2:0] Pp_sum_postcal_DO;
+	output wire [(2 * C_MANT) + 2:0] Pp_carry_postcal_DO;
+	wire [C_EXP + 1:0] Exp_dif_D;
+	wire [C_EXP + 1:0] Sft_amt_D;
+	assign Sub_SO = (Sign_a_DI ^ Sign_b_DI) ^ Sign_c_DI;
+	assign Exp_dif_D = ((Exp_a_DI - Exp_b_DI) - Exp_c_DI) + C_BIAS;
+	assign Sft_amt_D = (((Exp_b_DI + Exp_c_DI) - Exp_a_DI) - C_BIAS) + 27;
+	assign Sign_amt_DO = Sft_amt_D[C_EXP + 1];
+	wire Sft_stop_S;
+	assign Sft_stop_S = ~Sft_amt_D[C_EXP + 1] && (Sft_amt_D[C_EXP:0] >= 74);
+	assign Sft_stop_SO = Sft_stop_S;
+	function automatic [0:0] sv2v_cast_1;
+		input reg [0:0] inp;
+		sv2v_cast_1 = inp;
+	endfunction
+	assign Exp_postalig_DO = (Sft_amt_D[C_EXP + 1] ? Exp_a_DI : {sv2v_cast_1(((Exp_b_DI + Exp_c_DI) - C_BIAS) + 27)});
+	wire [73:0] Mant_postalig_a_D;
+	wire [C_MANT:0] Bit_sftout_D;
+	assign {Mant_postalig_a_D, Bit_sftout_D} = {Mant_a_DI, 74'h0000000000000000000} >> {(Sft_stop_S ? 0 : Sft_amt_D)};
+	assign Mant_postalig_a_DO = (Sft_amt_D[C_EXP + 1] ? {1'b0, Mant_a_DI, 50'h0000000000000} : {(Sft_stop_S ? 75'h0000000000000000000 : {(Sub_SO ? {1'b1, ~Mant_postalig_a_D} : {1'b0, Mant_postalig_a_D})})});
+	assign Sign_postalig_DO = (Sft_amt_D[C_EXP + 1] ? Sign_a_DI : Sign_b_DI ^ Sign_c_DI);
+	assign Pp_sum_postcal_DO = (Sft_amt_D[C_EXP + 1] ? {(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)) {1'sb0}} : Pp_sum_DI);
+	assign Pp_carry_postcal_DO = (Sft_amt_D[C_EXP + 1] ? {(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)) {1'sb0}} : Pp_carry_DI);
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_fmac/booth_encoder.v b/verilog/rtl/ips/fpu/hdl/fpu_fmac/booth_encoder.v
new file mode 100644
index 0000000..b494d5a
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_fmac/booth_encoder.v
@@ -0,0 +1,16 @@
+module booth_encoder (
+	Booth_b_DI,
+	Sel_1x_SO,
+	Sel_2x_SO,
+	Sel_sign_SO
+);
+	input wire [2:0] Booth_b_DI;
+	output wire Sel_1x_SO;
+	output wire Sel_2x_SO;
+	output wire Sel_sign_SO;
+	wire Sel_xnor_S;
+	assign Sel_1x_SO = ^Booth_b_DI[1:0];
+	assign Sel_xnor_S = ~(^Booth_b_DI[2:1]);
+	assign Sel_2x_SO = ~(Sel_1x_SO | Sel_xnor_S);
+	assign Sel_sign_SO = Booth_b_DI[2];
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_fmac/booth_selector.v b/verilog/rtl/ips/fpu/hdl/fpu_fmac/booth_selector.v
new file mode 100644
index 0000000..17f7944
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_fmac/booth_selector.v
@@ -0,0 +1,14 @@
+module booth_selector (
+	Booth_a_DI,
+	Sel_1x_SI,
+	Sel_2x_SI,
+	Sel_sign_SI,
+	Booth_pp_DO
+);
+	input wire [1:0] Booth_a_DI;
+	input wire Sel_1x_SI;
+	input wire Sel_2x_SI;
+	input wire Sel_sign_SI;
+	output wire Booth_pp_DO;
+	assign Booth_pp_DO = ~(~((Sel_1x_SI && Booth_a_DI[1]) | (Sel_2x_SI && Booth_a_DI[0])) ^ Sel_sign_SI);
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_fmac/fmac.v b/verilog/rtl/ips/fpu/hdl/fpu_fmac/fmac.v
new file mode 100644
index 0000000..f4cbe17
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_fmac/fmac.v
@@ -0,0 +1,190 @@
+module fmac (
+	Operand_a_DI,
+	Operand_b_DI,
+	Operand_c_DI,
+	RM_SI,
+	Result_DO,
+	Exp_OF_SO,
+	Exp_UF_SO,
+	Exp_NX_SO
+);
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+	input wire [C_OP - 1:0] Operand_a_DI;
+	input wire [C_OP - 1:0] Operand_b_DI;
+	input wire [C_OP - 1:0] Operand_c_DI;
+	input wire [C_RM - 1:0] RM_SI;
+	output wire [31:0] Result_DO;
+	output wire Exp_OF_SO;
+	output wire Exp_UF_SO;
+	output wire Exp_NX_SO;
+	wire [C_MANT - 1:0] Mant_res_DO;
+	wire [C_EXP - 1:0] Exp_res_DO;
+	wire Sign_res_DO;
+	wire DeN_a_S;
+	wire Sub_S;
+	wire Sign_postalig_D;
+	wire Sign_amt_D;
+	wire Sft_stop_S;
+	wire Sign_out_D;
+	assign Result_DO = {Sign_res_DO, Exp_res_DO, Mant_res_DO};
+	wire Sign_a_D;
+	wire Sign_b_D;
+	wire Sign_c_D;
+	wire [C_EXP - 1:0] Exp_a_D;
+	wire [C_EXP - 1:0] Exp_b_D;
+	wire [C_EXP - 1:0] Exp_c_D;
+	wire [C_MANT:0] Mant_a_D;
+	wire [C_MANT:0] Mant_b_D;
+	wire [C_MANT:0] Mant_c_D;
+	wire Inf_a_S;
+	wire Inf_b_S;
+	wire Inf_c_S;
+	wire NaN_a_S;
+	wire NaN_b_S;
+	wire NaN_c_S;
+	wire Zero_a_S;
+	wire Zero_b_S;
+	wire Zero_c_S;
+	preprocess_fmac precess_U0(
+		.Operand_a_DI(Operand_a_DI),
+		.Operand_b_DI(Operand_b_DI),
+		.Operand_c_DI(Operand_c_DI),
+		.Exp_a_DO(Exp_a_D),
+		.Mant_a_DO(Mant_a_D),
+		.Sign_a_DO(Sign_a_D),
+		.Exp_b_DO(Exp_b_D),
+		.Mant_b_DO(Mant_b_D),
+		.Sign_b_DO(Sign_b_D),
+		.Exp_c_DO(Exp_c_D),
+		.Mant_c_DO(Mant_c_D),
+		.Sign_c_DO(Sign_c_D),
+		.DeN_a_SO(DeN_a_S),
+		.Inf_a_SO(Inf_a_S),
+		.Inf_b_SO(Inf_b_S),
+		.Inf_c_SO(Inf_c_S),
+		.Zero_a_SO(Zero_a_S),
+		.Zero_b_SO(Zero_b_S),
+		.Zero_c_SO(Zero_c_S),
+		.NaN_a_SO(NaN_a_S),
+		.NaN_b_SO(NaN_b_S),
+		.NaN_c_SO(NaN_c_S)
+	);
+	wire [(((2 * C_MANT) + 2) >= 0 ? (13 * ((2 * C_MANT) + 3)) - 1 : (13 * (1 - ((2 * C_MANT) + 2))) + ((2 * C_MANT) + 1)):(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2)] Pp_index_D;
+	pp_generation pp_gneration_U0(
+		.Mant_a_DI(Mant_b_D),
+		.Mant_b_DI(Mant_c_D),
+		.Pp_index_DO(Pp_index_D)
+	);
+	wire [(2 * C_MANT) + 2:0] Pp_sum_D;
+	wire [(2 * C_MANT) + 2:0] Pp_carry_D;
+	wire MSB_cor_D;
+	wallace wallace_U0(
+		.Pp_index_DI(Pp_index_D),
+		.Pp_sum_DO(Pp_sum_D),
+		.Pp_carry_DO(Pp_carry_D),
+		.MSB_cor_DO(MSB_cor_D)
+	);
+	wire [74:0] Mant_postalig_a_D;
+	wire signed [C_EXP + 1:0] Exp_postalig_D;
+	wire [(2 * C_MANT) + 2:0] Pp_sum_postcal_D;
+	wire [(2 * C_MANT) + 2:0] Pp_carry_postcal_D;
+	aligner aligner_U0(
+		.Exp_a_DI(Exp_a_D),
+		.Exp_b_DI(Exp_b_D),
+		.Mant_a_DI(Mant_a_D),
+		.Exp_c_DI(Exp_c_D),
+		.Sign_a_DI(Sign_a_D),
+		.Sign_b_DI(Sign_b_D),
+		.Sign_c_DI(Sign_c_D),
+		.Pp_sum_DI(Pp_sum_D),
+		.Pp_carry_DI(Pp_carry_D),
+		.Sub_SO(Sub_S),
+		.Mant_postalig_a_DO(Mant_postalig_a_D),
+		.Exp_postalig_DO(Exp_postalig_D),
+		.Sign_postalig_DO(Sign_postalig_D),
+		.Sign_amt_DO(Sign_amt_D),
+		.Sft_stop_SO(Sft_stop_S),
+		.Pp_sum_postcal_DO(Pp_sum_postcal_D),
+		.Pp_carry_postcal_DO(Pp_carry_postcal_D)
+	);
+	wire [(2 * C_MANT) + 1:0] Csa_sum_D;
+	wire [(2 * C_MANT) + 1:0] Csa_carry_D;
+	CSA #((2 * C_MANT) + 2) CSA_U0(
+		.A_DI(Mant_postalig_a_D[(2 * C_MANT) + 1:0]),
+		.B_DI({Pp_sum_postcal_D[(2 * C_MANT) + 1:0]}),
+		.C_DI({Pp_carry_postcal_D[2 * C_MANT:0], 1'b0}),
+		.Sum_DO(Csa_sum_D),
+		.Carry_DO(Csa_carry_D)
+	);
+	wire [73:0] Sum_pos_D;
+	wire [(3 * C_MANT) + 4:0] A_LZA_D;
+	wire [(3 * C_MANT) + 4:0] B_LZA_D;
+	adders adders_U0(
+		.AL_DI(Csa_sum_D),
+		.BL_DI(Csa_carry_D),
+		.Sub_SI(Sub_S),
+		.Sign_cor_SI({MSB_cor_D, Pp_carry_postcal_D[(2 * C_MANT) + 2], {Pp_sum_postcal_D[(2 * C_MANT) + 2] && Pp_carry_postcal_D[(2 * C_MANT) + 1]}}),
+		.Sign_amt_DI(Sign_amt_D),
+		.Sft_stop_SI(Sft_stop_S),
+		.BH_DI(Mant_postalig_a_D[(3 * C_MANT) + 5:(2 * C_MANT) + 2]),
+		.Sign_postalig_DI(Sign_postalig_D),
+		.Sum_pos_DO(Sum_pos_D),
+		.Sign_out_DO(Sign_out_D),
+		.A_LZA_DO(A_LZA_D),
+		.B_LZA_DO(B_LZA_D)
+	);
+	wire [C_LEADONE_WIDTH - 1:0] Leading_one_D;
+	wire No_one_S;
+	LZA #((3 * C_MANT) + 5) LZA_U0(
+		.A_DI(A_LZA_D),
+		.B_DI(B_LZA_D),
+		.Leading_one_DO(Leading_one_D),
+		.No_one_SO(No_one_S)
+	);
+	fpu_norm_fmac fpu_norm_U0(
+		.Mant_in_DI(Sum_pos_D),
+		.Exp_in_DI(Exp_postalig_D),
+		.Sign_in_DI(Sign_out_D),
+		.Leading_one_DI(Leading_one_D),
+		.No_one_SI(No_one_S),
+		.Sign_amt_DI(Sign_amt_D),
+		.Sub_SI(Sub_S),
+		.Exp_a_DI(Operand_a_DI[C_OP - 2:C_MANT]),
+		.Mant_a_DI(Mant_a_D),
+		.Sign_a_DI(Sign_a_D),
+		.DeN_a_SI(DeN_a_S),
+		.RM_SI(RM_SI),
+		.Inf_a_SI(Inf_a_S),
+		.Inf_b_SI(Inf_b_S),
+		.Inf_c_SI(Inf_c_S),
+		.Zero_a_SI(Zero_a_S),
+		.Zero_b_SI(Zero_b_S),
+		.Zero_c_SI(Zero_c_S),
+		.NaN_a_SI(NaN_a_S),
+		.NaN_b_SI(NaN_b_S),
+		.NaN_c_SI(NaN_c_S),
+		.Mant_res_DO(Mant_res_DO),
+		.Exp_res_DO(Exp_res_DO),
+		.Sign_res_DO(Sign_res_DO),
+		.Exp_OF_SO(Exp_OF_SO),
+		.Exp_UF_SO(Exp_UF_SO),
+		.Flag_Inexact_SO(Exp_NX_SO)
+	);
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_fmac/fpu_defs_fmac.sv b/verilog/rtl/ips/fpu/hdl/fpu_fmac/fpu_defs_fmac.sv
new file mode 100644
index 0000000..e0d5eaf
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_fmac/fpu_defs_fmac.sv
@@ -0,0 +1,41 @@
+// Copyright 2017 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+///////////////////////////////////////////////////////////////////////////////
+// This file contains all fmac parameters                                    //
+//                                                                           //
+// Authors    : Lei Li  (lile@iis.ee.ethz.ch)                                //
+//                                                                           //
+//                                                                           //
+// Copyright (c) 2017 Integrated Systems Laboratory, ETH Zurich              //
+///////////////////////////////////////////////////////////////////////////////
+
+
+package fpu_defs_fmac;
+
+   parameter C_RM            = 2;
+   parameter C_RM_NEAREST    = 2'h0;
+   parameter C_RM_TRUNC      = 2'h1;
+   parameter C_RM_PLUSINF    = 2'h2;
+   parameter C_RM_MINUSINF   = 2'h3;
+   parameter C_PC            = 5;
+   parameter C_OP            = 32;
+   parameter C_MANT          = 23;
+   parameter C_EXP           = 8;
+   parameter C_BIAS          = 127;
+   parameter C_HALF_BIAS     = 63;
+   parameter C_LEADONE_WIDTH = 7;
+   parameter C_MANT_PRENORM  = C_MANT+1;
+   parameter C_EXP_ZERO      = 8'h00;
+   parameter C_EXP_ONE       = 8'h01;
+   parameter C_EXP_INF       = 8'hff;
+   parameter C_MANT_ZERO     = 23'h0;
+   parameter C_MANT_NAN      = 23'h400000;
+
+endpackage : fpu_defs_fmac
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_fmac/fpu_norm_fmac.v b/verilog/rtl/ips/fpu/hdl/fpu_fmac/fpu_norm_fmac.v
new file mode 100644
index 0000000..00417db
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_fmac/fpu_norm_fmac.v
@@ -0,0 +1,280 @@
+module fpu_norm_fmac (
+	Mant_in_DI,
+	Exp_in_DI,
+	Sign_in_DI,
+	Leading_one_DI,
+	No_one_SI,
+	Sign_amt_DI,
+	Sub_SI,
+	Exp_a_DI,
+	Mant_a_DI,
+	Sign_a_DI,
+	DeN_a_SI,
+	RM_SI,
+	Stick_one_SI,
+	Inf_a_SI,
+	Inf_b_SI,
+	Inf_c_SI,
+	Zero_a_SI,
+	Zero_b_SI,
+	Zero_c_SI,
+	NaN_a_SI,
+	NaN_b_SI,
+	NaN_c_SI,
+	Mant_res_DO,
+	Exp_res_DO,
+	Sign_res_DO,
+	Exp_OF_SO,
+	Exp_UF_SO,
+	Flag_Inexact_SO
+);
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+	input wire [(3 * C_MANT) + 4:0] Mant_in_DI;
+	input wire signed [C_EXP + 1:0] Exp_in_DI;
+	input wire Sign_in_DI;
+	input wire [C_LEADONE_WIDTH - 1:0] Leading_one_DI;
+	input wire No_one_SI;
+	input wire Sign_amt_DI;
+	input wire Sub_SI;
+	input wire [C_EXP - 1:0] Exp_a_DI;
+	input wire [C_MANT:0] Mant_a_DI;
+	input wire Sign_a_DI;
+	input wire DeN_a_SI;
+	input wire [C_RM - 1:0] RM_SI;
+	input wire Stick_one_SI;
+	input wire Inf_a_SI;
+	input wire Inf_b_SI;
+	input wire Inf_c_SI;
+	input wire Zero_a_SI;
+	input wire Zero_b_SI;
+	input wire Zero_c_SI;
+	input wire NaN_a_SI;
+	input wire NaN_b_SI;
+	input wire NaN_c_SI;
+	output wire [C_MANT - 1:0] Mant_res_DO;
+	output wire [C_EXP - 1:0] Exp_res_DO;
+	output reg Sign_res_DO;
+	output reg Exp_OF_SO;
+	output reg Exp_UF_SO;
+	output wire Flag_Inexact_SO;
+	reg [C_MANT:0] Mant_res_norm_D;
+	reg [C_EXP - 1:0] Exp_res_norm_D;
+	reg [1:0] Mant_lower_D;
+	wire Stick_one_HD;
+	wire [(3 * C_MANT) + 4:0] Mant_postsft_D;
+	wire [C_EXP + 1:0] Exp_postsft_D;
+	wire [C_EXP + 1:0] Exp_postsft_addone_D;
+	wire [C_LEADONE_WIDTH - 1:0] Leading_one_D;
+	wire [C_EXP:0] LSt_Mant_D;
+	assign Leading_one_D = (Sign_amt_DI | Mant_in_DI[(3 * C_MANT) + 4] ? 0 : Leading_one_DI);
+	wire Exp_lg_S;
+	assign Exp_lg_S = Exp_in_DI > Leading_one_D;
+	assign LSt_Mant_D = (Exp_in_DI[C_EXP + 1] ? 0 : (Exp_lg_S ? Leading_one_D : Exp_in_DI[C_EXP:0] - 1));
+	assign Mant_postsft_D = Mant_in_DI << LSt_Mant_D;
+	assign Exp_postsft_D = (Exp_in_DI[C_EXP + 1] ? 0 : (Exp_lg_S ? Exp_in_DI - Leading_one_D : 1));
+	assign Exp_postsft_addone_D = (Exp_in_DI - Leading_one_D) - 1;
+	wire [C_EXP + 1:0] Exp_Max_RS_D;
+	assign Exp_Max_RS_D = Exp_in_DI[C_EXP:0] + 74;
+	wire [C_EXP + 1:0] Num_RS_D;
+	assign Num_RS_D = ~Exp_in_DI + 2;
+	wire [(3 * C_MANT) + 6:0] Mant_RS_D;
+	assign Mant_RS_D = {Mant_in_DI, 1'b0, 1'b0} >> Num_RS_D;
+	wire [(2 * C_MANT) + 1:0] Mant_StickCh_D;
+	assign Mant_StickCh_D = (Exp_postsft_D[C_EXP + 1] ? Mant_RS_D[(2 * C_MANT) + 3:2] : (Exp_postsft_D[C_EXP + 1:0] == {((C_EXP + 1) >= 0 ? C_EXP + 2 : 1 - (C_EXP + 1)) {1'sb0}} ? Mant_postsft_D[(2 * C_MANT) + 2:1] : (Mant_postsft_D[(3 * C_MANT) + 4] | (Exp_postsft_D == 0) ? Mant_postsft_D[(2 * C_MANT) + 1:0] : {Mant_postsft_D[2 * C_MANT:0], 1'b0})));
+	assign Stick_one_HD = |Mant_StickCh_D;
+	wire Stick_one_D;
+	assign Stick_one_D = Stick_one_HD;
+	reg Mant_sticky_D;
+	always @(*)
+		if (((((NaN_a_SI | NaN_b_SI) | NaN_c_SI) | (Zero_b_SI && Inf_c_SI)) | (Zero_c_SI && Inf_b_SI)) | ((Sub_SI && Inf_a_SI) && (Inf_b_SI | Inf_c_SI))) begin
+			Exp_OF_SO = 1'b0;
+			Exp_UF_SO = 1'b0;
+			Mant_res_norm_D = {1'b0, C_MANT_NAN};
+			Exp_res_norm_D = 1'sb1;
+			Mant_lower_D = 2'b00;
+			Sign_res_DO = 1'b0;
+			Mant_sticky_D = 1'b0;
+		end
+		else if ((Inf_a_SI | Inf_b_SI) | Inf_c_SI) begin
+			Exp_OF_SO = 1'b1;
+			Exp_UF_SO = 1'b0;
+			Mant_res_norm_D = 1'sb0;
+			Exp_res_norm_D = 1'sb1;
+			Mant_lower_D = 2'b00;
+			Sign_res_DO = Sign_in_DI;
+			Mant_sticky_D = 1'b0;
+		end
+		else if (Sign_amt_DI) begin
+			Exp_OF_SO = 1'b0;
+			Exp_UF_SO = DeN_a_SI;
+			Mant_res_norm_D = Mant_a_DI;
+			Exp_res_norm_D = Exp_a_DI;
+			Mant_lower_D = 2'b00;
+			Sign_res_DO = Sign_a_DI;
+			Mant_sticky_D = 1'b0;
+		end
+		else if (No_one_SI) begin
+			Exp_OF_SO = 1'b0;
+			Exp_UF_SO = 1'b0;
+			Mant_res_norm_D = 1'sb0;
+			Exp_res_norm_D = 1'sb0;
+			Mant_lower_D = 2'b00;
+			Sign_res_DO = Sign_in_DI;
+			Mant_sticky_D = 1'b0;
+		end
+		else if (Exp_in_DI[C_EXP + 1]) begin
+			if (~Exp_Max_RS_D[C_EXP + 1]) begin
+				Exp_OF_SO = 1'b1;
+				Exp_UF_SO = 1'b0;
+				Mant_res_norm_D = 1'sb0;
+				Exp_res_norm_D = 1'sb0;
+				Mant_lower_D = 2'b00;
+				Sign_res_DO = Sign_in_DI;
+				Mant_sticky_D = 1'b0;
+			end
+			else begin
+				Exp_OF_SO = 1'b0;
+				Exp_UF_SO = 1'b1;
+				Mant_res_norm_D = {1'b0, Mant_RS_D[(3 * C_MANT) + 6:(2 * C_MANT) + 6]};
+				Exp_res_norm_D = 1'sb0;
+				Mant_lower_D = Mant_RS_D[(2 * C_MANT) + 5:(2 * C_MANT) + 4];
+				Sign_res_DO = Sign_in_DI;
+				Mant_sticky_D = Stick_one_D;
+			end
+		end
+		else if (((Exp_postsft_D[C_EXP:0] == 256) && ~Mant_postsft_D[(3 * C_MANT) + 4]) && (Mant_postsft_D[(3 * C_MANT) + 3:(2 * C_MANT) + 3] != {(((3 * C_MANT) + 3) >= ((2 * C_MANT) + 3) ? (((3 * C_MANT) + 3) - ((2 * C_MANT) + 3)) + 1 : (((2 * C_MANT) + 3) - ((3 * C_MANT) + 3)) + 1) {1'sb0}})) begin
+			Exp_OF_SO = 1'b0;
+			Exp_UF_SO = 1'b0;
+			Mant_res_norm_D = {1'b0, C_MANT_NAN};
+			Exp_res_norm_D = 1'sb1;
+			Mant_lower_D = 2'b00;
+			Sign_res_DO = 1'b0;
+			Mant_sticky_D = 1'b0;
+		end
+		else if (Exp_postsft_D[C_EXP - 1:0] == {C_EXP {1'sb1}}) begin
+			if (Mant_postsft_D[(3 * C_MANT) + 4]) begin
+				Exp_OF_SO = 1'b1;
+				Exp_UF_SO = 1'b0;
+				Mant_res_norm_D = {1'b0, C_MANT_NAN};
+				Exp_res_norm_D = 1'sb1;
+				Mant_lower_D = 2'b00;
+				Sign_res_DO = Sign_in_DI;
+				Mant_sticky_D = 1'b0;
+			end
+			else if (Mant_postsft_D[(3 * C_MANT) + 4:(2 * C_MANT) + 4] == {(((3 * C_MANT) + 4) >= ((2 * C_MANT) + 4) ? (((3 * C_MANT) + 4) - ((2 * C_MANT) + 4)) + 1 : (((2 * C_MANT) + 4) - ((3 * C_MANT) + 4)) + 1) {1'sb0}}) begin
+				Exp_OF_SO = 1'b1;
+				Exp_UF_SO = 1'b0;
+				Mant_res_norm_D = 1'sb0;
+				Exp_res_norm_D = 1'sb1;
+				Mant_lower_D = 2'b00;
+				Sign_res_DO = Sign_in_DI;
+				Mant_sticky_D = 1'b0;
+			end
+			else begin
+				Exp_OF_SO = 1'b0;
+				Exp_UF_SO = 1'b0;
+				Mant_res_norm_D = Mant_postsft_D[(3 * C_MANT) + 3:(2 * C_MANT) + 3];
+				Exp_res_norm_D = 254;
+				Mant_lower_D = Mant_postsft_D[(2 * C_MANT) + 2:(2 * C_MANT) + 1];
+				Sign_res_DO = Sign_in_DI;
+				Mant_sticky_D = Stick_one_D;
+			end
+		end
+		else if (Exp_postsft_D[C_EXP]) begin
+			Exp_OF_SO = 1'b1;
+			Exp_UF_SO = 1'b0;
+			Mant_res_norm_D = 1'sb0;
+			Exp_res_norm_D = 1'sb1;
+			Mant_lower_D = 2'b00;
+			Sign_res_DO = Sign_in_DI;
+			Mant_sticky_D = 1'b0;
+		end
+		else if (Exp_postsft_D[C_EXP + 1:0] == {((C_EXP + 1) >= 0 ? C_EXP + 2 : 1 - (C_EXP + 1)) {1'sb0}}) begin
+			Exp_OF_SO = 1'b0;
+			Exp_UF_SO = 1'b1;
+			Mant_res_norm_D = {1'b0, Mant_postsft_D[(3 * C_MANT) + 4:(2 * C_MANT) + 5]};
+			Exp_res_norm_D = 1'sb0;
+			Mant_lower_D = Mant_postsft_D[(2 * C_MANT) + 4:(2 * C_MANT) + 3];
+			Sign_res_DO = Sign_in_DI;
+			Mant_sticky_D = Stick_one_D;
+		end
+		else if (Exp_postsft_D[C_EXP + 1:0] == 1) begin
+			if (Mant_postsft_D[(3 * C_MANT) + 4]) begin
+				Exp_OF_SO = 1'b0;
+				Exp_UF_SO = 1'b0;
+				Mant_res_norm_D = {Mant_postsft_D[(3 * C_MANT) + 4:(2 * C_MANT) + 4]};
+				Exp_res_norm_D = 1;
+				Mant_lower_D = Mant_postsft_D[(2 * C_MANT) + 3:(2 * C_MANT) + 2];
+				Sign_res_DO = Sign_in_DI;
+				Mant_sticky_D = Stick_one_D;
+			end
+			else begin
+				Exp_OF_SO = 1'b0;
+				Exp_UF_SO = 1'b1;
+				Mant_res_norm_D = {Mant_postsft_D[(3 * C_MANT) + 4:(2 * C_MANT) + 4]};
+				Exp_res_norm_D = 1'sb0;
+				Mant_lower_D = Mant_postsft_D[(2 * C_MANT) + 3:(2 * C_MANT) + 2];
+				Sign_res_DO = Sign_in_DI;
+				Mant_sticky_D = Stick_one_D;
+			end
+		end
+		else if (~Mant_postsft_D[(3 * C_MANT) + 4]) begin
+			Exp_OF_SO = 1'b0;
+			Exp_UF_SO = 1'b0;
+			Mant_res_norm_D = Mant_postsft_D[(3 * C_MANT) + 3:(2 * C_MANT) + 3];
+			Exp_res_norm_D = Exp_postsft_addone_D[C_EXP - 1:0];
+			Mant_lower_D = Mant_postsft_D[(2 * C_MANT) + 2:(2 * C_MANT) + 1];
+			Sign_res_DO = Sign_in_DI;
+			Mant_sticky_D = Stick_one_D;
+		end
+		else begin
+			Exp_OF_SO = 1'b0;
+			Exp_UF_SO = 1'b0;
+			Mant_res_norm_D = Mant_postsft_D[(3 * C_MANT) + 4:(2 * C_MANT) + 4];
+			Exp_res_norm_D = Exp_postsft_D[C_EXP - 1:0];
+			Mant_lower_D = Mant_postsft_D[(2 * C_MANT) + 3:(2 * C_MANT) + 2];
+			Sign_res_DO = Sign_in_DI;
+			Mant_sticky_D = Stick_one_D;
+		end
+	wire [C_MANT:0] Mant_upper_D;
+	wire [C_MANT + 1:0] Mant_upperRounded_D;
+	reg Mant_roundUp_S;
+	wire Mant_rounded_S;
+	assign Mant_upper_D = Mant_res_norm_D;
+	assign Flag_Inexact_SO = Mant_rounded_S;
+	assign Mant_rounded_S = |Mant_lower_D | Mant_sticky_D;
+	always @(*) begin
+		Mant_roundUp_S = 1'b0;
+		case (RM_SI)
+			C_RM_NEAREST: Mant_roundUp_S = Mant_lower_D[1] && ((Mant_lower_D[0] | Mant_sticky_D) || Mant_upper_D[0]);
+			C_RM_TRUNC: Mant_roundUp_S = 0;
+			C_RM_PLUSINF: Mant_roundUp_S = Mant_rounded_S & ~Sign_in_DI;
+			C_RM_MINUSINF: Mant_roundUp_S = Mant_rounded_S & Sign_in_DI;
+			default: Mant_roundUp_S = 0;
+		endcase
+	end
+	wire Mant_renorm_S;
+	assign Mant_upperRounded_D = Mant_upper_D + Mant_roundUp_S;
+	assign Mant_renorm_S = Mant_upperRounded_D[C_MANT + 1];
+	wire Rounded_SO;
+	assign Mant_res_DO = (Mant_renorm_S ? Mant_upperRounded_D[C_MANT:1] : Mant_upperRounded_D[C_MANT - 1:0]);
+	assign Exp_res_DO = Exp_res_norm_D + Mant_renorm_S;
+	assign Rounded_SO = Mant_rounded_S;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_fmac/pp_generation.v b/verilog/rtl/ips/fpu/hdl/fpu_fmac/pp_generation.v
new file mode 100644
index 0000000..10f6be5
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_fmac/pp_generation.v
@@ -0,0 +1,106 @@
+module pp_generation (
+	Mant_a_DI,
+	Mant_b_DI,
+	Pp_index_DO
+);
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+
+parameter C_CMD               = 4;
+parameter C_FPU_ADD_CMD       = 4'h0;
+parameter C_FPU_SUB_CMD       = 4'h1;
+parameter C_FPU_MUL_CMD       = 4'h2;
+parameter C_FPU_DIV_CMD       = 4'h3;
+parameter C_FPU_I2F_CMD       = 4'h4;
+parameter C_FPU_F2I_CMD       = 4'h5;
+parameter C_FPU_SQRT_CMD      = 4'h6;
+parameter C_FPU_NOP_CMD       = 4'h7;
+parameter C_FPU_FMADD_CMD     = 4'h8;
+parameter C_FPU_FMSUB_CMD     = 4'h9;
+parameter C_FPU_FNMADD_CMD    = 4'hA;
+parameter C_FPU_FNMSUB_CMD    = 4'hB;
+parameter C_RM_NEAREST_MAX = 3'h4;
+parameter C_EXP_PRENORM  = C_EXP+2;
+parameter C_MANT_ADDIN   = C_MANT+4;
+parameter C_MANT_ADDOUT  = C_MANT+5;
+parameter C_MANT_SHIFTIN = C_MANT+3;
+parameter C_MANT_SHIFTED = C_MANT+4;
+parameter C_MANT_INT     = C_OP-1;
+parameter C_INF          = 32'h7fffffff;
+parameter C_MINF         = 32'h80000000;
+parameter C_EXP_SHIFT    = C_EXP_PRENORM;
+parameter C_SHIFT_BIAS   = 9'd127;
+parameter C_UNKNOWN      = 8'd157;
+parameter C_PADMANT      = 16'b0;
+parameter C_MANT_NoHB_ZERO   = 23'h0;
+parameter C_MANT_PRENORM_IND = 6;
+parameter F_QNAN         =32'h7FC00000;
+parameter C_FFLAG         = 5;
+	input wire [C_MANT:0] Mant_a_DI;
+	input wire [C_MANT:0] Mant_b_DI;
+	output wire [(((2 * C_MANT) + 2) >= 0 ? (13 * ((2 * C_MANT) + 3)) - 1 : (13 * (1 - ((2 * C_MANT) + 2))) + ((2 * C_MANT) + 1)):(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2)] Pp_index_DO;
+	wire Sel_xnor_S;
+	wire [C_MANT + 5:0] Mant_b_D;
+	assign Mant_b_D = {2'b00, Mant_b_DI, 2'b00};
+	wire [12:0] Sel_1x_S;
+	wire [12:0] Sel_2x_S;
+	wire [12:0] Sel_sign_S;
+	genvar i;
+	generate
+		for (i = 1; i <= 13; i = i + 1) begin : genblk1
+			booth_encoder booth_encoding(
+				.Booth_b_DI(Mant_b_D[(2 * i) + 1:(2 * i) - 1]),
+				.Sel_1x_SO(Sel_1x_S[i - 1]),
+				.Sel_2x_SO(Sel_2x_S[i - 1]),
+				.Sel_sign_SO(Sel_sign_S[i - 1])
+			);
+		end
+	endgenerate
+	wire [C_MANT + 2:0] Mant_a_D;
+	assign Mant_a_D = {Mant_a_DI, 1'b0};
+	wire [((C_MANT + 1) >= 0 ? (13 * (C_MANT + 2)) - 1 : (13 * (1 - (C_MANT + 1))) + C_MANT):((C_MANT + 1) >= 0 ? 0 : C_MANT + 1)] Booth_pp_D;
+	genvar l;
+	genvar j;
+	generate
+		for (l = 1; l <= 13; l = l + 1) begin : genblk2
+			for (j = 1; j <= (C_MANT + 2); j = j + 1) begin : genblk1
+				booth_selector booth_selection(
+					.Booth_a_DI(Mant_a_D[j:j - 1]),
+					.Sel_1x_SI(Sel_1x_S[l - 1]),
+					.Sel_2x_SI(Sel_2x_S[l - 1]),
+					.Sel_sign_SI(Sel_sign_S[l - 1]),
+					.Booth_pp_DO(Booth_pp_D[((l - 1) * ((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1))) + ((C_MANT + 1) >= 0 ? j - 1 : (C_MANT + 1) - (j - 1))])
+				);
+			end
+		end
+	endgenerate
+	assign Pp_index_DO[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2)+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))] = {21'h000000, ~Sel_sign_S[0], Sel_sign_S[0], Sel_sign_S[0], Booth_pp_D[((C_MANT + 1) >= 0 ? 0 : C_MANT + 1)+:((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1))]};
+	assign Pp_index_DO[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))] = {21'b000000000000000000001, ~Sel_sign_S[1], Booth_pp_D[((C_MANT + 1) >= 0 ? 0 : C_MANT + 1) + ((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1))+:((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1))], 1'b0, Sel_sign_S[0]};
+	assign Pp_index_DO[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (2 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))] = {19'b0000000000000000001, ~Sel_sign_S[2], Booth_pp_D[((C_MANT + 1) >= 0 ? 0 : C_MANT + 1) + (2 * ((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1)))+:((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1))], 1'b0, Sel_sign_S[1], 2'h0};
+	assign Pp_index_DO[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (3 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))] = {17'b00000000000000001, ~Sel_sign_S[3], Booth_pp_D[((C_MANT + 1) >= 0 ? 0 : C_MANT + 1) + (3 * ((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1)))+:((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1))], 1'b0, Sel_sign_S[2], 4'h0};
+	assign Pp_index_DO[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (4 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))] = {15'b000000000000001, ~Sel_sign_S[4], Booth_pp_D[((C_MANT + 1) >= 0 ? 0 : C_MANT + 1) + (4 * ((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1)))+:((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1))], 1'b0, Sel_sign_S[3], 6'h00};
+	assign Pp_index_DO[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (5 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))] = {13'b0000000000001, ~Sel_sign_S[5], Booth_pp_D[((C_MANT + 1) >= 0 ? 0 : C_MANT + 1) + (5 * ((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1)))+:((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1))], 1'b0, Sel_sign_S[4], 8'h00};
+	assign Pp_index_DO[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (6 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))] = {11'b00000000001, ~Sel_sign_S[6], Booth_pp_D[((C_MANT + 1) >= 0 ? 0 : C_MANT + 1) + (6 * ((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1)))+:((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1))], 1'b0, Sel_sign_S[5], 10'h000};
+	assign Pp_index_DO[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (7 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))] = {9'b000000001, ~Sel_sign_S[7], Booth_pp_D[((C_MANT + 1) >= 0 ? 0 : C_MANT + 1) + (7 * ((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1)))+:((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1))], 1'b0, Sel_sign_S[6], 12'h000};
+	assign Pp_index_DO[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (8 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))] = {7'b0000001, ~Sel_sign_S[8], Booth_pp_D[((C_MANT + 1) >= 0 ? 0 : C_MANT + 1) + (8 * ((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1)))+:((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1))], 1'b0, Sel_sign_S[7], 14'h0000};
+	assign Pp_index_DO[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (9 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))] = {5'b00001, ~Sel_sign_S[9], Booth_pp_D[((C_MANT + 1) >= 0 ? 0 : C_MANT + 1) + (9 * ((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1)))+:((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1))], 1'b0, Sel_sign_S[8], 16'h0000};
+	assign Pp_index_DO[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (10 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))] = {3'b001, ~Sel_sign_S[10], Booth_pp_D[((C_MANT + 1) >= 0 ? 0 : C_MANT + 1) + (10 * ((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1)))+:((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1))], 1'b0, Sel_sign_S[9], 18'h00000};
+	assign Pp_index_DO[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (11 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))] = {1'b1, ~Sel_sign_S[11], Booth_pp_D[((C_MANT + 1) >= 0 ? 0 : C_MANT + 1) + (11 * ((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1)))+:((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1))], 1'b0, Sel_sign_S[10], 20'h00000};
+	assign Pp_index_DO[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (12 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))] = {Booth_pp_D[((C_MANT + 1) >= 0 ? 0 : C_MANT + 1) + (12 * ((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1)))+:((C_MANT + 1) >= 0 ? C_MANT + 2 : 1 - (C_MANT + 1))], 1'b0, Sel_sign_S[11], 22'h000000};
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_fmac/preprocess_fmac.v b/verilog/rtl/ips/fpu/hdl/fpu_fmac/preprocess_fmac.v
new file mode 100644
index 0000000..17b20b3
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_fmac/preprocess_fmac.v
@@ -0,0 +1,163 @@
+module preprocess_fmac (
+	Operand_a_DI,
+	Operand_b_DI,
+	Operand_c_DI,
+	Exp_a_DO,
+	Mant_a_DO,
+	Sign_a_DO,
+	Exp_b_DO,
+	Mant_b_DO,
+	Sign_b_DO,
+	Exp_c_DO,
+	Mant_c_DO,
+	Sign_c_DO,
+	Inf_a_SO,
+	Inf_b_SO,
+	Inf_c_SO,
+	Zero_a_SO,
+	Zero_b_SO,
+	Zero_c_SO,
+	NaN_a_SO,
+	NaN_b_SO,
+	NaN_c_SO,
+	DeN_a_SO,
+	DeN_b_SO,
+	DeN_c_SO
+);
+parameter C_DIV_RM           = 2;
+parameter C_DIV_RM_NEAREST   = 2'h0;
+parameter C_DIV_RM_TRUNC     = 2'h1;
+parameter C_DIV_RM_PLUSINF   = 2'h2;
+parameter C_DIV_RM_MINUSINF  = 2'h3;
+parameter C_DIV_PC           = 5;
+parameter C_DIV_OP           = 32;
+parameter C_DIV_MANT         = 23;
+parameter C_DIV_EXP          = 8;
+parameter C_DIV_BIAS         = 127;
+parameter C_DIV_BIAS_AONE    = 8'h80;
+parameter C_DIV_HALF_BIAS    = 63;
+parameter C_DIV_MANT_PRENORM = C_DIV_MANT+1;
+parameter C_DIV_EXP_ZERO     = 8'h00;
+parameter C_DIV_EXP_ONE      = 8'h01;
+parameter C_DIV_EXP_INF      = 8'hff;
+parameter C_DIV_MANT_ZERO    = 23'h0;
+parameter C_DIV_MANT_NAN     = 23'h400000;
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+
+parameter C_CMD               = 4;
+parameter C_FPU_ADD_CMD       = 4'h0;
+parameter C_FPU_SUB_CMD       = 4'h1;
+parameter C_FPU_MUL_CMD       = 4'h2;
+parameter C_FPU_DIV_CMD       = 4'h3;
+parameter C_FPU_I2F_CMD       = 4'h4;
+parameter C_FPU_F2I_CMD       = 4'h5;
+parameter C_FPU_SQRT_CMD      = 4'h6;
+parameter C_FPU_NOP_CMD       = 4'h7;
+parameter C_FPU_FMADD_CMD     = 4'h8;
+parameter C_FPU_FMSUB_CMD     = 4'h9;
+parameter C_FPU_FNMADD_CMD    = 4'hA;
+parameter C_FPU_FNMSUB_CMD    = 4'hB;
+parameter C_RM_NEAREST_MAX = 3'h4;
+parameter C_EXP_PRENORM  = C_EXP+2;
+parameter C_MANT_ADDIN   = C_MANT+4;
+parameter C_MANT_ADDOUT  = C_MANT+5;
+parameter C_MANT_SHIFTIN = C_MANT+3;
+parameter C_MANT_SHIFTED = C_MANT+4;
+parameter C_MANT_INT     = C_OP-1;
+parameter C_INF          = 32'h7fffffff;
+parameter C_MINF         = 32'h80000000;
+parameter C_EXP_SHIFT    = C_EXP_PRENORM;
+parameter C_SHIFT_BIAS   = 9'd127;
+parameter C_UNKNOWN      = 8'd157;
+parameter C_PADMANT      = 16'b0;
+parameter C_MANT_NoHB_ZERO   = 23'h0;
+parameter C_MANT_PRENORM_IND = 6;
+parameter F_QNAN         =32'h7FC00000;
+parameter C_FFLAG         = 5;
+	input wire [C_OP - 1:0] Operand_a_DI;
+	input wire [C_OP - 1:0] Operand_b_DI;
+	input wire [C_OP - 1:0] Operand_c_DI;
+	output wire [C_EXP - 1:0] Exp_a_DO;
+	output wire [C_MANT:0] Mant_a_DO;
+	output wire Sign_a_DO;
+	output wire [C_EXP - 1:0] Exp_b_DO;
+	output wire [C_MANT:0] Mant_b_DO;
+	output wire Sign_b_DO;
+	output wire [C_EXP - 1:0] Exp_c_DO;
+	output wire [C_MANT:0] Mant_c_DO;
+	output wire Sign_c_DO;
+	output wire Inf_a_SO;
+	output wire Inf_b_SO;
+	output wire Inf_c_SO;
+	output wire Zero_a_SO;
+	output wire Zero_b_SO;
+	output wire Zero_c_SO;
+	output wire NaN_a_SO;
+	output wire NaN_b_SO;
+	output wire NaN_c_SO;
+	output wire DeN_a_SO;
+	output wire DeN_b_SO;
+	output wire DeN_c_SO;
+	wire Hb_a_D;
+	wire Hb_b_D;
+	wire Hb_c_D;
+	assign Sign_a_DO = Operand_a_DI[C_OP - 1];
+	assign Sign_b_DO = Operand_b_DI[C_OP - 1];
+	assign Sign_c_DO = Operand_c_DI[C_OP - 1];
+	assign Exp_a_DO = (DeN_a_SO ? C_EXP_ONE : Operand_a_DI[C_OP - 2:C_MANT]);
+	assign Exp_b_DO = (DeN_b_SO ? C_EXP_ONE : Operand_b_DI[C_OP - 2:C_MANT]);
+	assign Exp_c_DO = (DeN_c_SO ? C_EXP_ONE : Operand_c_DI[C_OP - 2:C_MANT]);
+	assign Mant_a_DO = {Hb_a_D, Operand_a_DI[C_MANT - 1:0]};
+	assign Mant_b_DO = {Hb_b_D, Operand_b_DI[C_MANT - 1:0]};
+	assign Mant_c_DO = {Hb_c_D, Operand_c_DI[C_MANT - 1:0]};
+	assign Hb_a_D = |Operand_a_DI[C_OP - 2:C_MANT];
+	assign Hb_b_D = |Operand_b_DI[C_OP - 2:C_MANT];
+	assign Hb_c_D = |Operand_c_DI[C_OP - 2:C_MANT];
+	wire Mant_a_zero_S;
+	wire Mant_b_zero_S;
+	wire Mant_c_zero_S;
+	assign Mant_a_zero_S = Operand_a_DI[C_MANT - 1:0] == C_MANT_ZERO;
+	assign Mant_b_zero_S = Operand_b_DI[C_MANT - 1:0] == C_MANT_ZERO;
+	assign Mant_c_zero_S = Operand_c_DI[C_MANT - 1:0] == C_MANT_ZERO;
+	wire Exp_a_zero_S;
+	wire Exp_b_zero_S;
+	wire Exp_c_zero_S;
+	assign Exp_a_zero_S = ~Hb_a_D;
+	assign Exp_b_zero_S = ~Hb_b_D;
+	assign Exp_c_zero_S = ~Hb_c_D;
+	wire Exp_a_Inf_NaN_S;
+	wire Exp_b_Inf_NaN_S;
+	wire Exp_c_Inf_NaN_S;
+	assign Exp_a_Inf_NaN_S = Exp_a_DO == C_EXP_INF;
+	assign Exp_b_Inf_NaN_S = Exp_b_DO == C_EXP_INF;
+	assign Exp_c_Inf_NaN_S = Exp_c_DO == C_EXP_INF;
+	assign Zero_a_SO = Exp_a_zero_S && Mant_a_zero_S;
+	assign Zero_b_SO = Exp_b_zero_S && Mant_b_zero_S;
+	assign Zero_c_SO = Exp_c_zero_S && Mant_c_zero_S;
+	assign Inf_a_SO = Exp_a_Inf_NaN_S && Mant_a_zero_S;
+	assign Inf_b_SO = Exp_b_Inf_NaN_S && Mant_b_zero_S;
+	assign Inf_c_SO = Exp_c_Inf_NaN_S && Mant_c_zero_S;
+	assign NaN_a_SO = Exp_a_Inf_NaN_S && ~Mant_a_zero_S;
+	assign NaN_b_SO = Exp_b_Inf_NaN_S && ~Mant_b_zero_S;
+	assign NaN_c_SO = Exp_c_Inf_NaN_S && ~Mant_c_zero_S;
+	assign DeN_a_SO = Exp_a_zero_S && ~Mant_a_zero_S;
+	assign DeN_b_SO = Exp_b_zero_S && ~Mant_b_zero_S;
+	assign DeN_c_SO = Exp_c_zero_S && ~Mant_c_zero_S;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_fmac/wallace.v b/verilog/rtl/ips/fpu/hdl/fpu_fmac/wallace.v
new file mode 100644
index 0000000..f3645a5
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_fmac/wallace.v
@@ -0,0 +1,127 @@
+module wallace (
+	Pp_index_DI,
+	Pp_sum_DO,
+	Pp_carry_DO,
+	MSB_cor_DO
+);
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+	input wire [(((2 * C_MANT) + 2) >= 0 ? (13 * ((2 * C_MANT) + 3)) - 1 : (13 * (1 - ((2 * C_MANT) + 2))) + ((2 * C_MANT) + 1)):(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2)] Pp_index_DI;
+	output wire [(2 * C_MANT) + 2:0] Pp_sum_DO;
+	output wire [(2 * C_MANT) + 2:0] Pp_carry_DO;
+	output wire MSB_cor_DO;
+	wire [(2 * C_MANT) + 2:0] CSA_u0_Sum_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u0_Carry_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u1_Sum_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u1_Carry_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u2_Sum_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u2_Carry_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u3_Sum_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u3_Carry_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u4_Sum_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u4_Carry_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u5_Sum_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u5_Carry_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u6_Sum_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u6_Carry_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u7_Sum_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u7_Carry_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u8_Sum_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u8_Carry_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u9_Sum_DI;
+	wire [(2 * C_MANT) + 2:0] CSA_u9_Carry_DI;
+	CSA #((2 * C_MANT) + 3) CSA_U0(
+		.A_DI(Pp_index_DI[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2)+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))]),
+		.B_DI(Pp_index_DI[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))]),
+		.C_DI(Pp_index_DI[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (2 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))]),
+		.Sum_DO(CSA_u0_Sum_DI),
+		.Carry_DO(CSA_u0_Carry_DI)
+	);
+	CSA #((2 * C_MANT) + 3) CSA_U1(
+		.A_DI(Pp_index_DI[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (3 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))]),
+		.B_DI(Pp_index_DI[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (4 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))]),
+		.C_DI(Pp_index_DI[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (5 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))]),
+		.Sum_DO(CSA_u1_Sum_DI),
+		.Carry_DO(CSA_u1_Carry_DI)
+	);
+	CSA #((2 * C_MANT) + 3) CSA_U2(
+		.A_DI(Pp_index_DI[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (6 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))]),
+		.B_DI(Pp_index_DI[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (7 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))]),
+		.C_DI(Pp_index_DI[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (8 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))]),
+		.Sum_DO(CSA_u2_Sum_DI),
+		.Carry_DO(CSA_u2_Carry_DI)
+	);
+	CSA #((2 * C_MANT) + 3) CSA_U3(
+		.A_DI(Pp_index_DI[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (9 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))]),
+		.B_DI(Pp_index_DI[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (10 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))]),
+		.C_DI(Pp_index_DI[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (11 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))]),
+		.Sum_DO(CSA_u3_Sum_DI),
+		.Carry_DO(CSA_u3_Carry_DI)
+	);
+	CSA #((2 * C_MANT) + 3) CSA_U4(
+		.A_DI(CSA_u0_Sum_DI),
+		.B_DI({CSA_u0_Carry_DI[(2 * C_MANT) + 1:0], 1'b0}),
+		.C_DI(CSA_u1_Sum_DI),
+		.Sum_DO(CSA_u4_Sum_DI),
+		.Carry_DO(CSA_u4_Carry_DI)
+	);
+	CSA #((2 * C_MANT) + 3) CSA_U5(
+		.A_DI({CSA_u1_Carry_DI[(2 * C_MANT) + 1:0], 1'b0}),
+		.B_DI({CSA_u2_Carry_DI[(2 * C_MANT) + 1:0], 1'b0}),
+		.C_DI(CSA_u2_Sum_DI),
+		.Sum_DO(CSA_u5_Sum_DI),
+		.Carry_DO(CSA_u5_Carry_DI)
+	);
+	CSA #((2 * C_MANT) + 3) CSA_U6(
+		.A_DI(CSA_u3_Sum_DI),
+		.B_DI({CSA_u3_Carry_DI[(2 * C_MANT) + 1:0], 1'b0}),
+		.C_DI(CSA_u4_Sum_DI),
+		.Sum_DO(CSA_u6_Sum_DI),
+		.Carry_DO(CSA_u6_Carry_DI)
+	);
+	CSA #((2 * C_MANT) + 3) CSA_U7(
+		.A_DI({CSA_u4_Carry_DI[(2 * C_MANT) + 1:0], 1'b0}),
+		.B_DI({CSA_u5_Carry_DI[(2 * C_MANT) + 1:0], 1'b0}),
+		.C_DI(CSA_u5_Sum_DI),
+		.Sum_DO(CSA_u7_Sum_DI),
+		.Carry_DO(CSA_u7_Carry_DI)
+	);
+	CSA #((2 * C_MANT) + 3) CSA_U8(
+		.A_DI(CSA_u6_Sum_DI),
+		.B_DI({CSA_u6_Carry_DI[(2 * C_MANT) + 1:0], 1'b0}),
+		.C_DI(CSA_u7_Sum_DI),
+		.Sum_DO(CSA_u8_Sum_DI),
+		.Carry_DO(CSA_u8_Carry_DI)
+	);
+	CSA #((2 * C_MANT) + 3) CSA_U9(
+		.A_DI({CSA_u7_Carry_DI[(2 * C_MANT) + 1:0], 1'b0}),
+		.B_DI({CSA_u8_Carry_DI[(2 * C_MANT) + 1:0], 1'b0}),
+		.C_DI(CSA_u8_Sum_DI),
+		.Sum_DO(CSA_u9_Sum_DI),
+		.Carry_DO(CSA_u9_Carry_DI)
+	);
+	CSA #((2 * C_MANT) + 3) CSA_U10(
+		.A_DI(CSA_u9_Sum_DI),
+		.B_DI({CSA_u9_Carry_DI[(2 * C_MANT) + 1:0], 1'b0}),
+		.C_DI(Pp_index_DI[(((2 * C_MANT) + 2) >= 0 ? 0 : (2 * C_MANT) + 2) + (12 * (((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2)))+:(((2 * C_MANT) + 2) >= 0 ? (2 * C_MANT) + 3 : 1 - ((2 * C_MANT) + 2))]),
+		.Sum_DO(Pp_sum_DO),
+		.Carry_DO(Pp_carry_DO)
+	);
+	assign MSB_cor_DO = (((((CSA_u9_Carry_DI[(2 * C_MANT) + 2] | CSA_u8_Carry_DI[(2 * C_MANT) + 2]) | CSA_u7_Carry_DI[(2 * C_MANT) + 2]) | CSA_u6_Carry_DI[(2 * C_MANT) + 2]) | CSA_u5_Carry_DI[(2 * C_MANT) + 2]) | CSA_u4_Carry_DI[(2 * C_MANT) + 2]) | CSA_u3_Carry_DI[(2 * C_MANT) + 2];
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_utils/fpu_ff.v b/verilog/rtl/ips/fpu/hdl/fpu_utils/fpu_ff.v
new file mode 100644
index 0000000..4ed778c
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_utils/fpu_ff.v
@@ -0,0 +1,57 @@
+module fpu_ff 
+#(
+  parameter LEN = 32
+)
+(
+	in_i,
+	first_one_o,
+	no_ones_o
+);
+	//parameter LEN = 32;
+	input wire [LEN - 1:0] in_i;
+	output wire [$clog2(LEN) - 1:0] first_one_o;
+	output wire no_ones_o;
+	localparam NUM_LEVELS = $clog2(LEN);
+	wire [(LEN * NUM_LEVELS) - 1:0] index_lut;
+	wire [(2 ** NUM_LEVELS) - 1:0] sel_nodes;
+	wire [((2 ** NUM_LEVELS) * NUM_LEVELS) - 1:0] index_nodes;
+	wire [LEN - 1:0] in_flipped;
+	genvar j;
+	generate
+		for (j = 0; j < LEN; j = j + 1) begin : genblk1
+			assign index_lut[j * NUM_LEVELS+:NUM_LEVELS] = $unsigned(j);
+			assign in_flipped[j] = in_i[(LEN - j) - 1];
+		end
+	endgenerate
+	genvar k;
+	genvar l;
+	genvar level;
+	generate
+		for (level = 0; level < NUM_LEVELS; level = level + 1) begin : genblk2
+			if (level < (NUM_LEVELS - 1)) begin : genblk1
+				for (l = 0; l < (2 ** level); l = l + 1) begin : genblk1
+					assign sel_nodes[((2 ** level) - 1) + l] = sel_nodes[((2 ** (level + 1)) - 1) + (l * 2)] | sel_nodes[(((2 ** (level + 1)) - 1) + (l * 2)) + 1];
+					assign index_nodes[(((2 ** level) - 1) + l) * NUM_LEVELS+:NUM_LEVELS] = (sel_nodes[((2 ** (level + 1)) - 1) + (l * 2)] == 1'b1 ? index_nodes[(((2 ** (level + 1)) - 1) + (l * 2)) * NUM_LEVELS+:NUM_LEVELS] : index_nodes[((((2 ** (level + 1)) - 1) + (l * 2)) + 1) * NUM_LEVELS+:NUM_LEVELS]);
+				end
+			end
+			if (level == (NUM_LEVELS - 1)) begin : genblk2
+				for (k = 0; k < (2 ** level); k = k + 1) begin : genblk1
+					if ((k * 2) < (LEN - 1)) begin : genblk1
+						assign sel_nodes[((2 ** level) - 1) + k] = in_flipped[k * 2] | in_flipped[(k * 2) + 1];
+						assign index_nodes[(((2 ** level) - 1) + k) * NUM_LEVELS+:NUM_LEVELS] = (in_flipped[k * 2] == 1'b1 ? index_lut[(k * 2) * NUM_LEVELS+:NUM_LEVELS] : index_lut[((k * 2) + 1) * NUM_LEVELS+:NUM_LEVELS]);
+					end
+					if ((k * 2) == (LEN - 1)) begin : genblk2
+						assign sel_nodes[((2 ** level) - 1) + k] = in_flipped[k * 2];
+						assign index_nodes[(((2 ** level) - 1) + k) * NUM_LEVELS+:NUM_LEVELS] = index_lut[(k * 2) * NUM_LEVELS+:NUM_LEVELS];
+					end
+					if ((k * 2) > (LEN - 1)) begin : genblk3
+						assign sel_nodes[((2 ** level) - 1) + k] = 1'b0;
+						assign index_nodes[(((2 ** level) - 1) + k) * NUM_LEVELS+:NUM_LEVELS] = 1'sb0;
+					end
+				end
+			end
+		end
+	endgenerate
+	assign first_one_o = index_nodes[0+:NUM_LEVELS];
+	assign no_ones_o = ~sel_nodes[0];
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_v0.1/defines_fpu.sv b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/defines_fpu.sv
new file mode 100644
index 0000000..0b0f9e4
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/defines_fpu.sv
@@ -0,0 +1,43 @@
+// Copyright 2017 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lukas Mueller -- lukasmue@student.ethz.ch                  //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    08/10/2014                                                 // 
+// Design Name:    FPU                                                        // 
+// Module Name:    defines.sv                                                 //
+// Project Name:   Private FPU                                                //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    Defines for the FPU                                        //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Revision:                                                                  //
+////////////////////////////////////////////////////////////////////////////////
+
+`define RM_NEAREST   2'h0
+`define RM_TRUNC     2'h1
+`define RM_PLUSINF   2'h2
+`define RM_MINUSINF  2'h3
+
+`define FP_OP_ADD    4'h0
+`define FP_OP_SUB    4'h1
+`define FP_OP_MUL    4'h2
+`define FP_OP_DIV    4'h3
+`define FP_OP_ITOF   4'h4
+`define FP_OP_FTOI   4'h5
+
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fp_fma_wrapper.v b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fp_fma_wrapper.v
new file mode 100644
index 0000000..22d38c3
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fp_fma_wrapper.v
@@ -0,0 +1,126 @@
+module fp_fma_wrapper 
+#(
+  parameter C_MAC_PIPE_REGS = 2,
+  parameter RND_WIDTH       = 2,
+  parameter STAT_WIDTH      = 5
+)
+(
+	clk_i,
+	rst_ni,
+	En_i,
+	OpA_i,
+	OpB_i,
+	OpC_i,
+	Op_i,
+	Rnd_i,
+	Status_o,
+	Res_o,
+	Valid_o,
+	Ready_o,
+	Ack_i
+);
+	//parameter C_MAC_PIPE_REGS = 2;
+	//parameter RND_WIDTH = 2;
+	//parameter STAT_WIDTH = 5;
+	input wire clk_i;
+	input wire rst_ni;
+	input wire En_i;
+	input wire [31:0] OpA_i;
+	input wire [31:0] OpB_i;
+	input wire [31:0] OpC_i;
+	input wire [1:0] Op_i;
+	input wire [RND_WIDTH - 1:0] Rnd_i;
+	output wire [STAT_WIDTH - 1:0] Status_o;
+	output wire [31:0] Res_o;
+	output wire Valid_o;
+	output wire Ready_o;
+	input wire Ack_i;
+	parameter C_PRE_PIPE_REGS = C_MAC_PIPE_REGS - 1;
+	parameter C_POST_PIPE_REGS = 1;
+	reg [31:0] OpA_DP [0:C_PRE_PIPE_REGS];
+	reg [31:0] OpB_DP [0:C_PRE_PIPE_REGS];
+	reg [31:0] OpC_DP [0:C_PRE_PIPE_REGS];
+	reg En_SP [0:C_PRE_PIPE_REGS];
+	reg [RND_WIDTH - 1:0] Rnd_DP [0:C_PRE_PIPE_REGS];
+	reg EnPost_SP [0:C_POST_PIPE_REGS];
+	reg [31:0] Res_DP [0:C_POST_PIPE_REGS];
+	reg [STAT_WIDTH - 1:0] Status_DP [0:C_POST_PIPE_REGS];
+	wire [7:0] status;
+	wire [32:1] sv2v_tmp_29B01;
+	assign sv2v_tmp_29B01 = (En_i ? OpA_i : {32 {1'sb0}});
+	always @(*) OpA_DP[0] = sv2v_tmp_29B01;
+	wire [32:1] sv2v_tmp_0E9BB;
+	assign sv2v_tmp_0E9BB = (En_i ? {OpB_i[31] ^ Op_i[1], OpB_i[30:0]} : {32 {1'sb0}});
+	always @(*) OpB_DP[0] = sv2v_tmp_0E9BB;
+	wire [32:1] sv2v_tmp_8E8DA;
+	assign sv2v_tmp_8E8DA = (En_i ? {OpC_i[31] ^ Op_i[0], OpC_i[30:0]} : {32 {1'sb0}});
+	always @(*) OpC_DP[0] = sv2v_tmp_8E8DA;
+	wire [1:1] sv2v_tmp_1D7A2;
+	assign sv2v_tmp_1D7A2 = En_i;
+	always @(*) En_SP[0] = sv2v_tmp_1D7A2;
+	wire [RND_WIDTH:1] sv2v_tmp_A3B99;
+	assign sv2v_tmp_A3B99 = Rnd_i;
+	always @(*) Rnd_DP[0] = sv2v_tmp_A3B99;
+	wire [1:1] sv2v_tmp_45B84;
+	assign sv2v_tmp_45B84 = En_SP[C_PRE_PIPE_REGS];
+	always @(*) EnPost_SP[0] = sv2v_tmp_45B84;
+	assign Res_o = Res_DP[C_POST_PIPE_REGS];
+	assign Valid_o = EnPost_SP[C_POST_PIPE_REGS];
+	assign Status_o = Status_DP[C_POST_PIPE_REGS];
+	assign Ready_o = 1'b1;
+	wire [STAT_WIDTH:1] sv2v_tmp_04800;
+	assign sv2v_tmp_04800 = {2'b00, status[4], status[3], 1'b0};
+	always @(*) Status_DP[0] = sv2v_tmp_04800;
+	fmac fp_fma_i(
+		.Operand_a_DI(OpC_DP[C_PRE_PIPE_REGS]),
+		.Operand_b_DI(OpB_DP[C_PRE_PIPE_REGS]),
+		.Operand_c_DI(OpA_DP[C_PRE_PIPE_REGS]),
+		.RM_SI(Rnd_DP[C_PRE_PIPE_REGS]),
+		.Result_DO(Res_DP[0]),
+		.Exp_OF_SO(status[4]),
+		.Exp_UF_SO(status[3]),
+		.Exp_NX_SO(status[5])
+	);
+	genvar i;
+	generate
+		for (i = 1; i <= C_PRE_PIPE_REGS; i = i + 1) begin : g_pre_regs
+			always @(posedge clk_i or negedge rst_ni) begin : p_pre_regs
+				if (~rst_ni) begin
+					En_SP[i] <= 1'sb0;
+					OpA_DP[i] <= 1'sb0;
+					OpB_DP[i] <= 1'sb0;
+					OpC_DP[i] <= 1'sb0;
+					Rnd_DP[i] <= 1'sb0;
+				end
+				else begin
+					En_SP[i] <= En_SP[i - 1];
+					if (En_SP[i - 1]) begin
+						OpA_DP[i] <= OpA_DP[i - 1];
+						OpB_DP[i] <= OpB_DP[i - 1];
+						OpC_DP[i] <= OpC_DP[i - 1];
+						Rnd_DP[i] <= Rnd_DP[i - 1];
+					end
+				end
+			end
+		end
+	endgenerate
+	genvar j;
+	generate
+		for (j = 1; j <= C_POST_PIPE_REGS; j = j + 1) begin : g_post_regs
+			always @(posedge clk_i or negedge rst_ni) begin : p_post_regs
+				if (~rst_ni) begin
+					EnPost_SP[j] <= 1'sb0;
+					Res_DP[j] <= 1'sb0;
+					Status_DP[j] <= 1'sb0;
+				end
+				else begin
+					EnPost_SP[j] <= EnPost_SP[j - 1];
+					if (EnPost_SP[j - 1]) begin
+						Res_DP[j] <= Res_DP[j - 1];
+						Status_DP[j] <= Status_DP[j - 1];
+					end
+				end
+			end
+		end
+	endgenerate
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpexc.v b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpexc.v
new file mode 100644
index 0000000..a35d275
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpexc.v
@@ -0,0 +1,158 @@
+//`include "fpu_defs.sv"
+//`include "fpu_defs_fmac.sv"
+
+
+
+module fpexc (
+	Mant_a_DI,
+	Mant_b_DI,
+	Exp_a_DI,
+	Exp_b_DI,
+	Sign_a_DI,
+	Sign_b_DI,
+	Mant_norm_DI,
+	Exp_res_DI,
+	Op_SI,
+	Mant_rounded_SI,
+	Exp_OF_SI,
+	Exp_UF_SI,
+	OF_SI,
+	UF_SI,
+	Zero_SI,
+	IX_SI,
+	IV_SI,
+	Inf_SI,
+	Exp_toZero_SO,
+	Exp_toInf_SO,
+	Mant_toZero_SO,
+	OF_SO,
+	UF_SO,
+	Zero_SO,
+	IX_SO,
+	IV_SO,
+	Inf_SO
+);
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+
+parameter C_CMD               = 4;
+parameter C_FPU_ADD_CMD       = 4'h0;
+parameter C_FPU_SUB_CMD       = 4'h1;
+parameter C_FPU_MUL_CMD       = 4'h2;
+parameter C_FPU_DIV_CMD       = 4'h3;
+parameter C_FPU_I2F_CMD       = 4'h4;
+parameter C_FPU_F2I_CMD       = 4'h5;
+parameter C_FPU_SQRT_CMD      = 4'h6;
+parameter C_FPU_NOP_CMD       = 4'h7;
+parameter C_FPU_FMADD_CMD     = 4'h8;
+parameter C_FPU_FMSUB_CMD     = 4'h9;
+parameter C_FPU_FNMADD_CMD    = 4'hA;
+parameter C_FPU_FNMSUB_CMD    = 4'hB;
+parameter C_RM_NEAREST_MAX = 3'h4;
+parameter C_EXP_PRENORM  = C_EXP+2;
+parameter C_MANT_ADDIN   = C_MANT+4;
+parameter C_MANT_ADDOUT  = C_MANT+5;
+parameter C_MANT_SHIFTIN = C_MANT+3;
+parameter C_MANT_SHIFTED = C_MANT+4;
+parameter C_MANT_INT     = C_OP-1;
+parameter C_INF          = 32'h7fffffff;
+parameter C_MINF         = 32'h80000000;
+parameter C_EXP_SHIFT    = C_EXP_PRENORM;
+parameter C_SHIFT_BIAS   = 9'd127;
+parameter C_UNKNOWN      = 8'd157;
+parameter C_PADMANT      = 16'b0;
+parameter C_MANT_NoHB_ZERO   = 23'h0;
+parameter C_MANT_PRENORM_IND = 6;
+parameter F_QNAN         =32'h7FC00000;
+
+
+	input wire [C_MANT:0] Mant_a_DI;
+	input wire [C_MANT:0] Mant_b_DI;
+	input wire [C_EXP - 1:0] Exp_a_DI;
+	input wire [C_EXP - 1:0] Exp_b_DI;
+	input wire Sign_a_DI;
+	input wire Sign_b_DI;
+	input wire [C_MANT:0] Mant_norm_DI;
+	input wire [C_EXP - 1:0] Exp_res_DI;
+	input wire [C_CMD - 1:0] Op_SI;
+	input wire Mant_rounded_SI;
+	input wire Exp_OF_SI;
+	input wire Exp_UF_SI;
+	input wire OF_SI;
+	input wire UF_SI;
+	input wire Zero_SI;
+	input wire IX_SI;
+	input wire IV_SI;
+	input wire Inf_SI;
+	output wire Exp_toZero_SO;
+	output wire Exp_toInf_SO;
+	output wire Mant_toZero_SO;
+	output wire OF_SO;
+	output wire UF_SO;
+	output wire Zero_SO;
+	output wire IX_SO;
+	output reg IV_SO;
+	output wire Inf_SO;
+	wire Inf_a_S;
+	wire Inf_b_S;
+	wire Zero_a_S;
+	wire Zero_b_S;
+	wire NaN_a_S;
+	wire NaN_b_S;
+	wire Mant_zero_S;
+	assign Inf_a_S = (Exp_a_DI == C_EXP_INF) & (Mant_a_DI[C_MANT - 1:0] == C_MANT_NoHB_ZERO);
+	assign Inf_b_S = (Exp_b_DI == C_EXP_INF) & (Mant_b_DI[C_MANT - 1:0] == C_MANT_NoHB_ZERO);
+	assign Zero_a_S = (Exp_a_DI == C_EXP_ZERO) & (Mant_a_DI == C_MANT_ZERO);
+	assign Zero_b_S = (Exp_b_DI == C_EXP_ZERO) & (Mant_b_DI == C_MANT_ZERO);
+	assign NaN_a_S = (Exp_a_DI == C_EXP_INF) & (Mant_a_DI[C_MANT - 1:0] != C_MANT_NoHB_ZERO);
+	assign NaN_b_S = (Exp_b_DI == C_EXP_INF) & (Mant_b_DI[C_MANT - 1:0] != C_MANT_NoHB_ZERO);
+	assign Mant_zero_S = Mant_norm_DI == C_MANT_ZERO;
+	assign OF_SO = (Op_SI == C_FPU_F2I_CMD ? OF_SI : (Exp_OF_SI & ~Mant_zero_S) | ((~IV_SO & (Inf_a_S ^ Inf_b_S)) & (Op_SI != C_FPU_I2F_CMD)));
+	assign UF_SO = (Op_SI == C_FPU_F2I_CMD ? UF_SI : Exp_UF_SI & Mant_rounded_SI);
+	assign Zero_SO = (Op_SI == C_FPU_F2I_CMD ? Zero_SI : Mant_zero_S & ~IV_SO);
+	assign IX_SO = (Op_SI == C_FPU_F2I_CMD ? IX_SI : Mant_rounded_SI | OF_SO);
+	always @(*) begin
+		IV_SO = 1'b0;
+		case (Op_SI)
+			C_FPU_ADD_CMD, C_FPU_SUB_CMD:
+				if ((((Inf_a_S & Inf_b_S) & (Sign_a_DI ^ Sign_b_DI)) | NaN_a_S) | NaN_b_S)
+					IV_SO = 1'b1;
+			C_FPU_MUL_CMD:
+				if ((((Inf_a_S & Zero_b_S) | (Inf_b_S & Zero_a_S)) | NaN_a_S) | NaN_b_S)
+					IV_SO = 1'b1;
+			C_FPU_F2I_CMD: IV_SO = IV_SI;
+		endcase
+	end
+	reg Inf_temp_S;
+	always @(*) begin
+		Inf_temp_S = 1'b0;
+		case (Op_SI)
+			C_FPU_ADD_CMD, C_FPU_SUB_CMD:
+				if ((Inf_a_S ^ Inf_b_S) | ((Inf_a_S & Inf_b_S) & ~(Sign_a_DI ^ Sign_b_DI)))
+					Inf_temp_S = 1'b1;
+			C_FPU_MUL_CMD:
+				if ((Inf_a_S & ~Zero_b_S) | (Inf_b_S & ~Zero_a_S))
+					Inf_temp_S = 1'b1;
+		endcase
+	end
+	assign Inf_SO = (Op_SI == C_FPU_F2I_CMD ? Inf_SI : Inf_temp_S | (Exp_OF_SI & ~Mant_zero_S));
+	assign Exp_toZero_SO = (Op_SI == C_FPU_I2F_CMD ? Zero_a_S & ~Sign_a_DI : Exp_UF_SI | (Mant_zero_S & ~Exp_toInf_SO));
+	assign Exp_toInf_SO = OF_SO | IV_SO;
+	assign Mant_toZero_SO = Inf_SO;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu.v b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu.v
new file mode 100644
index 0000000..b097343
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu.v
@@ -0,0 +1,130 @@
+module fpu (
+	Clk_CI,
+	Rst_RBI,
+	Operand_a_DI,
+	Operand_b_DI,
+	RM_SI,
+	OP_SI,
+	Enable_SI,
+	Stall_SI,
+	Result_DO,
+	OF_SO,
+	UF_SO,
+	Zero_SO,
+	IX_SO,
+	IV_SO,
+	Inf_SO
+);
+parameter C_CMD               = 4;
+parameter C_FPU_ADD_CMD       = 4'h0;
+parameter C_FPU_SUB_CMD       = 4'h1;
+parameter C_FPU_MUL_CMD       = 4'h2;
+parameter C_FPU_DIV_CMD       = 4'h3;
+parameter C_FPU_I2F_CMD       = 4'h4;
+parameter C_FPU_F2I_CMD       = 4'h5;
+parameter C_FPU_SQRT_CMD      = 4'h6;
+parameter C_FPU_NOP_CMD       = 4'h7;
+parameter C_FPU_FMADD_CMD     = 4'h8;
+parameter C_FPU_FMSUB_CMD     = 4'h9;
+parameter C_FPU_FNMADD_CMD    = 4'hA;
+parameter C_FPU_FNMSUB_CMD    = 4'hB;
+
+parameter C_RM           = 3;
+parameter C_RM_NEAREST   = 3'h0;
+parameter C_RM_TRUNC     = 3'h1;
+parameter C_RM_PLUSINF   = 3'h3;
+parameter C_RM_MINUSINF  = 3'h2;
+parameter C_RM_NEAREST_MAX = 3'h4;
+
+parameter C_PC           = 5;
+
+parameter C_OP           = 32;
+parameter C_MANT         = 23;
+parameter C_EXP          = 8;
+
+parameter C_EXP_PRENORM  = C_EXP+2;
+parameter C_MANT_PRENORM = C_MANT*2+2;
+parameter C_MANT_ADDIN   = C_MANT+4;
+parameter C_MANT_ADDOUT  = C_MANT+5;
+parameter C_MANT_SHIFTIN = C_MANT+3;
+parameter C_MANT_SHIFTED = C_MANT+4;
+parameter C_MANT_INT     = C_OP-1;
+parameter C_INF          = 32'h7fffffff;
+parameter C_MINF         = 32'h80000000;
+parameter C_EXP_SHIFT    = C_EXP_PRENORM;
+parameter C_SHIFT_BIAS   = 9'd127;
+parameter C_BIAS         = 10'd127;
+parameter C_UNKNOWN      = 8'd157;
+parameter C_PADMANT      = 16'b0;
+parameter C_EXP_ZERO     = 8'h00;
+parameter C_EXP_INF      = 8'hff;
+parameter C_MANT_ZERO    = 24'h0;
+parameter C_MANT_NoHB_ZERO   = 23'h0;
+parameter C_MANT_PRENORM_IND = 6;
+parameter F_QNAN         =32'h7FC00000;
+
+	input wire Clk_CI;
+	input wire Rst_RBI;
+	input wire [C_OP - 1:0] Operand_a_DI;
+	input wire [C_OP - 1:0] Operand_b_DI;
+	input wire [C_RM - 1:0] RM_SI;
+	input wire [C_CMD - 1:0] OP_SI;
+	input wire Enable_SI;
+	input wire Stall_SI;
+	output wire [C_OP - 1:0] Result_DO;
+	output wire OF_SO;
+	output wire UF_SO;
+	output wire Zero_SO;
+	output wire IX_SO;
+	output wire IV_SO;
+	output wire Inf_SO;
+	reg [C_OP - 1:0] Operand_a_D;
+	reg [C_OP - 1:0] Operand_b_D;
+	reg [C_RM - 1:0] RM_S;
+	reg [C_CMD - 1:0] OP_S;
+	wire Stall_S;
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI) begin
+			Operand_a_D <= 1'sb0;
+			Operand_b_D <= 1'sb0;
+			RM_S <= 1'sb0;
+			OP_S <= 1'sb0;
+		end
+		else if (~Stall_SI) begin
+			Operand_a_D <= Operand_a_DI;
+			Operand_b_D <= Operand_b_DI;
+			RM_S <= RM_SI;
+			OP_S <= OP_SI;
+		end
+	wire UF_S;
+	wire OF_S;
+	wire Zero_S;
+	wire IX_S;
+	wire IV_S;
+	wire Inf_S;
+	wire [C_OP - 1:0] Result_D;
+	fpu_core fpcore(
+		.Clk_CI(Clk_CI),
+		.Rst_RBI(Rst_RBI),
+		.Enable_SI(Enable_SI),
+		.Operand_a_DI(Operand_a_D),
+		.Operand_b_DI(Operand_b_D),
+		.RM_SI(RM_S),
+		.OP_SI(OP_S),
+		.Stall_SI(Stall_SI),
+		.Result_DO(Result_D),
+		.OF_SO(OF_S),
+		.UF_SO(UF_S),
+		.Zero_SO(Zero_S),
+		.IX_SO(IX_S),
+		.IV_SO(IV_S),
+		.Inf_SO(Inf_S)
+	);
+	assign Result_DO = Result_D;
+	assign OF_SO = OF_S;
+	assign UF_SO = UF_S;
+	assign Zero_SO = Zero_S;
+	assign IX_SO = IX_S;
+	assign IV_SO = IV_S;
+	assign Inf_SO = Inf_S;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_add.v b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_add.v
new file mode 100644
index 0000000..bb6c6c5
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_add.v
@@ -0,0 +1,158 @@
+module fpu_add (
+	Sign_a_DI,
+	Sign_b_DI,
+	Exp_a_DI,
+	Exp_b_DI,
+	Mant_a_DI,
+	Mant_b_DI,
+	Sign_prenorm_DO,
+	Exp_prenorm_DO,
+	Mant_prenorm_DO
+);
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+
+parameter C_CMD               = 4;
+parameter C_FPU_ADD_CMD       = 4'h0;
+parameter C_FPU_SUB_CMD       = 4'h1;
+parameter C_FPU_MUL_CMD       = 4'h2;
+parameter C_FPU_DIV_CMD       = 4'h3;
+parameter C_FPU_I2F_CMD       = 4'h4;
+parameter C_FPU_F2I_CMD       = 4'h5;
+parameter C_FPU_SQRT_CMD      = 4'h6;
+parameter C_FPU_NOP_CMD       = 4'h7;
+parameter C_FPU_FMADD_CMD     = 4'h8;
+parameter C_FPU_FMSUB_CMD     = 4'h9;
+parameter C_FPU_FNMADD_CMD    = 4'hA;
+parameter C_FPU_FNMSUB_CMD    = 4'hB;
+parameter C_RM_NEAREST_MAX = 3'h4;
+parameter C_EXP_PRENORM  = C_EXP+2;
+parameter C_MANT_ADDIN   = C_MANT+4;
+parameter C_MANT_ADDOUT  = C_MANT+5;
+parameter C_MANT_SHIFTIN = C_MANT+3;
+parameter C_MANT_SHIFTED = C_MANT+4;
+parameter C_MANT_INT     = C_OP-1;
+parameter C_INF          = 32'h7fffffff;
+parameter C_MINF         = 32'h80000000;
+parameter C_EXP_SHIFT    = C_EXP_PRENORM;
+parameter C_SHIFT_BIAS   = 9'd127;
+parameter C_UNKNOWN      = 8'd157;
+parameter C_PADMANT      = 16'b0;
+parameter C_MANT_NoHB_ZERO   = 23'h0;
+parameter C_MANT_PRENORM_IND = 6;
+parameter F_QNAN         =32'h7FC00000;
+	input wire Sign_a_DI;
+	input wire Sign_b_DI;
+	input wire [C_EXP - 1:0] Exp_a_DI;
+	input wire [C_EXP - 1:0] Exp_b_DI;
+	input wire [C_MANT:0] Mant_a_DI;
+	input wire [C_MANT:0] Mant_b_DI;
+	output wire Sign_prenorm_DO;
+	output wire signed [C_EXP_PRENORM - 1:0] Exp_prenorm_DO;
+	output wire [C_MANT_PRENORM - 1:0] Mant_prenorm_DO;
+	wire Sign_a_D;
+	wire Sign_b_D;
+	wire [C_EXP - 1:0] Exp_a_D;
+	wire [C_EXP - 1:0] Exp_b_D;
+	wire [C_MANT:0] Mant_a_D;
+	wire [C_MANT:0] Mant_b_D;
+	reg Sign_norm_D;
+	assign Sign_a_D = Sign_a_DI;
+	assign Sign_b_D = Sign_b_DI;
+	assign Exp_a_D = Exp_a_DI;
+	assign Exp_b_D = Exp_b_DI;
+	assign Mant_a_D = Mant_a_DI;
+	assign Mant_b_D = Mant_b_DI;
+	wire Exp_agtb_S;
+	wire Exp_equal_S;
+	reg [C_EXP - 1:0] Exp_diff_D;
+	reg [C_EXP - 1:0] Exp_prenorm_D;
+	assign Exp_agtb_S = Exp_a_D > Exp_b_D;
+	assign Exp_equal_S = Exp_diff_D == 0;
+	always @(*)
+		if (Exp_agtb_S) begin
+			Exp_diff_D = Exp_a_D - Exp_b_D;
+			Exp_prenorm_D = Exp_a_D;
+		end
+		else begin
+			Exp_diff_D = Exp_b_D - Exp_a_D;
+			Exp_prenorm_D = Exp_b_D;
+		end
+	wire Mant_agtb_S;
+	wire [C_MANT_SHIFTIN - 1:0] Mant_shiftIn_D;
+	wire [C_MANT_SHIFTED - 1:0] Mant_shifted_D;
+	reg Mant_sticky_D;
+	wire [C_MANT_SHIFTED - 1:0] Mant_unshifted_D;
+	wire [C_MANT_ADDIN - 1:0] Mant_addInA_D;
+	wire [C_MANT_ADDIN - 1:0] Mant_addInB_D;
+	wire [C_MANT_ADDOUT - 1:0] Mant_addOut_D;
+	wire [C_MANT_PRENORM - 1:0] Mant_prenorm_D;
+	wire Mant_addCarryIn_D;
+	reg Mant_invA_S;
+	reg Mant_invB_S;
+	wire Subtract_S;
+	assign Mant_agtb_S = Mant_a_D > Mant_b_D;
+	assign Mant_unshifted_D = {(Exp_agtb_S ? Mant_a_D : Mant_b_D), 3'b000};
+	assign Mant_shiftIn_D = {(Exp_agtb_S ? Mant_b_D : Mant_a_D), 2'b00};
+	always @(*) begin
+		Mant_sticky_D = 1'b0;
+		if (Exp_diff_D >= (C_MANT + 3))
+			Mant_sticky_D = |Mant_shiftIn_D;
+		else
+			Mant_sticky_D = |(Mant_shiftIn_D << ((C_MANT + 3) - Exp_diff_D));
+	end
+	assign Mant_shifted_D = {Mant_shiftIn_D >> Exp_diff_D, Mant_sticky_D};
+	always @(*) begin
+		Mant_invA_S = 1'sb0;
+		Mant_invB_S = 1'sb0;
+		if (Subtract_S)
+			if (Exp_agtb_S)
+				Mant_invA_S = 1'b1;
+			else if (Exp_equal_S) begin
+				if (Mant_agtb_S)
+					Mant_invB_S = 1'b1;
+				else
+					Mant_invA_S = 1'b1;
+			end
+			else
+				Mant_invA_S = 1'b1;
+	end
+	assign Mant_addCarryIn_D = Subtract_S;
+	assign Mant_addInA_D = (Mant_invA_S ? ~Mant_shifted_D : Mant_shifted_D);
+	assign Mant_addInB_D = (Mant_invB_S ? ~Mant_unshifted_D : Mant_unshifted_D);
+	assign Mant_addOut_D = (Mant_addInA_D + Mant_addInB_D) + Mant_addCarryIn_D;
+	assign Mant_prenorm_D = {Mant_addOut_D[C_MANT_ADDOUT - 1] & ~Subtract_S, Mant_addOut_D[C_MANT_ADDOUT - 2:0], 20'b00000000000000000000};
+	assign Subtract_S = Sign_a_D ^ Sign_b_D;
+	always @(*) begin
+		Sign_norm_D = 1'b0;
+		if (Exp_agtb_S)
+			Sign_norm_D = Sign_a_D;
+		else if (Exp_equal_S) begin
+			if (Mant_agtb_S)
+				Sign_norm_D = Sign_a_D;
+			else
+				Sign_norm_D = Sign_b_D;
+		end
+		else
+			Sign_norm_D = Sign_b_D;
+	end
+	assign Sign_prenorm_DO = Sign_norm_D;
+	assign Exp_prenorm_DO = $signed({2'b00, Exp_prenorm_D});
+	assign Mant_prenorm_DO = Mant_prenorm_D;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_core.v b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_core.v
new file mode 100644
index 0000000..1a164c0
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_core.v
@@ -0,0 +1,283 @@
+module fpu_core (
+	Clk_CI,
+	Rst_RBI,
+	Enable_SI,
+	Operand_a_DI,
+	Operand_b_DI,
+	RM_SI,
+	OP_SI,
+	Result_DO,
+	Valid_SO,
+	OF_SO,
+	UF_SO,
+	Zero_SO,
+	IX_SO,
+	IV_SO,
+	Inf_SO
+);
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+
+parameter C_CMD               = 4;
+parameter C_FPU_ADD_CMD       = 4'h0;
+parameter C_FPU_SUB_CMD       = 4'h1;
+parameter C_FPU_MUL_CMD       = 4'h2;
+parameter C_FPU_DIV_CMD       = 4'h3;
+parameter C_FPU_I2F_CMD       = 4'h4;
+parameter C_FPU_F2I_CMD       = 4'h5;
+parameter C_FPU_SQRT_CMD      = 4'h6;
+parameter C_FPU_NOP_CMD       = 4'h7;
+parameter C_FPU_FMADD_CMD     = 4'h8;
+parameter C_FPU_FMSUB_CMD     = 4'h9;
+parameter C_FPU_FNMADD_CMD    = 4'hA;
+parameter C_FPU_FNMSUB_CMD    = 4'hB;
+parameter C_RM_NEAREST_MAX = 3'h4;
+parameter C_EXP_PRENORM  = C_EXP+2;
+parameter C_MANT_ADDIN   = C_MANT+4;
+parameter C_MANT_ADDOUT  = C_MANT+5;
+parameter C_MANT_SHIFTIN = C_MANT+3;
+parameter C_MANT_SHIFTED = C_MANT+4;
+parameter C_MANT_INT     = C_OP-1;
+parameter C_INF          = 32'h7fffffff;
+parameter C_MINF         = 32'h80000000;
+parameter C_EXP_SHIFT    = C_EXP_PRENORM;
+parameter C_SHIFT_BIAS   = 9'd127;
+parameter C_UNKNOWN      = 8'd157;
+parameter C_PADMANT      = 16'b0;
+parameter C_MANT_NoHB_ZERO   = 23'h0;
+parameter C_MANT_PRENORM_IND = 6;
+parameter F_QNAN         =32'h7FC00000;
+	input wire Clk_CI;
+	input wire Rst_RBI;
+	input wire Enable_SI;
+	input wire [C_OP - 1:0] Operand_a_DI;
+	input wire [C_OP - 1:0] Operand_b_DI;
+	input wire [C_RM - 1:0] RM_SI;
+	input wire [C_CMD - 1:0] OP_SI;
+	output wire [C_OP - 1:0] Result_DO;
+	output wire Valid_SO;
+	output wire OF_SO;
+	output wire UF_SO;
+	output wire Zero_SO;
+	output wire IX_SO;
+	output wire IV_SO;
+	output wire Inf_SO;
+	wire Sign_a_D;
+	wire Sign_b_D;
+	wire [C_EXP - 1:0] Exp_a_D;
+	wire [C_EXP - 1:0] Exp_b_D;
+	wire [C_MANT:0] Mant_a_D;
+	wire [C_MANT:0] Mant_b_D;
+	wire Hb_a_D;
+	wire Hb_b_D;
+	reg signed [C_EXP_PRENORM - 1:0] Exp_prenorm_D;
+	reg [C_MANT_PRENORM - 1:0] Mant_prenorm_D;
+	reg Sign_norm_D;
+	wire [C_EXP - 1:0] Exp_norm_D;
+	wire [C_MANT:0] Mant_norm_D;
+	wire [C_OP - 1:0] Result_D;
+	wire Sign_res_D;
+	reg [C_EXP - 1:0] Exp_res_D;
+	wire [C_MANT:0] Mant_res_D;
+	reg [C_RM - 1:0] RM_SP;
+	reg [C_CMD - 1:0] OP_SP;
+	reg [C_OP - 1:0] Operand_a_DP;
+	reg [C_OP - 1:0] Operand_b_DP;
+	reg Enable_SP;
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI) begin
+			Operand_a_DP <= 1'sb0;
+			Operand_b_DP <= 1'sb0;
+			OP_SP <= 1'sb0;
+			RM_SP <= 1'sb0;
+			Enable_SP <= 1'sb0;
+		end
+		else begin
+			Operand_a_DP <= Operand_a_DI;
+			Operand_b_DP <= Operand_b_DI;
+			OP_SP <= OP_SI;
+			RM_SP <= RM_SI;
+			Enable_SP <= Enable_SI;
+		end
+	assign Valid_SO = Enable_SP;
+	assign Sign_a_D = Operand_a_DP[C_OP - 1];
+	assign Sign_b_D = (OP_SP == C_FPU_SUB_CMD ? ~Operand_b_DP[C_OP - 1] : Operand_b_DP[C_OP - 1]);
+	assign Exp_a_D = Operand_a_DP[C_OP - 2:C_MANT];
+	assign Exp_b_D = Operand_b_DP[C_OP - 2:C_MANT];
+	assign Mant_a_D = {Hb_a_D, Operand_a_DP[C_MANT - 1:0]};
+	assign Mant_b_D = {Hb_b_D, Operand_b_DP[C_MANT - 1:0]};
+	assign Hb_a_D = |Exp_a_D;
+	assign Hb_b_D = |Exp_b_D;
+	wire Sign_prenorm_add_D;
+	wire signed [C_EXP_PRENORM - 1:0] Exp_prenorm_add_D;
+	wire [C_MANT_PRENORM - 1:0] Mant_prenorm_add_D;
+	wire EnableAdd_S;
+	assign EnableAdd_S = Enable_SP & ((OP_SP == C_FPU_ADD_CMD) | (OP_SP == C_FPU_SUB_CMD));
+	fpu_add adder(
+		.Sign_a_DI((EnableAdd_S ? Sign_a_D : 1'b0)),
+		.Sign_b_DI((EnableAdd_S ? Sign_b_D : 1'b0)),
+		.Exp_a_DI((EnableAdd_S ? Exp_a_D : {C_EXP {1'sb0}})),
+		.Exp_b_DI((EnableAdd_S ? Exp_b_D : {C_EXP {1'sb0}})),
+		.Mant_a_DI((EnableAdd_S ? Mant_a_D : {(C_MANT >= 0 ? C_MANT + 1 : 1 - C_MANT) {1'sb0}})),
+		.Mant_b_DI((EnableAdd_S ? Mant_b_D : {(C_MANT >= 0 ? C_MANT + 1 : 1 - C_MANT) {1'sb0}})),
+		.Sign_prenorm_DO(Sign_prenorm_add_D),
+		.Exp_prenorm_DO(Exp_prenorm_add_D),
+		.Mant_prenorm_DO(Mant_prenorm_add_D)
+	);
+	wire Sign_prenorm_mult_D;
+	wire signed [C_EXP_PRENORM - 1:0] Exp_prenorm_mult_D;
+	wire [C_MANT_PRENORM - 1:0] Mant_prenorm_mult_D;
+	wire EnableMult_S;
+	assign EnableMult_S = Enable_SP & (OP_SP == C_FPU_MUL_CMD);
+	fpu_mult multiplier(
+		.Sign_a_DI((EnableMult_S ? Sign_a_D : 1'b0)),
+		.Sign_b_DI((EnableMult_S ? Sign_b_D : 1'b0)),
+		.Exp_a_DI((EnableMult_S ? Exp_a_D : {C_EXP {1'sb0}})),
+		.Exp_b_DI((EnableMult_S ? Exp_b_D : {C_EXP {1'sb0}})),
+		.Mant_a_DI((EnableMult_S ? Mant_a_D : {(C_MANT >= 0 ? C_MANT + 1 : 1 - C_MANT) {1'sb0}})),
+		.Mant_b_DI((EnableMult_S ? Mant_b_D : {(C_MANT >= 0 ? C_MANT + 1 : 1 - C_MANT) {1'sb0}})),
+		.Sign_prenorm_DO(Sign_prenorm_mult_D),
+		.Exp_prenorm_DO(Exp_prenorm_mult_D),
+		.Mant_prenorm_DO(Mant_prenorm_mult_D)
+	);
+	wire Sign_prenorm_itof_D;
+	wire signed [C_EXP_PRENORM - 1:0] Exp_prenorm_itof_D;
+	wire [C_MANT_PRENORM - 1:0] Mant_prenorm_itof_D;
+	wire EnableITOF_S;
+	assign EnableITOF_S = Enable_SP & (OP_SP == C_FPU_I2F_CMD);
+	fpu_itof int2fp(
+		.Operand_a_DI((EnableITOF_S ? Operand_a_DP : {C_OP {1'sb0}})),
+		.Sign_prenorm_DO(Sign_prenorm_itof_D),
+		.Exp_prenorm_DO(Exp_prenorm_itof_D),
+		.Mant_prenorm_DO(Mant_prenorm_itof_D)
+	);
+	wire [C_OP - 1:0] Result_ftoi_D;
+	wire UF_ftoi_S;
+	wire OF_ftoi_S;
+	wire Zero_ftoi_S;
+	wire IX_ftoi_S;
+	wire IV_ftoi_S;
+	wire Inf_ftoi_S;
+	wire EnableFTOI_S;
+	assign EnableFTOI_S = Enable_SP & (OP_SP == C_FPU_F2I_CMD);
+	fpu_ftoi fp2int(
+		.Sign_a_DI((EnableFTOI_S ? Sign_a_D : 1'b0)),
+		.Exp_a_DI((EnableFTOI_S ? Exp_a_D : {C_EXP {1'sb0}})),
+		.Mant_a_DI((EnableFTOI_S ? Mant_a_D : {(C_MANT >= 0 ? C_MANT + 1 : 1 - C_MANT) {1'sb0}})),
+		.Result_DO(Result_ftoi_D),
+		.UF_SO(UF_ftoi_S),
+		.OF_SO(OF_ftoi_S),
+		.Zero_SO(Zero_ftoi_S),
+		.IX_SO(IX_ftoi_S),
+		.IV_SO(IV_ftoi_S),
+		.Inf_SO(Inf_ftoi_S)
+	);
+	wire Mant_rounded_S;
+	wire Exp_OF_S;
+	wire Exp_UF_S;
+	always @(*) begin
+		Sign_norm_D = 1'sb0;
+		Exp_prenorm_D = 1'sb0;
+		Mant_prenorm_D = 1'sb0;
+		case (OP_SP)
+			C_FPU_ADD_CMD, C_FPU_SUB_CMD: begin
+				Sign_norm_D = Sign_prenorm_add_D;
+				Exp_prenorm_D = Exp_prenorm_add_D;
+				Mant_prenorm_D = Mant_prenorm_add_D;
+			end
+			C_FPU_MUL_CMD: begin
+				Sign_norm_D = Sign_prenorm_mult_D;
+				Exp_prenorm_D = Exp_prenorm_mult_D;
+				Mant_prenorm_D = Mant_prenorm_mult_D;
+			end
+			C_FPU_I2F_CMD: begin
+				Sign_norm_D = Sign_prenorm_itof_D;
+				Exp_prenorm_D = Exp_prenorm_itof_D;
+				Mant_prenorm_D = Mant_prenorm_itof_D;
+			end
+		endcase
+	end
+	fpu_norm normalizer(
+		.Mant_in_DI(Mant_prenorm_D),
+		.Exp_in_DI(Exp_prenorm_D),
+		.Sign_in_DI(Sign_norm_D),
+		.RM_SI(RM_SP),
+		.OP_SI(OP_SP),
+		.Mant_res_DO(Mant_norm_D),
+		.Exp_res_DO(Exp_norm_D),
+		.Rounded_SO(Mant_rounded_S),
+		.Exp_OF_SO(Exp_OF_S),
+		.Exp_UF_SO(Exp_UF_S)
+	);
+	wire UF_S;
+	wire OF_S;
+	wire Zero_S;
+	wire IX_S;
+	wire IV_S;
+	wire Inf_S;
+	wire Exp_toZero_S;
+	wire Exp_toInf_S;
+	wire Mant_toZero_S;
+	fpexc except(
+		.Mant_a_DI(Mant_a_D),
+		.Mant_b_DI(Mant_b_D),
+		.Exp_a_DI(Exp_a_D),
+		.Exp_b_DI(Exp_b_D),
+		.Sign_a_DI(Sign_a_D),
+		.Sign_b_DI(Sign_b_D),
+		.Mant_norm_DI(Mant_norm_D),
+		.Exp_res_DI(Exp_norm_D),
+		.Op_SI(OP_SP),
+		.UF_SI(UF_ftoi_S),
+		.OF_SI(OF_ftoi_S),
+		.Zero_SI(Zero_ftoi_S),
+		.IX_SI(IX_ftoi_S),
+		.IV_SI(IV_ftoi_S),
+		.Inf_SI(Inf_ftoi_S),
+		.Mant_rounded_SI(Mant_rounded_S),
+		.Exp_OF_SI(Exp_OF_S),
+		.Exp_UF_SI(Exp_UF_S),
+		.Exp_toZero_SO(Exp_toZero_S),
+		.Exp_toInf_SO(Exp_toInf_S),
+		.Mant_toZero_SO(Mant_toZero_S),
+		.UF_SO(UF_S),
+		.OF_SO(OF_S),
+		.Zero_SO(Zero_S),
+		.IX_SO(IX_S),
+		.IV_SO(IV_S),
+		.Inf_SO(Inf_S)
+	);
+	assign Sign_res_D = (Zero_S ? 1'b0 : Sign_norm_D);
+	always @(*) begin
+		Exp_res_D = Exp_norm_D;
+		if (Exp_toZero_S)
+			Exp_res_D = C_EXP_ZERO;
+		else if (Exp_toInf_S)
+			Exp_res_D = C_EXP_INF;
+	end
+	assign Mant_res_D = (Mant_toZero_S ? C_MANT_ZERO : Mant_norm_D);
+	assign Result_D = (IV_S ? F_QNAN : (OP_SP == C_FPU_F2I_CMD ? Result_ftoi_D : {Sign_res_D, Exp_res_D, Mant_res_D[C_MANT - 1:0]}));
+	assign Result_DO = Result_D;
+	assign UF_SO = UF_S;
+	assign OF_SO = OF_S;
+	assign Zero_SO = Zero_S;
+	assign IX_SO = IX_S;
+	assign IV_SO = IV_S;
+	assign Inf_SO = Inf_S;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_defs.sv b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_defs.sv
new file mode 100644
index 0000000..5e49413
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_defs.sv
@@ -0,0 +1,105 @@
+// Copyright 2017 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+///////////////////////////////////////////////////////////////////////////////
+// This file contains all FPU parameters                                     //
+//                                                                           //
+// Authors    : Michael Gautschi  (gautschi@iis.ee.ethz.ch)                  //
+//              Michael Schaffner (schaffner@iis.ee.ethz.ch)                 //
+//              Lei Li            (lile@iis.ee.ethz.ch)                      //
+// Copyright (c) 2015 Integrated Systems Laboratory, ETH Zurich              //
+///////////////////////////////////////////////////////////////////////////////
+
+
+package fpu_defs;
+
+   // op command
+   parameter C_CMD               = 4;
+   parameter C_FPU_ADD_CMD       = 4'h0;
+   parameter C_FPU_SUB_CMD       = 4'h1;
+   parameter C_FPU_MUL_CMD       = 4'h2;
+   parameter C_FPU_DIV_CMD       = 4'h3;
+   parameter C_FPU_I2F_CMD       = 4'h4;
+   parameter C_FPU_F2I_CMD       = 4'h5;
+   parameter C_FPU_SQRT_CMD      = 4'h6;
+   parameter C_FPU_NOP_CMD       = 4'h7;
+   parameter C_FPU_FMADD_CMD     = 4'h8;
+   parameter C_FPU_FMSUB_CMD     = 4'h9;
+   parameter C_FPU_FNMADD_CMD    = 4'hA;
+   parameter C_FPU_FNMSUB_CMD    = 4'hB;
+   
+   parameter C_RM           = 3;
+   parameter C_RM_NEAREST   = 3'h0;
+   parameter C_RM_TRUNC     = 3'h1;
+   parameter C_RM_PLUSINF   = 3'h3;
+   parameter C_RM_MINUSINF  = 3'h2;
+   parameter C_RM_NEAREST_MAX = 3'h4;
+
+   parameter C_PC           = 5;
+   
+
+// to be verified if it works in half precision mode!!!
+//`define HALFPREC
+
+`ifdef HALFPREC
+   parameter C_OP           = 16;
+   parameter C_MANT         = 10;
+   parameter C_EXP          = 5;
+
+   parameter C_EXP_PRENORM  = 7;
+   parameter C_MANT_PRENORM = 22;
+   parameter C_MANT_ADDIN   = 14;
+   parameter C_MANT_ADDOUT  = 15;
+   parameter C_MANT_SHIFTIN = 13;
+   parameter C_MANT_SHIFTED = 14;
+   parameter C_MANT_INT     = 15;
+   parameter C_INF          = 32'h7fff;
+   parameter C_MINF         = 32'h8000;
+   parameter C_EXP_SHIFT    = 7;
+   parameter C_SHIFT_BIAS   = 6'd15;
+   parameter C_BIAS         = 7'd15;
+   parameter C_UNKNOWN      = 8'd157;
+   parameter C_PADMANT      = 6'b0;
+   parameter C_EXP_ZERO     = 5'h00;
+   parameter C_EXP_INF      = 5'hff;
+   parameter C_MANT_ZERO    = 11'h0;
+   parameter C_MANT_NoHB_ZERO   = 10'h0;
+   parameter C_MANT_PRENORM_IND = 5;
+   parameter F_QNAN         =16'h7E00;
+
+`else
+   parameter C_OP           = 32;
+   parameter C_MANT         = 23;
+   parameter C_EXP          = 8;
+
+   parameter C_EXP_PRENORM  = C_EXP+2;
+   parameter C_MANT_PRENORM = C_MANT*2+2;
+   parameter C_MANT_ADDIN   = C_MANT+4;
+   parameter C_MANT_ADDOUT  = C_MANT+5;
+   parameter C_MANT_SHIFTIN = C_MANT+3;
+   parameter C_MANT_SHIFTED = C_MANT+4;
+   parameter C_MANT_INT     = C_OP-1;
+   parameter C_INF          = 32'h7fffffff;
+   parameter C_MINF         = 32'h80000000;
+   parameter C_EXP_SHIFT    = C_EXP_PRENORM;
+   parameter C_SHIFT_BIAS   = 9'd127;
+   parameter C_BIAS         = 10'd127;
+   parameter C_UNKNOWN      = 8'd157;
+   parameter C_PADMANT      = 16'b0;
+   parameter C_EXP_ZERO     = 8'h00;
+   parameter C_EXP_INF      = 8'hff;
+   parameter C_MANT_ZERO    = 24'h0;
+   parameter C_MANT_NoHB_ZERO   = 23'h0;
+   parameter C_MANT_PRENORM_IND = 6;
+   parameter F_QNAN         =32'h7FC00000;
+`endif
+
+   parameter C_FFLAG         = 5;
+
+endpackage : fpu_defs
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_ftoi.v b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_ftoi.v
new file mode 100644
index 0000000..789b85c
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_ftoi.v
@@ -0,0 +1,98 @@
+module fpu_ftoi (
+	Sign_a_DI,
+	Exp_a_DI,
+	Mant_a_DI,
+	Result_DO,
+	OF_SO,
+	UF_SO,
+	Zero_SO,
+	IX_SO,
+	IV_SO,
+	Inf_SO
+);
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+
+parameter C_CMD               = 4;
+parameter C_FPU_ADD_CMD       = 4'h0;
+parameter C_FPU_SUB_CMD       = 4'h1;
+parameter C_FPU_MUL_CMD       = 4'h2;
+parameter C_FPU_DIV_CMD       = 4'h3;
+parameter C_FPU_I2F_CMD       = 4'h4;
+parameter C_FPU_F2I_CMD       = 4'h5;
+parameter C_FPU_SQRT_CMD      = 4'h6;
+parameter C_FPU_NOP_CMD       = 4'h7;
+parameter C_FPU_FMADD_CMD     = 4'h8;
+parameter C_FPU_FMSUB_CMD     = 4'h9;
+parameter C_FPU_FNMADD_CMD    = 4'hA;
+parameter C_FPU_FNMSUB_CMD    = 4'hB;
+parameter C_RM_NEAREST_MAX = 3'h4;
+parameter C_EXP_PRENORM  = C_EXP+2;
+parameter C_MANT_ADDIN   = C_MANT+4;
+parameter C_MANT_ADDOUT  = C_MANT+5;
+parameter C_MANT_SHIFTIN = C_MANT+3;
+parameter C_MANT_SHIFTED = C_MANT+4;
+parameter C_MANT_INT     = C_OP-1;
+parameter C_INF          = 32'h7fffffff;
+parameter C_MINF         = 32'h80000000;
+parameter C_EXP_SHIFT    = C_EXP_PRENORM;
+parameter C_SHIFT_BIAS   = 9'd127;
+parameter C_UNKNOWN      = 8'd157;
+parameter C_PADMANT      = 16'b0;
+parameter C_MANT_NoHB_ZERO   = 23'h0;
+parameter C_MANT_PRENORM_IND = 6;
+parameter F_QNAN         =32'h7FC00000;
+	input wire Sign_a_DI;
+	input wire [C_EXP - 1:0] Exp_a_DI;
+	input wire [C_MANT:0] Mant_a_DI;
+	output wire [C_OP - 1:0] Result_DO;
+	output wire OF_SO;
+	output wire UF_SO;
+	output wire Zero_SO;
+	output wire IX_SO;
+	output wire IV_SO;
+	output wire Inf_SO;
+	wire Sign_a_D;
+	wire [C_EXP - 1:0] Exp_a_D;
+	wire [C_MANT:0] Mant_a_D;
+	wire [C_OP - 1:0] Result_D;
+	assign Sign_a_D = Sign_a_DI;
+	assign Exp_a_D = Exp_a_DI;
+	assign Mant_a_D = Mant_a_DI;
+	wire signed [C_EXP_SHIFT - 1:0] Shift_amount_D;
+	wire [(C_MANT + C_OP) - 2:0] Temp_shift_D;
+	wire [C_OP - 1:0] Temp_twos_D;
+	wire Shift_amount_neg_S;
+	wire Result_zero_S;
+	wire Input_zero_S;
+	assign Shift_amount_D = $signed({1'b0, Exp_a_D}) - $signed(C_SHIFT_BIAS);
+	assign Shift_amount_neg_S = Shift_amount_D[C_EXP_SHIFT - 1];
+	assign Temp_shift_D = (Shift_amount_neg_S ? {(((C_MANT + C_OP) - 2) >= 0 ? (C_MANT + C_OP) - 1 : 3 - (C_MANT + C_OP)) {1'sb0}} : Mant_a_D << Shift_amount_D);
+	assign Temp_twos_D = ~{1'b0, Temp_shift_D[(C_MANT + C_OP) - 2:C_MANT]} + 1'b1;
+	assign Result_D = (OF_SO ? (Sign_a_D ? C_MINF : C_INF) : (Sign_a_D ? Temp_twos_D : {Sign_a_D, Temp_shift_D[(C_MANT + C_OP) - 2:C_MANT]}));
+	assign Result_DO = Result_D;
+	assign Result_zero_S = ~|Result_D;
+	assign Input_zero_S = ~|{Exp_a_D, Mant_a_D};
+	assign UF_SO = 1'b0;
+	assign OF_SO = Shift_amount_D > (C_OP - 2);
+	assign Zero_SO = Result_zero_S & ~OF_SO;
+	assign IX_SO = ((|Temp_shift_D[C_MANT - 1:0] | Shift_amount_neg_S) | OF_SO) & ~Input_zero_S;
+	assign IV_SO = &Exp_a_D && |Mant_a_D;
+	assign Inf_SO = 1'b0;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_itof.v b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_itof.v
new file mode 100644
index 0000000..6293c3c
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_itof.v
@@ -0,0 +1,80 @@
+module fpu_itof (
+	Operand_a_DI,
+	Sign_prenorm_DO,
+	Exp_prenorm_DO,
+	Mant_prenorm_DO
+);
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+
+parameter C_CMD               = 4;
+parameter C_FPU_ADD_CMD       = 4'h0;
+parameter C_FPU_SUB_CMD       = 4'h1;
+parameter C_FPU_MUL_CMD       = 4'h2;
+parameter C_FPU_DIV_CMD       = 4'h3;
+parameter C_FPU_I2F_CMD       = 4'h4;
+parameter C_FPU_F2I_CMD       = 4'h5;
+parameter C_FPU_SQRT_CMD      = 4'h6;
+parameter C_FPU_NOP_CMD       = 4'h7;
+parameter C_FPU_FMADD_CMD     = 4'h8;
+parameter C_FPU_FMSUB_CMD     = 4'h9;
+parameter C_FPU_FNMADD_CMD    = 4'hA;
+parameter C_FPU_FNMSUB_CMD    = 4'hB;
+parameter C_RM_NEAREST_MAX = 3'h4;
+parameter C_EXP_PRENORM  = C_EXP+2;
+parameter C_MANT_ADDIN   = C_MANT+4;
+parameter C_MANT_ADDOUT  = C_MANT+5;
+parameter C_MANT_SHIFTIN = C_MANT+3;
+parameter C_MANT_SHIFTED = C_MANT+4;
+parameter C_MANT_INT     = C_OP-1;
+parameter C_INF          = 32'h7fffffff;
+parameter C_MINF         = 32'h80000000;
+parameter C_EXP_SHIFT    = C_EXP_PRENORM;
+parameter C_SHIFT_BIAS   = 9'd127;
+parameter C_UNKNOWN      = 8'd157;
+parameter C_PADMANT      = 16'b0;
+parameter C_MANT_NoHB_ZERO   = 23'h0;
+parameter C_MANT_PRENORM_IND = 6;
+parameter F_QNAN         =32'h7FC00000;
+	input wire [C_OP - 1:0] Operand_a_DI;
+	output wire Sign_prenorm_DO;
+	output wire signed [C_EXP_PRENORM - 1:0] Exp_prenorm_DO;
+	output wire [C_MANT_PRENORM - 1:0] Mant_prenorm_DO;
+	wire [C_OP - 1:0] Operand_a_D;
+	wire Sign_int_D;
+	wire Sign_prenorm_D;
+	wire [C_MANT_INT - 1:0] Mant_int_D;
+	wire [C_OP - 1:0] Temp_twos_to_unsigned_D;
+	wire [C_MANT_PRENORM - 1:0] Mant_prenorm_D;
+	wire Hb_a_D;
+	wire signed [C_EXP_PRENORM - 1:0] Exp_prenorm_D;
+	assign Operand_a_D = Operand_a_DI;
+	assign Sign_int_D = Operand_a_D[C_OP - 1];
+	assign Mant_int_D = Operand_a_D[C_MANT_INT - 1:0];
+	wire Twos_to_unsigned_zero;
+	assign Temp_twos_to_unsigned_D = ~Operand_a_D + 1'b1;
+	wire Twos_to_unsigned_zero_D;
+	assign Twos_to_unsigned_zero_D = ~(|Temp_twos_to_unsigned_D[C_MANT_INT - 1:0]);
+	assign Sign_prenorm_D = Sign_int_D;
+	assign Exp_prenorm_D = $signed({2'd0, C_UNKNOWN});
+	assign Mant_prenorm_D = (Sign_int_D ? {Twos_to_unsigned_zero_D, Temp_twos_to_unsigned_D[C_MANT_INT - 1:0], C_PADMANT} : {1'b0, Mant_int_D, C_PADMANT});
+	assign Sign_prenorm_DO = Sign_prenorm_D;
+	assign Exp_prenorm_DO = Exp_prenorm_D;
+	assign Mant_prenorm_DO = Mant_prenorm_D;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_mult.v b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_mult.v
new file mode 100644
index 0000000..e010b3d
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_mult.v
@@ -0,0 +1,90 @@
+module fpu_mult (
+	Sign_a_DI,
+	Sign_b_DI,
+	Exp_a_DI,
+	Exp_b_DI,
+	Mant_a_DI,
+	Mant_b_DI,
+	Sign_prenorm_DO,
+	Exp_prenorm_DO,
+	Mant_prenorm_DO
+);
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+
+parameter C_CMD               = 4;
+parameter C_FPU_ADD_CMD       = 4'h0;
+parameter C_FPU_SUB_CMD       = 4'h1;
+parameter C_FPU_MUL_CMD       = 4'h2;
+parameter C_FPU_DIV_CMD       = 4'h3;
+parameter C_FPU_I2F_CMD       = 4'h4;
+parameter C_FPU_F2I_CMD       = 4'h5;
+parameter C_FPU_SQRT_CMD      = 4'h6;
+parameter C_FPU_NOP_CMD       = 4'h7;
+parameter C_FPU_FMADD_CMD     = 4'h8;
+parameter C_FPU_FMSUB_CMD     = 4'h9;
+parameter C_FPU_FNMADD_CMD    = 4'hA;
+parameter C_FPU_FNMSUB_CMD    = 4'hB;
+parameter C_RM_NEAREST_MAX = 3'h4;
+parameter C_EXP_PRENORM  = C_EXP+2;
+parameter C_MANT_ADDIN   = C_MANT+4;
+parameter C_MANT_ADDOUT  = C_MANT+5;
+parameter C_MANT_SHIFTIN = C_MANT+3;
+parameter C_MANT_SHIFTED = C_MANT+4;
+parameter C_MANT_INT     = C_OP-1;
+parameter C_INF          = 32'h7fffffff;
+parameter C_MINF         = 32'h80000000;
+parameter C_EXP_SHIFT    = C_EXP_PRENORM;
+parameter C_SHIFT_BIAS   = 9'd127;
+parameter C_UNKNOWN      = 8'd157;
+parameter C_PADMANT      = 16'b0;
+parameter C_MANT_NoHB_ZERO   = 23'h0;
+parameter C_MANT_PRENORM_IND = 6;
+parameter F_QNAN         =32'h7FC00000;
+	input wire Sign_a_DI;
+	input wire Sign_b_DI;
+	input wire [C_EXP - 1:0] Exp_a_DI;
+	input wire [C_EXP - 1:0] Exp_b_DI;
+	input wire [C_MANT:0] Mant_a_DI;
+	input wire [C_MANT:0] Mant_b_DI;
+	output wire Sign_prenorm_DO;
+	output wire signed [C_EXP_PRENORM - 1:0] Exp_prenorm_DO;
+	output wire [C_MANT_PRENORM - 1:0] Mant_prenorm_DO;
+	wire Sign_a_D;
+	wire Sign_b_D;
+	wire Sign_prenorm_D;
+	wire [C_EXP - 1:0] Exp_a_D;
+	wire [C_EXP - 1:0] Exp_b_D;
+	wire [C_MANT:0] Mant_a_D;
+	wire [C_MANT:0] Mant_b_D;
+	wire signed [C_EXP_PRENORM - 1:0] Exp_prenorm_D;
+	wire [C_MANT_PRENORM - 1:0] Mant_prenorm_D;
+	assign Sign_a_D = Sign_a_DI;
+	assign Sign_b_D = Sign_b_DI;
+	assign Exp_a_D = Exp_a_DI;
+	assign Exp_b_D = Exp_b_DI;
+	assign Mant_a_D = Mant_a_DI;
+	assign Mant_b_D = Mant_b_DI;
+	assign Sign_prenorm_D = Sign_a_D ^ Sign_b_D;
+	assign Exp_prenorm_D = ($signed({2'b00, Exp_a_D}) + $signed({2'b00, Exp_b_D})) - $signed(C_BIAS);
+	assign Mant_prenorm_D = Mant_a_D * Mant_b_D;
+	assign Sign_prenorm_DO = Sign_prenorm_D;
+	assign Exp_prenorm_DO = Exp_prenorm_D;
+	assign Mant_prenorm_DO = Mant_prenorm_D;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_norm.v b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_norm.v
new file mode 100644
index 0000000..3e9b993
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_norm.v
@@ -0,0 +1,144 @@
+module fpu_norm (
+	Mant_in_DI,
+	Exp_in_DI,
+	Sign_in_DI,
+	RM_SI,
+	OP_SI,
+	Mant_res_DO,
+	Exp_res_DO,
+	Rounded_SO,
+	Exp_OF_SO,
+	Exp_UF_SO
+);
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+
+parameter C_CMD               = 4;
+parameter C_FPU_ADD_CMD       = 4'h0;
+parameter C_FPU_SUB_CMD       = 4'h1;
+parameter C_FPU_MUL_CMD       = 4'h2;
+parameter C_FPU_DIV_CMD       = 4'h3;
+parameter C_FPU_I2F_CMD       = 4'h4;
+parameter C_FPU_F2I_CMD       = 4'h5;
+parameter C_FPU_SQRT_CMD      = 4'h6;
+parameter C_FPU_NOP_CMD       = 4'h7;
+parameter C_FPU_FMADD_CMD     = 4'h8;
+parameter C_FPU_FMSUB_CMD     = 4'h9;
+parameter C_FPU_FNMADD_CMD    = 4'hA;
+parameter C_FPU_FNMSUB_CMD    = 4'hB;
+parameter C_RM_NEAREST_MAX = 3'h4;
+parameter C_EXP_PRENORM  = C_EXP+2;
+parameter C_MANT_ADDIN   = C_MANT+4;
+parameter C_MANT_ADDOUT  = C_MANT+5;
+parameter C_MANT_SHIFTIN = C_MANT+3;
+parameter C_MANT_SHIFTED = C_MANT+4;
+parameter C_MANT_INT     = C_OP-1;
+parameter C_INF          = 32'h7fffffff;
+parameter C_MINF         = 32'h80000000;
+parameter C_EXP_SHIFT    = C_EXP_PRENORM;
+parameter C_SHIFT_BIAS   = 9'd127;
+parameter C_UNKNOWN      = 8'd157;
+parameter C_PADMANT      = 16'b0;
+parameter C_MANT_NoHB_ZERO   = 23'h0;
+parameter C_MANT_PRENORM_IND = 6;
+parameter F_QNAN         =32'h7FC00000;
+	input wire [C_MANT_PRENORM - 1:0] Mant_in_DI;
+	input wire signed [C_EXP_PRENORM - 1:0] Exp_in_DI;
+	input wire Sign_in_DI;
+	input wire [C_RM - 1:0] RM_SI;
+	input wire [C_CMD - 1:0] OP_SI;
+	output wire [C_MANT:0] Mant_res_DO;
+	output wire [C_EXP - 1:0] Exp_res_DO;
+	output wire Rounded_SO;
+	output reg Exp_OF_SO;
+	output reg Exp_UF_SO;
+	wire [C_MANT_PRENORM_IND - 1:0] Mant_leadingOne_D;
+	wire Mant_zero_S;
+	reg [C_MANT + 4:0] Mant_norm_D;
+	wire signed [C_EXP_PRENORM - 1:0] Exp_norm_D;
+	wire signed [C_EXP_PRENORM - 1:0] Mant_shAmt_D;
+	wire signed [C_EXP_PRENORM:0] Mant_shAmt2_D;
+	wire [C_EXP - 1:0] Exp_final_D;
+	wire signed [C_EXP_PRENORM - 1:0] Exp_rounded_D;
+	reg Mant_sticky_D;
+	wire Denormal_S;
+	wire Mant_renorm_S;
+	fpu_ff #(.LEN(C_MANT_PRENORM)) LOD(
+		.in_i(Mant_in_DI),
+		.first_one_o(Mant_leadingOne_D),
+		.no_ones_o(Mant_zero_S)
+	);
+	wire Denormals_shift_add_D;
+	wire Denormals_exp_add_D;
+	assign Denormals_shift_add_D = (~Mant_zero_S & (Exp_in_DI == C_EXP_ZERO)) & ((OP_SI != C_FPU_MUL_CMD) | (~Mant_in_DI[C_MANT_PRENORM - 1] & ~Mant_in_DI[C_MANT_PRENORM - 2]));
+	assign Denormals_exp_add_D = (Mant_in_DI[C_MANT_PRENORM - 2] & (Exp_in_DI == C_EXP_ZERO)) & ((OP_SI == C_FPU_ADD_CMD) | (OP_SI == C_FPU_SUB_CMD));
+	assign Denormal_S = (C_EXP_PRENORM'($signed(Mant_leadingOne_D)) >= Exp_in_DI) || Mant_zero_S;
+	assign Mant_shAmt_D = (Denormal_S ? Exp_in_DI + Denormals_shift_add_D : Mant_leadingOne_D);
+	assign Mant_shAmt2_D = {Mant_shAmt_D[C_EXP_PRENORM - 1], Mant_shAmt_D} + (C_MANT + 5);
+	function automatic [(((C_MANT_PRENORM + C_MANT) + 4) >= 0 ? (C_MANT_PRENORM + C_MANT) + 5 : 1 - ((C_MANT_PRENORM + C_MANT) + 4)) - 1:0] sv2v_cast_F3481;
+		input reg [(((C_MANT_PRENORM + C_MANT) + 4) >= 0 ? (C_MANT_PRENORM + C_MANT) + 5 : 1 - ((C_MANT_PRENORM + C_MANT) + 4)) - 1:0] inp;
+		sv2v_cast_F3481 = inp;
+	endfunction
+	always @(*) begin : sv2v_autoblock_1
+		reg [(C_MANT_PRENORM + C_MANT) + 4:0] temp;
+		temp = sv2v_cast_F3481(Mant_in_DI) << Mant_shAmt2_D;
+		Mant_norm_D = temp[(C_MANT_PRENORM + C_MANT) + 4:C_MANT_PRENORM];
+	end
+	always @(*) begin
+		Mant_sticky_D = 1'b0;
+		if (Mant_shAmt2_D <= 0)
+			Mant_sticky_D = |Mant_in_DI;
+		else if (Mant_shAmt2_D <= C_MANT_PRENORM)
+			Mant_sticky_D = |(Mant_in_DI << Mant_shAmt2_D);
+	end
+	assign Exp_norm_D = ((Exp_in_DI - C_EXP_PRENORM'($signed(Mant_leadingOne_D))) + 1) + Denormals_exp_add_D;
+	assign Exp_rounded_D = Exp_norm_D + Mant_renorm_S;
+	assign Exp_final_D = Exp_rounded_D[C_EXP - 1:0];
+	always @(*) begin
+		Exp_OF_SO = 1'b0;
+		Exp_UF_SO = 1'b0;
+		if (Exp_rounded_D >= $signed({2'b00, C_EXP_INF}))
+			Exp_OF_SO = 1'b1;
+		else if (Exp_rounded_D <= $signed({2'b00, C_EXP_ZERO}))
+			Exp_UF_SO = 1'b1;
+	end
+	wire [C_MANT:0] Mant_upper_D;
+	wire [3:0] Mant_lower_D;
+	wire [C_MANT + 1:0] Mant_upperRounded_D;
+	reg Mant_roundUp_S;
+	wire Mant_rounded_S;
+	assign Mant_lower_D = Mant_norm_D[3:0];
+	assign Mant_upper_D = Mant_norm_D[C_MANT + 4:4];
+	assign Mant_rounded_S = |Mant_lower_D | Mant_sticky_D;
+	always @(*) begin
+		Mant_roundUp_S = 1'b0;
+		case (RM_SI)
+			C_RM_NEAREST: Mant_roundUp_S = Mant_lower_D[3] && ((|Mant_lower_D[2:0] | Mant_sticky_D) || Mant_upper_D[0]);
+			C_RM_TRUNC: Mant_roundUp_S = 0;
+			C_RM_PLUSINF: Mant_roundUp_S = Mant_rounded_S & ~Sign_in_DI;
+			C_RM_MINUSINF: Mant_roundUp_S = Mant_rounded_S & Sign_in_DI;
+			default: Mant_roundUp_S = 0;
+		endcase
+	end
+	assign Mant_upperRounded_D = Mant_upper_D + Mant_roundUp_S;
+	assign Mant_renorm_S = Mant_upperRounded_D[C_MANT + 1];
+	assign Mant_res_DO = Mant_upperRounded_D >> (Mant_renorm_S & ~Denormal_S);
+	assign Exp_res_DO = Exp_final_D;
+	assign Rounded_SO = Mant_rounded_S;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_private.v b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_private.v
new file mode 100644
index 0000000..1f32e0d
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_private.v
@@ -0,0 +1,189 @@
+module fpu_private (
+	clk_i,
+	rst_ni,
+	fpu_en_i,
+	operand_a_i,
+	operand_b_i,
+	operand_c_i,
+	rm_i,
+	fpu_op_i,
+	prec_i,
+	result_o,
+	valid_o,
+	flags_o,
+	divsqrt_busy_o
+);
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+
+parameter C_CMD               = 4;
+parameter C_FPU_ADD_CMD       = 4'h0;
+parameter C_FPU_SUB_CMD       = 4'h1;
+parameter C_FPU_MUL_CMD       = 4'h2;
+parameter C_FPU_DIV_CMD       = 4'h3;
+parameter C_FPU_I2F_CMD       = 4'h4;
+parameter C_FPU_F2I_CMD       = 4'h5;
+parameter C_FPU_SQRT_CMD      = 4'h6;
+parameter C_FPU_NOP_CMD       = 4'h7;
+parameter C_FPU_FMADD_CMD     = 4'h8;
+parameter C_FPU_FMSUB_CMD     = 4'h9;
+parameter C_FPU_FNMADD_CMD    = 4'hA;
+parameter C_FPU_FNMSUB_CMD    = 4'hB;
+parameter C_RM_NEAREST_MAX = 3'h4;
+parameter C_EXP_PRENORM  = C_EXP+2;
+parameter C_MANT_ADDIN   = C_MANT+4;
+parameter C_MANT_ADDOUT  = C_MANT+5;
+parameter C_MANT_SHIFTIN = C_MANT+3;
+parameter C_MANT_SHIFTED = C_MANT+4;
+parameter C_MANT_INT     = C_OP-1;
+parameter C_INF          = 32'h7fffffff;
+parameter C_MINF         = 32'h80000000;
+parameter C_EXP_SHIFT    = C_EXP_PRENORM;
+parameter C_SHIFT_BIAS   = 9'd127;
+parameter C_UNKNOWN      = 8'd157;
+parameter C_PADMANT      = 16'b0;
+parameter C_MANT_NoHB_ZERO   = 23'h0;
+parameter C_MANT_PRENORM_IND = 6;
+parameter F_QNAN         =32'h7FC00000;
+parameter C_FFLAG         = 5;
+	input wire clk_i;
+	input wire rst_ni;
+	input wire fpu_en_i;
+	input wire [C_OP - 1:0] operand_a_i;
+	input wire [C_OP - 1:0] operand_b_i;
+	input wire [C_OP - 1:0] operand_c_i;
+	input wire [C_RM - 1:0] rm_i;
+	input wire [C_CMD - 1:0] fpu_op_i;
+	input wire [C_PC - 1:0] prec_i;
+	output wire [C_OP - 1:0] result_o;
+	output wire valid_o;
+	output wire [C_FFLAG - 1:0] flags_o;
+	output wire divsqrt_busy_o;
+	wire divsqrt_enable;
+	wire fpu_enable;
+	wire fma_enable;
+	assign divsqrt_enable = fpu_en_i & ((fpu_op_i == C_FPU_DIV_CMD) | (fpu_op_i == C_FPU_SQRT_CMD));
+	assign fpu_enable = fpu_en_i & (((((fpu_op_i == C_FPU_ADD_CMD) | (fpu_op_i == C_FPU_SUB_CMD)) | (fpu_op_i == C_FPU_MUL_CMD)) | (fpu_op_i == C_FPU_I2F_CMD)) | (fpu_op_i == C_FPU_F2I_CMD));
+	assign fma_enable = fpu_en_i & ((((fpu_op_i == C_FPU_FMADD_CMD) | (fpu_op_i == C_FPU_FMSUB_CMD)) | (fpu_op_i == C_FPU_FNMADD_CMD)) | (fpu_op_i == C_FPU_FNMSUB_CMD));
+	wire [31:0] fpu_operand_a;
+	wire [31:0] fpu_operand_b;
+	wire [31:0] fpu_result;
+	wire [C_FFLAG - 1:0] fpu_flags;
+	wire fpu_of;
+	wire fpu_uf;
+	wire fpu_zero;
+	wire fpu_ix;
+	wire fpu_iv;
+	wire fpu_inf;
+	assign fpu_operand_a = (fpu_enable ? operand_a_i : {32 {1'sb0}});
+	assign fpu_operand_b = (fpu_enable ? operand_b_i : {32 {1'sb0}});
+	wire fpu_valid;
+	fpu_core fpu_core(
+		.Clk_CI(clk_i),
+		.Rst_RBI(rst_ni),
+		.Enable_SI(fpu_enable),
+		.Operand_a_DI(fpu_operand_a),
+		.Operand_b_DI(fpu_operand_b),
+		.RM_SI(rm_i),
+		.OP_SI(fpu_op_i),
+		.Result_DO(fpu_result),
+		.Valid_SO(fpu_valid),
+		.OF_SO(fpu_of),
+		.UF_SO(fpu_uf),
+		.Zero_SO(fpu_zero),
+		.IX_SO(fpu_ix),
+		.IV_SO(fpu_iv),
+		.Inf_SO(fpu_inf)
+	);
+	assign fpu_flags = {fpu_iv, 1'b0, fpu_of, fpu_uf, fpu_ix};
+	wire div_start;
+	wire sqrt_start;
+	wire [31:0] divsqrt_operand_a;
+	wire [31:0] divsqrt_operand_b;
+	wire [31:0] divsqrt_result;
+	wire [C_FFLAG - 1:0] divsqrt_flags;
+	wire divsqrt_nv;
+	wire divsqrt_ix;
+	assign sqrt_start = divsqrt_enable & (fpu_op_i == C_FPU_SQRT_CMD);
+	assign div_start = divsqrt_enable & (fpu_op_i == C_FPU_DIV_CMD);
+	assign divsqrt_operand_a = (div_start | sqrt_start ? operand_a_i : {32 {1'sb0}});
+	assign divsqrt_operand_b = (div_start ? operand_b_i : {32 {1'sb0}});
+	wire divsqrt_of;
+	wire divsqrt_uf;
+	wire divsqrt_zero;
+	wire divsqrt_valid;
+	div_sqrt_top_tp fpu_divsqrt_tp(
+		.Clk_CI(clk_i),
+		.Rst_RBI(rst_ni),
+		.Div_start_SI(div_start),
+		.Sqrt_start_SI(sqrt_start),
+		.Operand_a_DI(divsqrt_operand_a),
+		.Operand_b_DI(divsqrt_operand_b),
+		.RM_SI(rm_i[1:0]),
+		.Precision_ctl_SI(prec_i),
+		.Result_DO(divsqrt_result),
+		.Exp_OF_SO(divsqrt_of),
+		.Exp_UF_SO(divsqrt_uf),
+		.Div_zero_SO(divsqrt_zero),
+		.Ready_SO(divsqrt_busy_o),
+		.Done_SO(divsqrt_valid)
+	);
+	assign divsqrt_nv = 1'b0;
+	assign divsqrt_ix = 1'b0;
+	assign divsqrt_flags = {divsqrt_nv, divsqrt_zero, divsqrt_of, divsqrt_uf, divsqrt_ix};
+	wire [31:0] fma_operand_a;
+	wire [31:0] fma_operand_b;
+	wire [31:0] fma_operand_c;
+	wire [31:0] fma_result;
+	reg [1:0] fma_op;
+	wire fma_valid;
+	wire [C_FFLAG - 1:0] fma_flags;
+	always @(*) begin
+		fma_op = 2'b00;
+		case (fpu_op_i)
+			C_FPU_FMADD_CMD: fma_op = 2'b00;
+			C_FPU_FMSUB_CMD: fma_op = 2'b01;
+			C_FPU_FNMADD_CMD: fma_op = 2'b11;
+			C_FPU_FNMSUB_CMD: fma_op = 2'b10;
+			default: fma_op = 2'b00;
+		endcase
+	end
+	fp_fma_wrapper #(
+		.C_MAC_PIPE_REGS(2),
+		.RND_WIDTH(2),
+		.STAT_WIDTH(5)
+	) fp_fma_wrap_i(
+		.clk_i(clk_i),
+		.rst_ni(rst_ni),
+		.En_i(fma_enable),
+		.OpA_i(operand_a_i),
+		.OpB_i(operand_b_i),
+		.OpC_i(operand_c_i),
+		.Op_i(fma_op),
+		.Rnd_i(rm_i[1:0]),
+		.Status_o(fma_flags),
+		.Res_o(fma_result),
+		.Valid_o(fma_valid),
+		.Ready_o(),
+		.Ack_i(1'b1)
+	);
+	assign valid_o = (divsqrt_valid | fpu_valid) | fma_valid;
+	assign result_o = (divsqrt_valid ? divsqrt_result : (fpu_valid ? fpu_result : (fma_valid ? fma_result : {C_OP {1'sb0}})));
+	assign flags_o = (divsqrt_valid ? divsqrt_flags : (fpu_valid ? fpu_flags : (fma_valid ? fma_flags : {C_FFLAG {1'sb0}})));
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_shared.v b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_shared.v
new file mode 100644
index 0000000..89efef4
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/fpu_shared.v
@@ -0,0 +1,117 @@
+module fpu_shared 
+#(
+    parameter ADD_REGISTER = 1
+    )
+(
+	Clk_CI,
+	Rst_RBI,
+	Interface
+);
+	//parameter ADD_REGISTER = 1;
+	input wire Clk_CI;
+	input wire Rst_RBI;
+	input marx_apu_if.apu Interface;
+	reg [C_OP - 1:0] Operand_a_D;
+	reg [C_OP - 1:0] Operand_b_D;
+	reg [C_CMD - 1:0] Op_S;
+	reg [C_RM - 1:0] RM_S;
+	reg Valid_S;
+	reg [C_TAG - 1:0] Tag_D;
+	generate
+		if (ADD_REGISTER == 1) begin : genblk1
+			wire [C_OP - 1:0] Operand_a_DN;
+			wire [C_OP - 1:0] Operand_b_DN;
+			wire [C_RM - 1:0] RM_SN;
+			wire [C_CMD - 1:0] Op_SN;
+			wire Valid_SN;
+			wire [C_TAG - 1:0] Tag_DN;
+			assign Operand_a_DN = Interface.arga_ds_d;
+			assign Operand_b_DN = Interface.argb_ds_d;
+			assign RM_SN = Interface.flags_ds_d;
+			assign Op_SN = Interface.op_ds_d;
+			assign Valid_SN = Interface.valid_ds_s;
+			assign Tag_DN = Interface.tag_ds_d;
+			always @(posedge Clk_CI or negedge Rst_RBI)
+				if (~Rst_RBI) begin
+					Operand_a_D <= 1'sb0;
+					Operand_b_D <= 1'sb0;
+					Op_S <= 1'sb0;
+					RM_S <= 1'sb0;
+					Valid_S <= 1'sb0;
+					Tag_D <= 1'sb0;
+				end
+				else begin
+					Operand_a_D <= Operand_a_DN;
+					Operand_b_D <= Operand_b_DN;
+					RM_S <= RM_SN;
+					Op_S <= Op_SN;
+					Valid_S <= Valid_SN;
+					Tag_D <= Tag_DN;
+				end
+		end
+		else begin : genblk1
+			wire [C_OP:1] sv2v_tmp_6B435;
+			assign sv2v_tmp_6B435 = Interface.arga_ds_d;
+			always @(*) Operand_a_D = sv2v_tmp_6B435;
+			wire [C_OP:1] sv2v_tmp_8961F;
+			assign sv2v_tmp_8961F = Interface.argb_ds_d;
+			always @(*) Operand_b_D = sv2v_tmp_8961F;
+			wire [C_RM:1] sv2v_tmp_A4AE8;
+			assign sv2v_tmp_A4AE8 = Interface.flags_ds_d;
+			always @(*) RM_S = sv2v_tmp_A4AE8;
+			wire [C_CMD:1] sv2v_tmp_AEC49;
+			assign sv2v_tmp_AEC49 = Interface.op_ds_d;
+			always @(*) Op_S = sv2v_tmp_AEC49;
+			wire [1:1] sv2v_tmp_078D4;
+			assign sv2v_tmp_078D4 = Interface.valid_ds_s;
+			always @(*) Valid_S = sv2v_tmp_078D4;
+			wire [C_TAG:1] sv2v_tmp_37E7A;
+			assign sv2v_tmp_37E7A = Interface.tag_ds_d;
+			always @(*) Tag_D = sv2v_tmp_37E7A;
+		end
+	endgenerate
+	wire [C_OP - 1:0] Result_D;
+	wire [C_FLAG - 1:0] Flags_S;
+	wire UF_S;
+	wire OF_S;
+	wire Zero_S;
+	wire IX_S;
+	wire IV_S;
+	wire Inf_S;
+	fpu_core core(
+		.Clk_CI(Clk_CI),
+		.Rst_RBI(Rst_RBI),
+		.Enable_SI(Valid_S),
+		.Operand_a_DI(Operand_a_D),
+		.Operand_b_DI(Operand_b_D),
+		.RM_SI(RM_S),
+		.OP_SI(Op_S),
+		.Result_DO(Result_D),
+		.OF_SO(OF_S),
+		.UF_SO(UF_S),
+		.Zero_SO(Zero_S),
+		.IX_SO(IX_S),
+		.IV_SO(IV_S),
+		.Inf_SO(Inf_S)
+	);
+	reg ValidDelayed_SP;
+	wire ValidDelayed_SN;
+	reg [C_TAG - 1:0] TagDelayed_DP;
+	wire [C_TAG - 1:0] TagDelayed_DN;
+	assign ValidDelayed_SN = Valid_S;
+	assign TagDelayed_DN = Tag_D;
+	always @(posedge Clk_CI or negedge Rst_RBI)
+		if (~Rst_RBI) begin
+			ValidDelayed_SP <= 1'sb0;
+			TagDelayed_DP <= 1'sb0;
+		end
+		else begin
+			ValidDelayed_SP <= ValidDelayed_SN;
+			TagDelayed_DP <= TagDelayed_DN;
+		end
+	assign Interface.result_us_d = Result_D;
+	assign Interface.flags_us_d = {1'b0, Inf_S, IV_S, IX_S, Zero_S, 2'b00, UF_S, OF_S};
+	assign Interface.tag_us_d = TagDelayed_DP;
+	assign Interface.req_us_s = ValidDelayed_SP;
+	assign Interface.ready_ds_s = 1'b1;
+endmodule
diff --git a/verilog/rtl/ips/fpu/hdl/fpu_v0.1/riscv_fpu.v b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/riscv_fpu.v
new file mode 100644
index 0000000..e846a46
--- /dev/null
+++ b/verilog/rtl/ips/fpu/hdl/fpu_v0.1/riscv_fpu.v
@@ -0,0 +1,116 @@
+module riscv_fpu (
+	clk,
+	rst_n,
+	operand_a_i,
+	operand_b_i,
+	rounding_mode_i,
+	operator_i,
+	enable_i,
+	stall_i,
+	result_o,
+	fpu_ready_o,
+	result_valid_o
+);
+parameter C_RM            = 2;
+parameter C_RM_NEAREST    = 2'h0;
+parameter C_RM_TRUNC      = 2'h1;
+parameter C_RM_PLUSINF    = 2'h2;
+parameter C_RM_MINUSINF   = 2'h3;
+parameter C_PC            = 5;
+parameter C_OP            = 32;
+parameter C_MANT          = 23;
+parameter C_EXP           = 8;
+parameter C_BIAS          = 127;
+parameter C_HALF_BIAS     = 63;
+parameter C_LEADONE_WIDTH = 7;
+parameter C_MANT_PRENORM  = C_MANT+1;
+parameter C_EXP_ZERO      = 8'h00;
+parameter C_EXP_ONE       = 8'h01;
+parameter C_EXP_INF       = 8'hff;
+parameter C_MANT_ZERO     = 23'h0;
+parameter C_MANT_NAN      = 23'h400000;
+
+parameter C_CMD               = 4;
+parameter C_FPU_ADD_CMD       = 4'h0;
+parameter C_FPU_SUB_CMD       = 4'h1;
+parameter C_FPU_MUL_CMD       = 4'h2;
+parameter C_FPU_DIV_CMD       = 4'h3;
+parameter C_FPU_I2F_CMD       = 4'h4;
+parameter C_FPU_F2I_CMD       = 4'h5;
+parameter C_FPU_SQRT_CMD      = 4'h6;
+parameter C_FPU_NOP_CMD       = 4'h7;
+parameter C_FPU_FMADD_CMD     = 4'h8;
+parameter C_FPU_FMSUB_CMD     = 4'h9;
+parameter C_FPU_FNMADD_CMD    = 4'hA;
+parameter C_FPU_FNMSUB_CMD    = 4'hB;
+parameter C_RM_NEAREST_MAX = 3'h4;
+parameter C_EXP_PRENORM  = C_EXP+2;
+parameter C_MANT_ADDIN   = C_MANT+4;
+parameter C_MANT_ADDOUT  = C_MANT+5;
+parameter C_MANT_SHIFTIN = C_MANT+3;
+parameter C_MANT_SHIFTED = C_MANT+4;
+parameter C_MANT_INT     = C_OP-1;
+parameter C_INF          = 32'h7fffffff;
+parameter C_MINF         = 32'h80000000;
+parameter C_EXP_SHIFT    = C_EXP_PRENORM;
+parameter C_SHIFT_BIAS   = 9'd127;
+parameter C_UNKNOWN      = 8'd157;
+parameter C_PADMANT      = 16'b0;
+parameter C_MANT_NoHB_ZERO   = 23'h0;
+parameter C_MANT_PRENORM_IND = 6;
+parameter F_QNAN         =32'h7FC00000;
+parameter C_FFLAG         = 5;
+	input wire clk;
+	input wire rst_n;
+	input wire [C_OP - 1:0] operand_a_i;
+	input wire [C_OP - 1:0] operand_b_i;
+	input wire [C_RM - 1:0] rounding_mode_i;
+	input wire [C_CMD - 1:0] operator_i;
+	input wire enable_i;
+	input wire stall_i;
+	output wire [C_OP - 1:0] result_o;
+	output reg fpu_ready_o;
+	output wire result_valid_o;
+	localparam CYCLES = 2;
+	wire [C_OP - 1:0] operand_a_q;
+	wire [C_OP - 1:0] operand_b_q;
+	wire [C_RM - 1:0] rounding_mode_q;
+	wire [C_CMD - 1:0] operator_q;
+	reg [1:0] valid_count_q;
+	reg [1:0] valid_count_n;
+	assign result_valid_o = (valid_count_q == 1 ? 1'b1 : 1'b0);
+	always @(*) begin
+		valid_count_n = valid_count_q;
+		fpu_ready_o = 1'b1;
+		if (enable_i) begin
+			valid_count_n = valid_count_q + 1;
+			fpu_ready_o = 1'b0;
+			if (valid_count_q == 1) begin
+				fpu_ready_o = 1'b1;
+				valid_count_n = 2'd0;
+			end
+		end
+	end
+	always @(posedge clk or negedge rst_n)
+		if (~rst_n)
+			valid_count_q <= 1'b0;
+		else if (enable_i && ~stall_i)
+			valid_count_q <= valid_count_n;
+	fpu_core fpcore(
+		.Clk_CI(clk),
+		.Rst_RBI(rst_n),
+		.Enable_SI(enable_i),
+		.Operand_a_DI(operand_a_i),
+		.Operand_b_DI(operand_b_i),
+		.RM_SI(rounding_mode_i),
+		.OP_SI(operator_i),
+		.Stall_SI(stall_i),
+		.Result_DO(result_o),
+		.OF_SO(),
+		.UF_SO(),
+		.Zero_SO(),
+		.IX_SO(),
+		.IV_SO(),
+		.Inf_SO()
+	);
+endmodule