Skip to content

Commit

Permalink
Add Bitmanipulation support
Browse files Browse the repository at this point in the history
  • Loading branch information
gullahmed1 committed Nov 8, 2023
1 parent e70418e commit 6ccc9d9
Show file tree
Hide file tree
Showing 7 changed files with 281 additions and 18 deletions.
142 changes: 140 additions & 2 deletions rtl/cv32e40p_alu.sv
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@

module cv32e40p_alu
import cv32e40p_pkg::*;
(
#(
parameter ZBITMANIP = 0
) (
input logic clk,
input logic rst_n,
input logic enable_i,
Expand Down Expand Up @@ -805,7 +807,11 @@ module cv32e40p_alu
logic [31:0] bmask_first, bmask_inv;
logic [31:0] bextins_and;
logic [31:0] bextins_result, bclr_result, bset_result;

logic [31:0] result_bitmanip; // Store result of bitmanip operations
logic [31:0] clmul_result; // Store carry-less multiplication result
logic [ 5:0] cpop; // Store no of set bits in operand a
logic [ 4:0] ff_one_result; // Return the position of first one
logic ff_one_all_zeros; // Return true if all input is zero

// construct bit mask for insert/extract/bclr/bset
// bmask looks like this 00..0011..1100..00
Expand All @@ -823,6 +829,124 @@ module cv32e40p_alu
assign bclr_result = operand_a_i & bmask_inv;
assign bset_result = operand_a_i | bmask;

if (ZBITMANIP) begin : gen_zbc_zbb_results

// Temporary registers
logic [31:0] ff_one_in;
logic [31:0] [31:0] clmul_temp0;
logic [ 7:0] [31:0] clmul_temp1;
logic [ 1:0] [31:0] clmul_temp2;
logic [31:0] operand_b_rev;

// Decide the input of cv32e40p_ff_one module based on operator_i
assign ff_one_in = (operator_i == ALU_B_CTZ) ? operand_a_i : operand_a_rev;

// Instantiate cv32e40p_popcnt module, it will return 1's count
cv32e40p_popcnt popcnt_i (
.in_i (operand_a_i),
.result_o(cpop)
);

// Instantiate Find First One Module
cv32e40p_ff_one ff_one_i(
.in_i (ff_one_in ),
.first_one_o(ff_one_result),
.no_ones_o (ff_one_all_zeros)
);

// Reverse operand_b_i using streaming operator
assign operand_b_rev = {<<{operand_b_i}};

// Create 32 rows like traditional multiplication
for (genvar i = 0; i < 32; i++) begin : gen_32_rows
assign clmul_temp0[i] = (operator_i == ALU_B_CLMUL) ?
operand_b_i[i] ? operand_a_i << i : '0 :
operand_b_rev[i] ? operand_a_rev << i : '0;
end

// Xor 4 rows 8 times
for (genvar i = 0; i < 8; i++) begin : gen_xor_result_8_rows
assign clmul_temp1[i] = clmul_temp0[i<<2] ^ clmul_temp0[(i<<2)+1] ^
clmul_temp0[(i<<2)+2] ^ clmul_temp0[(i<<2)+3];
end

// XOR 4 rows twice
for (genvar i = 0; i < 2; i++) begin : gen_xor_result_2_rows
assign clmul_temp2[i] = clmul_temp1[i<<2] ^ clmul_temp1[(i<<2)+1] ^
clmul_temp1[(i<<2)+2] ^ clmul_temp1[(i<<2)+3];
end

// Xor on last 2 rows
assign clmul_result = clmul_temp2[0] ^ clmul_temp2[1];
end

always_comb begin
if (ZBITMANIP) begin
unique case (operator_i)

// Zba: Address generation Instructions , Shift left rs1 by 1/2/3 + rs2
ALU_B_SH1ADD: result_bitmanip = {operand_a_i[30:0],1'b0} + operand_b_i;
ALU_B_SH2ADD: result_bitmanip = {operand_a_i[29:0],2'b0} + operand_b_i;
ALU_B_SH3ADD: result_bitmanip = {operand_a_i[28:0],3'b0} + operand_b_i;

// Zbb: Basic Bit-Manipulation
// Logical with Negate
ALU_B_ANDN: result_bitmanip = operand_a_i & operand_b_neg;
ALU_B_ORN: result_bitmanip = operand_a_i | operand_b_neg;
ALU_B_XNOR: result_bitmanip = ~(operand_a_i ^ operand_b_i);

// Count leading/trailing zero bits
ALU_B_CLZ: result_bitmanip = ff_one_all_zeros ? {26'b0,6'b100000} : {26'b0,ff_one_result};
ALU_B_CTZ: result_bitmanip = ff_one_all_zeros ? {26'b0,6'b100000} : {26'b0,ff_one_result};

// Count set bits
ALU_B_CPOP: result_bitmanip = cpop;

// Integer Minimum/Maximum
ALU_B_MAX: result_bitmanip = ($signed(operand_a_i) < $signed(operand_b_i)) ? operand_b_i : operand_a_i;
ALU_B_MAXU: result_bitmanip = (operand_a_i < operand_b_i) ? operand_b_i : operand_a_i;
ALU_B_MIN: result_bitmanip = ($signed(operand_a_i) < $signed(operand_b_i)) ? operand_a_i : operand_b_i;
ALU_B_MINU: result_bitmanip = (operand_a_i < operand_b_i) ? operand_a_i : operand_b_i;

// Sign and zero-extension
ALU_B_SEXTB: result_bitmanip = {{24{operand_a_i[7]}}, operand_a_i[7:0]};
ALU_B_SEXTH: result_bitmanip = {{16{operand_a_i[15]}}, operand_a_i[15:0]};
ALU_B_ZEXTH: result_bitmanip = {{16{1'b0}}, operand_a_i[15:0]};

// Bitwise rotation
ALU_B_ROL: result_bitmanip = (operand_a_i << operand_b_i[4:0]) | (operand_a_i >> (32-operand_b_i[4:0]));
ALU_B_ROR: result_bitmanip = (operand_a_i >> operand_b_i[4:0]) | (operand_a_i << (32-operand_b_i[4:0]));
ALU_B_RORI: result_bitmanip = (operand_a_i >> operand_b_i[4:0]) | (operand_a_i << (32-operand_b_i[4:0]));

// Bitwise OR-Combine, byte granule
ALU_B_ORCB: result_bitmanip = {{8{|operand_a_i[31:24]}}, {8{|operand_a_i[23:16]}}, {8{|operand_a_i[15:8]}}, {8{|operand_a_i[7:0]}}};

// Byte-reverse register
ALU_B_REV8: result_bitmanip = {{operand_a_i[7:0]}, {operand_a_i[15:8]}, {operand_a_i[23:16]}, {operand_a_i[31:24]}};

// Zbc: Carry-less Multiplication low/reversed/high part
ALU_B_CLMUL: result_bitmanip = clmul_result;
ALU_B_CLMULR: result_bitmanip = {<<{clmul_result}};
ALU_B_CLMULH: result_bitmanip = {<<{clmul_result}} >> 1'b1;

// Zbs: Single-bit Instructions
ALU_B_BCLR: result_bitmanip = operand_a_i & ~(1'b1 << (operand_b_i & 5'b11111));
ALU_B_BCLRI: result_bitmanip = operand_a_i & ~(1'b1 << (operand_b_i & 5'b11111));
ALU_B_BEXT: result_bitmanip = (operand_a_i >> (operand_b_i & 5'b11111)) & 1'b1;
ALU_B_BEXTI: result_bitmanip = (operand_a_i >> (operand_b_i & 5'b11111)) & 1'b1;
ALU_B_BINV: result_bitmanip = operand_a_i ^ (1'b1 << (operand_b_i & 5'b11111));
ALU_B_BINVI: result_bitmanip = operand_a_i ^ (1'b1 << (operand_b_i & 5'b11111));
ALU_B_BSET: result_bitmanip = operand_a_i | (1'b1 << (operand_b_i & 5'b11111));
ALU_B_BSETI: result_bitmanip = operand_a_i | (1'b1 << (operand_b_i & 5'b11111));

default: result_bitmanip = '0;
endcase
end
else begin
result_bitmanip = '0;
end
end

/////////////////////////////////////////////////////////////////////////////////
// ____ _____ _______ _____ ________ ________ _____ _____ ______ //
// | _ \_ _|__ __| | __ \| ____\ \ / / ____| __ \ / ____| ____| //
Expand Down Expand Up @@ -979,6 +1103,20 @@ module cv32e40p_alu

default: ; // default case to suppress unique warning
endcase

if (ZBITMANIP) begin
unique case (operator_i)
// Bit-Manip Operations Result
ALU_B_SH1ADD, ALU_B_MIN, ALU_B_ROL, ALU_B_ROR, ALU_B_XNOR, ALU_B_MAXU,
ALU_B_SH2ADD, ALU_B_ANDN, ALU_B_MAX, ALU_B_ORN, ALU_B_MINU, ALU_B_RORI,
ALU_B_SEXTB, ALU_B_SEXTH, ALU_B_ZEXTH, ALU_B_CPOP, ALU_B_CTZ, ALU_B_BCLR,
ALU_B_BEXT, ALU_B_BEXTI, ALU_B_BINV, ALU_B_BINVI, ALU_B_BSET, ALU_B_REV8,
ALU_B_CLMUL, ALU_B_CLMULH, ALU_B_CLMULR, ALU_B_CLZ, ALU_B_BSETI, ALU_B_ORCB,
ALU_B_BCLRI, ALU_B_SH3ADD : result_o = result_bitmanip;

default: ;
endcase
end
end

assign ready_o = div_ready;
Expand Down
9 changes: 6 additions & 3 deletions rtl/cv32e40p_core.sv
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ module cv32e40p_core
parameter FPU_ADDMUL_LAT = 0, // Floating-Point ADDition/MULtiplication lane pipeline registers number
parameter FPU_OTHERS_LAT = 0, // Floating-Point COMParison/CONVersion lanes pipeline registers number
parameter ZFINX = 0, // Float-in-General Purpose registers
parameter NUM_MHPMCOUNTERS = 1
parameter NUM_MHPMCOUNTERS = 1,
parameter ZBITMANIP = 1 // To Enable Bitmanip support
) (
// Clock and Reset
input logic clk_i,
Expand Down Expand Up @@ -523,7 +524,8 @@ module cv32e40p_core
.APU_WOP_CPU (APU_WOP_CPU),
.APU_NDSFLAGS_CPU(APU_NDSFLAGS_CPU),
.APU_NUSFLAGS_CPU(APU_NUSFLAGS_CPU),
.DEBUG_TRIGGER_EN(DEBUG_TRIGGER_EN)
.DEBUG_TRIGGER_EN(DEBUG_TRIGGER_EN),
.ZBITMANIP (ZBITMANIP)
) id_stage_i (
.clk (clk), // Gated clock
.clk_ungated_i(clk_i), // Ungated clock
Expand Down Expand Up @@ -744,7 +746,8 @@ module cv32e40p_core
.APU_NARGS_CPU (APU_NARGS_CPU),
.APU_WOP_CPU (APU_WOP_CPU),
.APU_NDSFLAGS_CPU(APU_NDSFLAGS_CPU),
.APU_NUSFLAGS_CPU(APU_NUSFLAGS_CPU)
.APU_NUSFLAGS_CPU(APU_NUSFLAGS_CPU),
.ZBITMANIP (ZBITMANIP)
) ex_stage_i (
// Global signals: Clock and active low asynchronous reset
.clk (clk),
Expand Down
87 changes: 81 additions & 6 deletions rtl/cv32e40p_decoder.sv
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ module cv32e40p_decoder
parameter PULP_SECURE = 0,
parameter USE_PMP = 0,
parameter APU_WOP_CPU = 6,
parameter DEBUG_TRIGGER_EN = 1
parameter DEBUG_TRIGGER_EN = 1,
parameter ZBITMANIP = 0 // To Enable Bitmanip support
)
(
// signals running to/from controller
Expand Down Expand Up @@ -185,6 +186,9 @@ module cv32e40p_decoder
// unittypes for latencies to help us decode for APU
enum logic[1:0] {ADDMUL, DIVSQRT, NONCOMP, CONV} fp_op_group;

// Illegal Instr flags for bitmanip
logic illegal_instr_bm;
logic illegal_instr_non_bm;

/////////////////////////////////////////////
// ____ _ //
Expand Down Expand Up @@ -264,6 +268,8 @@ module cv32e40p_decoder
atop_o = 6'b000000;

illegal_insn_o = 1'b0;
illegal_instr_bm = 1'b0;
illegal_instr_non_bm = 1'b0;
ebrk_insn_o = 1'b0;
ecall_insn_o = 1'b0;
wfi_o = 1'b0;
Expand Down Expand Up @@ -493,8 +499,31 @@ module cv32e40p_decoder
3'b111: alu_operator_o = ALU_AND; // And with Immediate

3'b001: begin
alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
if (instr_rdata_i[31:25] != 7'b0)
if (instr_rdata_i[31:25] == 7'b0)
alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate

//Bit-Manip ALU Operations
else if (ZBITMANIP) begin
unique case (instr_rdata_i[31:25])
7'b011_0000: begin
unique case(instr_rdata_i[24:20])
5'b00100: alu_operator_o = ALU_B_SEXTB;
5'b00101: alu_operator_o = ALU_B_SEXTH;
5'b00010: alu_operator_o = ALU_B_CPOP;
5'b00001: alu_operator_o = ALU_B_CTZ;
5'b00000: alu_operator_o = ALU_B_CLZ;
default: illegal_insn_o = 1'b1;
endcase
end
7'b010_0100: alu_operator_o = ALU_B_BCLRI;
7'b011_0100: alu_operator_o = ALU_B_BINVI;
7'b001_0100: alu_operator_o = ALU_B_BSETI;
default: begin
illegal_insn_o = 1'b1;
end
endcase
end
else
illegal_insn_o = 1'b1;
end

Expand All @@ -503,11 +532,23 @@ module cv32e40p_decoder
alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate
else if (instr_rdata_i[31:25] == 7'b010_0000)
alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate

//Bit-Manip ALU Operations
else if (ZBITMANIP) begin
if (instr_rdata_i[31:25] == 7'b011_0000)
alu_operator_o = ALU_B_RORI;
else if (instr_rdata_i[31:20] == 12'b001010000111)
alu_operator_o = ALU_B_ORCB;
else if (instr_rdata_i[31:20] == 12'b011010011000)
alu_operator_o = ALU_B_REV8;
else if (instr_rdata_i[31:25] == 7'b010_0100)
alu_operator_o = ALU_B_BEXTI;
else
illegal_insn_o = 1'b1;
end
else
illegal_insn_o = 1'b1;
end


endcase
end

Expand Down Expand Up @@ -992,9 +1033,43 @@ module cv32e40p_decoder
end

default: begin
illegal_insn_o = 1'b1;
illegal_instr_non_bm = 1'b1;
end
endcase

if (ZBITMANIP) begin
unique case ({instr_rdata_i[30:25], instr_rdata_i[14:12]})
// Bit-Manip ALU Operations
{6'b01_0000, 3'b010}: alu_operator_o = ALU_B_SH1ADD;
{6'b01_0000, 3'b100}: alu_operator_o = ALU_B_SH2ADD;
{6'b01_0000, 3'b110}: alu_operator_o = ALU_B_SH3ADD;
{6'b10_0000, 3'b111}: alu_operator_o = ALU_B_ANDN;
{6'b00_0101, 3'b110}: alu_operator_o = ALU_B_MAX;
{6'b00_0101, 3'b100}: alu_operator_o = ALU_B_MIN;
{6'b11_0000, 3'b001}: alu_operator_o = ALU_B_ROL;
{6'b11_0000, 3'b101}: alu_operator_o = ALU_B_ROR;
{6'b10_0000, 3'b100}: alu_operator_o = ALU_B_XNOR;
{6'b10_0000, 3'b110}: alu_operator_o = ALU_B_ORN;
{6'b00_0101, 3'b111}: alu_operator_o = ALU_B_MAXU;
{6'b00_0101, 3'b101}: alu_operator_o = ALU_B_MINU;
{6'b00_0100, 3'b100}: alu_operator_o = ALU_B_ZEXTH;
{6'b00_0101, 3'b001}: alu_operator_o = ALU_B_CLMUL;
{6'b00_0101, 3'b011}: alu_operator_o = ALU_B_CLMULH;
{6'b00_0101, 3'b010}: alu_operator_o = ALU_B_CLMULR;
{6'b10_0100, 3'b001}: alu_operator_o = ALU_B_BCLR;
{6'b10_0100, 3'b101}: alu_operator_o = ALU_B_BEXT;
{6'b11_0100, 3'b001}: alu_operator_o = ALU_B_BINV;
{6'b01_0100, 3'b001}: alu_operator_o = ALU_B_BSET;
default: begin
illegal_instr_bm = 1'b1;
end
endcase
end

unique case (ZBITMANIP)
1'b0: illegal_insn_o = illegal_instr_non_bm;
1'b1: illegal_insn_o = illegal_instr_non_bm & illegal_instr_bm;
endcase
end
end

Expand Down
7 changes: 5 additions & 2 deletions rtl/cv32e40p_ex_stage.sv
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ module cv32e40p_ex_stage
parameter APU_NARGS_CPU = 3,
parameter APU_WOP_CPU = 6,
parameter APU_NDSFLAGS_CPU = 15,
parameter APU_NUSFLAGS_CPU = 5
parameter APU_NUSFLAGS_CPU = 5,
parameter ZBITMANIP = 0
) (
input logic clk,
input logic rst_n,
Expand Down Expand Up @@ -249,7 +250,9 @@ module cv32e40p_ex_stage
// //
////////////////////////////

cv32e40p_alu alu_i (
cv32e40p_alu #(
.ZBITMANIP (ZBITMANIP)
) alu_i (
.clk (clk),
.rst_n (rst_n),
.enable_i (alu_en_i),
Expand Down
6 changes: 4 additions & 2 deletions rtl/cv32e40p_id_stage.sv
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ module cv32e40p_id_stage
parameter APU_WOP_CPU = 6,
parameter APU_NDSFLAGS_CPU = 15,
parameter APU_NUSFLAGS_CPU = 5,
parameter DEBUG_TRIGGER_EN = 1
parameter DEBUG_TRIGGER_EN = 1,
parameter ZBITMANIP = 0 // To Enable Bitmanip support
) (
input logic clk, // Gated clock
input logic clk_ungated_i, // Ungated clock
Expand Down Expand Up @@ -978,7 +979,8 @@ module cv32e40p_id_stage
.PULP_SECURE (PULP_SECURE),
.USE_PMP (USE_PMP),
.APU_WOP_CPU (APU_WOP_CPU),
.DEBUG_TRIGGER_EN(DEBUG_TRIGGER_EN)
.DEBUG_TRIGGER_EN(DEBUG_TRIGGER_EN),
.ZBITMANIP (ZBITMANIP)
) decoder_i (
// controller related signals
.deassert_we_i(deassert_we),
Expand Down
6 changes: 4 additions & 2 deletions rtl/cv32e40p_top.sv
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ module cv32e40p_top #(
parameter FPU_ADDMUL_LAT = 0, // Floating-Point ADDition/MULtiplication computing lane pipeline registers number
parameter FPU_OTHERS_LAT = 0, // Floating-Point COMParison/CONVersion computing lanes pipeline registers number
parameter ZFINX = 0, // Float-in-General Purpose registers
parameter NUM_MHPMCOUNTERS = 1
parameter NUM_MHPMCOUNTERS = 1,
parameter ZBITMANIP = 1 // To Enable Bitmanip support
) (
// Clock and Reset
input logic clk_i,
Expand Down Expand Up @@ -90,7 +91,8 @@ module cv32e40p_top #(
.FPU_ADDMUL_LAT (FPU_ADDMUL_LAT),
.FPU_OTHERS_LAT (FPU_OTHERS_LAT),
.ZFINX (ZFINX),
.NUM_MHPMCOUNTERS(NUM_MHPMCOUNTERS)
.NUM_MHPMCOUNTERS(NUM_MHPMCOUNTERS),
.ZBITMANIP (ZBITMANIP)
) core_i (
.clk_i (clk_i),
.rst_ni(rst_ni),
Expand Down
Loading

0 comments on commit 6ccc9d9

Please sign in to comment.