基于booth2位编码+32压缩器+华莱士树实现的乘法器(Verilog实现)

原理部分

原理部分只做简要说明,想要深入理解的话可自行搜索资料或查看文章末尾的参考资料

booth2位编码

8种booth_code对应的部分积操作
在这里插入图片描述

32压缩器

3-2压缩器实际上就是一个保存进位加法器(csa)。他的功能就是将3个操作数转换为2个操作数(不会溢出,最多1+1+1=11,2位足够了)。
压缩器内的逻辑运算如下
在这里插入图片描述
在这里插入图片描述

module adder (
   input   Add_A,
   input   Add_B,
   input   Add_Cin,
   output  Cout,
   output  Sum
);

assign Cout     = (Add_A & Add_B) | (Add_Cin & (Add_A | Add_B));
assign Sum      = Add_A ^ Add_B ^ Add_Cin;
// assign {Cout, Sum} = Add_A + Add_B + Add_Cin;
endmodule

wallace树

华莱士树实际上就是将多个32压缩器拼接起来。
本文使用的是17个3_2压缩器拼接生成一个1位华莱士树,华莱士树的接线图如下:
在这里插入图片描述

代码实现

代码部分有详细注释,请结合注释理解代码。

Wallace

module wallace (
    input   [31:0]   mul1,
    input   [31:0]   mul2,
    output  [63:0]   result
);

// every 3 bits generate a boothcode, then shift 2 bits
wire        [ 2:0]   booth_code  [15:0];
// mul1 2*mul1 mul1_complement 2*mul1_complement
// mul1 is the multiplicand
// mul2 is the multiplier
wire        [31:0]   mulX;
wire        [32:0]   mulX_2;
wire        [31:0]   mulX_com;
wire        [32:0]   mulX_com_2;
wire        [31:0]   mulY;
// Nsum is the partical product
// Csum is the carry of 3_2_compressor
// Ssum is the sum of 3_2_compressor
wire        [63:0]   Nsum       [15:0];  
wire        [64:0]   Csum       [16:0];  
wire        [63:0]   Ssum       [16:0]; 


assign mulX         = mul1;
assign mulX_2       = mulX<<1;
// assign mulX_2       = {mulX[30:0], 2'b00};
assign mulX_com     = ~mulX;
assign mulX_com_2   = ~ (mulX<<1);
// assign mulX_com_2   = ~ ({mulX[30:0], 2'b00});
assign mulY = mul2;


// get 3bits booth_code, every 2 bits get a boothcode, so the total number is 32/2=16
genvar i;
generate
    assign booth_code[0] = {mulY[1], mulY[0], 1'b0};
    for (i=1; i<=15; i=i+1) begin
        assign booth_code[i] = {mulY[2*i+1], mulY[2*i], mulY[2*i-1]};
    end
endgenerate

// get partial product and carry
// carry = 1, means the mulX(multiplicand) is reversed
// when booth_code == 100/101/110, fill in 1 in the end of Csum
// (because previous step just reverse, didn't add 1, so use Csum to add 1)
generate
    for (i = 0; i < 16; i = i + 1) begin
        assign Nsum[i] = {64{(booth_code[i] == 3'b000)}} & 64'b0 |
                         {64{(booth_code[i] == 3'b001)}} & {{(32-2*i){mulX[31]}},         mulX,       {2*i{1'b0}}} |
                         {64{(booth_code[i] == 3'b010)}} & {{(32-2*i){mulX[31]}},         mulX,       {2*i{1'b0}}} |
                         {64{(booth_code[i] == 3'b011)}} & {{(32-2*i-1){mulX_2[32]}},     mulX_2,     {2*i{1'b0}}} |

                         {64{(booth_code[i] == 3'b100)}} & {{(32-2*i-1){mulX_com_2[32]}}, mulX_com_2, {2*i{1'b1}}} |
                         {64{(booth_code[i] == 3'b101)}} & {{(32-2*i){mulX_com[31]}},     mulX_com,   {2*i{1'b1}}} |
                         {64{(booth_code[i] == 3'b110)}} & {{(32-2*i){mulX_com[31]}},     mulX_com,   {2*i{1'b1}}} |
                         {64{(booth_code[i] == 3'b111)}} & 64'b0;
        
        // use Csum to the implement of +1, when taking complement
        assign Csum[i][0] = (booth_code[i] == 3'b100) |
                            (booth_code[i] == 3'b101) |
                            (booth_code[i] == 3'b110) ;
    end
endgenerate

// initial it as zero
assign Csum[16][0] = 1'b0;

// if use 42compressor, when 1+1+1+1, it will be 100,then the csum don't have enough bits to accommodata the carry!!!
// so, if we use 42compressor, we need to new a new carry2???? to accommodata anothor carry(Cout_tmp and Cout)
// now we use 3_2_compressor
generate 
    for(i=0;i<64;i=i+1)begin
        adder  adder0(.Add_A(Nsum[  0][i]),.Add_B(Nsum[  1][i]),.Add_Cin(Nsum[  2][i]),.Sum(Ssum[ 0][i]),.Cout(Csum[ 0][i+1]));
        adder  adder1(.Add_A(Nsum[  3][i]),.Add_B(Nsum[  4][i]),.Add_Cin(Nsum[  5][i]),.Sum(Ssum[ 1][i]),.Cout(Csum[ 1][i+1]));
        adder  adder2(.Add_A(Nsum[  6][i]),.Add_B(Nsum[  7][i]),.Add_Cin(Nsum[  8][i]),.Sum(Ssum[ 2][i]),.Cout(Csum[ 2][i+1]));
        adder  adder3(.Add_A(Nsum[  9][i]),.Add_B(Nsum[ 10][i]),.Add_Cin(Nsum[ 11][i]),.Sum(Ssum[ 3][i]),.Cout(Csum[ 3][i+1]));
        adder  adder4(.Add_A(Nsum[ 12][i]),.Add_B(Nsum[ 13][i]),.Add_Cin(Nsum[ 14][i]),.Sum(Ssum[ 4][i]),.Cout(Csum[ 4][i+1]));
        adder  adder5(.Add_A(Nsum[ 15][i]),.Add_B(1'b0        ),.Add_Cin(1'b0        ),.Sum(Ssum[ 5][i]),.Cout(Csum[ 5][i+1]));
        adder  adder6(.Add_A(Ssum[  0][i]),.Add_B(Ssum[  1][i]),.Add_Cin(Ssum[  2][i]),.Sum(Ssum[ 6][i]),.Cout(Csum[ 6][i+1]));
        adder  adder7(.Add_A(Ssum[  3][i]),.Add_B(Ssum[  4][i]),.Add_Cin(Ssum[  5][i]),.Sum(Ssum[ 7][i]),.Cout(Csum[ 7][i+1]));
        adder  adder8(.Add_A(Csum[  0][i]),.Add_B(Csum[  1][i]),.Add_Cin(Csum[  2][i]),.Sum(Ssum[ 8][i]),.Cout(Csum[ 8][i+1]));
        adder  adder9(.Add_A(Csum[  3][i]),.Add_B(Csum[  4][i]),.Add_Cin(Csum[  5][i]),.Sum(Ssum[ 9][i]),.Cout(Csum[ 9][i+1]));
        adder adder10(.Add_A(Ssum[  6][i]),.Add_B(Ssum[  7][i]),.Add_Cin(Ssum[  8][i]),.Sum(Ssum[10][i]),.Cout(Csum[10][i+1]));
        adder adder11(.Add_A(Ssum[  9][i]),.Add_B(Csum[  6][i]),.Add_Cin(Csum[  7][i]),.Sum(Ssum[11][i]),.Cout(Csum[11][i+1]));
        adder adder12(.Add_A(Csum[  8][i]),.Add_B(Csum[  9][i]),.Add_Cin(1'b0        ),.Sum(Ssum[12][i]),.Cout(Csum[12][i+1]));
        adder adder13(.Add_A(Ssum[ 10][i]),.Add_B(Ssum[ 11][i]),.Add_Cin(Ssum[ 12][i]),.Sum(Ssum[13][i]),.Cout(Csum[13][i+1]));
        adder adder14(.Add_A(Csum[ 10][i]),.Add_B(Csum[ 11][i]),.Add_Cin(Csum[ 12][i]),.Sum(Ssum[14][i]),.Cout(Csum[14][i+1]));
        adder adder15(.Add_A(Ssum[ 13][i]),.Add_B(Ssum[ 14][i]),.Add_Cin(Csum[ 13][i]),.Sum(Ssum[15][i]),.Cout(Csum[15][i+1]));
        adder adder16(.Add_A(Ssum[ 15][i]),.Add_B(Csum[ 14][i]),.Add_Cin(Csum[ 15][i]),.Sum(Ssum[16][i]),.Cout(Csum[16][i+1]));
    end
endgenerate
// get result
assign result = (Ssum[16] + Csum[16][63:0]);

// judge the result is right or wrong
wire [63:0] cmp;
wire [63:0] tmp;
assign tmp = {{32{mulX[31]}},mulX} * {{32{mulY[31]}},mulY};
assign cmp = (result == tmp);
endmodule

32compressor

module adder (
   input   Add_A,
   input   Add_B,
   input   Add_Cin,
   output  Cout,
   output  Sum
);

assign Cout     = (Add_A & Add_B) | (Add_Cin & (Add_A | Add_B));
assign Sum      = Add_A ^ Add_B ^ Add_Cin;
// assign {Cout, Sum} = Add_A + Add_B + Add_Cin;
endmodule

Testbench

`timescale 1ns / 1ps

module mul_tb();
reg  [31: 0] mul1;
reg  [31: 0] mul2;
wire [63: 0] ans;
wire cmp2;

reg  [31: 0] mul11;
reg  [31: 0] mul22;

wallace mul_wallace(
    .mul1(mul1),
    .mul2(mul2),
    .result(ans)
);

initial begin
    mul1=4'h2;
    mul2=4'h3;
    #1000 $finish;
end

always begin
    #5;
    mul1=$random();
    mul2=$random();
end  

在这里插入图片描述

参考资料

[1]压缩器原理: 讲解基于verilog的4-2压缩器和3-2压缩器的实现方式,实现华莱士树(Wallace Tree
[2]Booth2位编码链接: 定点乘法器----基4booth算法
[3]Wallace树接线: verilog语言设计的32位输入使用Booth两位一乘和华莱士树的定点补码乘法器(附参考仿真文件
[4]CPU设计实战(汪文祥)第五章

Logo

有“AI”的1024 = 2048,欢迎大家加入2048 AI社区

更多推荐