Added generic version of optimized pipelined CPU.
This commit is contained in:
@@ -127,6 +127,12 @@ static inline void tty_graphics_scan(int width, int height, tty_graphics_pixelfu
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* brief Converts a floating point value to a byte.
|
||||
* \param[in] the floating point value in [0,1]
|
||||
* \return the byte, in [0,255]
|
||||
* \details the input value is clamped to [0,1]
|
||||
*/
|
||||
static inline uint8_t tty_graphics_ftoi(float f) {
|
||||
f = (f < 0.0f) ? 0.0f : f;
|
||||
f = (f > 1.0f) ? 1.0f : f;
|
||||
|
||||
@@ -207,7 +207,7 @@ module Processor (
|
||||
reg [31:0] RAS_1;
|
||||
reg [31:0] RAS_2;
|
||||
reg [31:0] RAS_3;
|
||||
|
||||
|
||||
wire [31:0] D_JumpOrBranchAddr =
|
||||
/* D_isJALR */ FD_instr[3:2] == 2'b01 ? RAS_0 :
|
||||
(FD_PC + (D_isJAL ? D_Jimm : D_Bimm));
|
||||
|
||||
786
FemtoRV/TUTORIALS/FROM_BLINKER_TO_RISCV/pipelineX_generic.v
Normal file
786
FemtoRV/TUTORIALS/FROM_BLINKER_TO_RISCV/pipelineX_generic.v
Normal file
@@ -0,0 +1,786 @@
|
||||
/**
|
||||
* pipelineX_generic.v
|
||||
* Configurable PC prediction
|
||||
*/
|
||||
|
||||
`define CONFIG_PC_PREDICT // enables D -> F path (needed by options above)
|
||||
`define CONFIG_RAS // return address stack
|
||||
`define CONFIG_GSHARE // gshare branch prediction (or BTFNT if not set)
|
||||
//`define CONFIG_REGISTERED_D_PREDICT_BRANCH // registers branch prediction signal
|
||||
// (may gain a bit of fmax, but not
|
||||
// always...)
|
||||
|
||||
`default_nettype none
|
||||
`include "clockworks.v"
|
||||
`include "emitter_uart.v"
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
module Processor (
|
||||
input clk,
|
||||
input resetn,
|
||||
output [31:0] IO_mem_addr, // IO memory address
|
||||
input [31:0] IO_mem_rdata, // data read from IO memory
|
||||
output [31:0] IO_mem_wdata, // data written to IO memory
|
||||
output IO_mem_wr // IO write flag
|
||||
);
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/*
|
||||
Reminder for the 10 RISC-V codeops
|
||||
----------------------------------
|
||||
5'b01100 | ALUreg | rd <- rs1 OP rs2
|
||||
5'b00100 | ALUimm | rd <- rs1 OP Iimm
|
||||
5'b11000 | Branch | if(rs1 OP rs2) PC<-PC+Bimm
|
||||
5'b11001 | JALR | rd <- PC+4; PC<-rs1+Iimm
|
||||
5'b11011 | JAL | rd <- PC+4; PC<-PC+Jimm
|
||||
5'b00101 | AUIPC | rd <- PC + Uimm
|
||||
5'b01101 | LUI | rd <- Uimm
|
||||
5'b00000 | Load | rd <- mem[rs1+Iimm]
|
||||
5'b01000 | Store | mem[rs1+Simm] <- rs2
|
||||
5'b11100 | SYSTEM | special
|
||||
*/
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
// CSRs (cycle and retired instructions counters)
|
||||
reg [63:0] cycle;
|
||||
reg [63:0] instret;
|
||||
|
||||
always @(posedge clk) begin
|
||||
cycle <= !resetn ? 0 : cycle + 1;
|
||||
end
|
||||
|
||||
// Pipeline control
|
||||
wire D_flush;
|
||||
wire E_flush;
|
||||
|
||||
wire F_stall;
|
||||
wire D_stall;
|
||||
|
||||
wire halt; // Halt execution (on ebreak)
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/*** F: Instruction fetch ***/
|
||||
|
||||
reg [31:0] PC;
|
||||
|
||||
reg [31:0] PROGROM[0:16383]; // 16384 4-bytes words
|
||||
// 64 Kb of program ROM
|
||||
initial begin
|
||||
$readmemh("PROGROM.hex",PROGROM);
|
||||
end
|
||||
|
||||
`ifdef CONFIG_PC_PREDICT
|
||||
wire [31:0] F_PC =
|
||||
D_predictPC ? D_PCprediction :
|
||||
EM_correctPC ? EM_PCcorrection :
|
||||
PC;
|
||||
`else
|
||||
wire [31:0] F_PC = EM_correctPC ? EM_PCcorrection :
|
||||
PC;
|
||||
`endif
|
||||
|
||||
wire [31:0] F_PCplus4 = F_PC + 4;
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
||||
if(!F_stall) begin
|
||||
FD_instr <= PROGROM[F_PC[15:2]];
|
||||
FD_PC <= F_PC;
|
||||
PC <= F_PCplus4;
|
||||
end
|
||||
|
||||
FD_nop <= D_flush | !resetn;
|
||||
|
||||
if(!resetn) begin
|
||||
PC <= 0;
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
/******************************************************************************/
|
||||
/******************************************************************************/
|
||||
reg [31:0] FD_PC;
|
||||
reg [31:0] FD_instr;
|
||||
reg FD_nop; // Needed because I cannot directly write NOP to FD_instr
|
||||
// because FD_instr is plugged to PROGROM's output port.
|
||||
/******************************************************************************/
|
||||
/******************************************************************************/
|
||||
|
||||
/*** D: Instruction decode ***/
|
||||
|
||||
/** These three signals come from the Writeback stage **/
|
||||
wire wbEnable;
|
||||
wire [31:0] wbData;
|
||||
wire [4:0] wbRdId;
|
||||
|
||||
wire [4:0] D_rdId = FD_instr[11:7];
|
||||
wire [4:0] D_rs1Id = FD_instr[19:15];
|
||||
wire [4:0] D_rs2Id = FD_instr[24:20];
|
||||
|
||||
// commented-out codeop recognizers are optimized below
|
||||
// wire D_isJAL = (FD_instr[6:2]==5'b11011);
|
||||
// wire D_isJALR = (FD_instr[6:2]==5'b11001);
|
||||
// wire D_isAUIPC = (FD_instr[6:2]==5'b00101);
|
||||
// wire D_isLUI = (FD_instr[6:2]==5'b01101);
|
||||
// wire D_isBranch = (FD_instr[6:2]==5'b11000);
|
||||
wire D_isALUreg = (FD_instr[6:2]==5'b01100);
|
||||
wire D_isALUimm = (FD_instr[6:2]==5'b00100);
|
||||
wire D_isLoad = (FD_instr[6:2]==5'b00000);
|
||||
wire D_isStore = (FD_instr[6:2]==5'b01000);
|
||||
wire D_isSYSTEM = (FD_instr[6:2]==5'b11100);
|
||||
|
||||
// optimized codop recognizers
|
||||
wire D_isJAL = FD_instr[3];
|
||||
wire D_isJALR = {FD_instr[6], FD_instr[3], FD_instr[2]} == 3'b101;
|
||||
wire D_isLUI = FD_instr[6:4] == 3'b111;
|
||||
wire D_isAUIPC = FD_instr[6:4] == 3'b101;
|
||||
wire D_isBranch = {FD_instr[6], FD_instr[4], FD_instr[2]} == 3'b100;
|
||||
|
||||
|
||||
wire D_isJALorJALR = (FD_instr[2] & FD_instr[6]);
|
||||
wire D_isLUIorAUIPC = (FD_instr[4] & FD_instr[6]);
|
||||
|
||||
|
||||
wire D_readsRs1 = !(D_isJAL || D_isLUIorAUIPC);
|
||||
|
||||
wire D_readsRs2 = (FD_instr[5] && (FD_instr[3:2] == 2'b00));
|
||||
// <=> D_isALUreg || D_isBranch || D_isStore || D_isSYSTEM
|
||||
|
||||
wire [31:0] D_Uimm = { FD_instr[31],FD_instr[30:12], {12{1'b0}}};
|
||||
|
||||
wire [31:0] D_Bimm = {{20{FD_instr[31]}},
|
||||
FD_instr[7],FD_instr[30:25],FD_instr[11:8],1'b0};
|
||||
|
||||
wire [31:0] D_Jimm = {{12{FD_instr[31]}},
|
||||
FD_instr[19:12],FD_instr[20],FD_instr[30:21],1'b0};
|
||||
|
||||
`ifdef CONFIG_PC_PREDICT
|
||||
`ifdef CONFIG_GSHARE
|
||||
localparam BP_HISTO_BITS=9;
|
||||
localparam BP_ADDR_BITS=12;
|
||||
|
||||
localparam BHT_INDEX_BITS=BP_ADDR_BITS;
|
||||
localparam BHT_SIZE=1<<BHT_INDEX_BITS;
|
||||
|
||||
// global history
|
||||
reg [BP_HISTO_BITS-1:0] branch_history;
|
||||
|
||||
// branch history table (2 bits per entry)
|
||||
reg [1:0] BHT[BHT_SIZE-1:0];
|
||||
|
||||
// gets the index in the branch prediction table
|
||||
// from the PC
|
||||
function [BHT_INDEX_BITS-1:0] BHT_index;
|
||||
input [31:0] PC;
|
||||
/* verilator lint_off WIDTH */
|
||||
BHT_index = PC[BP_ADDR_BITS+1:2] ^
|
||||
(branch_history << (BP_ADDR_BITS - BP_HISTO_BITS));
|
||||
/* verilator lint_on WIDTH */
|
||||
endfunction
|
||||
|
||||
`ifdef CONFIG_REGISTERED_D_PREDICT_BRANCH
|
||||
// registered version, that "sees" one cycle in advanec by
|
||||
// using PC from the "F" stage (looses 0.038 CPIs but gains
|
||||
// maxfreq)
|
||||
reg D_predictBranch;
|
||||
always @(posedge clk) begin
|
||||
D_predictBranch <= BHT[BHT_index(PC)][1];
|
||||
end
|
||||
`else
|
||||
wire D_predictBranch = BHT[BHT_index(FD_PC)][1];
|
||||
`endif
|
||||
|
||||
`else
|
||||
// No GSHARE branch predictor,
|
||||
// use BTFNT (Backwards taken forwards not taken)
|
||||
// I[31]=Bimm sgn (pred bkwd branch taken)
|
||||
wire D_predictBranch = FD_instr[31];
|
||||
`endif
|
||||
|
||||
`ifdef CONFIG_RAS
|
||||
// code below is equivalent (in this context) to:
|
||||
// wire D_predictPC = !FD_nop && (
|
||||
// D_isJAL || D_isJALR || (D_isBranch && D_predictBranch)
|
||||
// );
|
||||
// JAL: 11011
|
||||
// JALR: 11001
|
||||
// Branch: 11000
|
||||
// The three start by 110, and it is the only ones
|
||||
wire D_predictPC = !FD_nop &&
|
||||
(FD_instr[6:4] == 3'b110) && (FD_instr[2] | D_predictBranch);
|
||||
|
||||
// Return address stack
|
||||
|
||||
reg [31:0] RAS_0;
|
||||
reg [31:0] RAS_1;
|
||||
reg [31:0] RAS_2;
|
||||
reg [31:0] RAS_3;
|
||||
|
||||
wire [31:0] D_PCprediction =
|
||||
/* D_isJALR */ FD_instr[3:2] == 2'b01 ? RAS_0 :
|
||||
(FD_PC + (D_isJAL ? D_Jimm : D_Bimm));
|
||||
|
||||
`else // !`ifdef CONFIG_RAS
|
||||
wire D_predictPC = !FD_nop && (D_isJAL || (D_isBranch && D_predictBranch));
|
||||
wire [31:0] D_PCprediction = (FD_PC + (D_isJAL ? D_Jimm : D_Bimm));
|
||||
`endif
|
||||
`endif // `CONFIG_PC_PREDICT
|
||||
|
||||
reg [31:0] RegisterBank [0:31];
|
||||
always @(posedge clk) begin
|
||||
|
||||
DE_rdId <= D_rdId;
|
||||
DE_rs1Id <= D_rs1Id;
|
||||
DE_rs2Id <= D_rs2Id;
|
||||
|
||||
DE_funct3 <= FD_instr[14:12];
|
||||
DE_funct3_is <= 8'b00000001 << FD_instr[14:12];
|
||||
DE_funct7 <= FD_instr[30];
|
||||
DE_csrId <= {FD_instr[27],FD_instr[21]};
|
||||
|
||||
DE_nop <= 1'b0;
|
||||
|
||||
if(!D_stall) begin
|
||||
DE_isALUreg <= D_isALUreg;
|
||||
DE_isALUimm <= D_isALUimm;
|
||||
DE_isBranch <= D_isBranch;
|
||||
DE_isJALR <= D_isJALR;
|
||||
DE_isJAL <= D_isJAL;
|
||||
DE_isAUIPC <= D_isAUIPC;
|
||||
DE_isLUI <= D_isLUI;
|
||||
DE_isLoad <= D_isLoad;
|
||||
DE_isStore <= D_isStore;
|
||||
DE_isCSRRS <= D_isSYSTEM && FD_instr[13];
|
||||
DE_isEBREAK <= D_isSYSTEM && !FD_instr[13];
|
||||
|
||||
// wbEnable = !isBranch & !isStore
|
||||
// Note: EM_wbEnable = DE_wbEnable && (rdId != 0)
|
||||
DE_wbEnable <= (FD_instr[5:2] != 4'b1000);
|
||||
end
|
||||
|
||||
if(E_flush | FD_nop) begin
|
||||
DE_nop <= 1'b1;
|
||||
DE_isALUreg <= 1'b0;
|
||||
DE_isALUimm <= 1'b0;
|
||||
DE_isBranch <= 1'b0;
|
||||
DE_isJALR <= 1'b0;
|
||||
DE_isJAL <= 1'b0;
|
||||
DE_isAUIPC <= 1'b0;
|
||||
DE_isLUI <= 1'b0;
|
||||
DE_isLoad <= 1'b0;
|
||||
DE_isStore <= 1'b0;
|
||||
DE_isCSRRS <= 1'b0;
|
||||
DE_isEBREAK <= 1'b0;
|
||||
DE_wbEnable <= 1'b0;
|
||||
end
|
||||
|
||||
if(wbEnable) begin
|
||||
RegisterBank[wbRdId] <= wbData;
|
||||
end
|
||||
|
||||
DE_IorSimm <= {
|
||||
{21{FD_instr[31]}},
|
||||
D_isStore ? {FD_instr[30:25],FD_instr[11:7]} :
|
||||
FD_instr[30:20]
|
||||
};
|
||||
|
||||
`ifdef CONFIG_PC_PREDICT
|
||||
// Used in case of misprediction:
|
||||
// PC+Bimm if predict not taken, PC+4 if predict taken
|
||||
DE_PCplus4orBimm <= FD_PC + (D_predictBranch ? 4 : D_Bimm);
|
||||
DE_predictBranch <= D_predictBranch;
|
||||
`ifdef CONFIG_GSHARE
|
||||
DE_BHTindex <= BHT_index(FD_PC);
|
||||
`endif
|
||||
`ifdef CONFIG_RAS
|
||||
DE_predictRA <= RAS_0;
|
||||
if(!D_stall && !FD_nop && !D_flush) begin
|
||||
if(D_isJAL && D_rdId==1) begin
|
||||
RAS_3 <= RAS_2;
|
||||
RAS_2 <= RAS_1;
|
||||
RAS_1 <= RAS_0;
|
||||
RAS_0 <= FD_PC + 4;
|
||||
end
|
||||
if(D_isJALR && D_rdId==0 && (D_rs1Id == 1 || D_rs1Id==5)) begin
|
||||
RAS_0 <= RAS_1;
|
||||
RAS_1 <= RAS_2;
|
||||
RAS_2 <= RAS_3;
|
||||
end
|
||||
end
|
||||
`endif
|
||||
`else
|
||||
DE_PCplusBorJimm <= FD_PC + (D_isJAL ? D_Jimm : D_Bimm);
|
||||
`endif
|
||||
|
||||
// Code below is equivalent to:
|
||||
// DE_PCplus4orUimm =
|
||||
// ((isLUI ? 0 : FD_PC)) + ((isJAL | isJALR) ? 4 : Uimm)
|
||||
// (knowing that isLUI | isAUIPC | isJAL | isJALR)
|
||||
DE_PCplus4orUimm <= ({32{FD_instr[6:5]!=2'b01}} & FD_PC) +
|
||||
(D_isJALorJALR ? 4 : D_Uimm);
|
||||
|
||||
DE_isJALorJALRorLUIorAUIPC <= FD_instr[2];
|
||||
end
|
||||
|
||||
/******************************************************************************/
|
||||
/******************************************************************************/
|
||||
reg DE_nop; // Needed by instret in W stage
|
||||
reg [4:0] DE_rdId;
|
||||
reg [4:0] DE_rs1Id;
|
||||
reg [4:0] DE_rs2Id;
|
||||
|
||||
reg [1:0] DE_csrId;
|
||||
reg [2:0] DE_funct3;
|
||||
(* onehot *) reg [7:0] DE_funct3_is;
|
||||
reg [5:5] DE_funct7;
|
||||
|
||||
reg [31:0] DE_IorSimm;
|
||||
|
||||
reg DE_isALUreg;
|
||||
reg DE_isALUimm;
|
||||
reg DE_isBranch;
|
||||
reg DE_isJALR;
|
||||
reg DE_isJAL;
|
||||
reg DE_isAUIPC;
|
||||
reg DE_isLUI;
|
||||
reg DE_isLoad;
|
||||
reg DE_isStore;
|
||||
reg DE_isCSRRS;
|
||||
reg DE_isEBREAK;
|
||||
|
||||
reg DE_wbEnable; // !isBranch && !isStore && rdId != 0
|
||||
|
||||
reg DE_isJALorJALRorLUIorAUIPC;
|
||||
|
||||
`ifdef CONFIG_PC_PREDICT
|
||||
reg [31:0] DE_PCplus4orBimm;
|
||||
reg DE_predictBranch;
|
||||
`ifdef CONFIG_RAS
|
||||
reg [31:0] DE_predictRA;
|
||||
`endif
|
||||
`ifdef CONFIG_GSHARE
|
||||
reg [BHT_INDEX_BITS-1:0] DE_BHTindex;
|
||||
`endif
|
||||
`else
|
||||
reg [31:0] DE_PCplusBorJimm;
|
||||
`endif
|
||||
|
||||
reg [31:0] DE_PCplus4orUimm;
|
||||
|
||||
/******************************************************************************/
|
||||
/******************************************************************************/
|
||||
/*** E: Execute ***/
|
||||
|
||||
/*********** Registrer forwarding ************************************/
|
||||
|
||||
wire E_M_fwd_rs1 = EM_wbEnable && (EM_rdId == DE_rs1Id);
|
||||
wire E_W_fwd_rs1 = MW_wbEnable && (MW_rdId == DE_rs1Id);
|
||||
|
||||
wire E_M_fwd_rs2 = EM_wbEnable && (EM_rdId == DE_rs2Id);
|
||||
wire E_W_fwd_rs2 = MW_wbEnable && (MW_rdId == DE_rs2Id);
|
||||
|
||||
wire [31:0] E_rs1 = E_M_fwd_rs1 ? EM_Eresult :
|
||||
E_W_fwd_rs1 ? wbData :
|
||||
RegisterBank[DE_rs1Id] ;
|
||||
|
||||
wire [31:0] E_rs2 = E_M_fwd_rs2 ? EM_Eresult :
|
||||
E_W_fwd_rs2 ? wbData :
|
||||
RegisterBank[DE_rs2Id] ;
|
||||
|
||||
/*********** the ALU *************************************************/
|
||||
|
||||
wire [31:0] E_aluIn1 = E_rs1;
|
||||
wire [31:0] E_aluIn2 = (DE_isALUreg | DE_isBranch) ? E_rs2 : DE_IorSimm;
|
||||
wire [4:0] E_shamt = DE_isALUreg ? E_rs2[4:0] : DE_rs2Id;
|
||||
|
||||
wire E_minus = DE_funct7[5] & DE_isALUreg;
|
||||
wire E_arith_shift = DE_funct7[5];
|
||||
|
||||
// The adder is used by both arithmetic instructions and JALR.
|
||||
wire [31:0] E_aluPlus = E_aluIn1 + E_aluIn2;
|
||||
|
||||
// Use a single 33 bits subtract to do subtraction and all comparisons
|
||||
// (trick borrowed from swapforth/J1)
|
||||
wire [32:0] E_aluMinus = {1'b1, ~E_aluIn2} + {1'b0,E_aluIn1} + 33'b1;
|
||||
wire E_LT =
|
||||
(E_aluIn1[31] ^ E_aluIn2[31]) ? E_aluIn1[31] : E_aluMinus[32];
|
||||
wire E_LTU = E_aluMinus[32];
|
||||
wire E_EQ = (E_aluMinus[31:0] == 0);
|
||||
|
||||
// Flip a 32 bit word. Used by the shifter (a single shifter for
|
||||
// left and right shifts, saves silicium !)
|
||||
function [31:0] flip32;
|
||||
input [31:0] x;
|
||||
flip32 = {x[ 0], x[ 1], x[ 2], x[ 3], x[ 4], x[ 5], x[ 6], x[ 7],
|
||||
x[ 8], x[ 9], x[10], x[11], x[12], x[13], x[14], x[15],
|
||||
x[16], x[17], x[18], x[19], x[20], x[21], x[22], x[23],
|
||||
x[24], x[25], x[26], x[27], x[28], x[29], x[30], x[31]};
|
||||
endfunction
|
||||
|
||||
wire [31:0] E_shifter_in = (DE_funct3==3'b001) ? flip32(E_aluIn1) : E_aluIn1;
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
wire [31:0] E_shifter =
|
||||
$signed({E_arith_shift & E_aluIn1[31], E_shifter_in}) >>> E_aluIn2[4:0];
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
wire [31:0] E_leftshift = flip32(E_shifter);
|
||||
|
||||
wire [31:0] E_aluOut =
|
||||
(DE_funct3_is[0] ? (E_minus ? E_aluMinus[31:0] : E_aluPlus) : 32'b0) |
|
||||
(DE_funct3_is[1] ? E_leftshift : 32'b0) |
|
||||
(DE_funct3_is[2] ? {31'b0, E_LT } : 32'b0) |
|
||||
(DE_funct3_is[3] ? {31'b0, E_LTU} : 32'b0) |
|
||||
(DE_funct3_is[4] ? E_aluIn1 ^ E_aluIn2 : 32'b0) |
|
||||
(DE_funct3_is[5] ? E_shifter : 32'b0) |
|
||||
(DE_funct3_is[6] ? E_aluIn1 | E_aluIn2 : 32'b0) |
|
||||
(DE_funct3_is[7] ? E_aluIn1 & E_aluIn2 : 32'b0) ;
|
||||
|
||||
|
||||
/*********** Branch, JAL, JALR ***********************************/
|
||||
|
||||
wire E_takeBranch =
|
||||
(DE_funct3_is[0] & E_EQ ) | // BEQ
|
||||
(DE_funct3_is[1] & !E_EQ ) | // BNE
|
||||
(DE_funct3_is[4] & E_LT ) | // BLT
|
||||
(DE_funct3_is[5] & !E_LT ) | // BGE
|
||||
(DE_funct3_is[6] & E_LTU) | // BLTU
|
||||
(DE_funct3_is[7] & !E_LTU) ; // BGEU
|
||||
|
||||
wire [31:0] E_JALRaddr = {E_aluPlus[31:1],1'b0};
|
||||
|
||||
`ifdef CONFIG_PC_PREDICT
|
||||
`ifdef CONFIG_RAS
|
||||
wire E_correctPC = (
|
||||
(DE_isJALR && (DE_predictRA != E_JALRaddr) ) ||
|
||||
(DE_isBranch && (E_takeBranch^DE_predictBranch))
|
||||
);
|
||||
`else
|
||||
wire E_correctPC = DE_isJALR ||
|
||||
(DE_isBranch && (E_takeBranch^DE_predictBranch));
|
||||
`endif
|
||||
wire [31:0] E_PCcorrection = DE_isBranch ? DE_PCplus4orBimm : E_JALRaddr;
|
||||
`else
|
||||
wire E_correctPC = (
|
||||
DE_isJAL || DE_isJALR ||
|
||||
(DE_isBranch && E_takeBranch)
|
||||
);
|
||||
wire [31:0] E_PCcorrection =
|
||||
DE_isJALR ? E_JALRaddr : DE_PCplusBorJimm;
|
||||
`endif
|
||||
|
||||
wire [31:0] E_result =
|
||||
DE_isJALorJALRorLUIorAUIPC ? DE_PCplus4orUimm : E_aluOut;
|
||||
|
||||
wire [31:0] E_addr = E_rs1 + DE_IorSimm;
|
||||
|
||||
/**************************************************************/
|
||||
|
||||
`ifdef CONFIG_PC_PREDICT
|
||||
`ifdef CONFIG_GSHARE
|
||||
function [1:0] incdec_sat;
|
||||
input [1:0] prev;
|
||||
input dir;
|
||||
incdec_sat =
|
||||
{dir, prev} == 3'b000 ? 2'b00 :
|
||||
{dir, prev} == 3'b001 ? 2'b00 :
|
||||
{dir, prev} == 3'b010 ? 2'b01 :
|
||||
{dir, prev} == 3'b011 ? 2'b10 :
|
||||
{dir, prev} == 3'b100 ? 2'b01 :
|
||||
{dir, prev} == 3'b101 ? 2'b10 :
|
||||
{dir, prev} == 3'b110 ? 2'b11 :
|
||||
2'b11 ;
|
||||
endfunction;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
always @(posedge clk) begin
|
||||
EM_nop <= DE_nop;
|
||||
EM_rdId <= DE_rdId;
|
||||
EM_rs1Id <= DE_rs1Id;
|
||||
EM_rs2Id <= DE_rs2Id;
|
||||
EM_funct3 <= DE_funct3;
|
||||
EM_csrId_is <= 4'b0001 << DE_csrId;
|
||||
EM_rs2 <= E_rs2;
|
||||
EM_Eresult <= E_result;
|
||||
EM_addr <= E_addr;
|
||||
EM_Mdata <= DATARAM[E_addr[15:2]];
|
||||
EM_isLoad <= DE_isLoad;
|
||||
EM_isStore <= DE_isStore;
|
||||
EM_isCSRRS <= DE_isCSRRS;
|
||||
EM_wbEnable <= DE_wbEnable && (DE_rdId != 0);
|
||||
EM_correctPC <= E_correctPC;
|
||||
EM_PCcorrection <= E_PCcorrection;
|
||||
|
||||
`ifdef CONFIG_PC_PREDICT
|
||||
`ifdef CONFIG_GSHARE
|
||||
if(DE_isBranch) begin
|
||||
branch_history <= {E_takeBranch,branch_history[BP_HISTO_BITS-1:1]};
|
||||
BHT[DE_BHTindex] <= incdec_sat(BHT[DE_BHTindex], E_takeBranch);
|
||||
end
|
||||
`endif
|
||||
`endif
|
||||
|
||||
end
|
||||
|
||||
assign halt = resetn & DE_isEBREAK;
|
||||
|
||||
/******************************************************************************/
|
||||
/******************************************************************************/
|
||||
reg EM_nop; // Needed by instret in W stage
|
||||
reg [4:0] EM_rdId;
|
||||
reg [4:0] EM_rs1Id;
|
||||
reg [4:0] EM_rs2Id;
|
||||
(* onehot *) reg [3:0] EM_csrId_is;
|
||||
reg [2:0] EM_funct3;
|
||||
reg [31:0] EM_rs2;
|
||||
reg [31:0] EM_Eresult;
|
||||
reg [31:0] EM_addr;
|
||||
reg [31:0] EM_Mdata;
|
||||
reg EM_isStore;
|
||||
reg EM_isLoad;
|
||||
reg EM_isCSRRS;
|
||||
reg EM_wbEnable;
|
||||
reg EM_correctPC;
|
||||
reg [31:0] EM_PCcorrection;
|
||||
|
||||
/******************************************************************************/
|
||||
/******************************************************************************/
|
||||
|
||||
/*** M: Memory ***/
|
||||
|
||||
wire M_isB = (EM_funct3[1:0] == 2'b00);
|
||||
wire M_isH = (EM_funct3[1:0] == 2'b01);
|
||||
|
||||
/*************** STORE **************************/
|
||||
|
||||
wire [31:0] M_STORE_data;
|
||||
assign M_STORE_data[ 7: 0] = EM_rs2[7:0];
|
||||
assign M_STORE_data[15: 8] = EM_addr[0] ? EM_rs2[7:0] : EM_rs2[15: 8] ;
|
||||
assign M_STORE_data[23:16] = EM_addr[1] ? EM_rs2[7:0] : EM_rs2[23:16] ;
|
||||
assign M_STORE_data[31:24] = EM_addr[0] ? EM_rs2[7:0] :
|
||||
EM_addr[1] ? EM_rs2[15:8] : EM_rs2[31:24] ;
|
||||
|
||||
// The memory write mask:
|
||||
// 1111 if writing a word
|
||||
// 0011 or 1100 if writing a halfword
|
||||
// (depending on EM_addr[1])
|
||||
// 0001, 0010, 0100 or 1000 if writing a byte
|
||||
// (depending on EM_addr[1:0])
|
||||
|
||||
wire [3:0] M_STORE_wmask = M_isB ?
|
||||
(EM_addr[1] ?
|
||||
(EM_addr[0] ? 4'b1000 : 4'b0100) :
|
||||
(EM_addr[0] ? 4'b0010 : 4'b0001)
|
||||
) :
|
||||
M_isH ? (EM_addr[1] ? 4'b1100 : 4'b0011) :
|
||||
4'b1111 ;
|
||||
|
||||
|
||||
wire M_isIO = EM_addr[22];
|
||||
wire M_isRAM = !M_isIO;
|
||||
|
||||
assign IO_mem_addr = EM_addr;
|
||||
assign IO_mem_wr = EM_isStore && M_isIO; // && M_STORE_wmask[0];
|
||||
assign IO_mem_wdata = EM_rs2;
|
||||
|
||||
wire [3:0] M_wmask = {4{EM_isStore & M_isRAM}} & M_STORE_wmask;
|
||||
|
||||
reg [31:0] DATARAM [0:16383]; // 16384 4-bytes words
|
||||
// 64 Kb of data RAM in total
|
||||
|
||||
wire [13:0] M_word_addr = EM_addr[15:2];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if(M_wmask[0]) DATARAM[M_word_addr][ 7:0 ] <= M_STORE_data[ 7:0 ];
|
||||
if(M_wmask[1]) DATARAM[M_word_addr][15:8 ] <= M_STORE_data[15:8 ];
|
||||
if(M_wmask[2]) DATARAM[M_word_addr][23:16] <= M_STORE_data[23:16];
|
||||
if(M_wmask[3]) DATARAM[M_word_addr][31:24] <= M_STORE_data[31:24];
|
||||
end
|
||||
|
||||
wire M_sext = !EM_funct3[2];
|
||||
|
||||
/*************** LOAD ****************************/
|
||||
|
||||
wire [15:0] M_LOAD_H=EM_addr[1] ? EM_Mdata[31:16]: EM_Mdata[15:0];
|
||||
wire [7:0] M_LOAD_B=EM_addr[0] ? M_LOAD_H[15:8] : M_LOAD_H[7:0];
|
||||
wire M_LOAD_sign=M_sext & (M_isB ? M_LOAD_B[7] : M_LOAD_H[15]);
|
||||
|
||||
wire [31:0] M_Mdata = M_isB ? {{24{M_LOAD_sign}},M_LOAD_B} :
|
||||
M_isH ? {{16{M_LOAD_sign}},M_LOAD_H} :
|
||||
EM_Mdata ;
|
||||
|
||||
wire [31:0] M_CSR_data =
|
||||
(EM_csrId_is[0] ? cycle[31:0] : 32'b0) |
|
||||
(EM_csrId_is[2] ? cycle[63:32] : 32'b0) |
|
||||
(EM_csrId_is[1] ? instret[31:0] : 32'b0) |
|
||||
(EM_csrId_is[3] ? instret[63:32] : 32'b0) ;
|
||||
|
||||
initial begin
|
||||
$readmemh("DATARAM.hex",DATARAM);
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
MW_nop <= EM_nop;
|
||||
MW_rdId <= EM_rdId;
|
||||
|
||||
MW_wbData <=
|
||||
EM_isLoad ? (M_isIO ? IO_mem_rdata : M_Mdata) :
|
||||
EM_isCSRRS ? M_CSR_data :
|
||||
EM_Eresult;
|
||||
|
||||
MW_wbEnable <= EM_wbEnable;
|
||||
|
||||
if(!resetn) begin
|
||||
instret <= 0;
|
||||
end else if(!MW_nop) begin
|
||||
// It's easier to count the retired instructions when
|
||||
// they *exit* the pipeline (but it requires to pass
|
||||
// a _nop flag through the pipeline).
|
||||
instret <= instret + 1;
|
||||
end
|
||||
end
|
||||
|
||||
/******************************************************************************/
|
||||
/******************************************************************************/
|
||||
reg MW_nop; // Needed by instret in W stage
|
||||
reg [4:0] MW_rdId;
|
||||
reg [31:0] MW_wbData;
|
||||
reg MW_wbEnable;
|
||||
/******************************************************************************/
|
||||
/******************************************************************************/
|
||||
|
||||
/*** W: WriteBack ***/
|
||||
|
||||
assign wbData = MW_wbData;
|
||||
assign wbEnable = MW_wbEnable;
|
||||
assign wbRdId = MW_rdId;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
// we do not test rdId == 0 because in general, one loads data to
|
||||
// a register, not to zero !
|
||||
wire rs1Hazard = D_readsRs1 && (D_rs1Id == DE_rdId);
|
||||
wire rs2Hazard = D_readsRs2 && (D_rs2Id == DE_rdId);
|
||||
|
||||
// we could generate slightly more bubble with
|
||||
// simpler test (to be used if critical path is here)
|
||||
// -> keeping this one (seems it has no influence on CPI,
|
||||
// and results in slightly better timings)
|
||||
// wire rs1Hazard = (D_rs1Id == DE_rdId);
|
||||
// wire rs2Hazard = (D_rs2Id == DE_rdId);
|
||||
|
||||
// we are not obliged to compare all bits !
|
||||
// wire rs1Hazard = (D_rs1Id[3:0] == DE_rdId[3:0]);
|
||||
// wire rs2Hazard = (D_rs2Id[3:0] == DE_rdId[3:0]);
|
||||
|
||||
// Add bubble only if next instr uses result of latency-2 instr
|
||||
wire dataHazard = !FD_nop && (DE_isLoad || DE_isCSRRS) &&
|
||||
(rs1Hazard || rs2Hazard);
|
||||
|
||||
// (other option: always add bubble after latency-2 instr
|
||||
// like Samsoniuk's DarkRiscV). Reduces critical path.
|
||||
// wire dataHazard = !FD_nop && (DE_isLoad || DE_isCSRRS);
|
||||
|
||||
assign F_stall = dataHazard | halt;
|
||||
assign D_stall = dataHazard | halt;
|
||||
|
||||
// Here we need to use E_correctPC (the registered version
|
||||
// DE_correctPC is not ready on time).
|
||||
assign D_flush = E_correctPC;
|
||||
assign E_flush = E_correctPC | dataHazard;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
`ifdef BENCH
|
||||
always @(posedge clk) begin
|
||||
if(halt) $finish();
|
||||
end
|
||||
`endif
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
endmodule
|
||||
|
||||
module SOC (
|
||||
input CLK, // system clock
|
||||
input RESET,// reset button
|
||||
output reg [4:0] LEDS, // system LEDs
|
||||
input RXD, // UART receive
|
||||
output TXD // UART transmit
|
||||
);
|
||||
|
||||
wire clk;
|
||||
wire resetn;
|
||||
|
||||
wire [31:0] IO_mem_addr;
|
||||
wire [31:0] IO_mem_rdata;
|
||||
wire [31:0] IO_mem_wdata;
|
||||
wire IO_mem_wr;
|
||||
|
||||
Processor CPU(
|
||||
.clk(clk),
|
||||
.resetn(resetn),
|
||||
.IO_mem_addr(IO_mem_addr),
|
||||
.IO_mem_rdata(IO_mem_rdata),
|
||||
.IO_mem_wdata(IO_mem_wdata),
|
||||
.IO_mem_wr(IO_mem_wr)
|
||||
);
|
||||
|
||||
wire [13:0] IO_wordaddr = IO_mem_addr[15:2];
|
||||
|
||||
// Memory-mapped IO in IO page, 1-hot addressing in word address.
|
||||
localparam IO_LEDS_bit = 0; // W five leds
|
||||
localparam IO_UART_DAT_bit = 1; // W data to send (8 bits)
|
||||
localparam IO_UART_CNTL_bit = 2; // R status. bit 9: busy sending
|
||||
|
||||
always @(posedge clk) begin
|
||||
if(IO_mem_wr & IO_wordaddr[IO_LEDS_bit]) begin
|
||||
LEDS <= IO_mem_wdata[4:0];
|
||||
end
|
||||
end
|
||||
|
||||
wire uart_valid = IO_mem_wr & IO_wordaddr[IO_UART_DAT_bit];
|
||||
wire uart_ready;
|
||||
|
||||
|
||||
corescore_emitter_uart #(
|
||||
.clk_freq_hz(`CPU_FREQ*1000000),
|
||||
.baud_rate(1000000)
|
||||
) UART(
|
||||
.i_clk(clk),
|
||||
.i_rst(!resetn),
|
||||
.i_data(IO_mem_wdata[7:0]),
|
||||
.i_valid(uart_valid),
|
||||
.o_ready(uart_ready),
|
||||
.o_uart_tx(TXD)
|
||||
);
|
||||
|
||||
assign IO_mem_rdata =
|
||||
IO_wordaddr[IO_UART_CNTL_bit] ? { 22'b0, !uart_ready, 9'b0}
|
||||
: 32'b0;
|
||||
|
||||
`ifdef BENCH
|
||||
always @(posedge clk) begin
|
||||
if(uart_valid) begin
|
||||
$write("%c", IO_mem_wdata[7:0] );
|
||||
$fflush(32'h8000_0001);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
// Gearbox and reset circuitry.
|
||||
Clockworks CW(
|
||||
.CLK(CLK),
|
||||
.RESET(RESET),
|
||||
.clk(clk),
|
||||
.resetn(resetn)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
Reference in New Issue
Block a user