testdrive-RV32M, simplified a bit DIV/REM implementation, gained a
little bit: now validates at 90MHz and works at 115 MHz
This commit is contained in:
@@ -1,114 +0,0 @@
|
||||
// Started from: https://www.fpga4fun.com/HDMI.html (c) fpga4fun.com & KNJN LLC 2013
|
||||
// Added comments, adapted to ECP5 / ULX3S, made small changes here and there
|
||||
//
|
||||
// - Introduced some ideas from Lawrie's code here: https://github.com/lawrie/ulx3s_examples/blob/master/hdmi/
|
||||
// See also https://github.com/sylefeb/Silice/tree/master/projects/hdmi_test (also based on Lawrie's code).
|
||||
// - I'm not using Lawrie's "fake differential" but instead I'm using LVCMOS33D mode for the HDMI pins in ulx3S.lpf,
|
||||
// that automatically generates the negative signal from the positive one.
|
||||
// See: https://www.gitmemory.com/issue/YosysHQ/nextpnr/544/751511265
|
||||
// See also LATTICE ECP5 and ECP5-5G sysI/O Usage Guide / Technical note
|
||||
// - In Lawrie's "fake differential", there is the ODRX1F trick that makes it possible to operate at half the
|
||||
// frequency for the bit clock, may be interesting/necessary to use for res higher than 640x480.
|
||||
// - I've seen also some ECP5 primitives: OLVDS (A->Z,ZN) and OBCO (I->OT,OC)
|
||||
// that I tried to use with the standard LVCMOS33 mode, without success.
|
||||
|
||||
module HDMI_test(
|
||||
input pclk,
|
||||
output [3:0] gpdi_dp
|
||||
// Note: gpdi_dn[3:0] is generated automatically by LVCMOS33D mode in ulx3s.lpf
|
||||
);
|
||||
|
||||
HDMI_gen hdmi_gen(
|
||||
.pixclk(pclk),
|
||||
.TMDS_rgb_p(gpdi_dp[2:0]),
|
||||
.TMDS_clock_p(gpdi_dp[3])
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
||||
/*********************************************************************************/
|
||||
module HDMI_gen(
|
||||
input pixclk, // 25MHz
|
||||
output [2:0] TMDS_rgb_p, TMDS_rgb_n, // HDMI pins: RGB
|
||||
output TMDS_clock_p, TMDS_clock_n // HDMI pins: clock
|
||||
);
|
||||
|
||||
/******** Video generation *******************************************************/
|
||||
// This part is just like a VGA generator
|
||||
reg [9:0] CounterX, CounterY;
|
||||
reg hSync, vSync, DrawArea;
|
||||
always @(posedge pixclk) DrawArea <= (CounterX<640) && (CounterY<480);
|
||||
|
||||
always @(posedge pixclk) CounterX <= (CounterX==799) ? 0 : CounterX+1;
|
||||
always @(posedge pixclk) if(CounterX==799) CounterY <= (CounterY==524) ? 0 : CounterY+1;
|
||||
|
||||
always @(posedge pixclk) hSync <= (CounterX>=656) && (CounterX<752);
|
||||
always @(posedge pixclk) vSync <= (CounterY>=490) && (CounterY<492);
|
||||
|
||||
/******** Draw something *********************************************************/
|
||||
// Generate 8-bits red,green,blue signals from X and Y coordinates (the "shader")
|
||||
wire [7:0] W = {8{CounterX[7:0]==CounterY[7:0]}};
|
||||
wire [7:0] A = {8{CounterX[7:5]==3'h2 && CounterY[7:5]==3'h2}};
|
||||
reg [7:0] red, green, blue;
|
||||
|
||||
always @(posedge pixclk) begin
|
||||
red <= ({CounterX[5:0] & {6{CounterY[4:3]==~CounterX[4:3]}}, 2'b00} | W) & ~A;
|
||||
green <= (CounterX[7:0] & {8{CounterY[6]}} | W) & ~A;
|
||||
blue <= CounterY[7:0] | W | A;
|
||||
end
|
||||
|
||||
/******** RGB TMDS encoding ***************************************************/
|
||||
// Generate 10-bits TMDS red,green,blue signals. Blue embeds HSync/VSync in its
|
||||
// control part.
|
||||
wire [9:0] TMDS_red, TMDS_green, TMDS_blue;
|
||||
TMDS_encoder encode_R(.clk(pixclk), .VD(red ), .CD(2'b00) , .VDE(DrawArea), .TMDS(TMDS_red));
|
||||
TMDS_encoder encode_G(.clk(pixclk), .VD(green), .CD(2'b00) , .VDE(DrawArea), .TMDS(TMDS_green));
|
||||
TMDS_encoder encode_B(.clk(pixclk), .VD(blue ), .CD({vSync,hSync}), .VDE(DrawArea), .TMDS(TMDS_blue));
|
||||
|
||||
/******** 250 MHz clock *******************************************************/
|
||||
// This one needs some FPGA-specific specialized blocks (a PLL).
|
||||
wire clk_TMDS; // The 250 MHz clock used by the serializers.
|
||||
HDMI_clock hdmi_clock(.clk(pixclk), .hdmi_clk(clk_TMDS));
|
||||
|
||||
/******** Shifter *************************************************************/
|
||||
// Serialize the three 10-bits TMDS red,green,blue signals.
|
||||
|
||||
// Modulo-10 counter (note: most code I found uses a counter reset to 0 when
|
||||
// it reaches 9. For a small modulo, I prefer to have a circular shift-buffer
|
||||
// with a single 1 that triggers the signal. I think it is more elegant, and
|
||||
// more importantly, it works for a higher fmax (450 MHz here).
|
||||
reg [9:0] TMDS_mod10=1;
|
||||
wire TMDS_shift_load = TMDS_mod10[9];
|
||||
always @(posedge clk_TMDS) TMDS_mod10 <= {TMDS_mod10[8:0],TMDS_mod10[9]};
|
||||
|
||||
// Every 10 clocks, we get a fresh R,G,B triplet from the TMDS encoders,
|
||||
// else we shift.
|
||||
reg [9:0] TMDS_shift_red=0, TMDS_shift_green=0, TMDS_shift_blue=0;
|
||||
always @(posedge clk_TMDS) begin
|
||||
TMDS_shift_red <= TMDS_shift_load ? TMDS_red : TMDS_shift_red [9:1];
|
||||
TMDS_shift_green <= TMDS_shift_load ? TMDS_green : TMDS_shift_green[9:1];
|
||||
TMDS_shift_blue <= TMDS_shift_load ? TMDS_blue : TMDS_shift_blue [9:1];
|
||||
end
|
||||
|
||||
/******** Output to HDMI *****************************************************/
|
||||
|
||||
// There are four differential pairs to generate (red,greeb,blue,clock).
|
||||
// Each differential pair has a positive part, and a negative part (just
|
||||
// negated bit). Here are the positive parts:
|
||||
assign TMDS_rgb_p[2] = TMDS_shift_red[0];
|
||||
assign TMDS_rgb_p[1] = TMDS_shift_green[0];
|
||||
assign TMDS_rgb_p[0] = TMDS_shift_blue[0];
|
||||
assign TMDS_clock_p = pixclk;
|
||||
|
||||
// Note: what's below would not work, _p and _n sides
|
||||
// require exact synchronization that could not be
|
||||
// guaranteed if written like that.
|
||||
// In fact, the negative side is not wired in the HDMI_test
|
||||
// module. I'm generating it at the level of the
|
||||
// output pins using LVCMOS33D pin type in ulx3s.lpf
|
||||
assign TMDS_rgb_n[2] = !TMDS_shift_red[0];
|
||||
assign TMDS_rgb_n[1] = !TMDS_shift_green[0];
|
||||
assign TMDS_rgb_n[0] = !TMDS_shift_blue[0];
|
||||
assign TMDS_clock_n = !pixclk;
|
||||
|
||||
endmodule
|
||||
@@ -3,7 +3,10 @@
|
||||
// preserve maxfreq at each step.
|
||||
// Step 0: Tachyon valid. fmax: 115-120 MHz exp. fmax: 135-140 MHz
|
||||
// Step 1: Barrel shft valid. fmax: 110-115 MHz exp. fmax: 130-135 MHz
|
||||
// Step 2: RV32M valid. fmax: 90 MHz exp. fmax: 115 MHz
|
||||
// Step 2: RV32M valid. fmax: 90 MHz exp. fmax: 115 MHz (electron: 70-80)
|
||||
// DIV is bottleneck (fmax 90) then MUL (105)
|
||||
// Let's try the division from projectF
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
// Firmware generation flags for this processor
|
||||
@@ -169,54 +172,47 @@ module FemtoRV32(
|
||||
wire [31:0] alu_mul = funct3Is[0] ? multiply[31: 0] // 0:MUL
|
||||
: multiply[63:32] ; // 1:MULH, 2:MULHSU, 3:MULHU
|
||||
|
||||
wire [31:0] alu_div = instr[13] ? (div_sign ? -dividendN : dividendN)
|
||||
: (div_sign ? -quotientN : quotientN);
|
||||
wire [31:0] alu_div = instr[13] ? (div_sign ? -dividend : dividend)
|
||||
: (div_sign ? -quotient : quotient);
|
||||
|
||||
|
||||
wire aluBusy = |quotient_msk ; // ALU is busy if division is in progress.
|
||||
wire aluBusy = |quotient_msk; // ALU is busy if division is in progress.
|
||||
reg [31:0] aluOut;
|
||||
|
||||
wire funcM = instr[25];
|
||||
wire isDivide = instr[14];
|
||||
|
||||
always @(posedge clk) begin
|
||||
aluOut <= (isALUreg & funcM) ? (isDivide ? alu_div : alu_mul) : alu_base;
|
||||
aluOut <= (isALUreg & funcM) ? (isDivide ? alu_div : alu_mul) : alu_base;
|
||||
end
|
||||
|
||||
|
||||
/***************************************************************************/
|
||||
// Implementation of DIV/REM instructions, highly inspired by PicoRV32
|
||||
|
||||
reg [31:0] dividend;
|
||||
reg [62:0] divisor;
|
||||
reg [31:0] quotient;
|
||||
reg [31:0] quotient_msk;
|
||||
|
||||
wire divstep_do = divisor <= {31'b0, dividend};
|
||||
|
||||
wire [31:0] dividendN = divstep_do ? dividend - divisor[31:0] : dividend;
|
||||
wire [31:0] quotientN = divstep_do ? quotient | quotient_msk : quotient;
|
||||
|
||||
wire div_sign = ~instr[12] & (instr[13] ? aluIn1[31] :
|
||||
(aluIn1[31] != aluIn2[31]) & |aluIn2);
|
||||
|
||||
reg [31:0] dividend;
|
||||
reg [62:0] divisor;
|
||||
reg [31:0] quotient;
|
||||
reg [32:0] quotient_msk;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (isALUreg & funcM & isDivide & aluWr) begin
|
||||
if (aluWr) begin
|
||||
dividend <= ~instr[12] & aluIn1[31] ? -aluIn1 : aluIn1;
|
||||
divisor <= {(~instr[12] & aluIn2[31] ? -aluIn2 : aluIn2), 31'b0};
|
||||
quotient <= 0;
|
||||
quotient_msk <= 1 << 31;
|
||||
quotient_msk[32] <= isALUreg & funcM & isDivide;
|
||||
end else begin
|
||||
dividend <= dividendN;
|
||||
divisor <= divisor >> 1;
|
||||
quotient <= quotientN;
|
||||
quotient_msk <= quotient_msk >> 1;
|
||||
if(divisor <= {31'b0, dividend}) begin
|
||||
quotient <= quotient | quotient_msk[32:1];
|
||||
dividend <= dividend - divisor[31:0];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg [31:0] divResult;
|
||||
always @(posedge clk) divResult <= instr[13] ? dividendN : quotientN;
|
||||
|
||||
|
||||
/***************************************************************************/
|
||||
// The predicate for conditional branches.
|
||||
/***************************************************************************/
|
||||
|
||||
Reference in New Issue
Block a user