Branch prediction
- added DHRYSTONE test - cleaning sources, adding hit/miss reports in simulation - pipeline9.v: with return address stack
This commit is contained in:
@@ -0,0 +1,425 @@
|
||||
/*
|
||||
****************************************************************************
|
||||
*
|
||||
* "DHRYSTONE" Benchmark Program
|
||||
* -----------------------------
|
||||
*
|
||||
* Version: C, Version 2.1
|
||||
*
|
||||
* File: dhry.h (part 1 of 3)
|
||||
*
|
||||
* Date: May 25, 1988
|
||||
*
|
||||
* Author: Reinhold P. Weicker
|
||||
* Siemens AG, AUT E 51
|
||||
* Postfach 3220
|
||||
* 8520 Erlangen
|
||||
* Germany (West)
|
||||
* Phone: [+49]-9131-7-20330
|
||||
* (8-17 Central European Time)
|
||||
* Usenet: ..!mcsun!unido!estevax!weicker
|
||||
*
|
||||
* Original Version (in Ada) published in
|
||||
* "Communications of the ACM" vol. 27., no. 10 (Oct. 1984),
|
||||
* pp. 1013 - 1030, together with the statistics
|
||||
* on which the distribution of statements etc. is based.
|
||||
*
|
||||
* In this C version, the following C library functions are used:
|
||||
* - strcpy, strcmp (inside the measurement loop)
|
||||
* - printf, scanf (outside the measurement loop)
|
||||
* In addition, Berkeley UNIX system calls "times ()" or "time ()"
|
||||
* are used for execution time measurement. For measurements
|
||||
* on other systems, these calls have to be changed.
|
||||
*
|
||||
* Collection of Results:
|
||||
* Reinhold Weicker (address see above) and
|
||||
*
|
||||
* Rick Richardson
|
||||
* PC Research. Inc.
|
||||
* 94 Apple Orchard Drive
|
||||
* Tinton Falls, NJ 07724
|
||||
* Phone: (201) 389-8963 (9-17 EST)
|
||||
* Usenet: ...!uunet!pcrat!rick
|
||||
*
|
||||
* Please send results to Rick Richardson and/or Reinhold Weicker.
|
||||
* Complete information should be given on hardware and software used.
|
||||
* Hardware information includes: Machine type, CPU, type and size
|
||||
* of caches; for microprocessors: clock frequency, memory speed
|
||||
* (number of wait states).
|
||||
* Software information includes: Compiler (and runtime library)
|
||||
* manufacturer and version, compilation switches, OS version.
|
||||
* The Operating System version may give an indication about the
|
||||
* compiler; Dhrystone itself performs no OS calls in the measurement loop.
|
||||
*
|
||||
* The complete output generated by the program should be mailed
|
||||
* such that at least some checks for correctness can be made.
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* History: This version C/2.1 has been made for two reasons:
|
||||
*
|
||||
* 1) There is an obvious need for a common C version of
|
||||
* Dhrystone, since C is at present the most popular system
|
||||
* programming language for the class of processors
|
||||
* (microcomputers, minicomputers) where Dhrystone is used most.
|
||||
* There should be, as far as possible, only one C version of
|
||||
* Dhrystone such that results can be compared without
|
||||
* restrictions. In the past, the C versions distributed
|
||||
* by Rick Richardson (Version 1.1) and by Reinhold Weicker
|
||||
* had small (though not significant) differences.
|
||||
*
|
||||
* 2) As far as it is possible without changes to the Dhrystone
|
||||
* statistics, optimizing compilers should be prevented from
|
||||
* removing significant statements.
|
||||
*
|
||||
* This C version has been developed in cooperation with
|
||||
* Rick Richardson (Tinton Falls, NJ), it incorporates many
|
||||
* ideas from the "Version 1.1" distributed previously by
|
||||
* him over the UNIX network Usenet.
|
||||
* I also thank Chaim Benedelac (National Semiconductor),
|
||||
* David Ditzel (SUN), Earl Killian and John Mashey (MIPS),
|
||||
* Alan Smith and Rafael Saavedra-Barrera (UC at Berkeley)
|
||||
* for their help with comments on earlier versions of the
|
||||
* benchmark.
|
||||
*
|
||||
* Changes: In the initialization part, this version follows mostly
|
||||
* Rick Richardson's version distributed via Usenet, not the
|
||||
* version distributed earlier via floppy disk by Reinhold Weicker.
|
||||
* As a concession to older compilers, names have been made
|
||||
* unique within the first 8 characters.
|
||||
* Inside the measurement loop, this version follows the
|
||||
* version previously distributed by Reinhold Weicker.
|
||||
*
|
||||
* At several places in the benchmark, code has been added,
|
||||
* but within the measurement loop only in branches that
|
||||
* are not executed. The intention is that optimizing compilers
|
||||
* should be prevented from moving code out of the measurement
|
||||
* loop, or from removing code altogether. Since the statements
|
||||
* that are executed within the measurement loop have NOT been
|
||||
* changed, the numbers defining the "Dhrystone distribution"
|
||||
* (distribution of statements, operand types and locality)
|
||||
* still hold. Except for sophisticated optimizing compilers,
|
||||
* execution times for this version should be the same as
|
||||
* for previous versions.
|
||||
*
|
||||
* Since it has proven difficult to subtract the time for the
|
||||
* measurement loop overhead in a correct way, the loop check
|
||||
* has been made a part of the benchmark. This does have
|
||||
* an impact - though a very minor one - on the distribution
|
||||
* statistics which have been updated for this version.
|
||||
*
|
||||
* All changes within the measurement loop are described
|
||||
* and discussed in the companion paper "Rationale for
|
||||
* Dhrystone version 2".
|
||||
*
|
||||
* Because of the self-imposed limitation that the order and
|
||||
* distribution of the executed statements should not be
|
||||
* changed, there are still cases where optimizing compilers
|
||||
* may not generate code for some statements. To a certain
|
||||
* degree, this is unavoidable for small synthetic benchmarks.
|
||||
* Users of the benchmark are advised to check code listings
|
||||
* whether code is generated for all statements of Dhrystone.
|
||||
*
|
||||
* Version 2.1 is identical to version 2.0 distributed via
|
||||
* the UNIX network Usenet in March 1988 except that it corrects
|
||||
* some minor deficiencies that were found by users of version 2.0.
|
||||
* The only change within the measurement loop is that a
|
||||
* non-executed "else" part was added to the "if" statement in
|
||||
* Func_3, and a non-executed "else" part removed from Proc_3.
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* Defines: The following "Defines" are possible:
|
||||
* -DREG=register (default: Not defined)
|
||||
* As an approximation to what an average C programmer
|
||||
* might do, the "register" storage class is applied
|
||||
* (if enabled by -DREG=register)
|
||||
* - for local variables, if they are used (dynamically)
|
||||
* five or more times
|
||||
* - for parameters if they are used (dynamically)
|
||||
* six or more times
|
||||
* Note that an optimal "register" strategy is
|
||||
* compiler-dependent, and that "register" declarations
|
||||
* do not necessarily lead to faster execution.
|
||||
* -DNOSTRUCTASSIGN (default: Not defined)
|
||||
* Define if the C compiler does not support
|
||||
* assignment of structures.
|
||||
* -DNOENUMS (default: Not defined)
|
||||
* Define if the C compiler does not support
|
||||
* enumeration types.
|
||||
* -DTIMES (default)
|
||||
* -DTIME
|
||||
* The "times" function of UNIX (returning process times)
|
||||
* or the "time" function (returning wallclock time)
|
||||
* is used for measurement.
|
||||
* For single user machines, "time ()" is adequate. For
|
||||
* multi-user machines where you cannot get single-user
|
||||
* access, use the "times ()" function. If you have
|
||||
* neither, use a stopwatch in the dead of night.
|
||||
* "printf"s are provided marking the points "Start Timer"
|
||||
* and "Stop Timer". DO NOT use the UNIX "time(1)"
|
||||
* command, as this will measure the total time to
|
||||
* run this program, which will (erroneously) include
|
||||
* the time to allocate storage (malloc) and to perform
|
||||
* the initialization.
|
||||
* -DHZ=nnn
|
||||
* In Berkeley UNIX, the function "times" returns process
|
||||
* time in 1/HZ seconds, with HZ = 60 for most systems.
|
||||
* CHECK YOUR SYSTEM DESCRIPTION BEFORE YOU JUST APPLY
|
||||
* A VALUE.
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* Compilation model and measurement (IMPORTANT):
|
||||
*
|
||||
* This C version of Dhrystone consists of three files:
|
||||
* - dhry.h (this file, containing global definitions and comments)
|
||||
* - dhry_1.c (containing the code corresponding to Ada package Pack_1)
|
||||
* - dhry_2.c (containing the code corresponding to Ada package Pack_2)
|
||||
*
|
||||
* The following "ground rules" apply for measurements:
|
||||
* - Separate compilation
|
||||
* - No procedure merging
|
||||
* - Otherwise, compiler optimizations are allowed but should be indicated
|
||||
* - Default results are those without register declarations
|
||||
* See the companion paper "Rationale for Dhrystone Version 2" for a more
|
||||
* detailed discussion of these ground rules.
|
||||
*
|
||||
* For 16-Bit processors (e.g. 80186, 80286), times for all compilation
|
||||
* models ("small", "medium", "large" etc.) should be given if possible,
|
||||
* together with a definition of these models for the compiler system used.
|
||||
*
|
||||
**************************************************************************
|
||||
*
|
||||
* Dhrystone (C version) statistics:
|
||||
*
|
||||
* [Comment from the first distribution, updated for version 2.
|
||||
* Note that because of language differences, the numbers are slightly
|
||||
* different from the Ada version.]
|
||||
*
|
||||
* The following program contains statements of a high level programming
|
||||
* language (here: C) in a distribution considered representative:
|
||||
*
|
||||
* assignments 52 (51.0 %)
|
||||
* control statements 33 (32.4 %)
|
||||
* procedure, function calls 17 (16.7 %)
|
||||
*
|
||||
* 103 statements are dynamically executed. The program is balanced with
|
||||
* respect to the three aspects:
|
||||
*
|
||||
* - statement type
|
||||
* - operand type
|
||||
* - operand locality
|
||||
* operand global, local, parameter, or constant.
|
||||
*
|
||||
* The combination of these three aspects is balanced only approximately.
|
||||
*
|
||||
* 1. Statement Type:
|
||||
* ----------------- number
|
||||
*
|
||||
* V1 = V2 9
|
||||
* (incl. V1 = F(..)
|
||||
* V = Constant 12
|
||||
* Assignment, 7
|
||||
* with array element
|
||||
* Assignment, 6
|
||||
* with record component
|
||||
* --
|
||||
* 34 34
|
||||
*
|
||||
* X = Y +|-|"&&"|"|" Z 5
|
||||
* X = Y +|-|"==" Constant 6
|
||||
* X = X +|- 1 3
|
||||
* X = Y *|/ Z 2
|
||||
* X = Expression, 1
|
||||
* two operators
|
||||
* X = Expression, 1
|
||||
* three operators
|
||||
* --
|
||||
* 18 18
|
||||
*
|
||||
* if .... 14
|
||||
* with "else" 7
|
||||
* without "else" 7
|
||||
* executed 3
|
||||
* not executed 4
|
||||
* for ... 7 | counted every time
|
||||
* while ... 4 | the loop condition
|
||||
* do ... while 1 | is evaluated
|
||||
* switch ... 1
|
||||
* break 1
|
||||
* declaration with 1
|
||||
* initialization
|
||||
* --
|
||||
* 34 34
|
||||
*
|
||||
* P (...) procedure call 11
|
||||
* user procedure 10
|
||||
* library procedure 1
|
||||
* X = F (...)
|
||||
* function call 6
|
||||
* user function 5
|
||||
* library function 1
|
||||
* --
|
||||
* 17 17
|
||||
* ---
|
||||
* 103
|
||||
*
|
||||
* The average number of parameters in procedure or function calls
|
||||
* is 1.82 (not counting the function values as implicit parameters).
|
||||
*
|
||||
*
|
||||
* 2. Operators
|
||||
* ------------
|
||||
* number approximate
|
||||
* percentage
|
||||
*
|
||||
* Arithmetic 32 50.8
|
||||
*
|
||||
* + 21 33.3
|
||||
* - 7 11.1
|
||||
* * 3 4.8
|
||||
* / (int div) 1 1.6
|
||||
*
|
||||
* Comparison 27 42.8
|
||||
*
|
||||
* == 9 14.3
|
||||
* /= 4 6.3
|
||||
* > 1 1.6
|
||||
* < 3 4.8
|
||||
* >= 1 1.6
|
||||
* <= 9 14.3
|
||||
*
|
||||
* Logic 4 6.3
|
||||
*
|
||||
* && (AND-THEN) 1 1.6
|
||||
* | (OR) 1 1.6
|
||||
* ! (NOT) 2 3.2
|
||||
*
|
||||
* -- -----
|
||||
* 63 100.1
|
||||
*
|
||||
*
|
||||
* 3. Operand Type (counted once per operand reference):
|
||||
* ---------------
|
||||
* number approximate
|
||||
* percentage
|
||||
*
|
||||
* Integer 175 72.3 %
|
||||
* Character 45 18.6 %
|
||||
* Pointer 12 5.0 %
|
||||
* String30 6 2.5 %
|
||||
* Array 2 0.8 %
|
||||
* Record 2 0.8 %
|
||||
* --- -------
|
||||
* 242 100.0 %
|
||||
*
|
||||
* When there is an access path leading to the final operand (e.g. a record
|
||||
* component), only the final data type on the access path is counted.
|
||||
*
|
||||
*
|
||||
* 4. Operand Locality:
|
||||
* -------------------
|
||||
* number approximate
|
||||
* percentage
|
||||
*
|
||||
* local variable 114 47.1 %
|
||||
* global variable 22 9.1 %
|
||||
* parameter 45 18.6 %
|
||||
* value 23 9.5 %
|
||||
* reference 22 9.1 %
|
||||
* function result 6 2.5 %
|
||||
* constant 55 22.7 %
|
||||
* --- -------
|
||||
* 242 100.0 %
|
||||
*
|
||||
*
|
||||
* The program does not compute anything meaningful, but it is syntactically
|
||||
* and semantically correct. All variables have a value assigned to them
|
||||
* before they are used as a source operand.
|
||||
*
|
||||
* There has been no explicit effort to account for the effects of a
|
||||
* cache, or to balance the use of long or short displacements for code or
|
||||
* data.
|
||||
*
|
||||
***************************************************************************
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/* Compiler and system dependent definitions: */
|
||||
|
||||
#ifndef TIME
|
||||
#define TIMES
|
||||
#endif
|
||||
/* Use times(2) time function unless */
|
||||
/* explicitly defined otherwise */
|
||||
|
||||
#ifdef TIMES
|
||||
#include <sys/types.h>
|
||||
#include <sys/times.h>
|
||||
/* for "times" */
|
||||
#endif
|
||||
|
||||
#define Mic_secs_Per_Second 80000000.0
|
||||
/* Berkeley UNIX C returns process times in seconds/HZ */
|
||||
|
||||
#ifdef NOSTRUCTASSIGN
|
||||
#define structassign(d, s) memcpy(&(d), &(s), sizeof(d))
|
||||
#else
|
||||
#define structassign(d, s) d = s
|
||||
#endif
|
||||
|
||||
#ifdef NOENUM
|
||||
#define Ident_1 0
|
||||
#define Ident_2 1
|
||||
#define Ident_3 2
|
||||
#define Ident_4 3
|
||||
#define Ident_5 4
|
||||
typedef int Enumeration;
|
||||
#else
|
||||
typedef enum {Ident_1, Ident_2, Ident_3, Ident_4, Ident_5}
|
||||
Enumeration;
|
||||
#endif
|
||||
/* for boolean and enumeration types in Ada, Pascal */
|
||||
|
||||
/* General definitions: */
|
||||
|
||||
//#include <stdio.h>
|
||||
/* for strcpy, strcmp */
|
||||
|
||||
#define Null 0
|
||||
/* Value of a Null pointer */
|
||||
#define true 1
|
||||
#define false 0
|
||||
|
||||
typedef int One_Thirty;
|
||||
typedef int One_Fifty;
|
||||
typedef char Capital_Letter;
|
||||
typedef int Boolean;
|
||||
typedef char Str_30 [31];
|
||||
typedef int Arr_1_Dim [50];
|
||||
typedef int Arr_2_Dim [50] [50];
|
||||
|
||||
typedef struct record
|
||||
{
|
||||
struct record *Ptr_Comp;
|
||||
Enumeration Discr;
|
||||
union {
|
||||
struct {
|
||||
Enumeration Enum_Comp;
|
||||
int Int_Comp;
|
||||
char Str_Comp [31];
|
||||
} var_1;
|
||||
struct {
|
||||
Enumeration E_Comp_2;
|
||||
char Str_2_Comp [31];
|
||||
} var_2;
|
||||
struct {
|
||||
char Ch_1_Comp;
|
||||
char Ch_2_Comp;
|
||||
} var_3;
|
||||
} variant;
|
||||
} Rec_Type, *Rec_Pointer;
|
||||
|
||||
|
||||
@@ -0,0 +1,465 @@
|
||||
/*
|
||||
****************************************************************************
|
||||
*
|
||||
* "DHRYSTONE" Benchmark Program
|
||||
* -----------------------------
|
||||
*
|
||||
* Version: C, Version 2.1
|
||||
*
|
||||
* File: dhry_1.c (part 2 of 3)
|
||||
*
|
||||
* Date: May 25, 1988
|
||||
*
|
||||
* Author: Reinhold P. Weicker
|
||||
*
|
||||
****************************************************************************
|
||||
*/
|
||||
|
||||
#include "dhry.h"
|
||||
|
||||
#ifdef USE_MYSTDLIB
|
||||
extern char *malloc ();
|
||||
#else
|
||||
# include <stdlib.h>
|
||||
# include <string.h>
|
||||
#endif
|
||||
|
||||
/* Global Variables: */
|
||||
|
||||
Rec_Pointer Ptr_Glob,
|
||||
Next_Ptr_Glob;
|
||||
int Int_Glob;
|
||||
Boolean Bool_Glob;
|
||||
char Ch_1_Glob,
|
||||
Ch_2_Glob;
|
||||
int Arr_1_Glob [50];
|
||||
int Arr_2_Glob [50] [50];
|
||||
|
||||
Enumeration Func_1 ();
|
||||
/* forward declaration necessary since Enumeration may not simply be int */
|
||||
|
||||
#ifndef REG
|
||||
Boolean Reg = false;
|
||||
#define REG
|
||||
/* REG becomes defined as empty */
|
||||
/* i.e. no register variables */
|
||||
#else
|
||||
Boolean Reg = true;
|
||||
#endif
|
||||
|
||||
/* variables for time measurement: */
|
||||
|
||||
#ifdef IGN_TIMES
|
||||
#define HZ 50000000
|
||||
struct tms time_info;
|
||||
extern time_t times ();
|
||||
/* see library function "times" */
|
||||
#define Too_Small_Time 120
|
||||
/* Measurements should last at least about 2 seconds */
|
||||
#endif
|
||||
#ifdef TIME
|
||||
extern long time();
|
||||
#ifdef RISCV
|
||||
extern long insn();
|
||||
#endif
|
||||
/* see library function "time" */
|
||||
#define Too_Small_Time 2
|
||||
/* Measurements should last at least 2 seconds */
|
||||
#endif
|
||||
|
||||
long Begin_Time,
|
||||
End_Time,
|
||||
User_Time;
|
||||
#ifdef RISCV
|
||||
long Begin_Insn,
|
||||
End_Insn,
|
||||
User_Insn;
|
||||
#endif
|
||||
float Microseconds,
|
||||
Dhrystones_Per_Second;
|
||||
/* end of variables for time measurement */
|
||||
|
||||
|
||||
main ()
|
||||
/*****/
|
||||
|
||||
/* main program, corresponds to procedures */
|
||||
/* Main and Proc_0 in the Ada version */
|
||||
{
|
||||
One_Fifty Int_1_Loc;
|
||||
REG One_Fifty Int_2_Loc;
|
||||
One_Fifty Int_3_Loc;
|
||||
REG char Ch_Index;
|
||||
Enumeration Enum_Loc;
|
||||
Str_30 Str_1_Loc;
|
||||
Str_30 Str_2_Loc;
|
||||
REG int Run_Index;
|
||||
REG int Number_Of_Runs;
|
||||
|
||||
Rec_Type R1,R2;
|
||||
|
||||
/* Initializations */
|
||||
|
||||
|
||||
/*
|
||||
* FEMTOSOC/FEMTORV32 modifications ===========================
|
||||
*/
|
||||
|
||||
/*
|
||||
* Since there are only two calls to malloc(), and that malloc()
|
||||
* is not supported yet by femtosoc lib, I replaced them with
|
||||
* pre-allocated structures.
|
||||
*/
|
||||
Next_Ptr_Glob = &R1; // (Rec_Pointer) malloc (sizeof (Rec_Type));
|
||||
Ptr_Glob = &R2; // (Rec_Pointer) malloc (sizeof (Rec_Type));
|
||||
|
||||
/*
|
||||
* Initialize IO (redirect to UART or OLED screen depending on
|
||||
* femtosoc.v configuration).
|
||||
*/
|
||||
// femtosoc_tty_init();
|
||||
|
||||
/*
|
||||
* Verify that this core was synthetized with counters.
|
||||
* The generation script extracts configuration
|
||||
* from femtosoc.v and writes values at specific memory addresses.
|
||||
* See stubs.c and LIB/femtorv32.h
|
||||
*/
|
||||
if(!has_counters()) {
|
||||
printf("This femtorv32 core does not have counters (see femtosoc.v)");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* End of FEMTOSOC/FEMTORV32 modifications ======================
|
||||
*/
|
||||
|
||||
Ptr_Glob->Ptr_Comp = Next_Ptr_Glob;
|
||||
Ptr_Glob->Discr = Ident_1;
|
||||
Ptr_Glob->variant.var_1.Enum_Comp = Ident_3;
|
||||
Ptr_Glob->variant.var_1.Int_Comp = 40;
|
||||
strcpy (Ptr_Glob->variant.var_1.Str_Comp,
|
||||
"DHRYSTONE PROGRAM, SOME STRING");
|
||||
strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
|
||||
|
||||
Arr_2_Glob [8][7] = 10;
|
||||
/* Was missing in published program. Without this statement, */
|
||||
/* Arr_2_Glob [8][7] would have an undefined value. */
|
||||
/* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */
|
||||
/* overflow may occur for this array element. */
|
||||
|
||||
printf ("\n");
|
||||
printf ("Dhrystone Benchmark, Version 2.1 (Language: C)\n");
|
||||
printf ("\n");
|
||||
if (Reg)
|
||||
{
|
||||
printf ("Program compiled with 'register' attribute\n");
|
||||
printf ("\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
printf ("Program compiled without 'register' attribute\n");
|
||||
printf ("\n");
|
||||
}
|
||||
printf ("Please give the number of runs through the benchmark: ");
|
||||
{
|
||||
// int n;
|
||||
// scanf ("%d", &n);
|
||||
Number_Of_Runs = 100;
|
||||
}
|
||||
printf ("\n");
|
||||
|
||||
printf ("Execution starts, %d runs through Dhrystone\n", Number_Of_Runs);
|
||||
|
||||
/***************/
|
||||
/* Start timer */
|
||||
/***************/
|
||||
|
||||
#ifdef IGN_TIMES
|
||||
times (&time_info);
|
||||
Begin_Time = (long) time_info.tms_utime;
|
||||
#endif
|
||||
#ifdef TIME
|
||||
Begin_Time = time ( (long *) 0);
|
||||
#ifdef RISCV
|
||||
Begin_Insn = insn ( (long *) 0);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index)
|
||||
{
|
||||
Proc_5();
|
||||
Proc_4();
|
||||
/* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */
|
||||
Int_1_Loc = 2;
|
||||
Int_2_Loc = 3;
|
||||
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
|
||||
Enum_Loc = Ident_2;
|
||||
Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc);
|
||||
/* Bool_Glob == 1 */
|
||||
while (Int_1_Loc < Int_2_Loc) /* loop body executed once */
|
||||
{
|
||||
Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc;
|
||||
/* Int_3_Loc == 7 */
|
||||
Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc);
|
||||
/* Int_3_Loc == 7 */
|
||||
Int_1_Loc += 1;
|
||||
} /* while */
|
||||
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
|
||||
Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc);
|
||||
/* Int_Glob == 5 */
|
||||
Proc_1 (Ptr_Glob);
|
||||
for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index)
|
||||
/* loop body executed twice */
|
||||
{
|
||||
if (Enum_Loc == Func_1 (Ch_Index, 'C'))
|
||||
/* then, not executed */
|
||||
{
|
||||
Proc_6 (Ident_1, &Enum_Loc);
|
||||
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING");
|
||||
Int_2_Loc = Run_Index;
|
||||
Int_Glob = Run_Index;
|
||||
}
|
||||
}
|
||||
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
|
||||
Int_2_Loc = Int_2_Loc * Int_1_Loc;
|
||||
Int_1_Loc = Int_2_Loc / Int_3_Loc;
|
||||
Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc;
|
||||
/* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */
|
||||
Proc_2 (&Int_1_Loc);
|
||||
/* Int_1_Loc == 5 */
|
||||
|
||||
} /* loop "for Run_Index" */
|
||||
|
||||
/**************/
|
||||
/* Stop timer */
|
||||
/**************/
|
||||
|
||||
#ifdef IGN_TIMES
|
||||
times (&time_info);
|
||||
End_Time = (long) time_info.tms_utime;
|
||||
#endif
|
||||
#ifdef TIME
|
||||
End_Time = time ( (long *) 0);
|
||||
#ifdef RISCV
|
||||
End_Insn = insn ( (long *) 0);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
printf ("Execution ends\n");
|
||||
printf ("\n");
|
||||
printf ("Final values of the variables used in the benchmark:\n");
|
||||
printf ("\n");
|
||||
printf ("Int_Glob: %d\n", Int_Glob);
|
||||
printf (" should be: %d\n", 5);
|
||||
printf ("Bool_Glob: %d\n", Bool_Glob);
|
||||
printf (" should be: %d\n", 1);
|
||||
printf ("Ch_1_Glob: %c\n", Ch_1_Glob);
|
||||
printf (" should be: %c\n", 'A');
|
||||
printf ("Ch_2_Glob: %c\n", Ch_2_Glob);
|
||||
printf (" should be: %c\n", 'B');
|
||||
printf ("Arr_1_Glob[8]: %d\n", Arr_1_Glob[8]);
|
||||
printf (" should be: %d\n", 7);
|
||||
printf ("Arr_2_Glob[8][7]: %d\n", Arr_2_Glob[8][7]);
|
||||
printf (" should be: Number_Of_Runs + 10\n");
|
||||
printf ("Ptr_Glob->\n");
|
||||
printf (" Ptr_Comp: %d\n", (int) Ptr_Glob->Ptr_Comp);
|
||||
printf (" should be: (implementation-dependent)\n");
|
||||
printf (" Discr: %d\n", Ptr_Glob->Discr);
|
||||
printf (" should be: %d\n", 0);
|
||||
printf (" Enum_Comp: %d\n", Ptr_Glob->variant.var_1.Enum_Comp);
|
||||
printf (" should be: %d\n", 2);
|
||||
printf (" Int_Comp: %d\n", Ptr_Glob->variant.var_1.Int_Comp);
|
||||
printf (" should be: %d\n", 17);
|
||||
printf (" Str_Comp: %s\n", Ptr_Glob->variant.var_1.Str_Comp);
|
||||
printf (" should be: DHRYSTONE PROGRAM, SOME STRING\n");
|
||||
printf ("Next_Ptr_Glob->\n");
|
||||
printf (" Ptr_Comp: %d\n", (int) Next_Ptr_Glob->Ptr_Comp);
|
||||
printf (" should be: (implementation-dependent), same as above\n");
|
||||
printf (" Discr: %d\n", Next_Ptr_Glob->Discr);
|
||||
printf (" should be: %d\n", 0);
|
||||
printf (" Enum_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
|
||||
printf (" should be: %d\n", 1);
|
||||
printf (" Int_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Int_Comp);
|
||||
printf (" should be: %d\n", 18);
|
||||
printf (" Str_Comp: %s\n",
|
||||
Next_Ptr_Glob->variant.var_1.Str_Comp);
|
||||
printf (" should be: DHRYSTONE PROGRAM, SOME STRING\n");
|
||||
printf ("Int_1_Loc: %d\n", Int_1_Loc);
|
||||
printf (" should be: %d\n", 5);
|
||||
printf ("Int_2_Loc: %d\n", Int_2_Loc);
|
||||
printf (" should be: %d\n", 13);
|
||||
printf ("Int_3_Loc: %d\n", Int_3_Loc);
|
||||
printf (" should be: %d\n", 7);
|
||||
printf ("Enum_Loc: %d\n", Enum_Loc);
|
||||
printf (" should be: %d\n", 1);
|
||||
printf ("Str_1_Loc: %s\n", Str_1_Loc);
|
||||
printf (" should be: DHRYSTONE PROGRAM, 1'ST STRING\n");
|
||||
printf ("Str_2_Loc: %s\n", Str_2_Loc);
|
||||
printf (" should be: DHRYSTONE PROGRAM, 2'ND STRING\n");
|
||||
printf ("\n");
|
||||
|
||||
User_Time = End_Time - Begin_Time;
|
||||
|
||||
#ifdef RISCV
|
||||
User_Insn = End_Insn - Begin_Insn;
|
||||
|
||||
printf("Number_Of_Runs: %d\n", Number_Of_Runs);
|
||||
printf("User_Time: %d cycles, %d insn\n", User_Time, User_Insn);
|
||||
|
||||
int Cycles_Per_Instruction_x1000 = (1000 * User_Time) / User_Insn;
|
||||
printf("Cycles_Per_Instruction: %d.%d%d%d\n", Cycles_Per_Instruction_x1000 / 1000,
|
||||
(Cycles_Per_Instruction_x1000 / 100) % 10,
|
||||
(Cycles_Per_Instruction_x1000 / 10) % 10,
|
||||
(Cycles_Per_Instruction_x1000 / 1) % 10);
|
||||
|
||||
int Dhrystones_Per_Second_Per_MHz = (Number_Of_Runs * 1000000) / User_Time;
|
||||
printf("Dhrystones_Per_Second_Per_MHz: %d\n", Dhrystones_Per_Second_Per_MHz);
|
||||
|
||||
/*
|
||||
* "Another common representation of the Dhrystone benchmark is the DMIPS (Dhrystone MIPS) obtained
|
||||
* when the Dhrystone score is divided by 1757 (the number of Dhrystones per second obtained on the
|
||||
* VAX 11/780, nominally a 1 MIPS machine)."
|
||||
*/
|
||||
|
||||
int DMIPS_Per_MHz_x1000 = (1000 * Dhrystones_Per_Second_Per_MHz) / 1757;
|
||||
printf("DMIPS_Per_MHz: %d.%d%d%d\n", DMIPS_Per_MHz_x1000 / 1000,
|
||||
(DMIPS_Per_MHz_x1000 / 100) % 10,
|
||||
(DMIPS_Per_MHz_x1000 / 10) % 10,
|
||||
(DMIPS_Per_MHz_x1000 / 1) % 10);
|
||||
#else
|
||||
if (User_Time < Too_Small_Time)
|
||||
{
|
||||
printf ("Measured time too small to obtain meaningful results\n");
|
||||
printf ("Please increase number of runs\n");
|
||||
printf ("\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef TIME
|
||||
Microseconds = (float) User_Time * Mic_secs_Per_Second
|
||||
/ (float) Number_Of_Runs;
|
||||
Dhrystones_Per_Second = (float) Number_Of_Runs / (float) User_Time;
|
||||
#else
|
||||
Microseconds = (float) User_Time * Mic_secs_Per_Second
|
||||
/ ((float) HZ * ((float) Number_Of_Runs));
|
||||
Dhrystones_Per_Second = ((float) HZ * (float) Number_Of_Runs)
|
||||
/ (float) User_Time;
|
||||
#endif
|
||||
printf ("Microseconds for one run through Dhrystone: ");
|
||||
printf ("%6.1f \n", Microseconds);
|
||||
printf ("Dhrystones per Second: ");
|
||||
printf ("%6.1f \n", Dhrystones_Per_Second);
|
||||
printf ("\n");
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
Proc_1 (Ptr_Val_Par)
|
||||
/******************/
|
||||
|
||||
REG Rec_Pointer Ptr_Val_Par;
|
||||
/* executed once */
|
||||
{
|
||||
REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp;
|
||||
/* == Ptr_Glob_Next */
|
||||
/* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */
|
||||
/* corresponds to "rename" in Ada, "with" in Pascal */
|
||||
|
||||
structassign (*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob);
|
||||
Ptr_Val_Par->variant.var_1.Int_Comp = 5;
|
||||
Next_Record->variant.var_1.Int_Comp
|
||||
= Ptr_Val_Par->variant.var_1.Int_Comp;
|
||||
Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp;
|
||||
Proc_3 (&Next_Record->Ptr_Comp);
|
||||
/* Ptr_Val_Par->Ptr_Comp->Ptr_Comp
|
||||
== Ptr_Glob->Ptr_Comp */
|
||||
if (Next_Record->Discr == Ident_1)
|
||||
/* then, executed */
|
||||
{
|
||||
Next_Record->variant.var_1.Int_Comp = 6;
|
||||
Proc_6 (Ptr_Val_Par->variant.var_1.Enum_Comp,
|
||||
&Next_Record->variant.var_1.Enum_Comp);
|
||||
Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp;
|
||||
Proc_7 (Next_Record->variant.var_1.Int_Comp, 10,
|
||||
&Next_Record->variant.var_1.Int_Comp);
|
||||
}
|
||||
else /* not executed */
|
||||
structassign (*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp);
|
||||
} /* Proc_1 */
|
||||
|
||||
|
||||
Proc_2 (Int_Par_Ref)
|
||||
/******************/
|
||||
/* executed once */
|
||||
/* *Int_Par_Ref == 1, becomes 4 */
|
||||
|
||||
One_Fifty *Int_Par_Ref;
|
||||
{
|
||||
One_Fifty Int_Loc;
|
||||
Enumeration Enum_Loc;
|
||||
|
||||
Int_Loc = *Int_Par_Ref + 10;
|
||||
do /* executed once */
|
||||
if (Ch_1_Glob == 'A')
|
||||
/* then, executed */
|
||||
{
|
||||
Int_Loc -= 1;
|
||||
*Int_Par_Ref = Int_Loc - Int_Glob;
|
||||
Enum_Loc = Ident_1;
|
||||
} /* if */
|
||||
while (Enum_Loc != Ident_1); /* true */
|
||||
} /* Proc_2 */
|
||||
|
||||
|
||||
Proc_3 (Ptr_Ref_Par)
|
||||
/******************/
|
||||
/* executed once */
|
||||
/* Ptr_Ref_Par becomes Ptr_Glob */
|
||||
|
||||
Rec_Pointer *Ptr_Ref_Par;
|
||||
|
||||
{
|
||||
if (Ptr_Glob != Null)
|
||||
/* then, executed */
|
||||
*Ptr_Ref_Par = Ptr_Glob->Ptr_Comp;
|
||||
Proc_7 (10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp);
|
||||
} /* Proc_3 */
|
||||
|
||||
|
||||
Proc_4 () /* without parameters */
|
||||
/*******/
|
||||
/* executed once */
|
||||
{
|
||||
Boolean Bool_Loc;
|
||||
|
||||
Bool_Loc = Ch_1_Glob == 'A';
|
||||
Bool_Glob = Bool_Loc | Bool_Glob;
|
||||
Ch_2_Glob = 'B';
|
||||
} /* Proc_4 */
|
||||
|
||||
|
||||
Proc_5 () /* without parameters */
|
||||
/*******/
|
||||
/* executed once */
|
||||
{
|
||||
Ch_1_Glob = 'A';
|
||||
Bool_Glob = false;
|
||||
} /* Proc_5 */
|
||||
|
||||
|
||||
/* Procedure for the assignment of structures, */
|
||||
/* if the C compiler doesn't support this feature */
|
||||
#ifdef NOSTRUCTASSIGN
|
||||
memcpy (d, s, l)
|
||||
register char *d;
|
||||
register char *s;
|
||||
register int l;
|
||||
{
|
||||
while (l--) *d++ = *s++;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -0,0 +1,192 @@
|
||||
/*
|
||||
****************************************************************************
|
||||
*
|
||||
* "DHRYSTONE" Benchmark Program
|
||||
* -----------------------------
|
||||
*
|
||||
* Version: C, Version 2.1
|
||||
*
|
||||
* File: dhry_2.c (part 3 of 3)
|
||||
*
|
||||
* Date: May 25, 1988
|
||||
*
|
||||
* Author: Reinhold P. Weicker
|
||||
*
|
||||
****************************************************************************
|
||||
*/
|
||||
|
||||
#include "dhry.h"
|
||||
|
||||
#ifndef REG
|
||||
#define REG
|
||||
/* REG becomes defined as empty */
|
||||
/* i.e. no register variables */
|
||||
#endif
|
||||
|
||||
extern int Int_Glob;
|
||||
extern char Ch_1_Glob;
|
||||
|
||||
|
||||
Proc_6 (Enum_Val_Par, Enum_Ref_Par)
|
||||
/*********************************/
|
||||
/* executed once */
|
||||
/* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */
|
||||
|
||||
Enumeration Enum_Val_Par;
|
||||
Enumeration *Enum_Ref_Par;
|
||||
{
|
||||
*Enum_Ref_Par = Enum_Val_Par;
|
||||
if (! Func_3 (Enum_Val_Par))
|
||||
/* then, not executed */
|
||||
*Enum_Ref_Par = Ident_4;
|
||||
switch (Enum_Val_Par)
|
||||
{
|
||||
case Ident_1:
|
||||
*Enum_Ref_Par = Ident_1;
|
||||
break;
|
||||
case Ident_2:
|
||||
if (Int_Glob > 100)
|
||||
/* then */
|
||||
*Enum_Ref_Par = Ident_1;
|
||||
else *Enum_Ref_Par = Ident_4;
|
||||
break;
|
||||
case Ident_3: /* executed */
|
||||
*Enum_Ref_Par = Ident_2;
|
||||
break;
|
||||
case Ident_4: break;
|
||||
case Ident_5:
|
||||
*Enum_Ref_Par = Ident_3;
|
||||
break;
|
||||
} /* switch */
|
||||
} /* Proc_6 */
|
||||
|
||||
|
||||
Proc_7 (Int_1_Par_Val, Int_2_Par_Val, Int_Par_Ref)
|
||||
/**********************************************/
|
||||
/* executed three times */
|
||||
/* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */
|
||||
/* Int_Par_Ref becomes 7 */
|
||||
/* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */
|
||||
/* Int_Par_Ref becomes 17 */
|
||||
/* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */
|
||||
/* Int_Par_Ref becomes 18 */
|
||||
One_Fifty Int_1_Par_Val;
|
||||
One_Fifty Int_2_Par_Val;
|
||||
One_Fifty *Int_Par_Ref;
|
||||
{
|
||||
One_Fifty Int_Loc;
|
||||
|
||||
Int_Loc = Int_1_Par_Val + 2;
|
||||
*Int_Par_Ref = Int_2_Par_Val + Int_Loc;
|
||||
} /* Proc_7 */
|
||||
|
||||
|
||||
Proc_8 (Arr_1_Par_Ref, Arr_2_Par_Ref, Int_1_Par_Val, Int_2_Par_Val)
|
||||
/*********************************************************************/
|
||||
/* executed once */
|
||||
/* Int_Par_Val_1 == 3 */
|
||||
/* Int_Par_Val_2 == 7 */
|
||||
Arr_1_Dim Arr_1_Par_Ref;
|
||||
Arr_2_Dim Arr_2_Par_Ref;
|
||||
int Int_1_Par_Val;
|
||||
int Int_2_Par_Val;
|
||||
{
|
||||
REG One_Fifty Int_Index;
|
||||
REG One_Fifty Int_Loc;
|
||||
|
||||
Int_Loc = Int_1_Par_Val + 5;
|
||||
Arr_1_Par_Ref [Int_Loc] = Int_2_Par_Val;
|
||||
Arr_1_Par_Ref [Int_Loc+1] = Arr_1_Par_Ref [Int_Loc];
|
||||
Arr_1_Par_Ref [Int_Loc+30] = Int_Loc;
|
||||
for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index)
|
||||
Arr_2_Par_Ref [Int_Loc] [Int_Index] = Int_Loc;
|
||||
Arr_2_Par_Ref [Int_Loc] [Int_Loc-1] += 1;
|
||||
Arr_2_Par_Ref [Int_Loc+20] [Int_Loc] = Arr_1_Par_Ref [Int_Loc];
|
||||
Int_Glob = 5;
|
||||
} /* Proc_8 */
|
||||
|
||||
|
||||
Enumeration Func_1 (Ch_1_Par_Val, Ch_2_Par_Val)
|
||||
/*************************************************/
|
||||
/* executed three times */
|
||||
/* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */
|
||||
/* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */
|
||||
/* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */
|
||||
|
||||
Capital_Letter Ch_1_Par_Val;
|
||||
Capital_Letter Ch_2_Par_Val;
|
||||
{
|
||||
Capital_Letter Ch_1_Loc;
|
||||
Capital_Letter Ch_2_Loc;
|
||||
|
||||
Ch_1_Loc = Ch_1_Par_Val;
|
||||
Ch_2_Loc = Ch_1_Loc;
|
||||
if (Ch_2_Loc != Ch_2_Par_Val)
|
||||
/* then, executed */
|
||||
return (Ident_1);
|
||||
else /* not executed */
|
||||
{
|
||||
Ch_1_Glob = Ch_1_Loc;
|
||||
return (Ident_2);
|
||||
}
|
||||
} /* Func_1 */
|
||||
|
||||
|
||||
Boolean Func_2 (Str_1_Par_Ref, Str_2_Par_Ref)
|
||||
/*************************************************/
|
||||
/* executed once */
|
||||
/* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */
|
||||
/* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */
|
||||
|
||||
Str_30 Str_1_Par_Ref;
|
||||
Str_30 Str_2_Par_Ref;
|
||||
{
|
||||
REG One_Thirty Int_Loc;
|
||||
Capital_Letter Ch_Loc;
|
||||
|
||||
Int_Loc = 2;
|
||||
while (Int_Loc <= 2) /* loop body executed once */
|
||||
if (Func_1 (Str_1_Par_Ref[Int_Loc],
|
||||
Str_2_Par_Ref[Int_Loc+1]) == Ident_1)
|
||||
/* then, executed */
|
||||
{
|
||||
Ch_Loc = 'A';
|
||||
Int_Loc += 1;
|
||||
} /* if, while */
|
||||
if (Ch_Loc >= 'W' && Ch_Loc < 'Z')
|
||||
/* then, not executed */
|
||||
Int_Loc = 7;
|
||||
if (Ch_Loc == 'R')
|
||||
/* then, not executed */
|
||||
return (true);
|
||||
else /* executed */
|
||||
{
|
||||
if (strcmp (Str_1_Par_Ref, Str_2_Par_Ref) > 0)
|
||||
/* then, not executed */
|
||||
{
|
||||
Int_Loc += 7;
|
||||
Int_Glob = Int_Loc;
|
||||
return (true);
|
||||
}
|
||||
else /* executed */
|
||||
return (false);
|
||||
} /* if Ch_Loc */
|
||||
} /* Func_2 */
|
||||
|
||||
|
||||
Boolean Func_3 (Enum_Par_Val)
|
||||
/***************************/
|
||||
/* executed once */
|
||||
/* Enum_Par_Val == Ident_3 */
|
||||
Enumeration Enum_Par_Val;
|
||||
{
|
||||
Enumeration Enum_Loc;
|
||||
|
||||
Enum_Loc = Enum_Par_Val;
|
||||
if (Enum_Loc == Ident_3)
|
||||
/* then, executed */
|
||||
return (true);
|
||||
else /* not executed */
|
||||
return (false);
|
||||
} /* Func_3 */
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
#include <perf.h>
|
||||
|
||||
long time() {
|
||||
return rdcycle();
|
||||
}
|
||||
|
||||
long insn() {
|
||||
return rdinstret();
|
||||
}
|
||||
|
||||
int has_counters() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *strcpy(char *dest, const char *src) {
|
||||
char* result = dest;
|
||||
while(*dest++=*src++);
|
||||
return result;
|
||||
}
|
||||
|
||||
int strcmp (const char *p1, const char *p2) {
|
||||
const unsigned char *s1 = (const unsigned char *) p1;
|
||||
const unsigned char *s2 = (const unsigned char *) p2;
|
||||
unsigned char c1, c2;
|
||||
do {
|
||||
c1 = (unsigned char) *s1++;
|
||||
c2 = (unsigned char) *s2++;
|
||||
if (c1 == '\0') {
|
||||
return c1 - c2;
|
||||
}
|
||||
}
|
||||
while (c1 == c2);
|
||||
return c1 - c2;
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
include ../../../FIRMWARE/makefile.inc
|
||||
RVASFLAGS=-march=$(ARCH) -mabi=$(ABI)
|
||||
RVCFLAGS=-Os -fno-pic -march=$(ARCH) -mabi=$(ABI) -fno-stack-protector -w -Wl,--no-relax
|
||||
RVCFLAGS=-I. -O2 -fno-pic -march=$(ARCH) -mabi=$(ABI) -fno-stack-protector -w -Wl,--no-relax
|
||||
|
||||
RAM_SIZE=6144
|
||||
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
#define RISCV
|
||||
#define TIME
|
||||
#define USE_MYSTDLIB
|
||||
|
||||
#include "DHRYSTONE/dhry_1.c"
|
||||
#include "DHRYSTONE/dhry_2.c"
|
||||
#include "DHRYSTONE/stubs.c"
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* pipeline6.v
|
||||
* pipeline7.v
|
||||
* Let us see how to morph our multi-cycle CPU into a pipelined CPU !
|
||||
* Step 7: a flavor of branch prediction
|
||||
* static branch prediction
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* pipeline6.v
|
||||
* pipeline8.v
|
||||
* Let us see how to morph our multi-cycle CPU into a pipelined CPU !
|
||||
* Step 8: dynamic branch prediction
|
||||
*/
|
||||
@@ -610,7 +610,7 @@ module Processor (
|
||||
if(halt) begin
|
||||
$display("Simulated processor's report");
|
||||
$display("----------------------------");
|
||||
$display("Pred hits = %3.3f\%%",
|
||||
$display("Branch hits= %3.3f\%%",
|
||||
nbPredictHit*100.0/nbBranch );
|
||||
$display("CPI = %3.3f",(cycle*1.0)/(instret*1.0));
|
||||
$display("Instr. mix = (Branch:%3.3f\%% JAL:%3.3f\%% JALR:%3.3f\%%)",
|
||||
|
||||
801
FemtoRV/TUTORIALS/FROM_BLINKER_TO_RISCV/pipeline9.v
Normal file
801
FemtoRV/TUTORIALS/FROM_BLINKER_TO_RISCV/pipeline9.v
Normal file
@@ -0,0 +1,801 @@
|
||||
/*
|
||||
* pipeline9.v
|
||||
* Let us see how to morph our multi-cycle CPU into a pipelined CPU !
|
||||
* Step 9: return address stack
|
||||
*/
|
||||
|
||||
`default_nettype none
|
||||
`include "clockworks.v"
|
||||
`include "emitter_uart.v"
|
||||
|
||||
//`define VERBOSE
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
module Processor (
|
||||
input clk,
|
||||
input resetn,
|
||||
output [31:0] IO_mem_addr, // IO memory address
|
||||
input [31:0] IO_mem_rdata, // data read from IO memory
|
||||
output [31:0] IO_mem_wdata, // data written to IO memory
|
||||
output IO_mem_wr // IO write flag
|
||||
);
|
||||
|
||||
`include "riscv_disassembly.v"
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/*
|
||||
Reminder for the 10 RISC-V codeops
|
||||
----------------------------------
|
||||
ALUreg // rd <- rs1 OP rs2
|
||||
ALUimm // rd <- rs1 OP Iimm
|
||||
Branch // if(rs1 OP rs2) PC<-PC+Bimm
|
||||
JALR // rd <- PC+4; PC<-rs1+Iimm
|
||||
JAL // rd <- PC+4; PC<-PC+Jimm
|
||||
AUIPC // rd <- PC + Uimm
|
||||
LUI // rd <- Uimm
|
||||
Load // rd <- mem[rs1+Iimm]
|
||||
Store // mem[rs1+Simm] <- rs2
|
||||
SYSTEM // special
|
||||
*/
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Instruction decoder as functions (we will use them several times) */
|
||||
|
||||
/* The 10 "recognizers" for the 10 codeops */
|
||||
function isALUreg; input [31:0] I; isALUreg=(I[6:0]==7'b0110011); endfunction
|
||||
function isALUimm; input [31:0] I; isALUimm=(I[6:0]==7'b0010011); endfunction
|
||||
function isBranch; input [31:0] I; isBranch=(I[6:0]==7'b1100011); endfunction
|
||||
function isJALR; input [31:0] I; isJALR =(I[6:0]==7'b1100111); endfunction
|
||||
function isJAL; input [31:0] I; isJAL =(I[6:0]==7'b1101111); endfunction
|
||||
function isAUIPC; input [31:0] I; isAUIPC =(I[6:0]==7'b0010111); endfunction
|
||||
function isLUI; input [31:0] I; isLUI =(I[6:0]==7'b0110111); endfunction
|
||||
function isLoad; input [31:0] I; isLoad =(I[6:0]==7'b0000011); endfunction
|
||||
function isStore; input [31:0] I; isStore =(I[6:0]==7'b0100011); endfunction
|
||||
function isSYSTEM; input [31:0] I; isSYSTEM=(I[6:0]==7'b1110011); endfunction
|
||||
|
||||
/* Register indices */
|
||||
function [4:0] rs1Id; input [31:0] I; rs1Id = I[19:15]; endfunction
|
||||
function [4:0] rs2Id; input [31:0] I; rs2Id = I[24:20]; endfunction
|
||||
function [4:0] shamt; input [31:0] I; shamt = I[24:20]; endfunction
|
||||
function [4:0] rdId; input [31:0] I; rdId = I[11:7]; endfunction
|
||||
function [1:0] csrId; input [31:0] I; csrId = {I[27],I[21]}; endfunction
|
||||
|
||||
/* funct3 and funct7 */
|
||||
function [2:0] funct3; input [31:0] I; funct3 = I[14:12]; endfunction
|
||||
function [6:0] funct7; input [31:0] I; funct7 = I[31:25]; endfunction
|
||||
|
||||
|
||||
/* EBREAK and CSRRS instruction "recognizers" */
|
||||
function isEBREAK;
|
||||
input [31:0] I;
|
||||
isEBREAK = (isSYSTEM(I) && funct3(I) == 3'b000);
|
||||
endfunction
|
||||
|
||||
function isCSRRS;
|
||||
input [31:0] I;
|
||||
isCSRRS = (isSYSTEM(I) && funct3(I) == 3'b010);
|
||||
endfunction
|
||||
|
||||
/* The 5 immediate formats */
|
||||
function [31:0] Uimm;
|
||||
input [31:0] I;
|
||||
Uimm={I[31:12],{12{1'b0}}};
|
||||
endfunction
|
||||
|
||||
function [31:0] Iimm;
|
||||
input [31:0] I;
|
||||
Iimm={{21{I[31]}},I[30:20]};
|
||||
endfunction
|
||||
|
||||
function [31:0] Simm;
|
||||
input [31:0] I;
|
||||
Simm={{21{I[31]}},I[30:25],I[11:7]};
|
||||
endfunction
|
||||
|
||||
function [31:0] Bimm;
|
||||
input [31:0] I;
|
||||
Bimm = {{20{I[31]}},I[7],I[30:25],I[11:8],1'b0};
|
||||
endfunction
|
||||
|
||||
function [31:0] Jimm;
|
||||
input [31:0] I;
|
||||
Jimm = {{12{I[31]}},I[19:12],I[20],I[30:21],1'b0};
|
||||
endfunction
|
||||
|
||||
function writesRd;
|
||||
input [31:0] I;
|
||||
writesRd = !isStore(I) && !isBranch(I);
|
||||
endfunction
|
||||
|
||||
function readsRs1;
|
||||
input [31:0] I;
|
||||
readsRs1 = !(isJAL(I) || isAUIPC(I) || isLUI(I));
|
||||
endfunction
|
||||
|
||||
function readsRs2;
|
||||
input [31:0] I;
|
||||
readsRs2 = isALUreg(I) || isBranch(I) || isStore(I);
|
||||
endfunction
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
reg [63:0] cycle;
|
||||
reg [63:0] instret;
|
||||
|
||||
always @(posedge clk) begin
|
||||
cycle <= !resetn ? 0 : cycle + 1;
|
||||
end
|
||||
|
||||
wire D_flush;
|
||||
wire E_flush;
|
||||
|
||||
wire F_stall;
|
||||
wire D_stall;
|
||||
|
||||
wire halt; // Halt execution (on ebreak)
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
localparam NOP = 32'b0000000_00000_00000_000_00000_0110011;
|
||||
|
||||
/*** F: Instruction fetch ***/
|
||||
|
||||
reg [31:0] PC;
|
||||
|
||||
reg [31:0] PROGROM[0:16383]; // 16384 4-bytes words
|
||||
// 64 Kb of program ROM
|
||||
initial begin
|
||||
$readmemh("PROGROM.hex",PROGROM);
|
||||
end
|
||||
|
||||
// Note: E's jumpOrBranch signals are registered in EM (1 cycle later),
|
||||
// hence taken into account in F_PC mux (1 cycle before). Doing so
|
||||
// avoids a *huge* critical path (that generates E_JumpOrBranch, that
|
||||
// uses the ALU branch result E_takeBranch, and hence that comprises
|
||||
// register forwarding & ALU)
|
||||
|
||||
wire [31:0] F_PC =
|
||||
D_JumpOrBranchNow ? D_JumpOrBranchAddr :
|
||||
EM_JumpOrBranchNow ? EM_JumpOrBranchAddr :
|
||||
PC;
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
||||
if(!F_stall) begin
|
||||
FD_instr <= PROGROM[F_PC[15:2]];
|
||||
FD_PC <= F_PC;
|
||||
PC <= F_PC+4;
|
||||
end
|
||||
|
||||
|
||||
// Cannot write NOP to FD_instr, because
|
||||
// whenever a BRAM read is involved, do
|
||||
// nothing else than sending the result
|
||||
// to a reg.
|
||||
FD_nop <= D_flush | !resetn;
|
||||
|
||||
if(!resetn) begin
|
||||
PC <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
/******************************************************************************/
|
||||
reg [31:0] FD_PC;
|
||||
reg [31:0] FD_instr;
|
||||
reg FD_nop;
|
||||
/******************************************************************************/
|
||||
|
||||
/*** D: Instruction decode ***/
|
||||
|
||||
// Branch prediction
|
||||
|
||||
// 83% success with HISTO_BITS=8, ADDR_BITS=12
|
||||
// *** 80% success with HISTO_BITS=5, ADDR_BITS=10
|
||||
// 78% success with HISTO_BITS=4, ADDR_BITS=8
|
||||
localparam BP_HISTO_BITS=5;
|
||||
localparam BP_ADDR_BITS=10;
|
||||
localparam BP_SIZE=1<<BP_ADDR_BITS;
|
||||
|
||||
reg [BP_HISTO_BITS-1:0] PHT[BP_SIZE-1:0]; // Pattern History Table
|
||||
reg [1:0] BHT[BP_SIZE-1:0]; // Branch History Table
|
||||
|
||||
function [BP_ADDR_BITS-1:0] PHT_index;
|
||||
input [31:0] PC;
|
||||
PHT_index = PC[BP_ADDR_BITS+1:2]; // pshare
|
||||
//PHT_index = 0; // gshare
|
||||
endfunction
|
||||
|
||||
function [BP_ADDR_BITS-1:0] BHT_index;
|
||||
input [31:0] PC;
|
||||
|
||||
// Choose indexing for dynamic branch prediction
|
||||
// (uncomment one of the following choices)
|
||||
// Used if D_predictBranch is set to dynamic (later in this file)
|
||||
|
||||
// 1: simple 2-bits counter without history
|
||||
// BHT_index = PHT_index(PC);
|
||||
|
||||
// 2: gselect
|
||||
// /* verilator lint_off WIDTH */
|
||||
// BHT_index = {PHT_index(PC), PHT[PHT_index(PC)]};
|
||||
// /* verilator lint_on WIDTH */
|
||||
|
||||
// 3: pshare/gshare
|
||||
BHT_index = PHT_index(PC) ^
|
||||
{PHT[PHT_index(PC)],{BP_ADDR_BITS-BP_HISTO_BITS{1'b0}}};
|
||||
|
||||
|
||||
endfunction
|
||||
|
||||
// Choose branch prediction strategy
|
||||
// (uncomment one of the following choices)
|
||||
//wire D_predictBranch = 1'd0; // 1. predict not taken
|
||||
//wire D_predictBranch = 1'd1; // 2. predict taken
|
||||
//wire D_predictBranch = FD_instr[31]; // 3. BTFNT
|
||||
wire D_predictBranch = BHT[BHT_index(FD_PC)][1]; // 4. dynamic
|
||||
|
||||
|
||||
// Next fetch gets address from JAL target or from Branch target
|
||||
// if branch is predicted.
|
||||
|
||||
wire D_JumpOrBranchNow = !FD_nop && (
|
||||
isJAL(FD_instr) ||
|
||||
(isBranch(FD_instr) && D_predictBranch) ||
|
||||
isJALR(FD_instr)
|
||||
);
|
||||
|
||||
// Return address stack
|
||||
|
||||
reg [31:0] RAS_0;
|
||||
reg [31:0] RAS_1;
|
||||
reg [31:0] RAS_2;
|
||||
reg [31:0] RAS_3;
|
||||
|
||||
wire [31:0] D_JumpOrBranchAddr =
|
||||
isJALR(FD_instr) ? RAS_0 :
|
||||
(FD_PC + (isJAL(FD_instr) ? Jimm(FD_instr) : Bimm(FD_instr)));
|
||||
|
||||
/** These three signals come from the Writeback stage **/
|
||||
wire wbEnable;
|
||||
wire [31:0] wbData;
|
||||
wire [4:0] wbRdId;
|
||||
|
||||
reg [31:0] RegisterBank [0:31];
|
||||
always @(posedge clk) begin
|
||||
|
||||
if(!D_stall) begin
|
||||
DE_PC <= FD_PC;
|
||||
DE_instr <= (E_flush | FD_nop) ? NOP : FD_instr;
|
||||
DE_predictBranch <= D_predictBranch;
|
||||
DE_predictRA <= RAS_0;
|
||||
DE_PHTindex <= PHT_index(FD_PC);
|
||||
DE_BHTindex <= BHT_index(FD_PC);
|
||||
if(!FD_nop) begin
|
||||
if(isJAL(FD_instr)) begin
|
||||
RAS_3 <= RAS_2;
|
||||
RAS_2 <= RAS_1;
|
||||
RAS_1 <= RAS_0;
|
||||
RAS_0 <= FD_PC + 4;
|
||||
// $display("push(%0h)",FD_PC+4);
|
||||
end else if(isJALR(FD_instr)) begin
|
||||
// $display("pop()");
|
||||
RAS_0 <= RAS_1;
|
||||
RAS_1 <= RAS_2;
|
||||
RAS_2 <= RAS_3;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if(E_flush) begin
|
||||
DE_instr <= NOP;
|
||||
end
|
||||
|
||||
if(wbEnable) begin
|
||||
RegisterBank[wbRdId] <= wbData;
|
||||
end
|
||||
end
|
||||
|
||||
/******************************************************************************/
|
||||
reg [31:0] DE_PC;
|
||||
reg [31:0] DE_instr;
|
||||
wire [31:0] DE_rs1 = RegisterBank[rs1Id(DE_instr)];
|
||||
wire [31:0] DE_rs2 = RegisterBank[rs2Id(DE_instr)];
|
||||
reg DE_predictBranch;
|
||||
reg [31:0] DE_predictRA;
|
||||
reg [BP_ADDR_BITS-1:0] DE_PHTindex;
|
||||
reg [BP_ADDR_BITS-1:0] DE_BHTindex;
|
||||
/******************************************************************************/
|
||||
|
||||
/*** E: Execute ***/
|
||||
|
||||
/*********** Registrer forwarding ************************************/
|
||||
|
||||
wire E_M_fwd_rs1 = rdId(EM_instr) != 0 && writesRd(EM_instr) &&
|
||||
(rdId(EM_instr) == rs1Id(DE_instr));
|
||||
|
||||
wire E_W_fwd_rs1 = rdId(MW_instr) != 0 && writesRd(MW_instr) &&
|
||||
(rdId(MW_instr) == rs1Id(DE_instr));
|
||||
|
||||
wire E_M_fwd_rs2 = rdId(EM_instr) != 0 && writesRd(EM_instr) &&
|
||||
(rdId(EM_instr) == rs2Id(DE_instr));
|
||||
|
||||
wire E_W_fwd_rs2 = rdId(MW_instr) != 0 && writesRd(MW_instr) &&
|
||||
(rdId(MW_instr) == rs2Id(DE_instr));
|
||||
|
||||
wire [31:0] E_rs1 = E_M_fwd_rs1 ? EM_Eresult :
|
||||
E_W_fwd_rs1 ? wbData :
|
||||
DE_rs1;
|
||||
|
||||
wire [31:0] E_rs2 = E_M_fwd_rs2 ? EM_Eresult :
|
||||
E_W_fwd_rs2 ? wbData :
|
||||
DE_rs2;
|
||||
|
||||
/*********** the ALU *************************************************/
|
||||
|
||||
wire [31:0] E_aluIn1 = E_rs1;
|
||||
|
||||
wire [31:0] E_aluIn2 =
|
||||
(isALUreg(DE_instr) | isBranch(DE_instr)) ? E_rs2 : Iimm(DE_instr);
|
||||
|
||||
wire [4:0] E_shamt = isALUreg(DE_instr) ? E_rs2[4:0] : shamt(DE_instr);
|
||||
|
||||
wire E_minus = DE_instr[30] & isALUreg(DE_instr);
|
||||
wire E_arith_shift = DE_instr[30];
|
||||
|
||||
// The adder is used by both arithmetic instructions and JALR.
|
||||
wire [31:0] E_aluPlus = E_aluIn1 + E_aluIn2;
|
||||
|
||||
// Use a single 33 bits subtract to do subtraction and all comparisons
|
||||
// (trick borrowed from swapforth/J1)
|
||||
wire [32:0] E_aluMinus = {1'b1, ~E_aluIn2} + {1'b0,E_aluIn1} + 33'b1;
|
||||
wire E_LT =
|
||||
(E_aluIn1[31] ^ E_aluIn2[31]) ? E_aluIn1[31] : E_aluMinus[32];
|
||||
wire E_LTU = E_aluMinus[32];
|
||||
wire E_EQ = (E_aluMinus[31:0] == 0);
|
||||
|
||||
// Flip a 32 bit word. Used by the shifter (a single shifter for
|
||||
// left and right shifts, saves silicium !)
|
||||
function [31:0] flip32;
|
||||
input [31:0] x;
|
||||
flip32 = {x[ 0], x[ 1], x[ 2], x[ 3], x[ 4], x[ 5], x[ 6], x[ 7],
|
||||
x[ 8], x[ 9], x[10], x[11], x[12], x[13], x[14], x[15],
|
||||
x[16], x[17], x[18], x[19], x[20], x[21], x[22], x[23],
|
||||
x[24], x[25], x[26], x[27], x[28], x[29], x[30], x[31]};
|
||||
endfunction
|
||||
|
||||
wire [31:0] E_shifter_in =
|
||||
(funct3(DE_instr)==3'b001) ? flip32(E_aluIn1) : E_aluIn1;
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
wire [31:0] E_shifter =
|
||||
$signed({E_arith_shift & E_aluIn1[31], E_shifter_in}) >>> E_aluIn2[4:0];
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
wire [31:0] E_leftshift = flip32(E_shifter);
|
||||
|
||||
reg [31:0] E_aluOut;
|
||||
always @(*) begin
|
||||
case(funct3(DE_instr))
|
||||
3'b000: E_aluOut = E_minus ? E_aluMinus[31:0] : E_aluPlus;
|
||||
3'b001: E_aluOut = E_leftshift;
|
||||
3'b010: E_aluOut = {31'b0, E_LT};
|
||||
3'b011: E_aluOut = {31'b0, E_LTU};
|
||||
3'b100: E_aluOut = E_aluIn1 ^ E_aluIn2;
|
||||
3'b101: E_aluOut = E_shifter;
|
||||
3'b110: E_aluOut = E_aluIn1 | E_aluIn2;
|
||||
3'b111: E_aluOut = E_aluIn1 & E_aluIn2;
|
||||
endcase
|
||||
end
|
||||
|
||||
/*********** Branch, JAL, JALR ***********************************/
|
||||
|
||||
reg E_takeBranch;
|
||||
always @(*) begin
|
||||
case (funct3(DE_instr))
|
||||
3'b000: E_takeBranch = E_EQ;
|
||||
3'b001: E_takeBranch = !E_EQ;
|
||||
3'b100: E_takeBranch = E_LT;
|
||||
3'b101: E_takeBranch = !E_LT;
|
||||
3'b110: E_takeBranch = E_LTU;
|
||||
3'b111: E_takeBranch = !E_LTU;
|
||||
default: E_takeBranch = 1'b0;
|
||||
endcase
|
||||
end
|
||||
|
||||
// Jump if mispredicted branch or JALR
|
||||
|
||||
`ifdef BENCH
|
||||
integer nbBranch = 0;
|
||||
integer nbBranchHit = 0;
|
||||
integer nbJAL = 0;
|
||||
integer nbJALR = 0;
|
||||
integer nbJALRhit = 0;
|
||||
`endif
|
||||
|
||||
function [1:0] incdec_sat;
|
||||
input [1:0] prev;
|
||||
input dir;
|
||||
// incdec_sat = dir ? 2'b11 : 2'b00; // simple binary instead of bimodal
|
||||
incdec_sat =
|
||||
{dir, prev} == 3'b000 ? 2'b00 :
|
||||
{dir, prev} == 3'b000 ? 2'b00 :
|
||||
{dir, prev} == 3'b001 ? 2'b00 :
|
||||
{dir, prev} == 3'b010 ? 2'b01 :
|
||||
{dir, prev} == 3'b011 ? 2'b10 :
|
||||
{dir, prev} == 3'b100 ? 2'b01 :
|
||||
{dir, prev} == 3'b101 ? 2'b10 :
|
||||
{dir, prev} == 3'b110 ? 2'b11 :
|
||||
2'b11 ;
|
||||
endfunction;
|
||||
|
||||
wire [31:0] E_JALRaddr = {E_aluPlus[31:1],1'b0};
|
||||
|
||||
wire E_JumpOrBranch = (
|
||||
(isJALR(DE_instr) && (DE_predictRA != E_JALRaddr)) ||
|
||||
(isBranch(DE_instr) && (E_takeBranch^DE_predictBranch))
|
||||
);
|
||||
|
||||
wire [31:0] E_JumpOrBranchAddr =
|
||||
isBranch(DE_instr) ?
|
||||
(DE_PC + (DE_predictBranch ? 4 : Bimm(DE_instr))) :
|
||||
/* JALR */ E_JALRaddr ;
|
||||
|
||||
wire [31:0] E_result =
|
||||
(isJAL(DE_instr) | isJALR(DE_instr)) ? DE_PC+4 :
|
||||
isLUI(DE_instr) ? Uimm(DE_instr) :
|
||||
isAUIPC(DE_instr) ? DE_PC + Uimm(DE_instr) :
|
||||
E_aluOut ;
|
||||
|
||||
/**************************************************************/
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
||||
//if(isJALR(DE_instr)) begin
|
||||
// $display("JALR predict %0h effective %0h", DE_predictRA, E_JALRaddr);
|
||||
//end
|
||||
|
||||
EM_PC <= DE_PC;
|
||||
EM_instr <= DE_instr;
|
||||
EM_rs2 <= E_rs2;
|
||||
EM_Eresult <= E_result;
|
||||
EM_addr <= isStore(DE_instr) ? E_rs1 + Simm(DE_instr) :
|
||||
E_rs1 + Iimm(DE_instr) ;
|
||||
|
||||
EM_JumpOrBranchNow <= E_JumpOrBranch;
|
||||
EM_JumpOrBranchAddr <= E_JumpOrBranchAddr;
|
||||
|
||||
if(isBranch(DE_instr)) begin
|
||||
PHT[DE_PHTindex] <= { PHT[DE_PHTindex][BP_HISTO_BITS-2:0],
|
||||
E_takeBranch };
|
||||
BHT[DE_BHTindex] <= incdec_sat(BHT[DE_BHTindex], E_takeBranch);
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef BENCH
|
||||
always @(posedge clk) begin
|
||||
if(resetn) begin
|
||||
if(isBranch(DE_instr)) begin
|
||||
nbBranch <= nbBranch + 1;
|
||||
if(E_takeBranch == DE_predictBranch) begin
|
||||
nbBranchHit <= nbBranchHit + 1;
|
||||
end
|
||||
end
|
||||
if(isJAL(DE_instr)) begin
|
||||
nbJAL <= nbJAL + 1;
|
||||
end
|
||||
if(isJALR(DE_instr)) begin
|
||||
nbJALR <= nbJALR + 1;
|
||||
if(DE_predictRA == E_JALRaddr) begin
|
||||
nbJALRhit <= nbJALRhit + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
|
||||
assign halt = resetn & isEBREAK(DE_instr);
|
||||
|
||||
/******************************************************************************/
|
||||
reg [31:0] EM_PC;
|
||||
reg [31:0] EM_instr;
|
||||
reg [31:0] EM_rs2;
|
||||
reg [31:0] EM_Eresult;
|
||||
reg [31:0] EM_addr;
|
||||
reg EM_JumpOrBranchNow;
|
||||
reg [31:0] EM_JumpOrBranchAddr;
|
||||
/******************************************************************************/
|
||||
|
||||
/*** M: Memory ***/
|
||||
|
||||
wire [2:0] M_funct3 = funct3(EM_instr);
|
||||
wire M_isB = (M_funct3[1:0] == 2'b00);
|
||||
wire M_isH = (M_funct3[1:0] == 2'b01);
|
||||
|
||||
/*************** STORE **************************/
|
||||
|
||||
wire [31:0] M_STORE_data;
|
||||
assign M_STORE_data[ 7: 0] = EM_rs2[7:0];
|
||||
assign M_STORE_data[15: 8] = EM_addr[0] ? EM_rs2[7:0] : EM_rs2[15: 8] ;
|
||||
assign M_STORE_data[23:16] = EM_addr[1] ? EM_rs2[7:0] : EM_rs2[23:16] ;
|
||||
assign M_STORE_data[31:24] = EM_addr[0] ? EM_rs2[7:0] :
|
||||
EM_addr[1] ? EM_rs2[15:8] : EM_rs2[31:24] ;
|
||||
|
||||
// The memory write mask:
|
||||
// 1111 if writing a word
|
||||
// 0011 or 1100 if writing a halfword
|
||||
// (depending on EM_addr[1])
|
||||
// 0001, 0010, 0100 or 1000 if writing a byte
|
||||
// (depending on EM_addr[1:0])
|
||||
|
||||
wire [3:0] M_STORE_wmask = M_isB ?
|
||||
(EM_addr[1] ?
|
||||
(EM_addr[0] ? 4'b1000 : 4'b0100) :
|
||||
(EM_addr[0] ? 4'b0010 : 4'b0001)
|
||||
) :
|
||||
M_isH ? (EM_addr[1] ? 4'b1100 : 4'b0011) :
|
||||
4'b1111 ;
|
||||
|
||||
|
||||
wire M_isIO = EM_addr[22];
|
||||
wire M_isRAM = !M_isIO;
|
||||
|
||||
assign IO_mem_addr = EM_addr;
|
||||
assign IO_mem_wr = isStore(EM_instr) && M_isIO; // && M_STORE_wmask[0];
|
||||
assign IO_mem_wdata = EM_rs2;
|
||||
|
||||
wire [3:0] M_wmask = {4{isStore(EM_instr) & M_isRAM}} & M_STORE_wmask;
|
||||
|
||||
reg [31:0] DATARAM [0:16383]; // 16384 4-bytes words
|
||||
// 64 Kb of data RAM in total
|
||||
wire [13:0] M_word_addr = EM_addr[15:2];
|
||||
|
||||
always @(posedge clk) begin
|
||||
MW_Mdata <= DATARAM[M_word_addr];
|
||||
if(M_wmask[0]) DATARAM[M_word_addr][ 7:0 ] <= M_STORE_data[ 7:0 ];
|
||||
if(M_wmask[1]) DATARAM[M_word_addr][15:8 ] <= M_STORE_data[15:8 ];
|
||||
if(M_wmask[2]) DATARAM[M_word_addr][23:16] <= M_STORE_data[23:16];
|
||||
if(M_wmask[3]) DATARAM[M_word_addr][31:24] <= M_STORE_data[31:24];
|
||||
end
|
||||
|
||||
initial begin
|
||||
$readmemh("DATARAM.hex",DATARAM);
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
MW_PC <= EM_PC;
|
||||
MW_instr <= EM_instr;
|
||||
MW_Eresult <= EM_Eresult;
|
||||
MW_IOresult <= IO_mem_rdata;
|
||||
MW_addr <= EM_addr;
|
||||
case(csrId(EM_instr))
|
||||
2'b00: MW_CSRresult = cycle[31:0];
|
||||
2'b10: MW_CSRresult = cycle[63:32];
|
||||
2'b01: MW_CSRresult = instret[31:0];
|
||||
2'b11: MW_CSRresult = instret[63:32];
|
||||
endcase
|
||||
if(!resetn) begin
|
||||
instret <= 0;
|
||||
end else if(MW_instr != NOP) begin
|
||||
instret <= instret + 1;
|
||||
end
|
||||
end
|
||||
|
||||
/******************************************************************************/
|
||||
reg [31:0] MW_PC;
|
||||
reg [31:0] MW_instr;
|
||||
reg [31:0] MW_Eresult;
|
||||
reg [31:0] MW_addr;
|
||||
reg [31:0] MW_Mdata;
|
||||
reg [31:0] MW_IOresult;
|
||||
reg [31:0] MW_CSRresult;
|
||||
/******************************************************************************/
|
||||
|
||||
/*** W: WriteBack ***/
|
||||
|
||||
wire [2:0] W_funct3 = funct3(MW_instr);
|
||||
wire W_isB = (W_funct3[1:0] == 2'b00);
|
||||
wire W_isH = (W_funct3[1:0] == 2'b01);
|
||||
wire W_sext = !W_funct3[2];
|
||||
wire W_isIO = MW_addr[22];
|
||||
|
||||
/*************** LOAD ****************************/
|
||||
|
||||
wire [15:0] W_LOAD_H=MW_addr[1] ? MW_Mdata[31:16]: MW_Mdata[15:0];
|
||||
wire [7:0] W_LOAD_B=MW_addr[0] ? W_LOAD_H[15:8] : W_LOAD_H[7:0];
|
||||
wire W_LOAD_sign=W_sext & (W_isB ? W_LOAD_B[7] : W_LOAD_H[15]);
|
||||
|
||||
wire [31:0] W_Mresult = W_isB ? {{24{W_LOAD_sign}},W_LOAD_B} :
|
||||
W_isH ? {{16{W_LOAD_sign}},W_LOAD_H} :
|
||||
MW_Mdata ;
|
||||
|
||||
assign wbData =
|
||||
isLoad(MW_instr) ? (W_isIO ? MW_IOresult : W_Mresult) :
|
||||
isCSRRS(MW_instr) ? MW_CSRresult :
|
||||
MW_Eresult;
|
||||
|
||||
assign wbEnable = writesRd(MW_instr) && rdId(MW_instr) != 0;
|
||||
assign wbRdId = rdId(MW_instr);
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
// Not testing that rdId(DE_instr) != 0 because in general one
|
||||
// does not Load to zero ! (idem for CSRRS).
|
||||
wire rs1Hazard = readsRs1(FD_instr) && (rs1Id(FD_instr) == rdId(DE_instr)) ;
|
||||
wire rs2Hazard = readsRs2(FD_instr) && (rs2Id(FD_instr) == rdId(DE_instr)) ;
|
||||
|
||||
wire dataHazard = !FD_nop &&
|
||||
(isLoad(DE_instr)||isCSRRS(DE_instr)) &&
|
||||
(rs1Hazard || rs2Hazard);
|
||||
|
||||
assign F_stall = dataHazard | halt;
|
||||
assign D_stall = dataHazard | halt;
|
||||
|
||||
assign D_flush = E_JumpOrBranch;
|
||||
assign E_flush = E_JumpOrBranch | dataHazard;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
`ifdef BENCH
|
||||
/* verilator lint_off WIDTH */
|
||||
always @(posedge clk) begin
|
||||
if(halt) begin
|
||||
$display("Simulated processor's report");
|
||||
$display("----------------------------");
|
||||
$display("Branch hit = %3.3f\%%",
|
||||
nbBranchHit*100.0/nbBranch );
|
||||
$display("JALR hit = %3.3f\%%",
|
||||
nbJALRhit*100.0/nbJALR );
|
||||
$display("CPI = %3.3f",(cycle*1.0)/(instret*1.0));
|
||||
$display("Instr. mix = (Branch:%3.3f\%% JAL:%3.3f\%% JALR:%3.3f\%%)",
|
||||
nbBranch*100.0/instret,
|
||||
nbJAL*100.0/instret,
|
||||
nbJALR*100.0/instret);
|
||||
$finish();
|
||||
end
|
||||
end
|
||||
/* verilator lint_on WIDTH */
|
||||
`endif
|
||||
|
||||
`ifdef VERBOSE
|
||||
always @(posedge clk) begin
|
||||
if(resetn & !halt) begin
|
||||
$write("D_JoB=%d E_JoB=%d D_flush=%d E_flush=%d\n",
|
||||
D_JumpOrBranchNow, EM_JumpOrBranchNow, D_flush, E_flush
|
||||
);
|
||||
|
||||
$write("[W] PC=%h ", MW_PC);
|
||||
$write(" ");
|
||||
riscv_disasm(MW_instr,MW_PC);
|
||||
if(wbEnable) $write(" x%0d <- 0x%0h",rdId(MW_instr),wbData);
|
||||
$write("\n");
|
||||
|
||||
$write("[M] PC=%h ", EM_PC);
|
||||
$write(" ");
|
||||
riscv_disasm(EM_instr,EM_PC);
|
||||
$write("\n");
|
||||
|
||||
$write("[E] PC=%h ", DE_PC);
|
||||
$write(" ");
|
||||
riscv_disasm(DE_instr,DE_PC);
|
||||
if(DE_instr != NOP) begin
|
||||
$write(" rs1=0x%h rs2=0x%h ",DE_rs1, DE_rs2);
|
||||
if(isBranch(DE_instr)) begin
|
||||
$write(" taken:%0d prediction OK:%0d",
|
||||
E_takeBranch,
|
||||
(E_takeBranch == DE_predictBranch) ? 1 : 0
|
||||
);
|
||||
end
|
||||
end
|
||||
$write("\n");
|
||||
|
||||
$write("[D] PC=%h ", FD_PC);
|
||||
$write("[%s%s] ",
|
||||
dataHazard && rs1Hazard?"*":" ",
|
||||
dataHazard && rs2Hazard?"*":" ");
|
||||
riscv_disasm(FD_nop ? NOP : FD_instr,FD_PC);
|
||||
if(isBranch(FD_instr)) begin
|
||||
$write(" predict taken:%0d",D_predictBranch);
|
||||
end
|
||||
$write("\n");
|
||||
|
||||
$write("[F] PC=%h ", F_PC);
|
||||
if(D_JumpOrBranchNow) $write(" PC <- [D] 0x%0h",D_JumpOrBranchAddr);
|
||||
if(EM_JumpOrBranchNow) $write(" PC <- [E] 0x%0h",EM_JumpOrBranchAddr);
|
||||
$write("\n");
|
||||
|
||||
$display("");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
module SOC (
|
||||
input CLK, // system clock
|
||||
input RESET,// reset button
|
||||
output reg [4:0] LEDS, // system LEDs
|
||||
input RXD, // UART receive
|
||||
output TXD // UART transmit
|
||||
);
|
||||
|
||||
wire clk;
|
||||
wire resetn;
|
||||
|
||||
wire [31:0] IO_mem_addr;
|
||||
wire [31:0] IO_mem_rdata;
|
||||
wire [31:0] IO_mem_wdata;
|
||||
wire IO_mem_wr;
|
||||
|
||||
Processor CPU(
|
||||
.clk(clk),
|
||||
.resetn(resetn),
|
||||
.IO_mem_addr(IO_mem_addr),
|
||||
.IO_mem_rdata(IO_mem_rdata),
|
||||
.IO_mem_wdata(IO_mem_wdata),
|
||||
.IO_mem_wr(IO_mem_wr)
|
||||
);
|
||||
|
||||
wire [13:0] IO_wordaddr = IO_mem_addr[15:2];
|
||||
|
||||
// Memory-mapped IO in IO page, 1-hot addressing in word address.
|
||||
localparam IO_LEDS_bit = 0; // W five leds
|
||||
localparam IO_UART_DAT_bit = 1; // W data to send (8 bits)
|
||||
localparam IO_UART_CNTL_bit = 2; // R status. bit 9: busy sending
|
||||
|
||||
always @(posedge clk) begin
|
||||
if(IO_mem_wr & IO_wordaddr[IO_LEDS_bit]) begin
|
||||
LEDS <= IO_mem_wdata[4:0];
|
||||
end
|
||||
end
|
||||
|
||||
wire uart_valid = IO_mem_wr & IO_wordaddr[IO_UART_DAT_bit];
|
||||
wire uart_ready;
|
||||
|
||||
corescore_emitter_uart #(
|
||||
.clk_freq_hz(`CPU_FREQ*1000000),
|
||||
.baud_rate(1000000)
|
||||
) UART(
|
||||
.i_clk(clk),
|
||||
.i_rst(!resetn),
|
||||
.i_data(IO_mem_wdata[7:0]),
|
||||
.i_valid(uart_valid),
|
||||
.o_ready(uart_ready),
|
||||
.o_uart_tx(TXD)
|
||||
);
|
||||
|
||||
assign IO_mem_rdata =
|
||||
IO_wordaddr[IO_UART_CNTL_bit] ? { 22'b0, !uart_ready, 9'b0}
|
||||
: 32'b0;
|
||||
|
||||
`ifdef BENCH
|
||||
always @(posedge clk) begin
|
||||
if(uart_valid) begin
|
||||
`ifdef VERBOSE
|
||||
$display("UART: %c", IO_mem_wdata[7:0]);
|
||||
`else
|
||||
$write("%c", IO_mem_wdata[7:0] );
|
||||
$fflush(32'h8000_0001);
|
||||
`endif
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
// Gearbox and reset circuitry.
|
||||
Clockworks CW(
|
||||
.CLK(CLK),
|
||||
.RESET(RESET),
|
||||
.clk(clk),
|
||||
.resetn(resetn)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
Reference in New Issue
Block a user