From 8d2f7d4e729655adb5fcfe799377912cfb43a032 Mon Sep 17 00:00:00 2001 From: Shamshura Egor Date: Thu, 23 Oct 2025 12:30:27 +0000 Subject: [PATCH 1/6] Add benchmarks --- CMakeLists.txt | 37 ++++- benchmarks/CMakeLists.txt | 22 +++ benchmarks/common/CMakeLists.txt | 2 + benchmarks/common/include/bench/utils.h | 21 +++ benchmarks/common/start.s | 39 +++++ benchmarks/fibrec/CMakeLists.txt | 2 + benchmarks/fibrec/fibrec.c | 31 ++++ benchmarks/median/CMakeLists.txt | 1 + benchmarks/median/median.c | 42 +++++ benchmarks/median/median.h | 11 ++ benchmarks/median/median_gendata.pl | 140 ++++++++++++++++ benchmarks/median/median_main.c | 36 +++++ benchmarks/multiply/CMakeLists.txt | 1 + benchmarks/multiply/multiply.c | 19 +++ benchmarks/multiply/multiply.h | 11 ++ benchmarks/multiply/multiply_gendata.pl | 142 ++++++++++++++++ benchmarks/multiply/multiply_main.c | 39 +++++ benchmarks/nopbench/CMakeLists.txt | 4 + benchmarks/nopbench/nopbench.s.in | 9 ++ benchmarks/qsort/CMakeLists.txt | 1 + benchmarks/qsort/qsort.c | 154 ++++++++++++++++++ benchmarks/qsort/qsort_gendata.pl | 132 +++++++++++++++ benchmarks/rsort/CMakeLists.txt | 1 + benchmarks/rsort/qsort_gendata.pl | 132 +++++++++++++++ benchmarks/rsort/rsort.c | 117 ++++++++++++++ benchmarks/towers/CMakeLists.txt | 1 + benchmarks/towers/towers_main.c | 207 ++++++++++++++++++++++++ benchmarks/vvadd/CMakeLists.txt | 1 + benchmarks/vvadd/vvadd_gendata.pl | 139 ++++++++++++++++ benchmarks/vvadd/vvadd_main.c | 44 +++++ cmake/dependencies.cmake | 6 +- cmake/toolchain/riscv.cmake | 65 ++++++++ src/hart/include/prot/hart.hh | 2 + src/jit/lightning/CMakeLists.txt | 6 +- src/jit/llvm/llvmbasedjit.cc | 16 +- src/jit/mir/CMakeLists.txt | 1 + tools/bench/Benchmark.py | 98 +++++++++++ tools/bench/DoBenchmark.py | 135 ++++++++++++++++ tools/bench/requirements.txt | 4 + tools/sim/sim_app.cpp | 10 +- 40 files changed, 1861 insertions(+), 20 deletions(-) create mode 100644 benchmarks/CMakeLists.txt create mode 100644 benchmarks/common/CMakeLists.txt create mode 100644 benchmarks/common/include/bench/utils.h create mode 100644 benchmarks/common/start.s create mode 100644 benchmarks/fibrec/CMakeLists.txt create mode 100644 benchmarks/fibrec/fibrec.c create mode 100644 benchmarks/median/CMakeLists.txt create mode 100644 benchmarks/median/median.c create mode 100644 benchmarks/median/median.h create mode 100755 benchmarks/median/median_gendata.pl create mode 100644 benchmarks/median/median_main.c create mode 100644 benchmarks/multiply/CMakeLists.txt create mode 100644 benchmarks/multiply/multiply.c create mode 100644 benchmarks/multiply/multiply.h create mode 100755 benchmarks/multiply/multiply_gendata.pl create mode 100644 benchmarks/multiply/multiply_main.c create mode 100644 benchmarks/nopbench/CMakeLists.txt create mode 100644 benchmarks/nopbench/nopbench.s.in create mode 100644 benchmarks/qsort/CMakeLists.txt create mode 100644 benchmarks/qsort/qsort.c create mode 100755 benchmarks/qsort/qsort_gendata.pl create mode 100644 benchmarks/rsort/CMakeLists.txt create mode 100755 benchmarks/rsort/qsort_gendata.pl create mode 100644 benchmarks/rsort/rsort.c create mode 100644 benchmarks/towers/CMakeLists.txt create mode 100644 benchmarks/towers/towers_main.c create mode 100644 benchmarks/vvadd/CMakeLists.txt create mode 100755 benchmarks/vvadd/vvadd_gendata.pl create mode 100644 benchmarks/vvadd/vvadd_main.c create mode 100644 cmake/toolchain/riscv.cmake create mode 100644 tools/bench/Benchmark.py create mode 100644 tools/bench/DoBenchmark.py create mode 100644 tools/bench/requirements.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 7cd83a8..3bc98a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,21 +1,42 @@ cmake_minimum_required(VERSION 3.22 FATAL_ERROR) -project(prot_jit) +project(prot_jit LANGUAGES C CXX) if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) - set_property(CACHE CMAKE_INSTALL_PREFIX PROPERTY VALUE "${CMAKE_BINARY_DIR}/install") + set_property(CACHE CMAKE_INSTALL_PREFIX + PROPERTY VALUE "${CMAKE_BINARY_DIR}/install") endif() option(PROT_ENABLE_WERROR "Enable -Werror option (CI)" OFF) - +option(PROT_BUILD_BENCHMARKS + "Enable benchmarks build (requires riscv gnu toolchain)" OFF) enable_testing() -include(cmake/CPM.cmake) -# Provide default CXX settings -include(cmake/defaults.cmake) -include(cmake/dependencies.cmake) -include(cmake/utils.cmake) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") +include(CPM) +# Provide default CXX settings +include(defaults) +include(dependencies) +include(utils) add_subdirectory(src) add_subdirectory(tools) + +if(PROT_BUILD_BENCHMARKS) + set(RISCV_TOOLCHAIN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/toolchain/riscv.cmake) + set(DEFAULT_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${CMAKE_INSTALL_PREFIX} + -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_TOOLCHAIN_FILE:PATH=${RISCV_TOOLCHAIN}) + if (DEFINED RISCV_TOOLCHAIN_DIR) + list(APPEND DEFAULT_ARGS -DRISCV_TOOLCHAIN_DIR:PATH=${RISCV_TOOLCHAIN_DIR}) + endif() + ExternalProject_Add( + benchmarks + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks + BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/benchmarks + CONFIGURE_HANDLED_BY_BUILD True + BUILD_ALWAYS True + CMAKE_ARGS ${DEFAULT_ARGS}) +endif() diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt new file mode 100644 index 0000000..4b8200d --- /dev/null +++ b/benchmarks/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.22 FATAL_ERROR) +project(psim-benchmarks LANGUAGES C ASM) + +if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set_property(CACHE CMAKE_INSTALL_PREFIX PROPERTY VALUE "${CMAKE_BINARY_DIR}/install") +endif() + +macro(prot_add_bench tar) + add_executable(${tar} ${ARGN}) + target_link_libraries(${tar} PRIVATE bench_startup) + install(TARGETS ${tar} DESTINATION bench) +endmacro() + +add_subdirectory(common) +add_subdirectory(qsort) +add_subdirectory(towers) +add_subdirectory(multiply) +add_subdirectory(rsort) +add_subdirectory(vvadd) +add_subdirectory(median) +add_subdirectory(nopbench) +add_subdirectory(fibrec) diff --git a/benchmarks/common/CMakeLists.txt b/benchmarks/common/CMakeLists.txt new file mode 100644 index 0000000..811c2c5 --- /dev/null +++ b/benchmarks/common/CMakeLists.txt @@ -0,0 +1,2 @@ +add_library(bench_startup OBJECT start.s) +target_include_directories(bench_startup PUBLIC include) diff --git a/benchmarks/common/include/bench/utils.h b/benchmarks/common/include/bench/utils.h new file mode 100644 index 0000000..f4a320d --- /dev/null +++ b/benchmarks/common/include/bench/utils.h @@ -0,0 +1,21 @@ +#ifndef UTILS_H_INCLUDED +#define UTILS_H_INCLUDED + + +static int verify(int n, const int *test, const int *verify) { + int i; + // Unrolled for faster verification + for (i = 0; i < n / 2 * 2; i += 2) { + int t0 = test[i], t1 = test[i + 1]; + int v0 = verify[i], v1 = verify[i + 1]; + if (t0 != v0) + return i + 1; + if (t1 != v1) + return i + 2; + } + if (n % 2 != 0 && test[n - 1] != verify[n - 1]) + return n; + return 0; +} + +#endif // UTILS_H_INCLUDED diff --git a/benchmarks/common/start.s b/benchmarks/common/start.s new file mode 100644 index 0000000..a9b0814 --- /dev/null +++ b/benchmarks/common/start.s @@ -0,0 +1,39 @@ +.global _start +.section .text +_start: + li x1, 0 + # li x2, 0 set via sim + li x3, 0 + li x4, 0 + li x5, 0 + li x6, 0 + li x7, 0 + li x8, 0 + li x9, 0 + li x10,0 + li x11,0 + li x12,0 + li x13,0 + li x14,0 + li x15,0 + li x16,0 + li x17,0 + li x18,0 + li x19,0 + li x20,0 + li x21,0 + li x22,0 + li x23,0 + li x24,0 + li x25,0 + li x26,0 + li x27,0 + li x28,0 + li x29,0 + li x30,0 + li x31,0 + + jal main + + li a7, 93 + ecall diff --git a/benchmarks/fibrec/CMakeLists.txt b/benchmarks/fibrec/CMakeLists.txt new file mode 100644 index 0000000..1dcaec1 --- /dev/null +++ b/benchmarks/fibrec/CMakeLists.txt @@ -0,0 +1,2 @@ +prot_add_bench(fibrec fibrec.c) +target_compile_definitions(fibrec PRIVATE N=31) diff --git a/benchmarks/fibrec/fibrec.c b/benchmarks/fibrec/fibrec.c new file mode 100644 index 0000000..539d818 --- /dev/null +++ b/benchmarks/fibrec/fibrec.c @@ -0,0 +1,31 @@ +#ifndef N +#error "N was not defined" +#endif + +int verify(unsigned r, unsigned n) { + static const unsigned calculated[] = { + 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, + 1597, 2584, 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, + 196418, 317811, 514229, 832040, 1346269, 2178309, 3524578, 5702887, + 9227465, 14930352, 24157817, 39088169, 63245986, 102334155, 165580141, + 267914296, 433494437, 701408733, 1134903170, 1836311903, 2971215073, 512559680, + 3483774753, 3996334433, 3185141890, 2886509027, 1776683621, 368225352, 2144908973, + 2513134325, 363076002, 2876210327, 3239286329, 1820529360, 764848393, 2585377753, + 3350226146, 1640636603, + }; + if (r == calculated[n]) + return 0; + return 1; +} + +unsigned fib(unsigned n) { + if (n <= 1) + return 1; + return fib(n - 1) + fib(n - 2); +} + +int main() { + unsigned r = fib(N); + return verify(r, N); +} + diff --git a/benchmarks/median/CMakeLists.txt b/benchmarks/median/CMakeLists.txt new file mode 100644 index 0000000..74be551 --- /dev/null +++ b/benchmarks/median/CMakeLists.txt @@ -0,0 +1 @@ +prot_add_bench(median median.c median_main.c) diff --git a/benchmarks/median/median.c b/benchmarks/median/median.c new file mode 100644 index 0000000..1999185 --- /dev/null +++ b/benchmarks/median/median.c @@ -0,0 +1,42 @@ +// See LICENSE for license details. + +//************************************************************************** +// Median filter (c version) +//-------------------------------------------------------------------------- + +void median( int n, int input[], int results[] ) +{ + int A, B, C, i; + + // Zero the ends + results[0] = 0; + results[n-1] = 0; + + // Do the filter + for ( i = 1; i < (n-1); i++ ) { + + A = input[i-1]; + B = input[i]; + C = input[i+1]; + + if ( A < B ) { + if ( B < C ) + results[i] = B; + else if ( C < A ) + results[i] = A; + else + results[i] = C; + } + + else { + if ( A < C ) + results[i] = A; + else if ( C < B ) + results[i] = B; + else + results[i] = C; + } + + } + +} diff --git a/benchmarks/median/median.h b/benchmarks/median/median.h new file mode 100644 index 0000000..7f9791c --- /dev/null +++ b/benchmarks/median/median.h @@ -0,0 +1,11 @@ +// See LICENSE for license details. + +//************************************************************************** +// Median filters +//-------------------------------------------------------------------------- + +// Simple C version +void median( int n, int input[], int results[] ); + +// Simple assembly version +void median_asm( int n, int input[], int results[] ); diff --git a/benchmarks/median/median_gendata.pl b/benchmarks/median/median_gendata.pl new file mode 100755 index 0000000..373904e --- /dev/null +++ b/benchmarks/median/median_gendata.pl @@ -0,0 +1,140 @@ +#!/usr/bin/perl -w +#========================================================================== +# median_gendata.pl +# +# Author : Christopher Batten (cbatten@mit.edu) +# Date : May 9, 2005 +# +(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; +# +# Simple script which creates an input data set and the reference data +# for the median benchmark. +# +ENDMSG + +use strict "vars"; +use warnings; +no warnings("once"); +use Getopt::Long; + +#-------------------------------------------------------------------------- +# Command line processing +#-------------------------------------------------------------------------- + +our %opts; + +sub usage() +{ + + print "\n"; + print " Usage: median_gendata.pl [options] \n"; + print "\n"; + print " Options:\n"; + print " --help print this message\n"; + print " --size size of input data [750]\n"; + print " --seed random seed [1]\n"; + print "$usageMsg"; + + exit(); +} + +sub processCommandLine() +{ + + $opts{"help"} = 0; + $opts{"size"} = 750; + $opts{"seed"} = 1; + Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); + $opts{"help"} and usage(); + +} + +#-------------------------------------------------------------------------- +# Helper Functions +#-------------------------------------------------------------------------- + +sub printArray +{ + my $arrayName = $_[0]; + my $arrayRef = $_[1]; + + my $numCols = 20; + my $arrayLen = scalar(@{$arrayRef}); + + print "int ".$arrayName."[DATA_SIZE] = \n"; + print "{\n"; + + if ( $arrayLen <= $numCols ) { + print " "; + for ( my $i = 0; $i < $arrayLen; $i++ ) { + print sprintf("%3d",$arrayRef->[$i]); + if ( $i != $arrayLen-1 ) { + print ", "; + } + } + print "\n"; + } + + else { + my $numRows = int($arrayLen/$numCols); + for ( my $j = 0; $j < $numRows; $j++ ) { + print " "; + for ( my $i = 0; $i < $numCols; $i++ ) { + my $index = $j*$numCols + $i; + print sprintf("%3d",$arrayRef->[$index]); + if ( $index != $arrayLen-1 ) { + print ", "; + } + } + print "\n"; + } + + if ( $arrayLen > ($numRows*$numCols) ) { + print " "; + for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { + my $index = $numCols*$numRows + $i; + print sprintf("%3d",$arrayRef->[$index]); + if ( $index != $arrayLen-1 ) { + print ", "; + } + } + print "\n"; + } + + } + + print "};\n\n"; +} + +#-------------------------------------------------------------------------- +# Main +#-------------------------------------------------------------------------- + +sub main() +{ + + processCommandLine(); + srand($opts{"seed"}); + + my @values; + for ( my $i = 0; $i < $opts{"size"}; $i++ ) { + push( @values, int(rand(999)) ); + } + + my @median; + $median[0] = 0; + $median[$opts{"size"}-1] = 0; + for ( my $i = 1; $i < $opts{"size"}-1; $i++ ) { + my @tempList = ( $values[$i-1], $values[$i], $values[$i+1] ); + my @sorted = sort { $a <=> $b } @tempList; + $median[$i] = $sorted[1]; + } + + print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; + printArray( "input_data", \@values ); + printArray( "verify_data", \@median ); + +} + +main(); + diff --git a/benchmarks/median/median_main.c b/benchmarks/median/median_main.c new file mode 100644 index 0000000..2f9736f --- /dev/null +++ b/benchmarks/median/median_main.c @@ -0,0 +1,36 @@ +// See LICENSE for license details. + +//************************************************************************** +// Median filter bencmark +//-------------------------------------------------------------------------- +// +// This benchmark performs a 1D three element median filter. The +// input data (and reference data) should be generated using the +// median_gendata.pl perl script and dumped to a file named +// dataset1.h. + +#include "bench/utils.h" +#include "median.h" + +//-------------------------------------------------------------------------- +// Input/Reference Data + +#include "dataset1.h" + +//-------------------------------------------------------------------------- +// Main + +int main() { + int results_data[DATA_SIZE]; + +#if PREALLOCATE + // If needed we preallocate everything in the caches + median(DATA_SIZE, input_data, results_data); +#endif + + // Do the filter + median(DATA_SIZE, input_data, results_data); + + // Check the results + return verify(DATA_SIZE, results_data, verify_data); +} diff --git a/benchmarks/multiply/CMakeLists.txt b/benchmarks/multiply/CMakeLists.txt new file mode 100644 index 0000000..d8b4c49 --- /dev/null +++ b/benchmarks/multiply/CMakeLists.txt @@ -0,0 +1 @@ +prot_add_bench(multiply multiply_main.c multiply.c) diff --git a/benchmarks/multiply/multiply.c b/benchmarks/multiply/multiply.c new file mode 100644 index 0000000..2d01e33 --- /dev/null +++ b/benchmarks/multiply/multiply.c @@ -0,0 +1,19 @@ +// See LICENSE for license details. + +// ************************************************************************* +// multiply function (c version) +// ------------------------------------------------------------------------- + +int multiply( int x, int y ) { + int i; + int result = 0; + + for (i = 0; i < 32; i++) { + if ((x & 0x1) == 1) + result = result + y; + x = x >> 1; + y = y << 1; + } + return result; +} + diff --git a/benchmarks/multiply/multiply.h b/benchmarks/multiply/multiply.h new file mode 100644 index 0000000..b2b1cf7 --- /dev/null +++ b/benchmarks/multiply/multiply.h @@ -0,0 +1,11 @@ +// See LICENSE for license details. + +//************************************************************************** +// Software multiply function +//-------------------------------------------------------------------------- + +// Simple C version +int multiply(int x, int y); + +// Simple assembly version +int multiply_asm(int x, int y); diff --git a/benchmarks/multiply/multiply_gendata.pl b/benchmarks/multiply/multiply_gendata.pl new file mode 100755 index 0000000..b8d8ed5 --- /dev/null +++ b/benchmarks/multiply/multiply_gendata.pl @@ -0,0 +1,142 @@ +#!/usr/bin/perl -w +#========================================================================== +# multiply_gendata.pl +# +# Author : Christopher Batten (cbatten@mit.edu) +# Date : May 9, 2005 +# +(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; +# +# Simple script which creates an input data set and the reference data +# for the multiply benchmark. +# +ENDMSG + +use strict "vars"; +use warnings; +no warnings("once"); +use Getopt::Long; + +#-------------------------------------------------------------------------- +# Command line processing +#-------------------------------------------------------------------------- + +our %opts; + +sub usage() +{ + + print "\n"; + print " Usage: multiply_gendata.pl [options] \n"; + print "\n"; + print " Options:\n"; + print " --help print this message\n"; + print " --size size of input data [750]\n"; + print " --seed random seed [1]\n"; + print "$usageMsg"; + + exit(); +} + +sub processCommandLine() +{ + + $opts{"help"} = 0; + $opts{"size"} = 750; + $opts{"seed"} = 1; + Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); + $opts{"help"} and usage(); + +} + +#-------------------------------------------------------------------------- +# Helper Functions +#-------------------------------------------------------------------------- + +sub printArray +{ + my $arrayName = $_[0]; + my $arrayRef = $_[1]; + + my $numCols = 20; + my $arrayLen = scalar(@{$arrayRef}); + + print "int ".$arrayName."[DATA_SIZE] = \n"; + print "{\n"; + + if ( $arrayLen <= $numCols ) { + print " "; + for ( my $i = 0; $i < $arrayLen; $i++ ) { + print sprintf("%3d",$arrayRef->[$i]); + if ( $i != $arrayLen-1 ) { + print ", "; + } + } + print "\n"; + } + + else { + my $numRows = int($arrayLen/$numCols); + for ( my $j = 0; $j < $numRows; $j++ ) { + print " "; + for ( my $i = 0; $i < $numCols; $i++ ) { + my $index = $j*$numCols + $i; + print sprintf("%3d",$arrayRef->[$index]); + if ( $index != $arrayLen-1 ) { + print ", "; + } + } + print "\n"; + } + + if ( $arrayLen > ($numRows*$numCols) ) { + print " "; + for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { + my $index = $numCols*$numRows + $i; + print sprintf("%3d",$arrayRef->[$index]); + if ( $index != $arrayLen-1 ) { + print ", "; + } + } + print "\n"; + } + + } + + print "};\n\n"; +} + +#-------------------------------------------------------------------------- +# Main +#-------------------------------------------------------------------------- + +sub main() +{ + + processCommandLine(); + srand($opts{"seed"}); + + my @values1; + for ( my $i = 0; $i < $opts{"size"}; $i++ ) { + push( @values1, int(rand(999)) ); + } + + my @values2; + for ( my $i = 0; $i < $opts{"size"}; $i++ ) { + push( @values2, int(rand(999)) ); + } + + my @multiply; + for ( my $i = 0; $i < $opts{"size"}; $i++ ) { + $multiply[$i] = $values1[$i] * $values2[$i]; + } + + print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; + printArray( "input_data1", \@values1 ); + printArray( "input_data2", \@values2 ); + printArray( "verify_data", \@multiply ); + +} + +main(); + diff --git a/benchmarks/multiply/multiply_main.c b/benchmarks/multiply/multiply_main.c new file mode 100644 index 0000000..efc6e8a --- /dev/null +++ b/benchmarks/multiply/multiply_main.c @@ -0,0 +1,39 @@ +// See LICENSE for license details. + +// ************************************************************************* +// multiply filter bencmark +// ------------------------------------------------------------------------- +// +// This benchmark tests the software multiply implemenation. The +// input data (and reference data) should be generated using the +// multiply_gendata.pl perl script and dumped to a file named +// dataset1.h + +#include "bench/utils.h" +#include "multiply.h" + +//-------------------------------------------------------------------------- +// Input/Reference Data + +#include "dataset1.h" + +//-------------------------------------------------------------------------- +// Main + +int main() { + int i; + int results_data[DATA_SIZE]; + +#if PREALLOCATE + for (i = 0; i < DATA_SIZE; i++) { + results_data[i] = multiply(input_data1[i], input_data2[i]); + } +#endif + + for (i = 0; i < DATA_SIZE; i++) { + results_data[i] = multiply(input_data1[i], input_data2[i]); + } + + // Check the results + return verify(DATA_SIZE, results_data, verify_data); +} diff --git a/benchmarks/nopbench/CMakeLists.txt b/benchmarks/nopbench/CMakeLists.txt new file mode 100644 index 0000000..89de444 --- /dev/null +++ b/benchmarks/nopbench/CMakeLists.txt @@ -0,0 +1,4 @@ +set(PROT_NOP_AMOUNT 1000000) +configure_file(nopbench.s.in nopbench.s @ONLY) + +prot_add_bench(nopbench nopbench.s) diff --git a/benchmarks/nopbench/nopbench.s.in b/benchmarks/nopbench/nopbench.s.in new file mode 100644 index 0000000..f2e965d --- /dev/null +++ b/benchmarks/nopbench/nopbench.s.in @@ -0,0 +1,9 @@ +.global main +.section .text +main: + .rept @PROT_NOP_AMOUNT@ + nop + .endr + li a0, 0 + li a7, 93 + ecall diff --git a/benchmarks/qsort/CMakeLists.txt b/benchmarks/qsort/CMakeLists.txt new file mode 100644 index 0000000..307b298 --- /dev/null +++ b/benchmarks/qsort/CMakeLists.txt @@ -0,0 +1 @@ +prot_add_bench(qsort qsort.c) diff --git a/benchmarks/qsort/qsort.c b/benchmarks/qsort/qsort.c new file mode 100644 index 0000000..35b42ac --- /dev/null +++ b/benchmarks/qsort/qsort.c @@ -0,0 +1,154 @@ +// See LICENSE for license details. + +//************************************************************************** +// Quicksort benchmark +//-------------------------------------------------------------------------- +// +// This benchmark uses quicksort to sort an array of integers. The +// implementation is largely adapted from Numerical Recipes for C. The +// input data (and reference data) should be generated using the +// qsort_gendata.pl perl script and dumped to a file named +// dataset1.h. + +// The INSERTION_THRESHOLD is the size of the subarray when the +// algorithm switches to using an insertion sort instead of +// quick sort. + +#include "bench/utils.h" + +typedef unsigned size_t; + +#define INSERTION_THRESHOLD 10 + +// NSTACK is the required auxiliary storage. +// It must be at least 2*lg(DATA_SIZE) + +#define NSTACK 50 + +//-------------------------------------------------------------------------- +// Input/Reference Data + +#define type int +#include "dataset1.h" + +// Swap macro for swapping two values. + +#define SWAP(a, b) \ + do { \ + typeof(a) temp = (a); \ + (a) = (b); \ + (b) = temp; \ + } while (0) +#define SWAP_IF_GREATER(a, b) \ + do { \ + if ((a) > (b)) \ + SWAP(a, b); \ + } while (0) + +//-------------------------------------------------------------------------- +// Quicksort function + +static void insertion_sort(size_t n, type arr[]) { + type *i, *j; + type value; + for (i = arr + 1; i < arr + n; i++) { + value = *i; + j = i; + while (value < *(j - 1)) { + *j = *(j - 1); + if (--j == arr) + break; + } + *j = value; + } +} + +// static void selection_sort(size_t n, type arr[]) { +// for (type *i = arr; i < arr + n - 1; i++) +// for (type *j = i + 1; j < arr + n; j++) +// SWAP_IF_GREATER(*i, *j); +// } + +void sort(size_t n, type arr[]) { + type *ir = arr + n; + type *l = arr + 1; + type *stack[NSTACK]; + type **stackp = stack; + + for (;;) { + // Insertion sort when subarray small enough. + if (ir - l < INSERTION_THRESHOLD) { + insertion_sort(ir - l + 1, l - 1); + + if (stackp == stack) + break; + + // Pop stack and begin a new round of partitioning. + ir = *stackp--; + l = *stackp--; + } else { + // Choose median of left, center, and right elements as + // partitioning element a. Also rearrange so that a[l-1] <= a[l] <= + // a[ir-]. + SWAP(arr[((l - arr) + (ir - arr)) / 2 - 1], l[0]); + SWAP_IF_GREATER(l[-1], ir[-1]); + SWAP_IF_GREATER(l[0], ir[-1]); + SWAP_IF_GREATER(l[-1], l[0]); + + // Initialize pointers for partitioning. + type *i = l + 1; + type *j = ir; + + // Partitioning element. + type a = l[0]; + + for (;;) { // Beginning of innermost loop. + while (*i++ < a) + ; // Scan up to find element > a. + while (*(j-- - 2) > a) + ; // Scan down to find element < a. + if (j < i) + break; // Pointers crossed. Partitioning complete. + SWAP(i[-1], j[-1]); // Exchange elements. + } // End of innermost loop. + + // Insert partitioning element. + l[0] = j[-1]; + j[-1] = a; + stackp += 2; + + // Push pointers to larger subarray on stack, + // process smaller subarray immediately. + + if (ir - i + 1 >= j - l) { + stackp[0] = ir; + stackp[-1] = i; + ir = j - 1; + } else { + stackp[0] = j - 1; + stackp[-1] = l; + l = i; + } + } + } +} + +//-------------------------------------------------------------------------- +// Main + +int main() { +#if PREALLOCATE + // If needed we preallocate everything in the caches + sort(DATA_SIZE, verify_data); + if (verify(DATA_SIZE, input_data, input_data)) + return 1; +#endif + + // Do the sort + // setStats(1); + sort(DATA_SIZE, input_data); + // setStats(0); + + // Check the results + return verify(DATA_SIZE, input_data, verify_data); +} diff --git a/benchmarks/qsort/qsort_gendata.pl b/benchmarks/qsort/qsort_gendata.pl new file mode 100755 index 0000000..92fc8fa --- /dev/null +++ b/benchmarks/qsort/qsort_gendata.pl @@ -0,0 +1,132 @@ +#!/usr/bin/perl -w +#========================================================================== +# qsort_gendata.pl +# +# Author : Christopher Batten (cbatten@mit.edu) +# Date : April 29, 2005 +# +(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; +# +# Simple script which creates an input data set and the reference data +# for the qsort benchmark. +# +ENDMSG + +use strict "vars"; +use warnings; +no warnings("once"); +use Getopt::Long; + +#-------------------------------------------------------------------------- +# Command line processing +#-------------------------------------------------------------------------- + +our %opts; + +sub usage() +{ + + print "\n"; + print " Usage: qsort_gendata.pl [options] \n"; + print "\n"; + print " Options:\n"; + print " --help print this message\n"; + print " --size size of input data [250]\n"; + print " --seed random seed [1]\n"; + print "$usageMsg"; + + exit(); +} + +sub processCommandLine() +{ + + $opts{"help"} = 0; + $opts{"size"} = 250; + $opts{"seed"} = 1; + Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); + $opts{"help"} and usage(); + +} + +#-------------------------------------------------------------------------- +# Helper Functions +#-------------------------------------------------------------------------- + +sub printArray +{ + my $arrayName = $_[0]; + my $arrayRef = $_[1]; + + my $numCols = 20; + my $arrayLen = scalar(@{$arrayRef}); + + print "type ".$arrayName."[DATA_SIZE] = \n"; + print "{\n"; + + if ( $arrayLen <= $numCols ) { + print " "; + for ( my $i = 0; $i < $arrayLen; $i++ ) { + print sprintf("%3d",$arrayRef->[$i]); + if ( $i != $arrayLen-1 ) { + print ", "; + } + } + print "\n"; + } + + else { + my $numRows = int($arrayLen/$numCols); + for ( my $j = 0; $j < $numRows; $j++ ) { + print " "; + for ( my $i = 0; $i < $numCols; $i++ ) { + my $index = $j*$numCols + $i; + print sprintf("%3d",$arrayRef->[$index]); + if ( $index != $arrayLen-1 ) { + print ", "; + } + } + print "\n"; + } + + if ( $arrayLen > ($numRows*$numCols) ) { + print " "; + for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { + my $index = $numCols*$numRows + $i; + print sprintf("%3d",$arrayRef->[$index]); + if ( $index != $arrayLen-1 ) { + print ", "; + } + } + print "\n"; + } + + } + + print "};\n\n"; +} + +#-------------------------------------------------------------------------- +# Main +#-------------------------------------------------------------------------- + +sub main() +{ + + processCommandLine(); + srand($opts{"seed"}); + + my @values; + for ( my $i = 0; $i < $opts{"size"}; $i++ ) { + push( @values, int(rand((1<<31)-1)) ); + } + my @sorted = sort { $a <=> $b } @values; + + print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; + printArray( "input_data", \@values ); + printArray( "verify_data", \@sorted ); + +} + +main(); + diff --git a/benchmarks/rsort/CMakeLists.txt b/benchmarks/rsort/CMakeLists.txt new file mode 100644 index 0000000..1872dc5 --- /dev/null +++ b/benchmarks/rsort/CMakeLists.txt @@ -0,0 +1 @@ +prot_add_bench(rsort rsort.c) diff --git a/benchmarks/rsort/qsort_gendata.pl b/benchmarks/rsort/qsort_gendata.pl new file mode 100755 index 0000000..92fc8fa --- /dev/null +++ b/benchmarks/rsort/qsort_gendata.pl @@ -0,0 +1,132 @@ +#!/usr/bin/perl -w +#========================================================================== +# qsort_gendata.pl +# +# Author : Christopher Batten (cbatten@mit.edu) +# Date : April 29, 2005 +# +(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; +# +# Simple script which creates an input data set and the reference data +# for the qsort benchmark. +# +ENDMSG + +use strict "vars"; +use warnings; +no warnings("once"); +use Getopt::Long; + +#-------------------------------------------------------------------------- +# Command line processing +#-------------------------------------------------------------------------- + +our %opts; + +sub usage() +{ + + print "\n"; + print " Usage: qsort_gendata.pl [options] \n"; + print "\n"; + print " Options:\n"; + print " --help print this message\n"; + print " --size size of input data [250]\n"; + print " --seed random seed [1]\n"; + print "$usageMsg"; + + exit(); +} + +sub processCommandLine() +{ + + $opts{"help"} = 0; + $opts{"size"} = 250; + $opts{"seed"} = 1; + Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); + $opts{"help"} and usage(); + +} + +#-------------------------------------------------------------------------- +# Helper Functions +#-------------------------------------------------------------------------- + +sub printArray +{ + my $arrayName = $_[0]; + my $arrayRef = $_[1]; + + my $numCols = 20; + my $arrayLen = scalar(@{$arrayRef}); + + print "type ".$arrayName."[DATA_SIZE] = \n"; + print "{\n"; + + if ( $arrayLen <= $numCols ) { + print " "; + for ( my $i = 0; $i < $arrayLen; $i++ ) { + print sprintf("%3d",$arrayRef->[$i]); + if ( $i != $arrayLen-1 ) { + print ", "; + } + } + print "\n"; + } + + else { + my $numRows = int($arrayLen/$numCols); + for ( my $j = 0; $j < $numRows; $j++ ) { + print " "; + for ( my $i = 0; $i < $numCols; $i++ ) { + my $index = $j*$numCols + $i; + print sprintf("%3d",$arrayRef->[$index]); + if ( $index != $arrayLen-1 ) { + print ", "; + } + } + print "\n"; + } + + if ( $arrayLen > ($numRows*$numCols) ) { + print " "; + for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { + my $index = $numCols*$numRows + $i; + print sprintf("%3d",$arrayRef->[$index]); + if ( $index != $arrayLen-1 ) { + print ", "; + } + } + print "\n"; + } + + } + + print "};\n\n"; +} + +#-------------------------------------------------------------------------- +# Main +#-------------------------------------------------------------------------- + +sub main() +{ + + processCommandLine(); + srand($opts{"seed"}); + + my @values; + for ( my $i = 0; $i < $opts{"size"}; $i++ ) { + push( @values, int(rand((1<<31)-1)) ); + } + my @sorted = sort { $a <=> $b } @values; + + print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; + printArray( "input_data", \@values ); + printArray( "verify_data", \@sorted ); + +} + +main(); + diff --git a/benchmarks/rsort/rsort.c b/benchmarks/rsort/rsort.c new file mode 100644 index 0000000..45f7ea9 --- /dev/null +++ b/benchmarks/rsort/rsort.c @@ -0,0 +1,117 @@ +// See LICENSE for license details. + +//************************************************************************** +// Radix Sort benchmark +//-------------------------------------------------------------------------- +// +// This benchmark uses radix sort to sort an array of integers. The +// implementation is largely adapted from Numerical Recipes for C. The +// input data (and reference data) should be generated using the +// qsort_gendata.pl perl script and dumped to a file named +// dataset1.h + +//-------------------------------------------------------------------------- +// Input/Reference Data + +#include "bench/utils.h" + +#define type unsigned int +#include "dataset1.h" + +#define LOG_BASE 8 +#define BASE (1 << LOG_BASE) + +#if 0 +#define fetch_add(ptr, inc) __sync_fetch_and_add(ptr, inc) +#else +#define fetch_add(ptr, inc) ((*(ptr) += (inc)) - (inc)) +#define fetch_add1(ptr, inc) (*(ptr) += (inc)) +#endif + +void memcpy_impl(void *dst, void const *src, unsigned n) { + for (unsigned i = 0; i < n; ++i) + ((char *)(dst))[i] = ((char *)(src))[i]; +} + +void sort(unsigned n, type *arrIn, type *scratchIn) { + unsigned log_exp = 0; + unsigned buckets[BASE]; + unsigned *bucket = buckets; + asm("" : "+r"(bucket)); + type *arr = arrIn, *scratch = scratchIn, *p; + unsigned *b; + + while (log_exp < 8 * sizeof(type)) { + for (b = bucket; b < bucket + BASE; b++) + *b = 0; + + for (p = arr; p < &arr[n - 3]; p += 4) { + type a0 = p[0]; + type a1 = p[1]; + type a2 = p[2]; + type a3 = p[3]; + fetch_add1(&bucket[(a0 >> log_exp) % BASE], 1); + fetch_add1(&bucket[(a1 >> log_exp) % BASE], 1); + fetch_add1(&bucket[(a2 >> log_exp) % BASE], 1); + fetch_add1(&bucket[(a3 >> log_exp) % BASE], 1); + } + for (; p < &arr[n]; p++) + bucket[(*p >> log_exp) % BASE]++; + + unsigned prev = bucket[0]; + prev += fetch_add(&bucket[1], prev); + for (b = &bucket[2]; b < bucket + BASE; b += 2) { + prev += fetch_add(&b[0], prev); + prev += fetch_add(&b[1], prev); + } + + for (p = &arr[n - 1]; p >= &arr[3]; p -= 4) { + type a0 = p[-0]; + type a1 = p[-1]; + type a2 = p[-2]; + type a3 = p[-3]; + unsigned *pb0 = &bucket[(a0 >> log_exp) % BASE]; + unsigned *pb1 = &bucket[(a1 >> log_exp) % BASE]; + unsigned *pb2 = &bucket[(a2 >> log_exp) % BASE]; + unsigned *pb3 = &bucket[(a3 >> log_exp) % BASE]; + type *s0 = scratch + fetch_add(pb0, -1); + type *s1 = scratch + fetch_add(pb1, -1); + type *s2 = scratch + fetch_add(pb2, -1); + type *s3 = scratch + fetch_add(pb3, -1); + s0[-1] = a0; + s1[-1] = a1; + s2[-1] = a2; + s3[-1] = a3; + } + for (; p >= &arr[0]; p--) + scratch[--bucket[(*p >> log_exp) % BASE]] = *p; + + type *tmp = arr; + arr = scratch; + scratch = tmp; + + log_exp += LOG_BASE; + } + if (arr != arrIn) + memcpy_impl(arr, scratch, n * sizeof(type)); +} + +//-------------------------------------------------------------------------- +// Main + +int main() { + static type scratch[DATA_SIZE]; + +#if PREALLOCATE + // If needed we preallocate everything in the caches + sort(DATA_SIZE, verify_data, scratch); + if (verify(DATA_SIZE, input_data, input_data)) + return 1; +#endif + + // Do the sort + sort(DATA_SIZE, input_data, scratch); + + // Check the results + return verify(DATA_SIZE, (const int *)input_data, (const int *)verify_data); +} diff --git a/benchmarks/towers/CMakeLists.txt b/benchmarks/towers/CMakeLists.txt new file mode 100644 index 0000000..96aac4e --- /dev/null +++ b/benchmarks/towers/CMakeLists.txt @@ -0,0 +1 @@ +prot_add_bench(towers towers_main.c) diff --git a/benchmarks/towers/towers_main.c b/benchmarks/towers/towers_main.c new file mode 100644 index 0000000..7cef530 --- /dev/null +++ b/benchmarks/towers/towers_main.c @@ -0,0 +1,207 @@ +// See LICENSE for license details. + +//************************************************************************** +// Towers of Hanoi benchmark +//-------------------------------------------------------------------------- +// +// Towers of Hanoi is a classic puzzle problem. The game consists of +// three pegs and a set of discs. Each disc is a different size, and +// initially all of the discs are on the left most peg with the smallest +// disc on top and the largest disc on the bottom. The goal is to move all +// of the discs onto the right most peg. The catch is that you are only +// allowed to move one disc at a time and you can never place a larger +// disc on top of a smaller disc. +// +// This implementation starts with NUM_DISC discs and uses a recursive +// algorithm to sovel the puzzle. + +// This is the number of discs in the puzzle. + +#define NUM_DISCS 20 + +//-------------------------------------------------------------------------- +// List data structure and functions + +struct Node { + int val; + struct Node *next; +}; + +struct List { + int size; + struct Node *head; +}; + +struct List g_nodeFreeList; +struct Node g_nodePool[NUM_DISCS]; + +int list_getSize(struct List *list) { return list->size; } + +void list_init(struct List *list) { + list->size = 0; + list->head = 0; +} + +void list_push(struct List *list, int val) { + struct Node *newNode; + + // Pop the next free node off the free list + newNode = g_nodeFreeList.head; + g_nodeFreeList.head = g_nodeFreeList.head->next; + + // Push the new node onto the given list + newNode->next = list->head; + list->head = newNode; + + // Assign the value + list->head->val = val; + + // Increment size + list->size++; +} + +int list_pop(struct List *list) { + struct Node *freedNode; + int val; + + // Get the value from the->head of given list + val = list->head->val; + + // Pop the head node off the given list + freedNode = list->head; + list->head = list->head->next; + + // Push the freed node onto the free list + freedNode->next = g_nodeFreeList.head; + g_nodeFreeList.head = freedNode; + + // Decrement size + list->size--; + + return val; +} + +void list_clear(struct List *list) { + while (list_getSize(list) > 0) + list_pop(list); +} + +//-------------------------------------------------------------------------- +// Tower data structure and functions + +struct Towers { + int numDiscs; + int numMoves; + struct List pegA; + struct List pegB; + struct List pegC; +}; + +void towers_init(struct Towers *this, int n) { + int i; + + this->numDiscs = n; + this->numMoves = 0; + + list_init(&(this->pegA)); + list_init(&(this->pegB)); + list_init(&(this->pegC)); + + for (i = 0; i < n; i++) + list_push(&(this->pegA), n - i); +} + +void towers_clear(struct Towers *this) { + + list_clear(&(this->pegA)); + list_clear(&(this->pegB)); + list_clear(&(this->pegC)); + + towers_init(this, this->numDiscs); +} + +void towers_solve_h(struct Towers *this, int n, struct List *startPeg, + struct List *tempPeg, struct List *destPeg) { + int val; + + if (n == 1) { + val = list_pop(startPeg); + list_push(destPeg, val); + this->numMoves++; + } else { + towers_solve_h(this, n - 1, startPeg, destPeg, tempPeg); + towers_solve_h(this, 1, startPeg, tempPeg, destPeg); + towers_solve_h(this, n - 1, tempPeg, startPeg, destPeg); + } +} + +void towers_solve(struct Towers *this) { + towers_solve_h(this, this->numDiscs, &(this->pegA), &(this->pegB), + &(this->pegC)); +} + +int towers_verify(struct Towers *this) { + struct Node *ptr; + int numDiscs = 0; + + if (list_getSize(&this->pegA) != 0) { + return 2; + } + + if (list_getSize(&this->pegB) != 0) { + return 3; + } + + if (list_getSize(&this->pegC) != this->numDiscs) { + return 4; + } + + for (ptr = this->pegC.head; ptr != 0; ptr = ptr->next) { + numDiscs++; + if (ptr->val != numDiscs) { + return 5; + } + } + + if (this->numMoves != ((1 << this->numDiscs) - 1)) { + return 6; + } + + return 0; +} + +//-------------------------------------------------------------------------- +// Main + +int main() { + struct Towers towers; + int i; + + // Initialize free list + + list_init(&g_nodeFreeList); + g_nodeFreeList.head = &(g_nodePool[0]); + g_nodeFreeList.size = NUM_DISCS; + g_nodePool[NUM_DISCS - 1].next = 0; + g_nodePool[NUM_DISCS - 1].val = 99; + for (i = 0; i < (NUM_DISCS - 1); i++) { + g_nodePool[i].next = &(g_nodePool[i + 1]); + g_nodePool[i].val = i; + } + + towers_init(&towers, NUM_DISCS); + + // If needed we preallocate everything in the caches + +#if PREALLOCATE + towers_solve(&towers); +#endif + + // Solve it + + towers_clear(&towers); + towers_solve(&towers); + + // Check the results + return towers_verify(&towers); +} diff --git a/benchmarks/vvadd/CMakeLists.txt b/benchmarks/vvadd/CMakeLists.txt new file mode 100644 index 0000000..67bc747 --- /dev/null +++ b/benchmarks/vvadd/CMakeLists.txt @@ -0,0 +1 @@ +prot_add_bench(vvadd vvadd_main.c) diff --git a/benchmarks/vvadd/vvadd_gendata.pl b/benchmarks/vvadd/vvadd_gendata.pl new file mode 100755 index 0000000..f23cdf4 --- /dev/null +++ b/benchmarks/vvadd/vvadd_gendata.pl @@ -0,0 +1,139 @@ +#!/usr/bin/perl -w +#========================================================================== +# vvadd_gendata.pl +# +# Author : Christopher Batten (cbatten@mit.edu) +# Date : April 29, 2005 +# +(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; +# +# Simple script which creates an input data set and the reference data +# for the vvadd benchmark. +# +ENDMSG + +use strict "vars"; +use warnings; +no warnings("once"); +use Getopt::Long; + +#-------------------------------------------------------------------------- +# Command line processing +#-------------------------------------------------------------------------- + +our %opts; + +sub usage() +{ + + print "\n"; + print " Usage: vvadd_gendata.pl [options] \n"; + print "\n"; + print " Options:\n"; + print " --help print this message\n"; + print " --size size of input data [1000]\n"; + print " --seed random seed [1]\n"; + print "$usageMsg"; + + exit(); +} + +sub processCommandLine() +{ + + $opts{"help"} = 0; + $opts{"size"} = 1000; + $opts{"seed"} = 1; + Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); + $opts{"help"} and usage(); + +} + +#-------------------------------------------------------------------------- +# Helper Functions +#-------------------------------------------------------------------------- + +sub printArray +{ + my $arrayName = $_[0]; + my $arrayRef = $_[1]; + + my $numCols = 20; + my $arrayLen = scalar(@{$arrayRef}); + + print "int ".$arrayName."[DATA_SIZE] = \n"; + print "{\n"; + + if ( $arrayLen <= $numCols ) { + print " "; + for ( my $i = 0; $i < $arrayLen; $i++ ) { + print sprintf("%3d",$arrayRef->[$i]); + if ( $i != $arrayLen-1 ) { + print ", "; + } + } + print "\n"; + } + + else { + my $numRows = int($arrayLen/$numCols); + for ( my $j = 0; $j < $numRows; $j++ ) { + print " "; + for ( my $i = 0; $i < $numCols; $i++ ) { + my $index = $j*$numCols + $i; + print sprintf("%3d",$arrayRef->[$index]); + if ( $index != $arrayLen-1 ) { + print ", "; + } + } + print "\n"; + } + + if ( $arrayLen > ($numRows*$numCols) ) { + print " "; + for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { + my $index = $numCols*$numRows + $i; + print sprintf("%3d",$arrayRef->[$index]); + if ( $index != $arrayLen-1 ) { + print ", "; + } + } + print "\n"; + } + + } + + print "};\n\n"; +} + +#-------------------------------------------------------------------------- +# Main +#-------------------------------------------------------------------------- + +sub main() +{ + + processCommandLine(); + srand($opts{"seed"}); + + my @values1; + my @values2; + my @sum; + for ( my $i = 0; $i < $opts{"size"}; $i++ ) { + my $value1 = int(rand(999)); + my $value2 = int(rand(999)); + push( @values1, $value1 ); + push( @values2, $value2 ); + push( @sum, $value1 + $value2 ); + } + + + print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; + printArray( "input1_data", \@values1 ); + printArray( "input2_data", \@values2 ); + printArray( "verify_data", \@sum ); + +} + +main(); + diff --git a/benchmarks/vvadd/vvadd_main.c b/benchmarks/vvadd/vvadd_main.c new file mode 100644 index 0000000..87dc241 --- /dev/null +++ b/benchmarks/vvadd/vvadd_main.c @@ -0,0 +1,44 @@ +// See LICENSE for license details. + +//************************************************************************** +// Vector-vector add benchmark +//-------------------------------------------------------------------------- +// +// This benchmark uses adds to vectors and writes the results to a +// third vector. The input data (and reference data) should be +// generated using the vvadd_gendata.pl perl script and dumped +// to a file named dataset1.h. + +#include "bench/utils.h" + +//-------------------------------------------------------------------------- +// Input/Reference Data + +#include "dataset1.h" + +//-------------------------------------------------------------------------- +// vvadd function + +void vvadd(int n, int a[], int b[], int c[]) { + int i; + for (i = 0; i < n; i++) + c[i] = a[i] + b[i]; +} + +//-------------------------------------------------------------------------- +// Main + +int main() { + int results_data[DATA_SIZE]; + +#if PREALLOCATE + // If needed we preallocate everything in the caches + vvadd(DATA_SIZE, input1_data, input2_data, results_data); +#endif + + // Do the vvadd + vvadd(DATA_SIZE, input1_data, input2_data, results_data); + + // Check the results + return verify(DATA_SIZE, results_data, verify_data); +} diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index 55dca5e..a7853a1 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -38,7 +38,8 @@ CPMAddPackage( VERSION 7.30 EXCLUDE_FROM_ALL True SYSTEM True - DOWNLOAD_ONLY) + DOWNLOAD_ONLY True +) # llvm find_package(LLVM 18.1.3 CONFIG REQUIRED) @@ -68,4 +69,5 @@ CPMAddPackage( EXCLUDE_FROM_ALL True SYSTEM True OPTIONS "BUILD_TESTING OFF" - DOWNLOAD_ONLY True) + DOWNLOAD_ONLY True +) diff --git a/cmake/toolchain/riscv.cmake b/cmake/toolchain/riscv.cmake new file mode 100644 index 0000000..6a60de9 --- /dev/null +++ b/cmake/toolchain/riscv.cmake @@ -0,0 +1,65 @@ +# RISC-V Cross Compilation Toolchain File Usage: cmake +# -DCMAKE_TOOLCHAIN_FILE=path/to/this/file.cmake .. + +# Base settings +set(CMAKE_SYSTEM_NAME Generic) +set(CMAKE_SYSTEM_PROCESSOR riscv) +set(CMAKE_LINKER_TYPE DEFAULT) + +# Toolchain prefix (modify this according to your toolchain) Common prefixes: +# riscv64-unknown-elf-, riscv64-unknown-linux-gnu-, riscv32-unknown-elf- +set(RISCV_TOOLCHAIN_PREFIX + "riscv32-unknown-elf-" + CACHE STRING "RISC-V toolchain prefix") + +# Target architecture (modify these according to your needs) +set(RISCV_ARCH + "rv32i" + CACHE STRING "RISC-V architecture") +set(RISCV_ABI + "ilp32" + CACHE STRING "RISC-V ABI") + +# Cross-compilation tools +if(DEFINED RISCV_TOOLCHAIN_DIR) + set(CMAKE_C_COMPILER "${RISCV_TOOLCHAIN_DIR}/${RISCV_TOOLCHAIN_PREFIX}gcc") + set(CMAKE_CXX_COMPILER "${RISCV_TOOLCHAIN_DIR}/${RISCV_TOOLCHAIN_PREFIX}g++") + set(CMAKE_ASM_COMPILER "${RISCV_TOOLCHAIN_DIR}/${RISCV_TOOLCHAIN_PREFIX}gcc") + set(CMAKE_AR "${RISCV_TOOLCHAIN_DIR}/${RISCV_TOOLCHAIN_PREFIX}ar") + set(CMAKE_RANLIB "${RISCV_TOOLCHAIN_DIR}/${RISCV_TOOLCHAIN_PREFIX}ranlib") +else() + set(CMAKE_C_COMPILER "${RISCV_TOOLCHAIN_PREFIX}gcc") + set(CMAKE_CXX_COMPILER "${RISCV_TOOLCHAIN_PREFIX}g++") + set(CMAKE_ASM_COMPILER "${RISCV_TOOLCHAIN_PREFIX}gcc") + set(CMAKE_AR "${RISCV_TOOLCHAIN_PREFIX}ar") + set(CMAKE_RANLIB "${RISCV_TOOLCHAIN_PREFIX}ranlib") +endif() + +# Compiler flags Search settings +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) + +# Optional: Specify sysroot if needed set(CMAKE_SYSROOT +# "/path/to/riscv/sysroot") + +# Optional: Additional flags for specific use cases +set(COMMON_FLAGS + "-march=${RISCV_ARCH} -mabi=${RISCV_ABI} -Wall -Wextra -nostdlib -nodefaultlibs -nostartfiles -static -fno-builtin" +) +set(CMAKE_C_FLAGS + "${COMMON_FLAGS}" + CACHE STRING "C compiler flags") +set(CMAKE_ASM_FLAGS + "${COMMON_FLAGS}" + CACHE STRING "ASM compiler flags") + +set(CMAKE_EXE_LINKER_FLAGS_INIT + "-nostdlib -nodefaultlibs -nostartfiles -fno-builtin -static" + CACHE STRING "Linker flags") + +# Test compiler +if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.6) + set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +endif() diff --git a/src/hart/include/prot/hart.hh b/src/hart/include/prot/hart.hh index 929421d..68b4f9c 100644 --- a/src/hart/include/prot/hart.hh +++ b/src/hart/include/prot/hart.hh @@ -29,6 +29,8 @@ public: auto getExitCode() { return m_cpu->getExitCode(); } + auto getIcount() const { return m_cpu->icount; } + private: std::unique_ptr m_mem; std::unique_ptr m_cpu; diff --git a/src/jit/lightning/CMakeLists.txt b/src/jit/lightning/CMakeLists.txt index 8317738..a5183a0 100644 --- a/src/jit/lightning/CMakeLists.txt +++ b/src/jit/lightning/CMakeLists.txt @@ -22,9 +22,9 @@ ExternalProject_Add( # Build & install BUILD_COMMAND ${MAKE_EXECUTABLE} -j INSTALL_COMMAND ${MAKE_EXECUTABLE} install - LOG_CONFIGURE True - LOG_BUILD True - LOG_MERGED_STDOUTERR True + LOG_CONFIGURE False + LOG_BUILD False + LOG_MERGED_STDOUTERR False LOG_OUTPUT_ON_FAILURE True BUILD_IN_SOURCE 1 BUILD_BYPRODUCTS ${LIGHTNING_LIBRARY}) diff --git a/src/jit/llvm/llvmbasedjit.cc b/src/jit/llvm/llvmbasedjit.cc index 15bd470..fd51869 100644 --- a/src/jit/llvm/llvmbasedjit.cc +++ b/src/jit/llvm/llvmbasedjit.cc @@ -98,6 +98,7 @@ void doSyscall(CPUState &state) { state.emulateSysCall(); } class LLVMBasedJIT : public JitEngine { std::unique_ptr m_jit; + std::unordered_map m_cache; using TBFunc = void (*)(CPUState &); @@ -107,9 +108,11 @@ class LLVMBasedJIT : public JitEngine { private: bool doJIT(CPUState &state) override { const auto pc = state.getPC(); - auto found = m_jit->lookup(std::to_string(pc)); - if (found) { - std::invoke(found->toPtr(), state); + llvm::orc::ExecutorAddr found; + auto it = m_cache.find(pc); + if (it != m_cache.end()) { + found = it->second; + std::invoke(found.toPtr(), state); const auto *bbInfo = getBBInfo(pc); state.icount += bbInfo->insns.size(); return true; @@ -128,9 +131,12 @@ class LLVMBasedJIT : public JitEngine { return false; } - found = m_jit->lookup(std::to_string(pc)); + bool hasFound; + std::tie(it, hasFound) = + m_cache.insert({pc, *m_jit->lookup(std::to_string(pc))}); + found = it->second; assert(found); - std::invoke(found->toPtr(), state); + std::invoke(found.toPtr(), state); state.icount += bbInfo->insns.size(); return true; } diff --git a/src/jit/mir/CMakeLists.txt b/src/jit/mir/CMakeLists.txt index 5e12a40..bc8a7bc 100644 --- a/src/jit/mir/CMakeLists.txt +++ b/src/jit/mir/CMakeLists.txt @@ -6,6 +6,7 @@ add_library(prot_mir OBJECT ${mir_SOURCE_DIR}/mir.c add_library(prot_mir_static STATIC) target_link_libraries(prot_mir_static PRIVATE prot_mir) +target_compile_features(prot_mir_static PRIVATE c_std_11) add_library(prot_jit_mir STATIC mir.cc) diff --git a/tools/bench/Benchmark.py b/tools/bench/Benchmark.py new file mode 100644 index 0000000..de8781b --- /dev/null +++ b/tools/bench/Benchmark.py @@ -0,0 +1,98 @@ +import subprocess + +sim_exe_path = None + + +class BenchmarkData: + def __init__(self, time_s: float, instruction_count: int, threshold_num: int): + self.time_s = time_s + self.instruction_count = instruction_count + self.threshold_num = threshold_num + + def mips(self) -> float: + return self.instruction_count / (1000000 * self.time_s) + + def time(self) -> float: + return self.time_s + + def icount(self) -> int: + return self.instruction_count + + def threshold(self) -> int: + return self.threshold_num + + +class BenchmarkRequest: + def __init__(self, elf_path: str, threshold_num: int = None, jit_name: str = 'interp'): + self.threshold_num = threshold_num + self.jit_name = jit_name + self.elf_path = elf_path + + +class BenchmarkExitResult: + def __init__(self, exit_code: int, stdout: str, stderr: str): + self.exit_code = exit_code + self.stdout = stdout.rstrip() + self.stderr = stderr.rstrip() + + def benchmark_data(self) -> BenchmarkData: + self.assert_success() + return self.parse_stdout() + + def assert_success(self) -> None: + assert self.exit_code == 0, self.stderr + assert self.stderr == '' + + def assert_error(self) -> None: + assert self.exit_code != 0, self.stdout + assert self.stdout == '' + + def parse_stdout(self) -> BenchmarkData: + result = {} + for line in self.stdout.splitlines(): + splitted = line.split(':', 1) + if (len(splitted) == 1): + continue + key = splitted[0].strip() + value = splitted[1].strip().rstrip('s') + result[key] = value + return BenchmarkData( + float(result["time"]), + int(result["icount"]), + int(result["threshold"]) + ) + + +class Benchmark: + def __init__(self, request: BenchmarkRequest): + self.request = request + pass + + def run(self): + command = [str(sim_exe_path)] + if (self.request.threshold_num != None): + # command.extend(['--thsreshold', str(self.request.threshold_num)]) + command.extend(['--jit', str(self.request.jit_name)]) + command.extend([str(self.request.elf_path)]) + self._process = subprocess.Popen( + command, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=1, + ) + + assert self._process.stdin is not None, "Failed to open STDIN" + assert self._process.stdout is not None, "Failed to open STDOUT" + assert self._process.stderr is not None, "Failed to open STDERR" + + def exit(self, timeout=100) -> BenchmarkExitResult: + self._process.stdin.close() + return self.wait_for_exit(timeout) + + def wait_for_exit(self, timeout=10) -> BenchmarkExitResult: + exit_code = self._process.wait(timeout) + stdout = self._process.stdout.read() + stderr = self._process.stderr.read() + return BenchmarkExitResult(exit_code, stdout, stderr) diff --git a/tools/bench/DoBenchmark.py b/tools/bench/DoBenchmark.py new file mode 100644 index 0000000..056bf06 --- /dev/null +++ b/tools/bench/DoBenchmark.py @@ -0,0 +1,135 @@ +import statistics +import seaborn as sns +import matplotlib.pyplot as plt +import pandas as pd +import sys +from itertools import repeat +from tqdm.contrib.concurrent import thread_map +import numpy as np +import argparse +from pathlib import Path +import Benchmark + + +class BenchmarkList: + def __init__(self, data: list[Benchmark.BenchmarkData], name: str): + self.data = data + self.name = name + + def name(self): + return self.name + + def data(self): + return self.data + + def mips(self): + return statistics.geometric_mean([x.mips() for x in self.data]) + + +class HistData: + def __init__(self, bench: str, mips: float, backend: str): + self.bench = bench + self.mips = mips + self.backend = backend + + +def build_hist(list_data: list[list[BenchmarkList]], bench_names: list[str]): + num_groups = len(list_data) + num_bars_per_group = len(list_data[0]) if num_groups > 0 else 0 + + data = {"Benchmark": [], "MIPS": [], "Backend": []} + + for j in range(num_bars_per_group): + for i in range(num_groups): + data["Benchmark"].append(bench_names[i]) + data["MIPS"].append(np.mean(list_data[i][j].mips())) + data["Backend"].append(list_data[i][j].name) + + df = pd.DataFrame(data) + print(df) + filtered_df = df[~df["Backend"].isin(["interp", "cached interp"])] + avg_df = ( + filtered_df.groupby("Benchmark") + .agg( + MIPS=("MIPS", "mean"), + ) + .reset_index() + .assign(Backend="averaged-jit") + ) + + final_df = pd.concat([df, avg_df], ignore_index=True) + df = final_df + + sns.set(style="whitegrid") + + plt.figure(figsize=(12, 8)) + bar_plot = sns.barplot( + x="Benchmark", + y="MIPS", + hue="Backend", + data=df, + palette=sns.color_palette(), + ) + plt.ylabel("MIPS") + plt.legend( + title="Backends", + loc="upper right", + fontsize="small", + title_fontsize="medium", + ) + sns.move_legend(bar_plot, "upper left", bbox_to_anchor=(1, 1)) + plt.tight_layout() + plt.savefig("hist.png") + plt.close() + + +def run_benchmark(request: Benchmark.BenchmarkRequest, times: int): + bench = Benchmark.Benchmark(request) + results = [] + for _ in range(times): + bench.run() + exit_result = bench.exit() + exit_result.assert_success() + results.append(exit_result.benchmark_data()) + return BenchmarkList(results, request.jit_name) + + +def run_elf(elf, th_num: int, times: int): + requests = [Benchmark.BenchmarkRequest(elf)] + jits = ["cached-interp", "llvm", "xbyak", "asmjit", "lightning", "mir"] + for jit in jits: + requests.append(Benchmark.BenchmarkRequest(elf, th_num, jit)) + data = [] + print("Running: " + benchmark_name(elf)) + + for req in thread_map(run_benchmark, requests, repeat(times)): + data.append(req) + return data, benchmark_name(elf) + + +def run_benchmarks(elf_paths: list[str], threshold_num: int, times: int): + list_data = [] + bench_names = [] + for data, name in map( + run_elf, elf_paths, repeat(threshold_num), repeat(times) + ): + list_data.append(data) + bench_names.append(name) + build_hist(list_data, bench_names) + + +def benchmark_name(elf_path: Path): + return elf_path.name + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Tool for benchmarking") + parser.add_argument("sim_bin", type=Path, help="Path to sim binary") + parser.add_argument("elf", type=Path, nargs="+", help="Path(s) to benchmarks") + parser.add_argument("-t", "--times", type=int, default=2, help="Amount of times to run each bench") + parser.add_argument("--thres", type=int, default=0, help="JIT threshold") + + args = parser.parse_args() + + Benchmark.sim_exe_path = args.sim_bin + run_benchmarks(args.elf, args.thres, args.times) diff --git a/tools/bench/requirements.txt b/tools/bench/requirements.txt new file mode 100644 index 0000000..79032e0 --- /dev/null +++ b/tools/bench/requirements.txt @@ -0,0 +1,4 @@ +matplotlib +tqdm +seaborn +pandas diff --git a/tools/sim/sim_app.cpp b/tools/sim/sim_app.cpp index 02d2b4d..e1cad6b 100644 --- a/tools/sim/sim_app.cpp +++ b/tools/sim/sim_app.cpp @@ -1,5 +1,6 @@ #include +#include #include #include @@ -49,10 +50,15 @@ int main(int argc, const char *argv[]) try { return hart; }(); - hart.dump(std::cout); + auto start = std::chrono::high_resolution_clock::now(); hart.run(); + auto end = std::chrono::high_resolution_clock::now(); hart.dump(std::cout); - fmt::println("Finish execution"); + std::chrono::duration duration = end - start; + fmt::println("icount: {}", hart.getIcount()); + fmt::println("time: {}s", duration.count()); + fmt::println("threshold: {}", 0); + fmt::println("mips: {}", hart.getIcount() / (duration.count() * 1000000)); return hart.getExitCode(); } catch (const std::exception &ex) { fmt::println(std::cerr, "Caught an exception of type {}, message: {}", From 9ef558271f395002438e84652179883902533e52 Mon Sep 17 00:00:00 2001 From: Andrey Derzhavin Date: Fri, 31 Oct 2025 16:52:32 +0300 Subject: [PATCH 2/6] Optimize mem --- src/jit/base/base.cc | 44 ++++++++++++----------- src/memory/include/prot/memory.hh | 38 ++++++++++++++++++++ src/memory/plain_memory.cc | 59 +++++++++++++++++++++++++++---- tools/sim/sim_app.cpp | 2 +- 4 files changed, 115 insertions(+), 28 deletions(-) diff --git a/src/jit/base/base.cc b/src/jit/base/base.cc index e72f037..2a81e54 100644 --- a/src/jit/base/base.cc +++ b/src/jit/base/base.cc @@ -10,33 +10,35 @@ extern "C" { namespace prot::engine { void JitEngine::step(CPUState &cpu) { - if (doJIT(cpu)) { - return; - } + while (!cpu.finished) { + if (doJIT(cpu)) { + continue; + } - // colllect bb - auto [bbIt, wasNew] = m_cacheBB.try_emplace(cpu.getPC()); - if (wasNew) { - auto curAddr = bbIt->first; - auto &bb = bbIt->second; + // colllect bb + auto [bbIt, wasNew] = m_cacheBB.try_emplace(cpu.getPC()); + if (wasNew) { + auto curAddr = bbIt->first; + auto &bb = bbIt->second; - while (true) { - auto bytes = cpu.memory->read(curAddr); - auto inst = isa::Instruction::decode(bytes); - if (!inst.has_value()) { - throw std::runtime_error{ - fmt::format("Cannot decode bytes: {:#x}", bytes)}; - } + while (true) { + auto bytes = cpu.memory->read(curAddr); + auto inst = isa::Instruction::decode(bytes); + if (!inst.has_value()) { + throw std::runtime_error{ + fmt::format("Cannot decode bytes: {:#x}", bytes)}; + } - bb.insns.push_back(*inst); - if (isa::isTerminator(inst->opcode())) { - break; + bb.insns.push_back(*inst); + if (isa::isTerminator(inst->opcode())) { + break; + } + curAddr += isa::kWordSize; } - curAddr += isa::kWordSize; } - } - interpret(cpu, bbIt->second); + interpret(cpu, bbIt->second); + } } void JitEngine::interpret(CPUState &cpu, BBInfo &info) { for (const auto &insn : info.insns) { diff --git a/src/memory/include/prot/memory.hh b/src/memory/include/prot/memory.hh index 06fe4e5..974b9d1 100644 --- a/src/memory/include/prot/memory.hh +++ b/src/memory/include/prot/memory.hh @@ -28,15 +28,53 @@ struct Memory { template [[nodiscard]] T read(isa::Addr addr) const { + if constexpr (std::same_as) { + return read8(addr); + } + if constexpr (std::same_as) { + return read16(addr); + } + if constexpr (std::same_as) { + return read32(addr); + } + std::array buf; readBlock(addr, buf); return std::bit_cast(buf); } template void write(isa::Addr addr, T val) { + if constexpr (std::same_as) { + return write8(addr, val); + } + if constexpr (std::same_as) { + return write16(addr, val); + } + if constexpr (std::same_as) { + return write32(addr, val); + } const auto &buf = std::bit_cast>(val); writeBlock(buf, addr); } + + virtual std::uint8_t read8(isa::Addr addr) const { + return read(addr); + } + virtual std::uint16_t read16(isa::Addr addr) const { + return read(addr); + } + virtual std::uint32_t read32(isa::Addr addr) const { + return read(addr); + } + virtual void write8(isa::Addr addr, std::uint8_t val) { + write(addr, val); + } + virtual void write16(isa::Addr addr, std::uint16_t val) { + write(addr, val); + } + virtual void write32(isa::Addr addr, std::uint32_t val) { + write(addr, val); + } }; namespace memory { diff --git a/src/memory/plain_memory.cc b/src/memory/plain_memory.cc index 1f75c11..d69f7b6 100644 --- a/src/memory/plain_memory.cc +++ b/src/memory/plain_memory.cc @@ -7,26 +7,72 @@ #include #include +extern "C" { +#include +} + namespace prot::memory { namespace { class PlainMemory : public Memory { + struct Unmap { + std::size_t m_size = 0; + + public: + explicit Unmap(std::size_t size) noexcept : m_size(size) {} + + void operator()(void *ptr) const noexcept { ::munmap(ptr, m_size); } + }; + public: explicit PlainMemory(std::size_t size, isa::Addr start) - : m_data(size), m_start(start) { + : m_storage( + [size] { + auto *ptr = + ::mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); + if (ptr == MAP_FAILED) { + throw std::runtime_error{ + fmt::format("Failed to allocate {} bytes for code", size)}; + } + + return static_cast(ptr); + }(), + Unmap{size}), + m_data(m_storage.get(), size), m_start(start) { if (m_data.size() + m_start < m_start) { throw std::invalid_argument{ fmt::format("Size {} or start addr {:#x} is too high", size, start)}; } } + std::uint8_t read8(isa::Addr addr) const override { + return *reinterpret_cast(translateAddr(addr)); + } + std::uint16_t read16(isa::Addr addr) const override { + return *reinterpret_cast(translateAddr(addr)); + } + std::uint32_t read32(isa::Addr addr) const override { + return *reinterpret_cast(translateAddr(addr)); + } + + void write8(isa::Addr addr, std::uint8_t val) override { + *reinterpret_cast(translateAddr(addr)) = val; + } + void write16(isa::Addr addr, std::uint16_t val) override { + *reinterpret_cast(translateAddr(addr)) = val; + } + void write32(isa::Addr addr, std::uint32_t val) override { + *reinterpret_cast(translateAddr(addr)) = val; + } + void writeBlock(std::span src, isa::Addr addr) override { - checkRange(addr, src.size()); - std::ranges::copy(src, translateAddr(addr)); + // checkRange(addr, src.size()); + std::memcpy(translateAddr(addr), src.data(), src.size()); } void readBlock(isa::Addr addr, std::span dest) const override { - checkRange(addr, dest.size()); - std::ranges::copy_n(translateAddr(addr), dest.size(), dest.begin()); + // checkRange(addr, dest.size()); + std::memcpy(dest.data(), translateAddr(addr), dest.size()); } private: @@ -57,7 +103,8 @@ class PlainMemory : public Memory { } } - std::vector m_data; + std::unique_ptr m_storage; + std::span m_data; isa::Addr m_start{}; }; } // namespace diff --git a/tools/sim/sim_app.cpp b/tools/sim/sim_app.cpp index e1cad6b..2507361 100644 --- a/tools/sim/sim_app.cpp +++ b/tools/sim/sim_app.cpp @@ -43,7 +43,7 @@ int main(int argc, const char *argv[]) try { !jitBackend.empty() ? prot::engine::JitFactory::createEngine(jitBackend) : std::make_unique(); - prot::Hart hart{prot::memory::makePaged(12), std::move(engine)}; + prot::Hart hart{prot::memory::makePlain(4ULL << 30U), std::move(engine)}; hart.load(loader); hart.setSP(stackTop); From 8a70912671301677d24414c4fa0a08cf67ac14c6 Mon Sep 17 00:00:00 2001 From: Andrey Derzhavin Date: Fri, 31 Oct 2025 20:36:00 +0300 Subject: [PATCH 3/6] Optimize JIT translation --- src/jit/asmjit/asmjit.cc | 27 +---------- src/jit/base/base.cc | 20 ++++++--- src/jit/base/include/prot/jit/base.hh | 43 +++++++++++++++--- src/jit/lightning/lightning.cc | 33 ++------------ src/jit/llvm/llvmbasedjit.cc | 64 +++++++++++---------------- src/jit/mir/mir.cc | 30 +------------ src/jit/xbyak/xbyak.cc | 33 +++----------- 7 files changed, 91 insertions(+), 159 deletions(-) diff --git a/src/jit/asmjit/asmjit.cc b/src/jit/asmjit/asmjit.cc index db3356d..a9739bd 100644 --- a/src/jit/asmjit/asmjit.cc +++ b/src/jit/asmjit/asmjit.cc @@ -119,34 +119,9 @@ class AsmJit : public JitEngine { AsmJit() = default; private: - bool doJIT(CPUState &state) override { - const auto pc = state.getPC(); - auto found = m_cacheTB.find(pc); - if (found != m_cacheTB.end()) { - found->second(state); - return true; - } - - const auto *bbInfo = getBBInfo(pc); - if (bbInfo == nullptr) { - // No such bb yet - return false; - } - - auto holder = translate(*bbInfo); - - auto [it, wasNew] = m_cacheTB.try_emplace(pc, std::move(holder)); - assert(wasNew); - - it->second(state); - - return true; - } - - [[nodiscard]] JitFunction translate(const BBInfo &info); + [[nodiscard]] JitFunction translate(const BBInfo &info) override; asmjit::JitRuntime runtime; - std::unordered_map m_cacheTB; }; template void storeHelper(CPUState &state, isa::Addr addr, T val) { diff --git a/src/jit/base/base.cc b/src/jit/base/base.cc index 2a81e54..2d82b20 100644 --- a/src/jit/base/base.cc +++ b/src/jit/base/base.cc @@ -10,14 +10,17 @@ extern "C" { namespace prot::engine { void JitEngine::step(CPUState &cpu) { - while (!cpu.finished) { - if (doJIT(cpu)) { + while (!cpu.finished) [[likely]] { + // colllect bb + const auto pc = cpu.getPC(); + auto found = m_tbCache.lookup(pc); + if (found != nullptr) [[likely]] { + found(cpu); continue; } - // colllect bb - auto [bbIt, wasNew] = m_cacheBB.try_emplace(cpu.getPC()); - if (wasNew) { + auto [bbIt, wasNew] = m_cacheBB.try_emplace(pc); + if (wasNew) [[unlikely]] { auto curAddr = bbIt->first; auto &bb = bbIt->second; @@ -35,6 +38,13 @@ void JitEngine::step(CPUState &cpu) { } curAddr += isa::kWordSize; } + } else if (bbIt->second.num_exec >= kExecThreshold) [[likely]] { + auto code = translate(bbIt->second); + m_tbCache.insert(pc, code); + if (code != nullptr) [[likely]] { + code(cpu); + continue; + } } interpret(cpu, bbIt->second); diff --git a/src/jit/base/include/prot/jit/base.hh b/src/jit/base/include/prot/jit/base.hh index 43e475f..460a6d9 100644 --- a/src/jit/base/include/prot/jit/base.hh +++ b/src/jit/base/include/prot/jit/base.hh @@ -4,17 +4,49 @@ #include "prot/interpreter.hh" #include +#include #include #include namespace prot::engine { +using JitFunction = void (*)(CPUState &); class JitEngine : public Interpreter { - static constexpr std::size_t kExecThreshold = 0; + static constexpr std::size_t kExecThreshold = 10; public: void step(CPUState &cpu) override; protected: + struct TbCache { + static constexpr std::uint64_t kInvalidAddr{0}; + static constexpr std::uint64_t kSizeLog2{22}; + static constexpr std::uint64_t kSize{1ULL << kSizeLog2}; + static constexpr std::uint64_t kGpaGranularityLog2{2}; + + struct Entry { + JitFunction func{}; + std::uint32_t gpa{}; + }; + + JitFunction lookup(std::uint32_t gpa) const { + const auto &entry = get(gpa); + return entry.gpa == gpa ? entry.func : nullptr; + } + void insert(std::uint32_t gpa, JitFunction func) { + get(gpa) = Entry{.func = func, .gpa = gpa}; + } + + private: + const Entry &get(std::uint32_t gpa) const { return m_cache[getHash(gpa)]; } + Entry &get(std::uint32_t gpa) { return m_cache[getHash(gpa)]; } + + [[nodiscard]] static constexpr std::uint32_t getHash(std::uint32_t gpa) { + return (gpa >> kGpaGranularityLog2) & (kSize - 1); + } + + std::array m_cache; + }; + // simple bb counting struct BBInfo final { std::vector insns; @@ -29,19 +61,20 @@ private: } private: - virtual bool doJIT(CPUState &state) = 0; + [[nodiscard]] virtual JitFunction translate(const BBInfo &info) = 0; + TbCache m_tbCache; std::unordered_map m_cacheBB; }; class CachedInterpreter final : public JitEngine { - bool doJIT([[maybe_unused]] CPUState &stat) override { return false; } + JitFunction translate(const BBInfo & /* unused */) override { + return nullptr; + } }; // Helper class to store JITed code // Especially helpful for libraries w/out propper mem pool support - -using JitFunction = void (*)(CPUState &); class CodeHolder final { struct Unmap { std::size_t m_size = 0; diff --git a/src/jit/lightning/lightning.cc b/src/jit/lightning/lightning.cc index 23c403c..b2b2ea0 100644 --- a/src/jit/lightning/lightning.cc +++ b/src/jit/lightning/lightning.cc @@ -18,35 +18,12 @@ namespace { struct Lightning : public JitEngine { Lightning() { init_jit("JIT Research"); } - bool doJIT(CPUState &state) override { - const auto pc = state.getPC(); - auto found = m_cacheTB.find(pc); - if (found != m_cacheTB.end()) { - found->second(state); - return true; - } - - const auto *bbInfo = getBBInfo(pc); - if (bbInfo == nullptr) { - // No such bb yet - return false; - } - - auto code = translate(*bbInfo); - auto [it, wasNew] = m_cacheTB.emplace(pc, std::move(code)); - assert(wasNew); - - it->second(state); - - return true; - } - - [[nodiscard]] CodeHolder translate(const BBInfo &info); + [[nodiscard]] JitFunction translate(const BBInfo &info) override; ~Lightning() override { finish_jit(); } private: - std::unordered_map m_cacheTB; + std::vector m_holders; }; void storeHelper(CPUState &state, isa::Addr addr, @@ -86,7 +63,7 @@ class JITStateHolder final { std::unique_ptr m_ptr; }; -CodeHolder Lightning::translate(const BBInfo &info) { +JitFunction Lightning::translate(const BBInfo &info) { JITStateHolder holder; jit_state_t *_jit = holder.get(); @@ -331,13 +308,11 @@ CodeHolder Lightning::translate(const BBInfo &info) { jit_stxi_i(offsetof(CPUState, icount), JIT_V0, JIT_R0); jit_epilog(); - auto code = std::move(holder).emit(); - // fmt::println("CODE!!"); // jit_disassemble(); // fmt::println("CODE END"); - return code; + return m_holders.emplace_back(std::move(holder).emit()).as(); } } // namespace diff --git a/src/jit/llvm/llvmbasedjit.cc b/src/jit/llvm/llvmbasedjit.cc index fd51869..2db85ec 100644 --- a/src/jit/llvm/llvmbasedjit.cc +++ b/src/jit/llvm/llvmbasedjit.cc @@ -50,7 +50,7 @@ llvm::Type *getCPUStateType(llvm::LLVMContext &Ctx) { llvm::ArrayType *regsArrayType = llvm::ArrayType::get(wordType, 32); std::vector structMemberTypes = { - regsArrayType, pcType, finishedType, memoryPtrType, icountType}; + regsArrayType, pcType, finishedType, memoryPtrType, icountType, wordType}; llvm::StructType *cpuStateType = llvm::StructType::create( Ctx, structMemberTypes, "CPUState", /*IsPacked=*/false); @@ -98,7 +98,7 @@ void doSyscall(CPUState &state) { state.emulateSysCall(); } class LLVMBasedJIT : public JitEngine { std::unique_ptr m_jit; - std::unordered_map m_cache; + std::size_t m_moduleId{}; using TBFunc = void (*)(CPUState &); @@ -106,43 +106,17 @@ class LLVMBasedJIT : public JitEngine { LLVMBasedJIT(std::unique_ptr JIT); private: - bool doJIT(CPUState &state) override { - const auto pc = state.getPC(); - llvm::orc::ExecutorAddr found; - auto it = m_cache.find(pc); - if (it != m_cache.end()) { - found = it->second; - std::invoke(found.toPtr(), state); - const auto *bbInfo = getBBInfo(pc); - state.icount += bbInfo->insns.size(); - return true; - } - - const auto *bbInfo = getBBInfo(pc); - if (bbInfo == nullptr) { - return false; - } - - auto [module, ctx] = translate(pc, *bbInfo); + JitFunction translate(const BBInfo &info) override { + auto &&[module, ctx] = doTranslate(info); llvm::orc::ThreadSafeModule tsm(std::move(module), std::move(ctx)); tsm = optimizeIRModule(std::move(tsm)); auto err = m_jit->addIRModule(std::move(tsm)); - if (err) { - return false; - } - - bool hasFound; - std::tie(it, hasFound) = - m_cache.insert({pc, *m_jit->lookup(std::to_string(pc))}); - found = it->second; - assert(found); - std::invoke(found.toPtr(), state); - state.icount += bbInfo->insns.size(); - return true; + assert(!err); + return *m_jit->lookup(std::to_string(m_moduleId++))->toPtr(); } std::pair, std::unique_ptr> - translate(isa::Word pc, const BBInfo &info); + doTranslate(const BBInfo &info); static llvm::orc::ThreadSafeModule optimizeIRModule(llvm::orc::ThreadSafeModule TSM); }; @@ -180,16 +154,17 @@ llvm::Function *getOrDeclareMemoryFunction(llvm::Module &M, } std::pair, std::unique_ptr> -LLVMBasedJIT::translate(const isa::Word pc, const BBInfo &info) { +LLVMBasedJIT::doTranslate(const BBInfo &info) { + const auto &name = std::to_string(m_moduleId); auto ctxPtr = std::make_unique(); - auto modulePtr = std::make_unique(std::to_string(pc), *ctxPtr); + auto modulePtr = std::make_unique(name, *ctxPtr); llvm::IRBuilder<> builder{*ctxPtr}; auto *fnTy = llvm::FunctionType::get( llvm::Type::getVoidTy(*ctxPtr), {llvm::PointerType::getUnqual(getCPUStateType(*ctxPtr))}, false); - auto *fn = llvm::Function::Create(fnTy, llvm::Function::ExternalLinkage, - std::to_string(pc), *modulePtr); + auto *fn = llvm::Function::Create(fnTy, llvm::Function::ExternalLinkage, name, + *modulePtr); llvm::orc::MangleAndInterner mangle(m_jit->getExecutionSession(), m_jit->getDataLayout()); @@ -218,8 +193,21 @@ LLVMBasedJIT::translate(const isa::Word pc, const BBInfo &info) { }}; llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(*ctxPtr, "entry", fn); builder.SetInsertPoint(entryBB); - std::ranges::for_each(info.insns, std::bind_front(buildInstruction, data)); + auto *icountType = + llvm::IntegerType::get(*ctxPtr, sizeofBits()); + auto *addend = llvm::ConstantInt::get(icountType, info.insns.size()); + auto *cpuStructTy = getCPUStateType(data.Builder.getContext()); + + auto *cpuArg = data.CurrentFunction->getArg(0); + + llvm::Value *icPtr = data.Builder.CreateStructGEP(cpuStructTy, cpuArg, 4); + auto *icVal = data.Builder.CreateLoad(icountType, icPtr); + auto *newVal = data.Builder.CreateAdd(icVal, addend); + data.Builder.CreateStore(newVal, icPtr); + + // data.Module.dump(); + // throw 1; data.Builder.CreateRetVoid(); return std::make_pair(std::move(modulePtr), std::move(ctxPtr)); diff --git a/src/jit/mir/mir.cc b/src/jit/mir/mir.cc index 6421e5d..518ecc0 100644 --- a/src/jit/mir/mir.cc +++ b/src/jit/mir/mir.cc @@ -172,32 +172,9 @@ class MIRJit : public JitEngine { } private: - bool doJIT(CPUState &state) override { - const auto pc = state.getPC(); - auto found = m_cacheTB.find(pc); - if (found != m_cacheTB.end()) { - found->second(state); - return true; - } - - const auto *bbInfo = getBBInfo(pc); - if (bbInfo == nullptr) { - return false; - } - - auto func = translate(*bbInfo); - auto [it, wasNew] = m_cacheTB.try_emplace(pc, func); - assert(wasNew); - - it->second(state); - - return true; - } - - [[nodiscard]] JitFunction translate(const BBInfo &info); + [[nodiscard]] JitFunction translate(const BBInfo &info) override; MIR_context_t ctx; - std::unordered_map m_cacheTB; std::unordered_map m_func_proto{}; }; @@ -413,10 +390,7 @@ JitFunction MIRJit::translate(const BBInfo &info) { MIR_link(ctx, MIR_set_gen_interface, nullptr); - JitFunction compiled_func = - reinterpret_cast(MIR_gen(ctx, func_item)); - - return compiled_func; + return reinterpret_cast(MIR_gen(ctx, func_item)); } } // namespace diff --git a/src/jit/xbyak/xbyak.cc b/src/jit/xbyak/xbyak.cc index 26d55ea..18520da 100644 --- a/src/jit/xbyak/xbyak.cc +++ b/src/jit/xbyak/xbyak.cc @@ -20,32 +20,9 @@ class XByakJit : public JitEngine, private Xbyak::CodeGenerator { : Xbyak::CodeGenerator{Xbyak::DEFAULT_MAX_CODE_SIZE, Xbyak::AutoGrow} {} private: - bool doJIT(CPUState &state) override { - const auto pc = state.getPC(); - auto found = m_cacheTB.find(pc); - if (found != m_cacheTB.end()) { - found->second(state); - return true; - } - - const auto *bbInfo = getBBInfo(pc); - if (bbInfo == nullptr) { - // No such bb yet - return false; - } - - auto holder = translate(*bbInfo); - - auto [it, wasNew] = m_cacheTB.try_emplace(pc, std::move(holder)); - assert(wasNew); + [[nodiscard]] JitFunction translate(const BBInfo &info) override; - it->second(state); - - return true; - } - - [[nodiscard]] CodeHolder translate(const BBInfo &info); - std::unordered_map m_cacheTB; + std::vector m_holders; }; void storeHelper(CPUState &state, isa::Addr addr, @@ -59,7 +36,7 @@ template T loadHelper(CPUState &state, isa::Addr addr) { void syscallHelper(CPUState &state) { state.emulateSysCall(); } -CodeHolder XByakJit::translate(const BBInfo &info) { +JitFunction XByakJit::translate(const BBInfo &info) { reset(); // XByak specific (CodeGenerator is about a PAGE size!!, so reuse it) Xbyak::util::StackFrame frame{this, 3, 3 | Xbyak::util::UseRCX}; @@ -306,9 +283,9 @@ CodeHolder XByakJit::translate(const BBInfo &info) { frame.close(); ready(); - // Copy data to holder - return CodeHolder{std::as_bytes(std::span{getCode(), getSize()})}; + return m_holders.emplace_back(std::as_bytes(std::span{getCode(), getSize()})) + .as(); } // namespace } // namespace From 982d51bdcaed9ce6e3cbd56a09cc38474d3c5da7 Mon Sep 17 00:00:00 2001 From: Andrey Derzhavin Date: Fri, 31 Oct 2025 20:50:44 +0300 Subject: [PATCH 4/6] Add lfs --- .gitattributes | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..1af7c3f --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +dataset1.h filter=lfs diff=lfs merge=lfs -text From 4752d0df66639c2907c031371995eb8c92e1d56a Mon Sep 17 00:00:00 2001 From: Andrey Derzhavin Date: Fri, 31 Oct 2025 21:14:37 +0300 Subject: [PATCH 5/6] Use lfs --- benchmarks/median/dataset1.h | 3 +++ benchmarks/multiply/dataset1.h | 3 +++ benchmarks/qsort/dataset1.h | 3 +++ benchmarks/rsort/dataset1.h | 3 +++ benchmarks/vvadd/dataset1.h | 3 +++ 5 files changed, 15 insertions(+) create mode 100644 benchmarks/median/dataset1.h create mode 100644 benchmarks/multiply/dataset1.h create mode 100644 benchmarks/qsort/dataset1.h create mode 100644 benchmarks/rsort/dataset1.h create mode 100644 benchmarks/vvadd/dataset1.h diff --git a/benchmarks/median/dataset1.h b/benchmarks/median/dataset1.h new file mode 100644 index 0000000..a90effb --- /dev/null +++ b/benchmarks/median/dataset1.h @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c48e2de2d791af38684f82f7e646f5e1d2c1c17b83339d5ea616e46741c6707 +size 51500096 diff --git a/benchmarks/multiply/dataset1.h b/benchmarks/multiply/dataset1.h new file mode 100644 index 0000000..87e7fa7 --- /dev/null +++ b/benchmarks/multiply/dataset1.h @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9d18dfb4ff15d36c754507eea1abed70b78212a1741615bbfd84ab016d58403 +size 90261269 diff --git a/benchmarks/qsort/dataset1.h b/benchmarks/qsort/dataset1.h new file mode 100644 index 0000000..f2fea5f --- /dev/null +++ b/benchmarks/qsort/dataset1.h @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a038a080b9ea99372f25dfce4317ac8a6630e930fd10360feb3433db00297813 +size 11632993 diff --git a/benchmarks/rsort/dataset1.h b/benchmarks/rsort/dataset1.h new file mode 100644 index 0000000..f2fea5f --- /dev/null +++ b/benchmarks/rsort/dataset1.h @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a038a080b9ea99372f25dfce4317ac8a6630e930fd10360feb3433db00297813 +size 11632993 diff --git a/benchmarks/vvadd/dataset1.h b/benchmarks/vvadd/dataset1.h new file mode 100644 index 0000000..5f74dab --- /dev/null +++ b/benchmarks/vvadd/dataset1.h @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b606421d236bd62d343919299c76b03d02c10c0e3af7473a11a585aaa6391571 +size 79744342 From 94c90a5c18ffafdc2751079f27c900f7813b5357 Mon Sep 17 00:00:00 2001 From: Andrey Derzhavin Date: Fri, 31 Oct 2025 21:35:14 +0300 Subject: [PATCH 6/6] Use Gmean --- tools/bench/DoBenchmark.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/tools/bench/DoBenchmark.py b/tools/bench/DoBenchmark.py index 056bf06..478ff5f 100644 --- a/tools/bench/DoBenchmark.py +++ b/tools/bench/DoBenchmark.py @@ -42,19 +42,19 @@ def build_hist(list_data: list[list[BenchmarkList]], bench_names: list[str]): for j in range(num_bars_per_group): for i in range(num_groups): data["Benchmark"].append(bench_names[i]) - data["MIPS"].append(np.mean(list_data[i][j].mips())) + data["MIPS"].append(list_data[i][j].mips()) data["Backend"].append(list_data[i][j].name) df = pd.DataFrame(data) print(df) - filtered_df = df[~df["Backend"].isin(["interp", "cached interp"])] + filtered_df = df[~df["Backend"].isin(["nopbench"])] avg_df = ( - filtered_df.groupby("Benchmark") + filtered_df.groupby("Backend") .agg( - MIPS=("MIPS", "mean"), + MIPS=("MIPS", statistics.geometric_mean), ) .reset_index() - .assign(Backend="averaged-jit") + .assign(Benchmark="geomean") ) final_df = pd.concat([df, avg_df], ignore_index=True) @@ -125,8 +125,16 @@ def benchmark_name(elf_path: Path): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Tool for benchmarking") parser.add_argument("sim_bin", type=Path, help="Path to sim binary") - parser.add_argument("elf", type=Path, nargs="+", help="Path(s) to benchmarks") - parser.add_argument("-t", "--times", type=int, default=2, help="Amount of times to run each bench") + parser.add_argument( + "elf", type=Path, nargs="+", help="Path(s) to benchmarks" + ) + parser.add_argument( + "-t", + "--times", + type=int, + default=2, + help="Amount of times to run each bench", + ) parser.add_argument("--thres", type=int, default=0, help="JIT threshold") args = parser.parse_args()