From 06fa3aaf89456299ecd055bbfe4849aada003791 Mon Sep 17 00:00:00 2001 From: Michael Goldstein <51342076+mgoldstein322@users.noreply.github.com> Date: Wed, 4 Feb 2026 11:11:43 -0500 Subject: [PATCH] Carcosa Element (#2592) * initial commit with barebones header on new element * add files necessary for compilation * fix some basic syntax issues * create impl file for baseline injector update Makefile.am to include impl file fix serialization to stop compiler errors and warnings * add enum for basic logic selection * block out empty basic fault functions build parameter reader for fault type add parameter for choosing fault type add parameter for stuckAtFault inputs * split fault logic code off into subclasses -- only stuck-at is currently skeletoned, but I will add the rest soon * add another layer of abstraction so that the fault itself is its own class--need to fix some compiler issues and skeletonize the other faults * add todo note to faultBase.h * fix compile errors NOT arising from serialization * sidestep serialization compile issue * I have defeated the compiler * use correct memEvent and add some base logic -- need to figure out how to determine message direction * prototype stuckAtFault written--currently untested (and very ugly) * something is DEFINITELY broken here * stuckAtFault appears functional -- testing likely required, and there's still no dynamic way to change what fault logic is in use * corruptMemRegion written compiler is giving me errors that the exact same code didn't give in the last fault--will test when this is resolved * COMPILER DEFEATED * randomFlip files added randomDrop prototype complete--untested * fix faultlogic parameters on randomFlip * all basic faults built need to refine other aspects (such as payload printing) * add missed file * remove inlining add debug output to event dropper need to compare event drop logic to scott's portmodule example (just to make sure I'm not breaking anything on accident) * major refactor skeletonized--time to make it work * large number of compile errors fixed--many more to fix on monday :) * most compilation issues fixed need to do 2 things: 1. change fault** into vec[fault*] and rewrite affected code 2. figure out better way to pass valid installation directions to injectors * refactor nearly complete must implement serialization for FaultBase class after this is done, I can finally go in and fix the faulty logic in the StuckAtFault and CorruptMemRegion classes * refactor should be complete Still todo: 1. Refine stuckAtFault debug prints to make more sense 2. Refine address selection in stuckAtFault and corruptMemFault to only corrupt the valid range of data * add todos to codebase * fix some ELI weirdness stuckAtFault debug output should be fixed, and it now properly accounts for endianness in the data array FaultBase debug outputs for setMemEventPayload improved need to take a look at corrupt mem next, and run some more tests on stuck at * 95% sure stuckAtFault logic is working as intended * remove todos from stuckAtFault and make endianness member consistent with other members begin fix for corruptMemRegion - No longer crashes simulation - Should be more optimized to only run on smaller regions of code that actually need changes instead of looping through everything every time - Currently crashes when hitting assertion for start index of corrupt message when two regions are in the same message but don't overlap * 95% certain that corruptMemFault is working as intended Todo: - Build test configs for each included fault and get them into the testing system * add test suite for corruptMemFault (overlap fails currently) * remove dummy test * randomFlip and randomDrop tests added rng hooks in FaultInjectorBase expanded to include every usage without later faults or injectors needing their own need to figure out how to delay injection so that it doesn't occur when sst initialization is still occurring * randomDrop ELI fixed * corruptMem index computation fixed and corruption slightly optimized * tests added to EXTRA_DIST in makefile * add manual seed argument and identify seed which causes crash in randomFlip * remove trailing white space * move faultInjectorBase into injectors dir update paths to faultInjectorBase * rename doubleFaultInjector to dropFlipFaultInjector --- src/sst/elements/carcosa/Makefile.am | 66 ++ src/sst/elements/carcosa/configure.m4 | 9 + .../carcosa/faultlogic/corruptMemFault.cc | 125 ++++ .../carcosa/faultlogic/corruptMemFault.h | 72 +++ .../elements/carcosa/faultlogic/faultBase.cc | 77 +++ .../elements/carcosa/faultlogic/faultBase.h | 56 ++ .../carcosa/faultlogic/randomDropFault.cc | 31 + .../carcosa/faultlogic/randomDropFault.h | 41 ++ .../carcosa/faultlogic/randomFlipFault.cc | 38 ++ .../carcosa/faultlogic/randomFlipFault.h | 41 ++ .../carcosa/faultlogic/stuckAtFault.cc | 187 ++++++ .../carcosa/faultlogic/stuckAtFault.h | 84 +++ .../injectors/corruptMemFaultInjector.cc | 40 ++ .../injectors/corruptMemFaultInjector.h | 50 ++ .../injectors/dropFlipFaultInjector.cc | 99 +++ .../carcosa/injectors/dropFlipFaultInjector.h | 61 ++ .../carcosa/injectors/faultInjectorBase.cc | 168 +++++ .../carcosa/injectors/faultInjectorBase.h | 178 ++++++ .../injectors/randomDropFaultInjector.cc | 58 ++ .../injectors/randomDropFaultInjector.h | 54 ++ .../injectors/randomFlipFaultInjector.cc | 55 ++ .../injectors/randomFlipFaultInjector.h | 55 ++ .../carcosa/injectors/stuckAtFaultInjector.cc | 21 + .../carcosa/injectors/stuckAtFaultInjector.h | 51 ++ .../carcosa/tests/testCorruptMemBasic.py | 570 +++++++++++++++++ .../carcosa/tests/testCorruptMemDouble.py | 570 +++++++++++++++++ .../tests/testCorruptMemDoubleOverlap.py | 571 +++++++++++++++++ .../elements/carcosa/tests/testRandomDrop.py | 571 +++++++++++++++++ .../elements/carcosa/tests/testRandomFlip.py | 572 ++++++++++++++++++ .../carcosa/tests/testStuckAtBasic.py | 570 +++++++++++++++++ .../carcosa/tests/testStuckAtMultiple.py | 571 +++++++++++++++++ .../carcosa/tests/testStuckAtOverlap.py | 571 +++++++++++++++++ .../carcosa/tests/testStuckAtSameByte.py | 571 +++++++++++++++++ 33 files changed, 6854 insertions(+) create mode 100644 src/sst/elements/carcosa/Makefile.am create mode 100644 src/sst/elements/carcosa/configure.m4 create mode 100644 src/sst/elements/carcosa/faultlogic/corruptMemFault.cc create mode 100644 src/sst/elements/carcosa/faultlogic/corruptMemFault.h create mode 100644 src/sst/elements/carcosa/faultlogic/faultBase.cc create mode 100644 src/sst/elements/carcosa/faultlogic/faultBase.h create mode 100644 src/sst/elements/carcosa/faultlogic/randomDropFault.cc create mode 100644 src/sst/elements/carcosa/faultlogic/randomDropFault.h create mode 100644 src/sst/elements/carcosa/faultlogic/randomFlipFault.cc create mode 100644 src/sst/elements/carcosa/faultlogic/randomFlipFault.h create mode 100644 src/sst/elements/carcosa/faultlogic/stuckAtFault.cc create mode 100644 src/sst/elements/carcosa/faultlogic/stuckAtFault.h create mode 100644 src/sst/elements/carcosa/injectors/corruptMemFaultInjector.cc create mode 100644 src/sst/elements/carcosa/injectors/corruptMemFaultInjector.h create mode 100644 src/sst/elements/carcosa/injectors/dropFlipFaultInjector.cc create mode 100644 src/sst/elements/carcosa/injectors/dropFlipFaultInjector.h create mode 100644 src/sst/elements/carcosa/injectors/faultInjectorBase.cc create mode 100644 src/sst/elements/carcosa/injectors/faultInjectorBase.h create mode 100644 src/sst/elements/carcosa/injectors/randomDropFaultInjector.cc create mode 100644 src/sst/elements/carcosa/injectors/randomDropFaultInjector.h create mode 100644 src/sst/elements/carcosa/injectors/randomFlipFaultInjector.cc create mode 100644 src/sst/elements/carcosa/injectors/randomFlipFaultInjector.h create mode 100644 src/sst/elements/carcosa/injectors/stuckAtFaultInjector.cc create mode 100644 src/sst/elements/carcosa/injectors/stuckAtFaultInjector.h create mode 100644 src/sst/elements/carcosa/tests/testCorruptMemBasic.py create mode 100644 src/sst/elements/carcosa/tests/testCorruptMemDouble.py create mode 100644 src/sst/elements/carcosa/tests/testCorruptMemDoubleOverlap.py create mode 100644 src/sst/elements/carcosa/tests/testRandomDrop.py create mode 100644 src/sst/elements/carcosa/tests/testRandomFlip.py create mode 100644 src/sst/elements/carcosa/tests/testStuckAtBasic.py create mode 100644 src/sst/elements/carcosa/tests/testStuckAtMultiple.py create mode 100644 src/sst/elements/carcosa/tests/testStuckAtOverlap.py create mode 100644 src/sst/elements/carcosa/tests/testStuckAtSameByte.py diff --git a/src/sst/elements/carcosa/Makefile.am b/src/sst/elements/carcosa/Makefile.am new file mode 100644 index 0000000000..986cab2899 --- /dev/null +++ b/src/sst/elements/carcosa/Makefile.am @@ -0,0 +1,66 @@ +# -*- Makefile -*- +# +# + +AM_CPPFLAGS += \ + $(MPI_CPPFLAGS) \ + -I$(top_srcdir)/src + +compdir = $(pkglibdir) +comp_LTLIBRARIES = libcarcosa.la +libcarcosa_la_SOURCES = \ + injectors/faultInjectorBase.cc \ + injectors/faultInjectorBase.h \ + injectors/stuckAtFaultInjector.cc \ + injectors/stuckAtFaultInjector.h \ + injectors/corruptMemFaultInjector.cc \ + injectors/corruptMemFaultInjector.h \ + injectors/randomDropFaultInjector.cc \ + injectors/randomDropFaultInjector.h \ + injectors/randomFlipFaultInjector.cc \ + injectors/randomFlipFaultInjector.h \ + injectors/dropFlipFaultInjector.cc \ + injectors/dropFlipFaultInjector.h \ + faultlogic/faultBase.cc \ + faultlogic/faultBase.h \ + faultlogic/stuckAtFault.cc \ + faultlogic/stuckAtFault.h \ + faultlogic/corruptMemFault.cc \ + faultlogic/corruptMemFault.h \ + faultlogic/randomDropFault.cc \ + faultlogic/randomDropFault.h \ + faultlogic/randomFlipFault.cc \ + faultlogic/randomFlipFault.h + +EXTRA_DIST = \ + tests/testCorruptMemBasic.py \ + tests/testCorruptMemDouble.py \ + tests/testCorruptMemDoubleOverlap.py \ + tests/testRandomDrop.py \ + tests/testRandomFlip.py \ + tests/testStuckAtBasic.py \ + tests/testStuckAtMultiple.py \ + tests/testStuckAtOverlap.py \ + tests/testStuckAtSameByte.py + +sstdir = $(includedir)/sst/elements/carcosa +nobase_sst_HEADERS = \ + injectors/faultInjectorBase.h \ + injectors/stuckAtFaultInjector.h \ + injectors/corruptMemFaultInjector.h \ + injectors/randomDropFaultInjector.h \ + injectors/randomFlipFaultInjector.h \ + injectors/dropFlipFaultInjector.h \ + faultlogic/faultBase.h \ + faultlogic/stuckAtFault.h \ + faultlogic/corruptMemFault.h \ + faultlogic/randomDropFault.h \ + faultlogic/randomFlipFault.h + +libcarcosa_la_LDFLAGS = -module -avoid-version +libcarcosa_la_LIBADD = + +AM_CPPFLAGS += $(HMC_FLAG) +install-exec-hook: + $(SST_REGISTER_TOOL) SST_ELEMENT_SOURCE carcosa=$(abs_srcdir) + $(SST_REGISTER_TOOL) SST_ELEMENT_TESTS carcosa=$(abs_srcdir)/tests diff --git a/src/sst/elements/carcosa/configure.m4 b/src/sst/elements/carcosa/configure.m4 new file mode 100644 index 0000000000..a47dc7ab12 --- /dev/null +++ b/src/sst/elements/carcosa/configure.m4 @@ -0,0 +1,9 @@ +dnl -*- Autoconf -*- +dnl vim:ft=config +dnl + +AC_DEFUN([SST_carcosa_CONFIG], [ + carcosa_happy="yes" + + AS_IF([test "$carcosa_happy" = "yes"], [$1], [$2]) +]) diff --git a/src/sst/elements/carcosa/faultlogic/corruptMemFault.cc b/src/sst/elements/carcosa/faultlogic/corruptMemFault.cc new file mode 100644 index 0000000000..fdabd7deb1 --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/corruptMemFault.cc @@ -0,0 +1,125 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/faultlogic/corruptMemFault.h" + +using namespace SST::Carcosa; + +CorruptMemFault::CorruptMemFault(Params& params, FaultInjectorBase* injector) : FaultBase(params, injector) { +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Fault type: Corrupt Memory Region\n"); +#endif + // read in data regions + std::vector regionVec; + + // parameter format: {"regions": ["start_addr0, end_addr0", "start_addr1, end_addr1",...]} + params.find_array("regions", regionVec); + + // process entries into region + for (std::string region: regionVec) { + std::pair region_pair = convertString(region); + + // check validity + if (region_pair.first > region_pair.second) { + getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "Invalid corruption region: [0x%zx, 0x%zx].\n", + region_pair.first, region_pair.second); + } + + corruptionRegions_.push_back(region_pair); +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Inserted corruption region: [0x%zx, 0x%zx]\n", + region_pair.first, region_pair.second); +#endif + } +} + +bool CorruptMemFault::faultLogic(Event*& ev) { + SST::MemHierarchy::MemEvent* mem_ev = convertMemEvent(ev); + + Addr base_addr = mem_ev->getBaseAddr(); + dataVec original_payload = mem_ev->getPayload(); + dataVec new_payload(original_payload); + for (int r: regionsToUse_) { + auto& region = corruptionRegions_[r]; + size_t payload_sz = mem_ev->getPayloadSize(); + int32_t start = computeStartIndex(base_addr, payload_sz, region.first); + if (start < 0) { + getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "No valid start index for corruption.\n"); + } + int32_t end = computeEndIndex(base_addr, payload_sz, region.second); + if (end < 0) { + getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "No valid start index for corruption.\n"); + } + for (int i = start; i < end; i++) { + new_payload[i] = static_cast(injector_->randUInt32(0,255)); + } + } + setMemEventPayload(ev, new_payload); + return true; +} + +std::pair CorruptMemFault::convertString(std::string& region) { + std::stringstream ss(region); + uint64_t addr0, addr1; + + ss >> std::hex >> addr0; + if (ss.peek() == ','){ + ss.ignore(); + } + ss >> std::hex >> addr1; + +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "Extracted region pair: [0x%zx, 0x%zx]\n", + addr0, addr1); +#endif + return make_pair(addr0, addr1); +} + +int32_t CorruptMemFault::computeStartIndex(Addr base_addr, size_t payload_sz, Addr region_start) { + // start index is always the first byte of this payload in the corruption region + int payload_bytes = payload_sz / 8; + Addr addr = base_addr; + for (int i = 0; i < payload_bytes; i++, addr+=8) { + if (addr >= region_start) { + return addr - base_addr; + } + } + return -1; +} + +int32_t CorruptMemFault::computeEndIndex(Addr base_addr, size_t payload_sz, Addr region_end) { + // end index is either the final addr's final byte, or the region end's addr's final byte + int payload_bytes = payload_sz / 8; + Addr addr = base_addr + ((payload_bytes - 1) * 8); + for (int i = payload_bytes; i >= 0; i--, addr-=8) { + if (addr <= region_end) { + return addr - base_addr + 8; + } + } + return -1; +} + +std::vector* CorruptMemFault::checkAddrUsage(Event*& ev) { + Addr base_addr = convertMemEvent(ev)->getBaseAddr(); + for (int i = 0; i < corruptionRegions_.size(); i++) { + auto& region = corruptionRegions_[i]; + // check if message contains ANY address in this region + int payload_bytes = convertMemEvent(ev)->getPayloadSize() / 8; + Addr addr = base_addr; + for (int j = 0; j < payload_bytes; addr+=8, j++) { + if ((addr >= region.first) && (addr <= region.second)) { + regionsToUse_.push_back(i); + break; + } + } + } + return ®ionsToUse_; +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/faultlogic/corruptMemFault.h b/src/sst/elements/carcosa/faultlogic/corruptMemFault.h new file mode 100644 index 0000000000..5574639bad --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/corruptMemFault.h @@ -0,0 +1,72 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_CORRUPTMEMFAULT_H +#define SST_ELEMENTS_CARCOSA_CORRUPTMEMFAULT_H + +#include "sst/elements/carcosa/faultlogic/faultBase.h" +#include "sst/core/rng/mersenne.h" +#include +#include +#include +#include + +namespace SST::Carcosa { + +typedef std::vector dataVec; +typedef SST::MemHierarchy::Addr Addr; + +/** + * This fault is intended to be placed on the input/output ports + * of memory components such as DRAM or HBM. Events that pass through + * it, and whose data addresses fall within the ranges set in this + * module's parameters, will have their payloads randomly altered + * to simulate corruption in the affected region of memory. + */ +class CorruptMemFault : public FaultBase +{ +public: + + CorruptMemFault(Params& params, FaultInjectorBase* injector); + + CorruptMemFault() = default; + ~CorruptMemFault() {} + + /** + * 1. Read in event + * 2. Test if event is in specified region + * 3. Corrupt event payload if necessary + * 4. Replace payload + */ + bool faultLogic(Event*& ev) override; + + std::vector* checkAddrUsage(Event*& ev); +protected: + + std::vector> corruptionRegions_; + + std::vector regionsToUse_; + + std::pair convertString(std::string& region); + + int32_t computeStartIndex(Addr base_addr, size_t payload_sz, Addr region_start); + int32_t computeEndIndex(Addr base_addr, size_t payload_sz, Addr region_end); + + void serialize_order(SST::Core::Serialization::serializer& ser) override { + FaultBase::serialize_order(ser); + SST_SER(corruptionRegions_); + SST_SER(regionsToUse_); + } + ImplementVirtualSerializable(CorruptMemFault) +}; // CorruptMemFault +} // namespace SST::Carcosa + +#endif // SST_ELEMENTS_CARCOSA_CORRUPTMEMFAULT_H \ No newline at end of file diff --git a/src/sst/elements/carcosa/faultlogic/faultBase.cc b/src/sst/elements/carcosa/faultlogic/faultBase.cc new file mode 100644 index 0000000000..4a648f6699 --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/faultBase.cc @@ -0,0 +1,77 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/faultlogic/faultBase.h" + +using namespace SST::Carcosa; + +FaultBase::FaultBase(Params& params, FaultInjectorBase* injector) : injector_(injector) { + // +} + +bool FaultBase::faultLogic(Event*& ev) { + return true; +} + +SST::Output* FaultBase::getSimulationOutput() { + return injector_->getOutput(); +} + +SST::Output* FaultBase::getSimulationDebug() { + return injector_->getDebug(); +} + +SST::MemHierarchy::MemEvent* FaultBase::convertMemEvent(Event*& ev) { + SST::MemHierarchy::MemEvent* mem_ev = dynamic_cast(ev); + + if (mem_ev == nullptr) { + getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "Attempting to inject mem fault on a non-MemEvent type.\n"); + } + +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 3, 0, "Intercepted event %zu/%d\n", mem_ev->getID().first, mem_ev->getID().second); +#endif + return mem_ev; +} + +dataVec& FaultBase::getMemEventPayload(Event*& ev) { + return convertMemEvent(ev)->getPayload(); +} + +void FaultBase::setMemEventPayload(Event*& ev, dataVec newPayload) { +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "Payload before replacement:\n"); + SST::MemHierarchy::MemEvent* mem_ev = convertMemEvent(ev); + dataVec payload = getMemEventPayload(ev); + for (int i = 0; i < payload.size(); i+=8) { + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "\n0x%zx: [\t", mem_ev->getBaseAddr() + i); + for (int j = i; j < (i+8); j++) { + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "%d\t", payload[j]); + } + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "]\n"); + } +#endif + convertMemEvent(ev)->setPayload(newPayload); + +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "Payload after replacement:\n"); + mem_ev = convertMemEvent(ev); + payload = getMemEventPayload(ev); + for (int i = 0; i < payload.size(); i+=8) { + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "\n0x%zx: [\t", mem_ev->getBaseAddr() + i); + for (int j = i; j < (i+8); j++) { + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "%d\t", payload[j]); + } + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "]\n"); + } +#endif +} + diff --git a/src/sst/elements/carcosa/faultlogic/faultBase.h b/src/sst/elements/carcosa/faultlogic/faultBase.h new file mode 100644 index 0000000000..08da7ac773 --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/faultBase.h @@ -0,0 +1,56 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_FAULTBASE_H +#define SST_ELEMENTS_CARCOSA_FAULTBASE_H + +#include "sst/elements/carcosa/injectors/faultInjectorBase.h" +#include "sst/core/serialization/serializable.h" +#include +#include +#include + +namespace SST::Carcosa { + +typedef std::vector dataVec; + +class FaultInjectorBase; + +class FaultBase : public SST::Core::Serialization::serializable { +public: + FaultBase(Params& params, FaultInjectorBase* injector); + + FaultBase() = default; + ~FaultBase() {} + + virtual bool faultLogic(Event*& ev); + + SST::Output* getSimulationOutput(); + + SST::Output* getSimulationDebug(); + + SST::MemHierarchy::MemEvent* convertMemEvent(Event*& ev); + + dataVec& getMemEventPayload(Event*& ev); + + void setMemEventPayload(Event*& ev, dataVec newPayload); +protected: + FaultInjectorBase* injector_ = nullptr; + + void serialize_order(SST::Core::Serialization::serializer& ser) override { + FaultBase::serialize_order(ser); + SST_SER(injector_); + } + ImplementVirtualSerializable(FaultBase) +}; // class FaultBase +} // namespace SST::Carcosa + +#endif \ No newline at end of file diff --git a/src/sst/elements/carcosa/faultlogic/randomDropFault.cc b/src/sst/elements/carcosa/faultlogic/randomDropFault.cc new file mode 100644 index 0000000000..b939c88200 --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/randomDropFault.cc @@ -0,0 +1,31 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/faultlogic/randomDropFault.h" + +using namespace SST::Carcosa; + +RandomDropFault::RandomDropFault(Params& params, FaultInjectorBase* injector) : FaultBase(params, injector) { + // +} + +bool RandomDropFault::faultLogic(Event*& ev) { + SST::MemHierarchy::MemEvent* mem_ev = convertMemEvent(ev); + + delete mem_ev; + if (injector_->getInstallDirection() == installDirection::Receive) { + injector_->cancelDelivery(); + } +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Event dropped.\n"); +#endif + return true; +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/faultlogic/randomDropFault.h b/src/sst/elements/carcosa/faultlogic/randomDropFault.h new file mode 100644 index 0000000000..5dfa679d4b --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/randomDropFault.h @@ -0,0 +1,41 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_RANDOMDROPFAULT_H +#define SST_ELEMENTS_CARCOSA_RANDOMDROPFAULT_H + +#include "sst/elements/carcosa/faultlogic/faultBase.h" +#include +#include +#include + +namespace SST::Carcosa { + +typedef std::vector dataVec; + +class RandomDropFault : public FaultBase { +public: + RandomDropFault(Params& params, FaultInjectorBase* injector); + + RandomDropFault() = default; + ~RandomDropFault() {} + + bool faultLogic(Event*& ev) override; +protected: + void serialize_order(SST::Core::Serialization::serializer& ser) override { + FaultBase::serialize_order(ser); + } + ImplementVirtualSerializable(RandomDropFault) +}; // RandomDropFault + +} // namespace SST::Carcosa + +#endif // SST_ELEMENTS_CARCOSA_RANDOMDROPFAULT_H \ No newline at end of file diff --git a/src/sst/elements/carcosa/faultlogic/randomFlipFault.cc b/src/sst/elements/carcosa/faultlogic/randomFlipFault.cc new file mode 100644 index 0000000000..48da3c7ea7 --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/randomFlipFault.cc @@ -0,0 +1,38 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/faultlogic/randomFlipFault.h" + +using namespace SST::Carcosa; + +RandomFlipFault::RandomFlipFault(Params& params, FaultInjectorBase* injector) : FaultBase(params, injector) { + // +} + +bool RandomFlipFault::faultLogic(Event*& ev) { + // check if this is the proper event type and get payload if it is + dataVec payload = getMemEventPayload(ev); + std::pair lucky_number = pickByteAndBit(payload.size()); + uint8_t byte = payload[lucky_number.first]; + uint8_t mask = static_cast(1) << (lucky_number.second); + payload[lucky_number.first] = byte ^ mask; + setMemEventPayload(ev, payload); + return true; +} + +inline std::pair RandomFlipFault::pickByteAndBit(size_t payload_sz) { + uint32_t byte = injector_->randUInt32(0, payload_sz); + uint32_t bit = injector_->randUInt32(0, 8); +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Flipping bit %u in byte %u.\n", (uint32_t)bit, (uint32_t)byte); +#endif + return make_pair(byte, bit); +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/faultlogic/randomFlipFault.h b/src/sst/elements/carcosa/faultlogic/randomFlipFault.h new file mode 100644 index 0000000000..0ddd02ee63 --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/randomFlipFault.h @@ -0,0 +1,41 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_RANDOMFLIPFAULT_H +#define SST_ELEMENTS_CARCOSA_RANDOMFLIPFAULT_H + +#include "sst/elements/carcosa/faultlogic/faultBase.h" + +namespace SST::Carcosa { + +class RandomFlipFault : public FaultBase { +public: + RandomFlipFault(Params& params, FaultInjectorBase* injector); + + RandomFlipFault() = default; + ~RandomFlipFault() {} + + bool faultLogic(Event*& ev) override; +protected: + /** + * Randomly choose which bit in which byte to flip + * @return (byte, bit) + */ + inline std::pair pickByteAndBit(size_t payload_sz); +protected: + void serialize_order(SST::Core::Serialization::serializer& ser) override { + FaultBase::serialize_order(ser); + } + ImplementVirtualSerializable(RandomFlipFault) +}; // RandomFlipFault +} + +#endif // SST_ELEMENTS_CARCOSA_RANDOMFLIPFAULT_H \ No newline at end of file diff --git a/src/sst/elements/carcosa/faultlogic/stuckAtFault.cc b/src/sst/elements/carcosa/faultlogic/stuckAtFault.cc new file mode 100644 index 0000000000..571d1e45a6 --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/stuckAtFault.cc @@ -0,0 +1,187 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/faultlogic/stuckAtFault.h" + +using namespace SST::Carcosa; + +/********** StuckAtFault **********/ + +StuckAtFault::StuckAtFault(Params& params, FaultInjectorBase* injector) : FaultBase(params, injector) +{ +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Fault Type: Stuck-At Fault\n"); +#endif + // read in masks + // parameter format: {masks: ["addr, byte, zeroMask, oneMask"]} + std::vector paramVecStr; + params.find_array("masks", paramVecStr); + + std::vector paramVec = convertString(paramVecStr); + // build maps + for (auto param = paramVec.begin(); param != paramVec.end(); param++) { + Addr addr = param->addr; + int byte = param->byte; + uint8_t zeroMask = param->zeroMask; + uint8_t oneMask = param->oneMask; + if ((int)(zeroMask & oneMask) > 0) { + getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "Masks contain overlapping values. Addr: 0x%zx, " + "byte: %d\n", addr, byte); + } + // check for vector in each map before creating it + if (stuckAtZeroMask_.count(addr) == 1) { + stuckAtZeroMask_.at(addr).push_back(make_pair(byte, zeroMask)); + } else { + auto addrVecPair = stuckAtZeroMask_.emplace(make_pair(addr, std::vector>())); + if (addrVecPair.second) { + addrVecPair.first->second.push_back(make_pair(byte, zeroMask)); + } else { + getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "Failed to insert mask.\n"); + } + } +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Finished inserting zero-masks for 0x%zx.\n", addr); +#endif + if (stuckAtOneMask_.count(addr) == 1) { + stuckAtOneMask_.at(addr).push_back(make_pair(byte, oneMask)); + } else { + auto addrVecPair = stuckAtOneMask_.emplace(make_pair(addr, std::vector>())); + if (addrVecPair.second) { + addrVecPair.first->second.push_back(make_pair(byte, oneMask)); + } else { + getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "Failed to insert mask.\n"); + } + } +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Finished inserting one-masks for 0x%zx.\n", addr); +#endif + } + + endianness_ = (params.find("endianness", "little") == std::string("little")) ? false : true; +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Endianness set to %s.\n", endianness_ ? "big" : "little"); +#endif +} + +bool StuckAtFault::faultLogic(SST::Event*& ev) { + // Convert to memEvent + SST::MemHierarchy::MemEvent* mem_ev = this->convertMemEvent(ev); + + Addr addr = mem_ev->getAddr(); + std::vector masked_addrs; + for (int i = mem_ev->getBaseAddr(); i < mem_ev->getBaseAddr() + mem_ev->getPayloadSize(); i+=8) { + if (stuckAtZeroMask_.count(i) == 1 || stuckAtOneMask_.count(i) == 1) { + masked_addrs.push_back(i); + } + } + + // check for the addr in question in the fault map + if (masked_addrs.size() > 0) { +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Masked Addr at cache line 0x%zx found in stuck map.\n", addr); +#endif + // replace data if necessary + dataVec payload = this->getMemEventPayload(ev); + + for (int masked_addr: masked_addrs){ + uint8_t mask = 0b00000000; +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Begin zero mask for address: 0x%zx\n", masked_addr); +#endif + // mask tuple is (byte, mask) + if (stuckAtZeroMask_.count(masked_addr) == 1) { + for (auto maskPair: stuckAtZeroMask_.at(masked_addr)) { + mask = maskPair.second; + uint32_t final_byte = computeByte(masked_addr, mem_ev->getBaseAddr(), maskPair.first); +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "\tbyte %d, value: %d, mask: %d, new value: %d\n", + maskPair.first, (int)payload[final_byte], (int) mask, + (int)(payload[final_byte] & (~mask))); + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "\tPayload index: %d\n", final_byte); +#endif + payload[final_byte] &= (~mask); + } + } +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "End zero mask for address: 0x%zx\n", masked_addr); + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Begin one mask for address: 0x%zx\n", masked_addr); +#endif + if (stuckAtOneMask_.count(masked_addr) == 1) { + for (auto maskPair: stuckAtOneMask_.at(masked_addr)) { + mask = maskPair.second; + uint32_t final_byte = computeByte(masked_addr, mem_ev->getBaseAddr(), maskPair.first); +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "\tbyte %d, value: %d, mask: %d, new value: %d\n", + maskPair.first, (int)payload[final_byte], (int) mask, + (int)(payload[final_byte] | mask)); + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "\tPayload index: %d\n", final_byte); +#endif + payload[final_byte] |= mask; + } +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "End one mask for address: 0x%zx\n", masked_addr); +#endif + } + } + + // replace payload + this->setMemEventPayload(ev, payload); + } // if (found) + return true; +} + +std::vector StuckAtFault::convertString(std::vector& paramVecString) { + std::vector paramVec; + + for (auto param = paramVecString.begin(); param != paramVecString.end(); param++) { + // disassemble string + std::stringstream stream; + Addr addr; int byte; std::string zeroMaskStr, oneMaskStr; uint8_t zeroMask, oneMask; + stream.str(*param); + stream >> std::hex >> addr; + if (stream.peek() == ',') { + stream.ignore(); + } + stream >> std::dec >> byte; + if (stream.peek() == ',') { + stream.ignore(); + } + stream >> zeroMaskStr; + zeroMask = static_cast(std::bitset<8>(zeroMaskStr).to_ulong()); + if (stream.peek() == ',') { + stream.ignore(); + } + stream >> oneMaskStr; + oneMask = static_cast(std::bitset<8>(oneMaskStr).to_ulong()); + if (stream.peek() == ',') { + stream.ignore(); + } +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Masks for addr 0x%zx, byte %d: %d %d\n", addr, byte, (int)zeroMask, (int)oneMask); +#endif + // insert maskParam + paramVec.push_back({addr, byte, zeroMask, oneMask}); + } + + return paramVec; +} + +uint32_t StuckAtFault::computeByte(Addr addr, Addr base_addr, uint32_t byte) { + uint32_t base_byte = addr - base_addr; + // vanadis riscv is little endian, so bytes are in reverse order + // Big endian: Addr->(B7|B6|B5|B4|B3|B2|B1|B0); Little endian: Addr->(B0|B1|B2|B3|B4|B5|B6|B7) + // endianness bool -> true = big; false = little + if (endianness_) { + return (base_byte + 7) - byte; + } else { + return base_byte + byte; + } +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/faultlogic/stuckAtFault.h b/src/sst/elements/carcosa/faultlogic/stuckAtFault.h new file mode 100644 index 0000000000..9ec4cdb59a --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/stuckAtFault.h @@ -0,0 +1,84 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_STUCKATFAULT_H +#define SST_ELEMENTS_CARCOSA_STUCKATFAULT_H + +#include "sst/elements/carcosa/faultlogic/faultBase.h" +#include +#include +#include +#include +#include + +namespace SST::Carcosa { + +typedef std::vector dataVec; +typedef SST::MemHierarchy::Addr Addr; + +/** + * This fault is used to simulate a stuck bit fault. + * To ensure correct operation, make sure that the port module + * using this fault is attached at every point where the data + * for this bit could be read. For example, a stuck bit in the L2 + * cache would need a port module with this fault installed on all + * input OR all output ports to the L2; if the simulator has forwarding enabled, + * but the actual system being simulated does not do the forwarding from memory + * directly into the L1 or the core (bypassing L2 ops in simulation), it may be + * advisable to also place these port modules on the ports used to forward these events. + */ +class StuckAtFault : public FaultBase +{ +public: + + StuckAtFault(Params& params, FaultInjectorBase* injector); + + StuckAtFault() = default; + ~StuckAtFault() {} + + /** + * Read event payload and perform the following: + * - If stuckAtMap.at(addr) exists, compare all listed bits with payload value + * - If payload value does not match mapped value, add bit to flip mask + * - Once all stored bit values have been compared, use flip mask to modify address data + */ + bool faultLogic(Event*& ev) override; +protected: + + // map of addr->{byte, mask} for saving stuck bit values + std::map>> stuckAtZeroMask_; + // add stuckAtOneMask + std::map>> stuckAtOneMask_; + // false = little; true = big + bool endianness_ = false; + + typedef struct maskParam { + Addr addr; + int byte; + uint8_t zeroMask; + uint8_t oneMask; + } maskParam_t; + + std::vector convertString(std::vector& paramVecStr); + uint32_t computeByte(Addr addr, Addr base_addr, uint32_t byte); + + void serialize_order(SST::Core::Serialization::serializer& ser) override { + FaultBase::serialize_order(ser); + SST_SER(stuckAtZeroMask_); + SST_SER(stuckAtOneMask_); + SST_SER(endianness_); + } + ImplementVirtualSerializable(StuckAtFault) +}; + +} // namespace SST::Carcosa + +#endif // SST_ELEMENTS_CARCOSA_STUCKATFAULT_H \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/corruptMemFaultInjector.cc b/src/sst/elements/carcosa/injectors/corruptMemFaultInjector.cc new file mode 100644 index 0000000000..1d744e7abe --- /dev/null +++ b/src/sst/elements/carcosa/injectors/corruptMemFaultInjector.cc @@ -0,0 +1,40 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/injectors/corruptMemFaultInjector.h" +#include "sst/elements/carcosa/faultlogic/corruptMemFault.h" + +using namespace SST::Carcosa; + +CorruptMemFaultInjector::CorruptMemFaultInjector(Params& params) : FaultInjectorBase(params) { + // create fault + fault.push_back(new CorruptMemFault(params, this)); + setValidInstallation(params, SEND_RECEIVE_VALID); +} + +void CorruptMemFaultInjector::executeFaults(Event*& ev) { + // is this addr in a corrupt region? + std::vector* regionsToUse = dynamic_cast(fault[0])->checkAddrUsage(ev); + // if returned vec is not empty, save to fault-accessible location and execute + if (regionsToUse->size() != 0) { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 2, 0, "Corruption region detected.\n"); +#endif + if (!fault[0]) { + out_->fatal(CALL_INFO_LONG, -1, "No valid fault to execute.\n"); + } + if (!fault[0]->faultLogic(ev)) { + out_->fatal(CALL_INFO_LONG, -1, "Fault somehow returned unsuccessful... How?\n"); + } + // reset vec + regionsToUse->clear(); + } +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/corruptMemFaultInjector.h b/src/sst/elements/carcosa/injectors/corruptMemFaultInjector.h new file mode 100644 index 0000000000..acdcad1e45 --- /dev/null +++ b/src/sst/elements/carcosa/injectors/corruptMemFaultInjector.h @@ -0,0 +1,50 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_CORRUPTMEMFAULTINJECTOR_H +#define SST_ELEMENTS_CARCOSA_CORRUPTMEMFAULTINJECTOR_H + +#include "sst/elements/carcosa/injectors/faultInjectorBase.h" + +namespace SST::Carcosa { + +class CorruptMemFaultInjector : public FaultInjectorBase { +public: + SST_ELI_REGISTER_PORTMODULE( + CorruptMemFaultInjector, + "carcosa", + "CorruptMemFaultInjector", + SST_ELI_ELEMENT_VERSION(0, 1, 0), + "PortModule class used to simulate a whole memory region being corrupted" + ) + + SST_ELI_DOCUMENT_PARAMS( + {"regions", "Formatted as an array of strings: [\"start_addr0, end_addr0\", \"start_addr1, end_addr1\",...,\"start_addrN, end_addrN\"]. Addresses expected in hexadecimal."} + ) + + CorruptMemFaultInjector(Params& params); + + CorruptMemFaultInjector() = default; + ~CorruptMemFaultInjector() {} +protected: + void executeFaults(Event*& ev) override; + + void serialize_order(SST::Core::Serialization::serializer& ser) override + { + SST::PortModule::serialize_order(ser); + // serialize parameters like `SST_SER()` + } + ImplementVirtualSerializable(SST::Carcosa::CorruptMemFaultInjector) +}; // class CorruptMemFaultInjector + +} // namespace SST::Carcosa + +#endif \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/dropFlipFaultInjector.cc b/src/sst/elements/carcosa/injectors/dropFlipFaultInjector.cc new file mode 100644 index 0000000000..1dcbddec05 --- /dev/null +++ b/src/sst/elements/carcosa/injectors/dropFlipFaultInjector.cc @@ -0,0 +1,99 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/injectors/dropFlipFaultInjector.h" +#include "sst/elements/carcosa/faultlogic/randomFlipFault.h" +#include "sst/elements/carcosa/faultlogic/randomDropFault.h" + +using namespace SST::Carcosa; + +DropFlipFaultInjector::DropFlipFaultInjector(Params& params) : FaultInjectorBase(params) { + // create fault + fault.resize(2); + fault[0] = new RandomDropFault(params, this); + fault[1] = new RandomFlipFault(params, this); + + // read probability params + drop_probability_ = params.find("drop_probability", 0.0); +#ifdef __SST_DEBUG_OUTPUT__ + if (drop_probability_ > 0.0){ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Drop probability set to %lf.\n", drop_probability_); + } +#endif + + flip_probability_ = params.find("flip_probability", 0.0); +#ifdef __SST_DEBUG_OUTPUT__ + if (flip_probability_ > 0.0){ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Flip probability set to %lf.\n", flip_probability_); + } +#endif + + setValidInstallation(params, SEND_RECEIVE_VALID); +} + +bool DropFlipFaultInjector::doInjection() { + if (this->randFloat(0.0, 1.0) <= this->drop_probability_) { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Drop triggered.\n"); +#endif + this->triggered_injection_[0] = true; + } else { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Drop skipped.\n"); +#endif + this->triggered_injection_[0] = false; + } + + if (this->randFloat(0.0, 1.0) <= this->flip_probability_) { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Flip triggered.\n"); +#endif + this->triggered_injection_[1] = true; + } + else { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Flip skipped.\n"); +#endif + this->triggered_injection_[1] = false; + } + + return this->triggered_injection_[0] || this->triggered_injection_[1]; +} + +/** + * Overridden execution function to cause faults to be chosen at random + * from the vector once a fault has been triggered + */ +void DropFlipFaultInjector::executeFaults(Event*& ev) { + if (this->triggered_injection_[0]) { + // do drop + if (fault[0]) { + fault[0]->faultLogic(ev); +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Drop triggered.\n"); +#endif + } else { + out_->fatal(CALL_INFO_LONG, -1, "No valid drop fault object.\n"); + } + return; + } + if (this->triggered_injection_[1]) { + // do flip + if (fault[1]) { + fault[1]->faultLogic(ev); +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Flip triggered.\n"); +#endif + } else { + out_->fatal(CALL_INFO_LONG, -1, "No valid flip fault object.\n"); + } + } +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/dropFlipFaultInjector.h b/src/sst/elements/carcosa/injectors/dropFlipFaultInjector.h new file mode 100644 index 0000000000..a0b165f62c --- /dev/null +++ b/src/sst/elements/carcosa/injectors/dropFlipFaultInjector.h @@ -0,0 +1,61 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_DOUBLEFAULTINJECTOR_H +#define SST_ELEMENTS_CARCOSA_DOUBLEFAULTINJECTOR_H + +#include "sst/elements/carcosa/injectors/faultInjectorBase.h" +#include + +namespace SST::Carcosa { + +class DropFlipFaultInjector : public FaultInjectorBase { +public: + SST_ELI_REGISTER_PORTMODULE( + DropFlipFaultInjector, + "carcosa", + "DropFlipFaultInjector", + SST_ELI_ELEMENT_VERSION(0, 1, 0), + "PortModule class used to simulate a data transfer lost at random and a random bit flip in transit" + ) + + SST_ELI_DOCUMENT_PARAMS( + {"drop_probability", "The probability that a drop will be injected. Default = 0.0"}, + {"flip_probability", "The probability that a flip will be injected. Default = 0.0"} + ) + + DropFlipFaultInjector(Params& params); + + DropFlipFaultInjector() = default; + ~DropFlipFaultInjector() {} +protected: + double drop_probability_; + double flip_probability_; + // Byte array representing triggered fault. First is drop, second is flip. + std::array triggered_injection_; + + bool doInjection() override; + void executeFaults(Event*& ev) override; + + void serialize_order(SST::Core::Serialization::serializer& ser) override + { + SST::PortModule::serialize_order(ser); + // serialize parameters like `SST_SER()` + SST_SER(drop_probability_); + SST_SER(flip_probability_); + SST_SER(triggered_injection_); + } + ImplementVirtualSerializable(SST::Carcosa::DoubleFaultInjector) +}; // class DoubleFaultInjector + +} // namespace SST::Carcosa + +#endif \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/faultInjectorBase.cc b/src/sst/elements/carcosa/injectors/faultInjectorBase.cc new file mode 100644 index 0000000000..6e760de39f --- /dev/null +++ b/src/sst/elements/carcosa/injectors/faultInjectorBase.cc @@ -0,0 +1,168 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/injectors/faultInjectorBase.h" +#include "sst/core/params.h" + +using namespace SST::Carcosa; + +/********** FaultInjectorBase **********/ + +FaultInjectorBase::FaultInjectorBase(SST::Params& params) : PortModule() +{ + out_ = new Output(); + out_->init("", params.find("verbose", 1), 0, Output::STDOUT); + + dbg_ = new Output(); + dbg_->init("", params.find("debug_level", 1), 0, (Output::output_location_t)params.find("debug", 0)); + +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Initializing FaultInjector:\n"); +#endif + seed_ = params.find("seed", 0); + if (seed_ != 0) { + base_rng_.seed(seed_); +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "\tRNG Seed: %d\n", seed_); +#endif + } +} + +/** + * Default behavior is to delete all fault objects in the order they were + * added to the vector + */ +FaultInjectorBase::~FaultInjectorBase() { + for (int i = 0; i < fault.size(); i++) { + if (fault[i]) { + delete fault[i]; + } + } +} + +void +FaultInjectorBase::eventSent(uintptr_t key, Event*& ev) +{ + if (!valid_installs_set) { + out_->fatal(CALL_INFO_LONG, -1, "Valid installation directions not set -- did you forget to call setValidInstallation() in your constructor?\n"); + } + if (doInjection()){ +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 3, 0, "Injection triggered.\n"); +#endif + this->executeFaults(ev); + } +#ifdef __SST_DEBUG_OUTPUT__ + else { + dbg_->debug(CALL_INFO_LONG, 3, 0, "Injection skipped.\n"); + } +#endif +} + +void +FaultInjectorBase::interceptHandler(uintptr_t key, Event*& ev, bool& cancel) +{ + if (!valid_installs_set) { + out_->fatal(CALL_INFO_LONG, -1, "Valid installation directions not set -- did you forget to call setValidInstallation() in your constructor?\n"); + } + // do not cancel delivery by default + cancel = false; + cancel_ = &cancel; + + if (doInjection()){ +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 3, 0, "Injection triggered.\n"); +#endif + this->executeFaults(ev); + } +#ifdef __SST_DEBUG_OUTPUT__ + else { + dbg_->debug(CALL_INFO_LONG, 3, 0, "Injection skipped.\n"); + } +#endif +} + +uint32_t FaultInjectorBase::randUInt32(uint32_t start, uint32_t end) { + uint32_t range = end - start; + return start + (base_rng_.generateNextUInt32() % range); +} + +int32_t FaultInjectorBase::randInt32(int32_t start, int32_t end) { + int32_t range = end - start; + return start + (base_rng_.generateNextInt32() % range); +} + +uint64_t FaultInjectorBase::randUInt64(uint64_t start, uint64_t end) { + uint64_t range = end - start; + return start + (base_rng_.generateNextUInt64() % range); +} + +int64_t FaultInjectorBase::randInt64(int64_t start, int64_t end) { + int64_t range = end - start; + return start + (base_rng_.generateNextInt64() % range); +} + +double FaultInjectorBase::randFloat(double start, double end) { + double range = end - start; + return start + (base_rng_.nextUniform() * range); +} + +bool FaultInjectorBase::doInjection() { + return true; +} + +installDirection FaultInjectorBase::setInstallDirection(std::string param) { + if ( param == "Receive" ) { + if (valid_installation_[0]) { + return installDirection::Receive; + } else { + out_->fatal(CALL_INFO_LONG, 1, 0, "This PortModule Fault Injector cannot intercept Receive events.\n"); + } + } else if ( param == "Send" ) { + if (valid_installation_[1]) { + return installDirection::Send; + } else { + out_->fatal(CALL_INFO_LONG, 1, 0, "This PortModule Fault Injector cannot intercept Send events.\n"); + } + } + return installDirection::Invalid; +} + +void FaultInjectorBase::setValidInstallation(Params& params, std::array valid_install) { + valid_installation_ = valid_install; + std::string install_dir = params.find("install_direction", "Receive"); + install_direction_ = setInstallDirection(install_dir); + + if (install_direction_ == installDirection::Invalid) { + out_->fatal(CALL_INFO_LONG, -1, "Install Direction should never be set to Invalid! Did you forget to set which directions are valid?\n"); + } + +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "\tInstall Direction: %s\n", install_dir.c_str()); +#endif + valid_installs_set = true; +} + +/** + * Default behavior is to execute faults in the order they were + * added to the vector + */ +void FaultInjectorBase::executeFaults(Event*& ev) { + bool success = false; + for (int i = 0; i < fault.size(); i++) { + if (fault[i]) { + success = fault[i]->faultLogic(ev); + } + } + if (!success) { + out_->fatal(CALL_INFO_LONG, -1, "No valid fault object, or no fault successfully executed.\n"); + } +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/faultInjectorBase.h b/src/sst/elements/carcosa/injectors/faultInjectorBase.h new file mode 100644 index 0000000000..18fa2dbd22 --- /dev/null +++ b/src/sst/elements/carcosa/injectors/faultInjectorBase.h @@ -0,0 +1,178 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_FAULTINJECTORBASE_H +#define SST_ELEMENTS_CARCOSA_FAULTINJECTORBASE_H + +#include "sst/core/portModule.h" +#include "sst/core/event.h" +#include "sst/core/output.h" +#include "sst/elements/memHierarchy/memEvent.h" +#include "sst/elements/carcosa/faultlogic/faultBase.h" +#include +#include "sst/core/rng/mersenne.h" +#include +#include +#include +#include + +namespace SST::Carcosa { + +typedef std::vector dataVec; +class FaultBase; + +#define SEND_RECEIVE_VALID {{true, true}} +#define RECEIVE_VALID {{true, false}} +#define SEND_VALID {{false, true}} + +/********** FaultInjectorBase **********/ + +enum installDirection { + Send = 0, + Receive, + Invalid +}; + +/** + * Base class containing required functions and basic data for + * creating fault injection on component ports. + * + * Injectors are used to execute the logic that tests for + * whether or not an injection should occur. Upon triggering + * an injection, a fault object which inherits from the + * FaultBase class but be used to execute the fault logic + * on the triggering message. + */ +class FaultInjectorBase : public SST::PortModule +{ +public: + + SST_ELI_REGISTER_PORTMODULE( + FaultInjectorBase, + "carcosa", + "FaultInjectorBase", + SST_ELI_ELEMENT_VERSION(0, 1, 0), + "Base PortModule class used to connect fault injection logic to components" + ) + + SST_ELI_DOCUMENT_PARAMS( + {"install_direction", "Flag which direction the injector should read from on a port. Valid optins are \'Send\', \'Receive\', and \'Both\'. Default is \'Receive\'."}, + {"seed", "Optional integer seed to give to the random number generator. Default = 0 (0 seed will be assumed to mean NO seed)."}, + {"debug", "Integer determining if debug should be active. 0 disables, 1 sends output to STDOUT, 2 to STDERR. Default = 0"}, + {"debug_level", "Integer determining verbosity of debug output. 1 enables basic text output, 2 enables signficant activity output."} + ) + + FaultInjectorBase(Params& params); + + FaultInjectorBase() = default; + ~FaultInjectorBase(); + + void virtual eventSent(uintptr_t key, Event*& ev) override; + void virtual interceptHandler(uintptr_t key, Event*& ev, bool& cancel) override; + + bool installOnReceive() override + { + switch (install_direction_) { + case Send: + return false; + case Receive: + default: + return true; + } + } + bool installOnSend() override + { + switch (install_direction_) { + case Send: + return true; + case Receive: + default: + return false; + } + } + + void cancelDelivery() { + *cancel_ = true; + } + + installDirection setInstallDirection(std::string param); + + installDirection getInstallDirection() { + return install_direction_; + } + + enum memEventType { + DataRequest = 0, + Response, + Writeback, + RoutedByAddr, + Invalid + }; + + SST::Output* getOutput() { + return out_; + } + + SST::Output* getDebug() { + return dbg_; + } + + uint32_t randUInt32(uint32_t start, uint32_t end); + int32_t randInt32(int32_t start, int32_t end); + uint64_t randUInt64(uint64_t start, uint64_t end); + int64_t randInt64(int64_t start, int64_t end); + + double randFloat(double start, double end); + +protected: + SST::Output* out_; + SST::Output* dbg_; + std::vector fault; + bool* cancel_; + installDirection install_direction_ = installDirection::Receive; + SST::RNG::MersenneRNG base_rng_; + uint64_t seed_ = 0; +private: + std::array valid_installation_ = {{false, false}}; + bool valid_installs_set = false; +protected: + + virtual bool doInjection(); + virtual void executeFaults(Event*& ev); + + /** + * This function MUST be called by the derived class constructor + * @arg params pass the same params object to this function + * @arg valid_install_ pass either SEND_VALID, RECEIVE_VALID, + * or SEND_RECEIVE_VALID + */ + void setValidInstallation(Params& params, std::array valid_install); + + void serialize_order(SST::Core::Serialization::serializer& ser) override + { + SST::PortModule::serialize_order(ser); + // serialize parameters like `SST_SER()` + SST_SER(out_); + SST_SER(dbg_); + SST_SER(fault); + SST_SER(cancel_); + SST_SER(install_direction_); + SST_SER(base_rng_); + SST_SER(seed_); + SST_SER(valid_installation_); + SST_SER(valid_installs_set); + } + ImplementVirtualSerializable(SST::Carcosa::FaultInjectorBase) +}; + +} // namespace SST::FaultInjectorBase + +#endif // SST_ELEMENTS_CARCOSA_FAULTINJECTORBASE_H \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/randomDropFaultInjector.cc b/src/sst/elements/carcosa/injectors/randomDropFaultInjector.cc new file mode 100644 index 0000000000..33f9bcc735 --- /dev/null +++ b/src/sst/elements/carcosa/injectors/randomDropFaultInjector.cc @@ -0,0 +1,58 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/injectors/randomDropFaultInjector.h" +#include "sst/elements/carcosa/faultlogic/randomDropFault.h" + +using namespace SST::Carcosa; + +RandomDropFaultInjector::RandomDropFaultInjector(Params& params) : FaultInjectorBase(params) { + // read injection probability + injection_probability_ = params.find("injection_probability", 0.0); + // create fault + fault.push_back(new RandomDropFault(params, this)); + setValidInstallation(params, RECEIVE_VALID); +} + +bool RandomDropFaultInjector::doInjection() { + if (this->randFloat(0.0, 1.0) <= this->injection_probability_) { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Injection triggered.\n"); +#endif + return true; + } else { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Injection skipped.\n"); +#endif + return false; + } +} + +/** + * Custom execution is required to ensure delivery is canceled + * + * In the base interceptHandler, a reference to a boolean called + * 'cancel' is accepted as an argument. That function assigns the + * injector's pointer (called 'cancel_') to that reference's address, + * and that reference must be updated here after the event is destroyed + * if the installation direction of this PortModule was set to 'Receive'. + */ +void RandomDropFaultInjector::executeFaults(Event*& ev) { + if (fault[0]) { + if (this->doInjection()) { + if (!fault[0]->faultLogic(ev)) { + out_->fatal(CALL_INFO_LONG, -1, "Fault execution failed.\n"); + } + } + } else { + out_->fatal(CALL_INFO_LONG, -1, "No valid fault object.\n"); + } +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/randomDropFaultInjector.h b/src/sst/elements/carcosa/injectors/randomDropFaultInjector.h new file mode 100644 index 0000000000..e1b3b901e2 --- /dev/null +++ b/src/sst/elements/carcosa/injectors/randomDropFaultInjector.h @@ -0,0 +1,54 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_RANDOMDROPFAULTINJECTOR_H +#define SST_ELEMENTS_CARCOSA_RANDOMDROPFAULTINJECTOR_H + +#include "sst/elements/carcosa/injectors/faultInjectorBase.h" + +namespace SST::Carcosa { + +class RandomDropFaultInjector : public FaultInjectorBase { +public: + SST_ELI_REGISTER_PORTMODULE( + RandomDropFaultInjector, + "carcosa", + "RandomDropFaultInjector", + SST_ELI_ELEMENT_VERSION(0, 2, 0), + "PortModule class used to simulate a data transfer lost at random" + ) + + SST_ELI_DOCUMENT_PARAMS( + {"injection_probability", "Probability for injection to randomly occur. Default = 0.0"} + ) + + RandomDropFaultInjector(Params& params); + + RandomDropFaultInjector() = default; + ~RandomDropFaultInjector() {} +protected: + + double injection_probability_; + bool doInjection() override; + void executeFaults(Event*& ev) override; + + void serialize_order(SST::Core::Serialization::serializer& ser) override + { + SST::PortModule::serialize_order(ser); + SST_SER(injection_probability_); + // serialize parameters like `SST_SER()` + } + ImplementVirtualSerializable(SST::Carcosa::RandomDropFaultInjector) +}; // class RandomDropFaultInjector + +} // namespace SST::Carcosa + +#endif \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/randomFlipFaultInjector.cc b/src/sst/elements/carcosa/injectors/randomFlipFaultInjector.cc new file mode 100644 index 0000000000..999045c316 --- /dev/null +++ b/src/sst/elements/carcosa/injectors/randomFlipFaultInjector.cc @@ -0,0 +1,55 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/injectors/randomFlipFaultInjector.h" +#include "sst/elements/carcosa/faultlogic/randomFlipFault.h" + +using namespace SST::Carcosa; + +RandomFlipFaultInjector::RandomFlipFaultInjector(Params& params) : FaultInjectorBase(params) { + // read injection probability + this->injection_probability_ = params.find("injection_probability", 0.0); +#ifdef __SST_DEBUG_OUTPUT__ + if (injection_probability_ > 0.0){ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Injection probability set to %lf.\n", injection_probability_); + } +#endif + + // create fault + fault.push_back(new RandomFlipFault(params, this)); + setValidInstallation(params, SEND_RECEIVE_VALID); +} + +bool RandomFlipFaultInjector::doInjection() { + if (this->randFloat(0.0, 1.0) <= this->injection_probability_) { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Injection triggered.\n"); +#endif + return true; + } else { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Injection skipped.\n"); +#endif + return false; + } +} + +void RandomFlipFaultInjector::executeFaults(Event*& ev) { + if (fault[0]) { + if (this->doInjection()) { + if (!fault[0]->faultLogic(ev)) { + out_->fatal(CALL_INFO_LONG, -1, "Fault execution failed.\n"); + } + } + } else { + out_->fatal(CALL_INFO_LONG, -1, "No valid fault object.\n"); + } +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/randomFlipFaultInjector.h b/src/sst/elements/carcosa/injectors/randomFlipFaultInjector.h new file mode 100644 index 0000000000..cf79131bef --- /dev/null +++ b/src/sst/elements/carcosa/injectors/randomFlipFaultInjector.h @@ -0,0 +1,55 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_RANDOMFLIPFAULTINJECTOR_H +#define SST_ELEMENTS_CARCOSA_RANDOMFLIPFAULTINJECTOR_H + +#include "sst/elements/carcosa/injectors/faultInjectorBase.h" + +namespace SST::Carcosa { + +class RandomFlipFaultInjector : public FaultInjectorBase { +public: + SST_ELI_REGISTER_PORTMODULE( + RandomFlipFaultInjector, + "carcosa", + "RandomFlipFaultInjector", + SST_ELI_ELEMENT_VERSION(0, 1, 0), + "PortModule class used to simulate a random bit flip when transferring data" + ) + + SST_ELI_DOCUMENT_PARAMS( + {"injection_probability", "Probability for fault injection to trigger. Default = 0.0"} + ) + + RandomFlipFaultInjector(Params& params); + + RandomFlipFaultInjector() = default; + ~RandomFlipFaultInjector() {} +protected: + double injection_probability_; + + + bool doInjection() override; + void executeFaults(Event*& ev) override; + + void serialize_order(SST::Core::Serialization::serializer& ser) override + { + SST::PortModule::serialize_order(ser); + // serialize parameters like `SST_SER()` + SST_SER(injection_probability_); + } + ImplementVirtualSerializable(SST::Carcosa::RandomFlipFaultInjector) +}; // class RandomFlipFaultInjector + +} // namespace SST::Carcosa + +#endif \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/stuckAtFaultInjector.cc b/src/sst/elements/carcosa/injectors/stuckAtFaultInjector.cc new file mode 100644 index 0000000000..f8968a63c4 --- /dev/null +++ b/src/sst/elements/carcosa/injectors/stuckAtFaultInjector.cc @@ -0,0 +1,21 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/injectors/stuckAtFaultInjector.h" +#include "sst/elements/carcosa/faultlogic/stuckAtFault.h" + +using namespace SST::Carcosa; + +StuckAtFaultInjector::StuckAtFaultInjector(Params& params) : FaultInjectorBase(params) { + // create fault + fault.push_back(new StuckAtFault(params, this)); + setValidInstallation(params, SEND_RECEIVE_VALID); +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/stuckAtFaultInjector.h b/src/sst/elements/carcosa/injectors/stuckAtFaultInjector.h new file mode 100644 index 0000000000..87b1157c3d --- /dev/null +++ b/src/sst/elements/carcosa/injectors/stuckAtFaultInjector.h @@ -0,0 +1,51 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_STUCKATFAULTINJECTOR_H +#define SST_ELEMENTS_CARCOSA_STUCKATFAULTINJECTOR_H + +#include "sst/elements/carcosa/injectors/faultInjectorBase.h" +#include + +namespace SST::Carcosa { + +class StuckAtFaultInjector : public FaultInjectorBase { +public: + SST_ELI_REGISTER_PORTMODULE( + StuckAtFaultInjector, + "carcosa", + "StuckAtFaultInjector", + SST_ELI_ELEMENT_VERSION(0, 1, 0), + "PortModule class used to simulate a stuck bit within a given component" + ) + + SST_ELI_DOCUMENT_PARAMS( + {"masks", "String array formatted as [\"addr0, byte0, zeroMask0, oneMask0\",...,\"addrN, byteN, zeroMaskN, oneMaskN\"]." \ + "Addresses are expected to be in hexadecimal, and masks are 8 bit strings."}, + {"endianness", "Byte ordering in memory. Given as a string containing \'little\' or \'big\'. Default: little"} + ) + + StuckAtFaultInjector(Params& params); + + StuckAtFaultInjector() = default; + ~StuckAtFaultInjector() {} +protected: + void serialize_order(SST::Core::Serialization::serializer& ser) override + { + SST::PortModule::serialize_order(ser); + // serialize parameters like `SST_SER()` + } + ImplementVirtualSerializable(SST::Carcosa::StuckAtFaultInjector) +}; // class StuckAtFaultInjector + +} // namespace SST::Carcosa + +#endif \ No newline at end of file diff --git a/src/sst/elements/carcosa/tests/testCorruptMemBasic.py b/src/sst/elements/carcosa/tests/testCorruptMemBasic.py new file mode 100644 index 0000000000..54b8785484 --- /dev/null +++ b/src/sst/elements/carcosa/tests/testCorruptMemBasic.py @@ -0,0 +1,570 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +memctrl.addPortModule("highlink", "carcosa.CorruptMemFaultInjector", { + "install_direction": "Receive", + "regions": ["4D88, 4DA0"], + "debug" : 1, + "debug_level": 2 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/carcosa/tests/testCorruptMemDouble.py b/src/sst/elements/carcosa/tests/testCorruptMemDouble.py new file mode 100644 index 0000000000..d38f90d7ba --- /dev/null +++ b/src/sst/elements/carcosa/tests/testCorruptMemDouble.py @@ -0,0 +1,570 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +memctrl.addPortModule("highlink", "carcosa.CorruptMemFaultInjector", { + "install_direction": "Receive", + "regions": ["4D88, 4D90", "4DA0, 4DA8"], + "debug" : 1, + "debug_level": 2 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/carcosa/tests/testCorruptMemDoubleOverlap.py b/src/sst/elements/carcosa/tests/testCorruptMemDoubleOverlap.py new file mode 100644 index 0000000000..c3693a7b21 --- /dev/null +++ b/src/sst/elements/carcosa/tests/testCorruptMemDoubleOverlap.py @@ -0,0 +1,571 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +#TODO: fix bug here :) +memctrl.addPortModule("highlink", "carcosa.CorruptMemFaultInjector", { + "install_direction": "Receive", + "regions": ["4D88, 4DA0", "4D90, 4DA8"], + "debug" : 1, + "debug_level": 2 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/carcosa/tests/testRandomDrop.py b/src/sst/elements/carcosa/tests/testRandomDrop.py new file mode 100644 index 0000000000..716086b8de --- /dev/null +++ b/src/sst/elements/carcosa/tests/testRandomDrop.py @@ -0,0 +1,571 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +# SHOULD FAIL TO INITIALIZE +memctrl.addPortModule("highlink", "carcosa.RandomDropFaultInjector", { + "install_direction": "Receive", + "injection_probability": 0.01, + "debug" : 1, + "debug_level": 2 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/carcosa/tests/testRandomFlip.py b/src/sst/elements/carcosa/tests/testRandomFlip.py new file mode 100644 index 0000000000..cd251ef2c8 --- /dev/null +++ b/src/sst/elements/carcosa/tests/testRandomFlip.py @@ -0,0 +1,572 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +# SHOULD FAIL TO INITIALIZE +memctrl.addPortModule("highlink", "carcosa.RandomFlipFaultInjector", { + "install_direction": "Receive", + "injection_probability": 0.001, + #"seed": 156, + "debug" : 1, + "debug_level": 1 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/carcosa/tests/testStuckAtBasic.py b/src/sst/elements/carcosa/tests/testStuckAtBasic.py new file mode 100644 index 0000000000..29ece08d5f --- /dev/null +++ b/src/sst/elements/carcosa/tests/testStuckAtBasic.py @@ -0,0 +1,570 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +memctrl.addPortModule("highlink", "carcosa.StuckAtFaultInjector", { + "install_direction": "Receive", + "masks": ["4D88, 3, 11110000, 00001111"], + "debug" : 1, + "debug_level": 2 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/carcosa/tests/testStuckAtMultiple.py b/src/sst/elements/carcosa/tests/testStuckAtMultiple.py new file mode 100644 index 0000000000..467a22c30f --- /dev/null +++ b/src/sst/elements/carcosa/tests/testStuckAtMultiple.py @@ -0,0 +1,571 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +# SHOULD FAIL TO INITIALIZE +memctrl.addPortModule("highlink", "carcosa.StuckAtFaultInjector", { + "intall_direction": "Receive", + "masks": ["4D88, 3, 11110000, 00001111", "4D90, 3, 11110000, 00001111"], + "debug" : 1, + "debug_level": 2 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/carcosa/tests/testStuckAtOverlap.py b/src/sst/elements/carcosa/tests/testStuckAtOverlap.py new file mode 100644 index 0000000000..945e8880b5 --- /dev/null +++ b/src/sst/elements/carcosa/tests/testStuckAtOverlap.py @@ -0,0 +1,571 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +# SHOULD FAIL TO INITIALIZE +memctrl.addPortModule("highlink", "carcosa.StuckAtFaultInjector", { + "install_direction": "Receive", + "masks": ["4D88, 3, 11111111, 11111111"], + "debug" : 1, + "debug_level": 2 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/carcosa/tests/testStuckAtSameByte.py b/src/sst/elements/carcosa/tests/testStuckAtSameByte.py new file mode 100644 index 0000000000..11b7b86987 --- /dev/null +++ b/src/sst/elements/carcosa/tests/testStuckAtSameByte.py @@ -0,0 +1,571 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +# SHOULD FAIL TO INITIALIZE +memctrl.addPortModule("highlink", "carcosa.StuckAtFaultInjector", { + "install_direction": "Receive", + "masks": ["4D88, 3, 11110000, 00001111", "4D88, 5, 11110000, 00001111"], + "debug" : 1, + "debug_level": 2 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) +