diff --git a/.gitignore b/.gitignore
index 6551a7a6..a2dc6bf9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,5 @@ scripts/compile.tcl
regression_tests/
pulp-runtime/
fault_injection_sim/
+venv/
+.venv/
diff --git a/Bender.lock b/Bender.lock
index c3a920be..7dc17323 100644
--- a/Bender.lock
+++ b/Bender.lock
@@ -7,8 +7,8 @@ packages:
dependencies:
- common_cells
axi:
- revision: 853ede23b2a9837951b74dbdc6d18c3eef5bac7d
- version: 0.39.5
+ revision: 78831b6feba265d5ee2683bbf42b4150f8a35c43
+ version: 0.39.8
source:
Git: https://github.com/pulp-platform/axi.git
dependencies:
@@ -16,16 +16,16 @@ packages:
- common_verification
- tech_cells_generic
axi2mem:
- revision: b0e963433b2f6a61262b1448031e74eaec57c203
- version: null
+ revision: be0c696709acaee579787ba2432d26ad27640594
+ version: 1.0.2
source:
Git: https://github.com/pulp-platform/axi2mem.git
dependencies:
- axi_slice
- common_cells
axi2per:
- revision: 4932bd2b88a1c7b5f0bf95411fc512905ed32439
- version: null
+ revision: e8ca052a745e184ca960933b2fe416b725e9ca81
+ version: 1.0.2
source:
Git: https://github.com/pulp-platform/axi2per.git
dependencies:
@@ -37,47 +37,53 @@ packages:
Git: https://github.com/pulp-platform/axi_slice.git
dependencies:
- common_cells
+ axi_stream:
+ revision: 54891ff40455ca94a37641b9da4604647878cc07
+ version: 0.1.1
+ source:
+ Git: https://github.com/pulp-platform/axi_stream.git
+ dependencies:
+ - common_cells
cluster_icache:
- revision: dd0e8f3497903a9ca99fc9f349d5a4f688ceb3ae
- version: null
+ revision: 64e21ae455bbdde850c4df13bef86ea55ac42537
+ version: 0.2.0
source:
Git: https://github.com/pulp-platform/cluster_icache.git
dependencies:
- axi
- common_cells
- - register_interface
- scm
- tech_cells_generic
cluster_interconnect:
- revision: 1284def6c0b7f7e9355eb093d00883ad9dead1b7
- version: null
+ revision: 2967d8d17be0a6139229ca8d3d4956e182aec3de
+ version: 1.3.0
source:
Git: https://github.com/pulp-platform/cluster_interconnect.git
dependencies:
- common_cells
cluster_peripherals:
- revision: 0b8e8ab9e6be3a5030a18256bb7e75cf6b6f6cac
- version: null
+ revision: e464eb9ddcc39e5a50009819601c4f213b1d4ba3
+ version: 2.2.0
source:
Git: https://github.com/pulp-platform/cluster_peripherals.git
dependencies:
- hci
common_cells:
- revision: c27bce39ebb2e6bae52f60960814a2afca7bd4cb
- version: 1.37.0
+ revision: 9afda9abb565971649c2aa0985639c096f351171
+ version: 1.38.0
source:
Git: https://github.com/pulp-platform/common_cells.git
dependencies:
- common_verification
- tech_cells_generic
common_verification:
- revision: 9c07fa860593b2caabd9b5681740c25fac04b878
- version: 0.2.3
+ revision: fb1885f48ea46164a10568aeff51884389f67ae3
+ version: 0.2.5
source:
Git: https://github.com/pulp-platform/common_verification.git
dependencies: []
cv32e40p:
- revision: e863f576699815b38cc9d80dbdede8ed5efd5991
+ revision: 1a93f340e9dadb9f7c8c471f27a40932c8b1c62e
version: null
source:
Git: https://github.com/pulp-platform/cv32e40p.git
@@ -92,6 +98,14 @@ packages:
Git: https://github.com/pulp-platform/event_unit_flex.git
dependencies:
- common_cells
+ flex-v:
+ revision: e9355c2f6ec4c105abdff39489e5d1be93bc4374
+ version: null
+ source:
+ Git: https://github.com/pulp-platform/flex-v.git
+ dependencies:
+ - fpnew
+ - tech_cells_generic
fpnew:
revision: a8e0cba6dd50f357ece73c2c955d96efc3c6c315
version: null
@@ -108,7 +122,7 @@ packages:
dependencies:
- common_cells
hci:
- revision: aed9005c761827c6cbff2ea9a15f9cc37acd1169
+ revision: 0faa4f3cdfc87ead4a10e5c307c00aa6c4504d0d
version: null
source:
Git: https://github.com/pulp-platform/hci.git
@@ -120,8 +134,8 @@ packages:
- redundancy_cells
- register_interface
hier-icache:
- revision: 2886cb2a46cea3e2bd2d979b505d88fadfbe150c
- version: null
+ revision: 7243834d2407ca23cff583d57641c84b982bd9bc
+ version: 1.3.0
source:
Git: https://github.com/pulp-platform/hier-icache.git
dependencies:
@@ -139,8 +153,8 @@ packages:
dependencies:
- tech_cells_generic
hwpe-stream:
- revision: 65c99a4a2f37a79acee800ab0151f67dfb1edef1
- version: 1.8.0
+ revision: 3bc9694705b72a5b9bddc7fcde5091b9e45ba0c8
+ version: null
source:
Git: https://github.com/pulp-platform/hwpe-stream.git
dependencies:
@@ -159,14 +173,16 @@ packages:
Git: https://github.com/pulp-platform/icache-intc.git
dependencies: []
idma:
- revision: 437ffa9dac5dea0daccfd3e8ae604d4f6ae2cdf1
- version: null
+ revision: ff5d56fffb3767814db88d6bf8f381974ea33aa5
+ version: 0.6.4
source:
- Git: https://github.com/pulp-platform/iDMA.git
+ Git: https://github.com/pulp-platform/idma.git
dependencies:
- axi
+ - axi_stream
- common_cells
- common_verification
+ - obi
- register_interface
l2_tcdm_hybrid_interco:
revision: fa55e72859dcfb117a2788a77352193bef94ff2b
@@ -175,14 +191,14 @@ packages:
Git: https://github.com/pulp-platform/L2_tcdm_hybrid_interco.git
dependencies: []
mchan:
- revision: 7f064f205a3e0203e959b14773c4afecf56681ab
- version: null
+ revision: 3f2ae92f78e2ddbd0e079cbb4f81fcc248171c12
+ version: 1.2.4
source:
Git: https://github.com/pulp-platform/mchan.git
dependencies:
- common_cells
neureka:
- revision: 94528df2bc6d5eedc0439bd403c2ad005f0a7519
+ revision: 5ff2b6bc0a04de07eb2549a599655fb6d7f99c58
version: null
source:
Git: https://github.com/pulp-platform/neureka.git
@@ -190,18 +206,25 @@ packages:
- hci
- hwpe-ctrl
- hwpe-stream
- - register_interface
- zeroriscy
+ obi:
+ revision: c2141a653c755461ff44f61d12aeb5d99fc8e760
+ version: 0.1.3
+ source:
+ Git: https://github.com/pulp-platform/obi.git
+ dependencies:
+ - common_cells
+ - common_verification
per2axi:
- revision: 95bf23119b47fc171d9ed3734c431f71cffd9350
- version: null
+ revision: 18cf4f2ad51b73de0448843ce0def54ab5fb274b
+ version: 1.0.5
source:
Git: https://github.com/pulp-platform/per2axi.git
dependencies:
- axi_slice
redmule:
revision: 9223ccc932e21d0667e9c2d30831db41eec9299e
- version:
+ version: null
source:
Git: https://github.com/pulp-platform/redmule.git
dependencies:
@@ -214,7 +237,7 @@ packages:
- register_interface
- tech_cells_generic
redundancy_cells:
- revision: 49e714b97a19a7aaddf064ae2757c8f02d1f62dc
+ revision: 9e31f7c6c24877eaf58279903e7a162b16c9a721
version: null
source:
Git: https://github.com/pulp-platform/redundancy_cells.git
@@ -224,8 +247,8 @@ packages:
- register_interface
- tech_cells_generic
register_interface:
- revision: 5daa85d164cf6b54ad061ea1e4c6f3624556e467
- version: 0.4.5
+ revision: 8e8c209ea559d3b54f45cf30fcce95ce70ff5e49
+ version: 0.4.6
source:
Git: https://github.com/pulp-platform/register_interface.git
dependencies:
@@ -233,23 +256,15 @@ packages:
- axi
- common_cells
- common_verification
- flex-v:
- revision: c760db14dbd6cc3ec3b8ae8274df2eac7225bcac
- version: null
- source:
- Git: https://github.com/pulp-platform/flex-v.git
- dependencies:
- - fpnew
- - tech_cells_generic
scm:
- revision: 74426dee36f28ae1c02f7635cf844a0156145320
- version: null
+ revision: 472f99affe44ff7b282b519c047a3cfeb35b16c6
+ version: 1.2.0
source:
Git: https://github.com/pulp-platform/scm.git
dependencies:
- tech_cells_generic
softex:
- revision: 31e7534a3da3244f2f5c08bab430a560beee6ff2
+ revision: 11dd29e85d40e29fea0481b471f1c0cc967df1a4
version: null
source:
Git: https://github.com/belanoa/softex.git
diff --git a/Bender.yml b/Bender.yml
index db26c097..a30679b6 100644
--- a/Bender.yml
+++ b/Bender.yml
@@ -18,7 +18,7 @@ dependencies:
cluster_interconnect: { git: "https://github.com/pulp-platform/cluster_interconnect.git", rev: v1.3.0 }
event_unit_flex: { git: "https://github.com/pulp-platform/event_unit_flex.git", rev: astral-v1.0 }
mchan: { git: "https://github.com/pulp-platform/mchan.git", rev: v1.2.4 }
- idma: { git: "https://github.com/pulp-platform/iDMA.git", rev: v0.6.3 }
+ idma: { git: "https://github.com/pulp-platform/idma.git", rev: v0.6.4 }
hier-icache: { git: "https://github.com/pulp-platform/hier-icache.git", rev: v1.3.0 }
cluster_icache: { git: "https://github.com/pulp-platform/cluster_icache.git", rev: v0.2.0 }
cluster_peripherals: { git: "https://github.com/pulp-platform/cluster_peripherals.git", rev: v2.2.0 }
@@ -30,12 +30,13 @@ dependencies:
cv32e40p: { git: "https://github.com/pulp-platform/cv32e40p.git", rev: astral-v1.0 }
ibex: { git: "https://github.com/pulp-platform/ibex.git", rev: "pulpissimo-v6.1.2" }
scm: { git: "https://github.com/pulp-platform/scm.git", rev: v1.2.0 }
- hci: { git: "https://github.com/pulp-platform/hci.git", rev: astral-v1.0 }
- register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.4 }
- redundancy_cells: { git: "https://github.com/pulp-platform/redundancy_cells.git", rev: astral-v1.0 }
+ hci: { git: "https://github.com/pulp-platform/hci.git", rev: 0faa4f3cdfc87ead4a10e5c307c00aa6c4504d0d } # branch: feat/multicyclesupport
+ register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.5 }
+ redundancy_cells: { git: "https://github.com/pulp-platform/redundancy_cells.git", rev: 9e31f7c6c24877eaf58279903e7a162b16c9a721 } # branch: astral-v0
redmule: { git: "https://github.com/pulp-platform/redmule.git", rev: astral-v1.0 }
- neureka: { git: "https://github.com/pulp-platform/neureka.git", rev: astral-v1.0 }
+ neureka: { git: "https://github.com/pulp-platform/neureka.git", rev: 5ff2b6bc0a04de07eb2549a599655fb6d7f99c58 } # branch: feat/multicyclesupport
softex: { git: "https://github.com/belanoa/softex.git" , rev: astral-v1.0 }
+ obi: { git: "https://github.com/pulp-platform/obi.git", rev: v0.1.3 }
export_include_dirs:
- include
@@ -95,4 +96,3 @@ sources:
files:
- nonfree/gf12/sourcecode/tc_sram.sv
- nonfree/gf12/sourcecode/tc_clk.sv
-
diff --git a/Makefile b/Makefile
index a910fa42..05facc53 100644
--- a/Makefile
+++ b/Makefile
@@ -13,6 +13,7 @@ QUESTA ?=
endif
BENDER ?= bender
+PYTHON ?= python3
VSIM ?= $(QUESTA) vsim
VOPT ?= $(QUESTA) vopt
@@ -26,6 +27,12 @@ REGRESSIONS := $(ROOT_DIR)/regression_tests
VLOG_ARGS += -suppress vlog-2583 -suppress vlog-13314 -suppress vlog-13233 -timescale \"1 ns / 1 ps\" \"+incdir+$(shell pwd)/include\"
+# TB's wide DMA port toggle override
+export TB_ENABLE_WIDE_PORT
+ifneq ($(strip $(TB_ENABLE_WIDE_PORT)),)
+VLOG_ARGS += +define+TB_ENABLE_WIDE_PORT=$(TB_ENABLE_WIDE_PORT)
+endif
+
define generate_vsim
echo 'set ROOT [file normalize [file dirname [info script]]/$3]' > $1
$(BENDER) script vsim --vlog-arg="$(VLOG_ARGS)" $2 | grep -v "set ROOT" >> $1
@@ -37,10 +44,12 @@ endef
######################
NONFREE_REMOTE ?= git@iis-git.ee.ethz.ch:pulp-restricted/pulp-cluster-nonfree.git
-NONFREE_COMMIT ?= beb98ce
+#NONFREE_COMMIT ?= 6f5b4b5aa85b6f3ac4bbe03439dd250ab4810d80 # branch: dkeller/chimera-v2
+NONFREE_BRANCH ?= dkeller/chimera-v2
nonfree-init:
- git clone $(NONFREE_REMOTE) nonfree
+ rm -rf nonfree;
+ git clone --single-branch --branch $(NONFREE_BRANCH) $(NONFREE_REMOTE) nonfree
cd nonfree && git checkout $(NONFREE_COMMIT)
################
@@ -62,6 +71,8 @@ Bender.lock:
$(BENDER) checkout
touch Bender.lock
+update:
+ $(BENDER) update
######
# SW #
@@ -75,28 +86,129 @@ sw-clean:
## Clone pulp-runtime as SW stack
PULP_RUNTIME_REMOTE ?= https://github.com/pulp-platform/pulp-runtime.git
-PULP_RUNTIME_COMMIT ?= 1e3bccf # branch: lg/upstream
+PULP_RUNTIME_REF ?= dkeller/chimera-v2
+PULP_RUNTIME_DIR ?= $(ROOT_DIR)/pulp-runtime
+# Lock (optional)
+PULP_RUNTIME_LOCK_FILE ?= pulp-runtime.lock
+PULP_RUNTIME_COMMIT := $(shell test -f $(PULP_RUNTIME_LOCK_FILE) && cat $(PULP_RUNTIME_LOCK_FILE) || echo)
pulp-runtime:
- git clone $(PULP_RUNTIME_REMOTE) $@
- cd $@ && git checkout $(PULP_RUNTIME_COMMIT)
+ @if [ -d "$(PULP_RUNTIME_DIR)/.git" ]; then \
+ echo "[pulp-runtime] Using existing repo at $(PULP_RUNTIME_DIR)"; \
+ git -C $(PULP_RUNTIME_DIR) fetch --tags --all; \
+ else \
+ echo "[pulp-runtime] Cloning $(PULP_RUNTIME_REMOTE)"; \
+ git clone $(PULP_RUNTIME_REMOTE) $(PULP_RUNTIME_DIR); \
+ fi
+ @if [ -n "$(PULP_RUNTIME_COMMIT)" ]; then \
+ echo "[pulp-runtime] Checking out locked commit $(PULP_RUNTIME_COMMIT)"; \
+ git -C $(PULP_RUNTIME_DIR) checkout --detach $(PULP_RUNTIME_COMMIT); \
+ else \
+ echo "[pulp-runtime] Checking out ref $(PULP_RUNTIME_REF)"; \
+ git -C $(PULP_RUNTIME_DIR) checkout $(PULP_RUNTIME_REF) || true; \
+ git -C $(PULP_RUNTIME_DIR) pull --ff-only || true; \
+ fi
+
+lock-runtime:
+ @git -C $(PULP_RUNTIME_DIR) rev-parse HEAD > $(PULP_RUNTIME_LOCK_FILE) && \
+ echo "Locked pulp-runtime to $$(cat $(PULP_RUNTIME_LOCK_FILE))"
+
+unlock-runtime:
+ @rm -f $(PULP_RUNTIME_LOCK_FILE) && echo "Unlocked pulp-runtime (will track $(PULP_RUNTIME_REF))"
## Clone fault injection scripts
FAULT_SIM_REMOTE ?= https://github.com/pulp-platform/InjectaFault.git
-FAULT_SIM_COMMIT ?= 84ddcff # branch: rt/rename-var
+FAULT_SIM_BRANCH ?= rt/rename-var
+FAULT_SIM_DIR ?= $(ROOT_DIR)/fault_injection_sim
+# Lock (optional)
+FAULT_SIM_LOCK_FILE ?= fault_injection_sim.lock
+FAULT_SIM_COMMIT := $(shell test -f $(FAULT_SIM_LOCK_FILE) && cat $(FAULT_SIM_LOCK_FILE) || echo)
fault_injection_sim:
- git clone $(FAULT_SIM_REMOTE) $@
- cd $@ && git checkout $(FAULT_SIM_COMMIT)
+ @if [ -d "$(FAULT_SIM_DIR)/.git" ]; then \
+ echo "[fault_injection_sim] Using existing repo at $(FAULT_SIM_DIR)"; \
+ git -C $(FAULT_SIM_DIR) fetch --tags --all; \
+ else \
+ echo "[fault_injection_sim] Cloning $(FAULT_SIM_REMOTE)"; \
+ git clone $(FAULT_SIM_REMOTE) $(FAULT_SIM_DIR); \
+ fi
+ @if [ -n "$(FAULT_SIM_COMMIT)" ]; then \
+ echo "[fault_injection_sim] Checking out locked commit $(FAULT_SIM_COMMIT)"; \
+ git -C $(FAULT_SIM_DIR) checkout --detach $(FAULT_SIM_COMMIT); \
+ else \
+ echo "[fault_injection_sim] Checking out ref $(FAULT_SIM_BRANCH)"; \
+ git -C $(FAULT_SIM_DIR) checkout $(FAULT_SIM_BRANCH) || true; \
+ git -C $(FAULT_SIM_DIR) pull --ff-only || true; \
+ fi
+
+lock-fault-sim:
+ @git -C $(FAULT_SIM_DIR) rev-parse HEAD > $(FAULT_SIM_LOCK_FILE) && \
+ echo "Locked fault_injection_sim to $$(cat $(FAULT_SIM_LOCK_FILE))"
+
+unlock-fault-sim:
+ @rm -f $(FAULT_SIM_LOCK_FILE) && echo "Unlocked fault_injection_sim (will track $(FAULT_SIM_BRANCH))"
## Clone regression tests
REGRESSION_TESTS_REMOTE ?= https://github.com/pulp-platform/regression_tests.git
-REGRESSION_TESTS_COMMIT ?= dd7ef99 # branch: lg/upstream
+REGRESSION_TESTS_BRANCH ?= dkeller/chimera-v2
+REGRESSION_TESTS_DIR ?= $(ROOT_DIR)/regression_tests
+# Lock (optional)
+REGRESSION_TESTS_LOCK_FILE ?= regression_tests.lock
+REGRESSION_TESTS_COMMIT := $(shell test -f $(REGRESSION_TESTS_LOCK_FILE) && cat $(REGRESSION_TESTS_LOCK_FILE) || echo)
regression_tests:
- git clone $(REGRESSION_TESTS_REMOTE) $@
- cd $@ && git checkout $(REGRESSION_TESTS_COMMIT)
- cd $@ && git submodule update --init --recursive
+ @if [ -d "$(REGRESSION_TESTS_DIR)/.git" ]; then \
+ echo "[regression_tests] Using existing repo at $(REGRESSION_TESTS_DIR)"; \
+ git -C $(REGRESSION_TESTS_DIR) fetch --tags --all; \
+ else \
+ echo "[regression_tests] Cloning $(REGRESSION_TESTS_REMOTE)"; \
+ git clone $(REGRESSION_TESTS_REMOTE) $(REGRESSION_TESTS_DIR); \
+ fi
+ @if [ -n "$(REGRESSION_TESTS_COMMIT)" ]; then \
+ echo "[regression_tests] Checking out locked commit $(REGRESSION_TESTS_COMMIT)"; \
+ git -C $(REGRESSION_TESTS_DIR) checkout --detach $(REGRESSION_TESTS_COMMIT); \
+ else \
+ echo "[regression_tests] Checking out ref $(REGRESSION_TESTS_BRANCH)"; \
+ git -C $(REGRESSION_TESTS_DIR) checkout $(REGRESSION_TESTS_BRANCH) || true; \
+ git -C $(REGRESSION_TESTS_DIR) pull --ff-only || true; \
+ fi
+ @echo "[regression_tests] Initializing submodules (e.g., pulp-nnx)"
+ @git -C $(REGRESSION_TESTS_DIR) submodule update --init --recursive
+
+lock-regression-tests:
+ @git -C $(REGRESSION_TESTS_DIR) rev-parse HEAD > $(REGRESSION_TESTS_LOCK_FILE) && \
+ echo "Locked regression_tests to $$(cat $(REGRESSION_TESTS_LOCK_FILE))"
+
+unlock-regression-tests:
+ @rm -f $(REGRESSION_TESTS_LOCK_FILE) && echo "Unlocked regression_tests (will track $(REGRESSION_TESTS_BRANCH))"
+
+#########################
+# Hardware dependencies #
+#########################
+
+# Set dependency paths only if dependencies have already been cloned
+# This avoids running `bender checkout` at every make command
+ifeq ($(shell test -d $(ROOT_DIR)/.bender || echo 1),)
+IDMA_ROOT := $(shell $(BENDER) path idma)
+endif
+
+# Fall back to safe defaults if dependencies are not cloned yet
+IDMA_ROOT ?= .
+
+# Python requirements (version and packages) coming from iDMA repository
+gen_idma_hw: $(IDMA_ROOT)/.idma_generated
+$(IDMA_ROOT)/.idma_generated:
+ @$(PYTHON) --version >/dev/null 2>&1 || { echo "ERROR: Python not found. Python 3.8 or higher is required."; exit 1; } && \
+ $(PYTHON) -c "import sys; assert sys.version_info >= (3, 8)" || { echo "ERROR: Python version must be 3.8 or higher"; exit 1; } && \
+ rm -rf venv && $(PYTHON) -m venv venv && \
+ . venv/bin/activate && \
+ pip install --upgrade pip && \
+ pip install -r $(IDMA_ROOT)/requirements.txt && \
+ make -C $(IDMA_ROOT) idma_hw_all && \
+ touch $@
+
+clean_idma_hw:
+ make -C $(IDMA_ROOT) idma_clean_all
########################
# Build and simulation #
@@ -104,7 +216,7 @@ regression_tests:
.PHONY: sim-clean compile build run
-sim-clean:
+sim-clean: clean_idma_hw
rm -rf scripts/compile.tcl
rm -rf work
@@ -121,7 +233,7 @@ scripts/synth-compile.tcl: | Bender.lock
$(library):
$(QUESTA) vlib $(library)
-compile: $(library)
+compile: $(IDMA_ROOT)/.idma_generated $(library)
@test -f Bender.lock || { echo "ERROR: Bender.lock file does not exist. Did you run make checkout in bender mode?"; exit 1; }
@test -f scripts/compile.tcl || { echo "ERROR: scripts/compile.tcl file does not exist. Did you run make scripts in bender mode?"; exit 1; }
$(VSIM) -c -do 'quit -code [source scripts/compile.tcl]'
@@ -133,6 +245,13 @@ run:
$(VSIM) +permissive -suppress 3053 -suppress 8885 -lib $(library) +MAX_CYCLES=$(max_cycles) +UVM_TESTNAME=$(test_case) +APP=$(elf-bin) +notimingchecks +nospecify -t 1ps \
${top_level}_optimized +permissive-off ++$(elf-bin) ++$(target-options) ++$(cl-bin) | tee sim.log
+.PHONY: clean
+
+clean:
+ rm -rf scripts/synth-compile.tcl
+ rm -rf scripts/compile.tcl
+ rm -rf transcript
+ rm -rf *.log
####################
# Regression tests #
####################
diff --git a/README.md b/README.md
index be0e6011..8e77cfb6 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
# PULP Cluster
-
+
The `pulp_cluster` repository contains the structure of the cluster subsystem
used in PULP chips. For more details on the internal architecture, see the
diff --git a/bender-common.mk b/bender-common.mk
index ca6c866d..4297d044 100644
--- a/bender-common.mk
+++ b/bender-common.mk
@@ -11,7 +11,16 @@ common_defs += -D CLUSTER_ALIAS
common_defs += -D USE_PULP_PARAMETERS
common_targs += -t rtl
-common_targs += -t mchan
+# DMA configuration
+DMA_TYPE ?= idma
+
+ifeq ($(DMA_TYPE),mchan)
+ common_targs += -t mchan
+ common_defs += -D TARGET_MCHAN
+else
+ # Default to iDMA
+endif
+
common_targs += -t cluster_standalone
common_targs += -t scm_use_fpga_scm
common_targs += -t cv32e40p_use_ff_regfile
\ No newline at end of file
diff --git a/doc/PULP_CLUSTER_updateOct2024.drawio b/doc/PULP_CLUSTER_updateOct2024.drawio
new file mode 100644
index 00000000..c3a4afb4
--- /dev/null
+++ b/doc/PULP_CLUSTER_updateOct2024.drawio
@@ -0,0 +1,1643 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/PULP_CLUSTER_updateOct2024.png b/doc/PULP_CLUSTER_updateOct2024.png
new file mode 100644
index 00000000..de562a75
Binary files /dev/null and b/doc/PULP_CLUSTER_updateOct2024.png differ
diff --git a/include/cluster_bus_defines.sv b/include/cluster_bus_defines.sv
index 487e2629..928ae829 100644
--- a/include/cluster_bus_defines.sv
+++ b/include/cluster_bus_defines.sv
@@ -13,7 +13,7 @@
*
*/
-`define NB_SLAVE 4
+`define NB_SLAVE 3
`define NB_MASTER 3
`define NB_REGION 1
diff --git a/include/pulp_soc_defines.sv b/include/pulp_soc_defines.sv
index 965fef4a..eda1b588 100644
--- a/include/pulp_soc_defines.sv
+++ b/include/pulp_soc_defines.sv
@@ -43,11 +43,19 @@
`define CLUST_FP_DIVSQRT 1
`define CLUST_SHARED_FP 2
`define CLUST_SHARED_FP_DIVSQRT 2
-`endif
+`endif
//PARAMETRES
`define NB_CLUSTERS 1
`define NB_CORES 8
-`define NB_DMAS 4
+`define NB_EXT 4
`define NB_MPERIPHS 1
`define NB_SPERIPHS 12
+
+`ifdef TARGET_MCHAN
+ `define NB_DMAS 4
+ `define DMA_USE_HWPE_PORT 0
+`else
+ `define NB_DMAS 2
+ `define DMA_USE_HWPE_PORT 1
+`endif
diff --git a/packages/pulp_cluster_package.sv b/packages/pulp_cluster_package.sv
index 2f8dae70..41563a12 100644
--- a/packages/pulp_cluster_package.sv
+++ b/packages/pulp_cluster_package.sv
@@ -41,23 +41,27 @@ package pulp_cluster_package;
localparam int unsigned MAX_NUM_HWPES = 8;
- typedef struct {
+ typedef struct packed {
hwpe_type_e [MAX_NUM_HWPES-1:0] HwpeList;
byte_t NumHwpes;
} hwpe_subsystem_cfg_t;
// PULP cluster configuration
- typedef struct {
+ typedef struct packed {
// Type of core in the cluster
core_type_e CoreType;
// Number of cores in the cluster
byte_t NumCores;
// Number of DMA TCDM plugs
+ // If using MCHAN, must be 4. If using iDMA, can vary in multiples of 2 or 3
byte_t DmaNumPlugs;
// Number of DMA outstanding transactions
byte_t DmaNumOutstandingBursts;
// DMA burst length in bits
word_t DmaBurstLength;
+ // If the DMA should use a HWPE port, set to 1
+ // This makes sense only when using a DMA_TCDM_DATA_WIDTH close to the HWP
+ bit DmaUseHwpePort;
// Number of masters in crossbar peripherals
byte_t NumMstPeriphs;
// Number of slaves in crossbar peripherals
@@ -144,12 +148,16 @@ package pulp_cluster_package;
byte_t AxiIdInWidth;
// AXI ID width of crossbar manager ports
byte_t AxiIdOutWidth;
+ // AXI ID width of wide external to cluster port
+ byte_t AxiIdOutWideWidth;
// AXI address width
byte_t AxiAddrWidth;
- // AXI data width from external to cluster
+ // AXI data width from external to cluster (narrow)
byte_t AxiDataInWidth;
- // AXI data width from cluster to external
+ // AXI data width from cluster to external (narrow)
byte_t AxiDataOutWidth;
+ // AXI data width from cluster to external (wide)
+ word_t AxiDataOutWideWidth;
// AXI user width
byte_t AxiUserWidth;
// AXI maximum subordinate transaction per ID
@@ -170,6 +178,8 @@ package pulp_cluster_package;
doub_t ClusterExternalOffs;
// Address remap for virtualization
bit EnableRemapAddress;
+ // Enable wide AXI master port for high-bandwidth DMA transfers
+ bit EnableWidePort;
// Enable Snitch ICache
bit SnitchICache;
} pulp_cluster_cfg_t;
@@ -196,7 +206,7 @@ package pulp_cluster_package;
localparam int unsigned SPER_ERROR_ID = 12; // -> unmapped, directed to error
// The following parameters refer to the cluster AXI crossbar
- localparam byte_t NumAxiSubordinatePorts = 4;
+ localparam byte_t NumAxiSubordinatePorts = 3;
localparam byte_t NumAxiManagerPorts = 3;
localparam byte_t AxiSubordinateIdwidth = 4;
localparam byte_t AxiManagerIdwidth = AxiSubordinateIdwidth + $clog2(NumAxiSubordinatePorts);
@@ -209,6 +219,7 @@ package pulp_cluster_package;
DmaNumPlugs: NumDmas,
DmaNumOutstandingBursts: 8,
DmaBurstLength: 256,
+ DmaUseHwpePort: `DMA_USE_HWPE_PORT,
NumMstPeriphs: NB_MPERIPHS,
NumSlvPeriphs: NB_SPERIPHS,
ClusterAlias: 1,
@@ -251,10 +262,12 @@ package pulp_cluster_package;
NumAxiIn: NumAxiSubordinatePorts,
NumAxiOut: NumAxiManagerPorts,
AxiIdInWidth: AxiSubordinateIdwidth,
- AxiIdOutWidth:AxiManagerIdwidth,
+ AxiIdOutWidth: AxiManagerIdwidth,
+ AxiIdOutWideWidth: 1,
AxiAddrWidth: 48,
AxiDataInWidth: 64,
AxiDataOutWidth: 64,
+ AxiDataOutWideWidth: 128,
AxiUserWidth: 10,
AxiMaxInTrans: 64,
AxiMaxOutTrans: 64,
@@ -265,6 +278,7 @@ package pulp_cluster_package;
ClusterPeriphOffs: 'h00200000,
ClusterExternalOffs: 'h00400000,
EnableRemapAddress: 0,
+ EnableWidePort: 1,
SnitchICache: 0,
default: '0
};
diff --git a/rtl/cluster_bus_wrap.sv b/rtl/cluster_bus_wrap.sv
index 775e5622..dbf867d3 100644
--- a/rtl/cluster_bus_wrap.sv
+++ b/rtl/cluster_bus_wrap.sv
@@ -31,7 +31,6 @@ module cluster_bus_wrap
parameter int unsigned AXI_ID_IN_WIDTH = 4 ,
parameter int unsigned AXI_ID_OUT_WIDTH = 6 ,
parameter int unsigned AXI_USER_WIDTH = 6 ,
- parameter int unsigned DMA_NB_OUTSND_BURSTS = 8 ,
parameter int unsigned TCDM_SIZE = 0,
parameter logic [AXI_ADDR_WIDTH-1:0] BaseAddr = 'h10000000,
parameter logic [AXI_ADDR_WIDTH-1:0] ClusterPeripheralsOffs = 'h00200000,
@@ -59,8 +58,6 @@ module cluster_bus_wrap
output slave_resp_t data_slave_resp_o,
input slave_req_t instr_slave_req_i,
output slave_resp_t instr_slave_resp_o,
- input slave_req_t dma_slave_req_i,
- output slave_resp_t dma_slave_resp_o,
input slave_req_t ext_slave_req_i,
output slave_resp_t ext_slave_resp_o,
//INITIATOR
@@ -92,10 +89,8 @@ module cluster_bus_wrap
`AXI_ASSIGN_RESP_STRUCT(data_slave_resp_o, axi_slave_resps[0])
`AXI_ASSIGN_REQ_STRUCT(axi_slave_reqs[1], instr_slave_req_i)
`AXI_ASSIGN_RESP_STRUCT(instr_slave_resp_o, axi_slave_resps[1])
- `AXI_ASSIGN_REQ_STRUCT(axi_slave_reqs[2], dma_slave_req_i)
- `AXI_ASSIGN_RESP_STRUCT(dma_slave_resp_o, axi_slave_resps[2])
- `AXI_ASSIGN_REQ_STRUCT(axi_slave_reqs[3], ext_slave_req_i)
- `AXI_ASSIGN_RESP_STRUCT(ext_slave_resp_o, axi_slave_resps[3])
+ `AXI_ASSIGN_REQ_STRUCT(axi_slave_reqs[2], ext_slave_req_i)
+ `AXI_ASSIGN_RESP_STRUCT(ext_slave_resp_o, axi_slave_resps[2])
master_req_t [NB_MASTER-1:0] axi_master_reqs;
master_resp_t [NB_MASTER-1:0] axi_master_resps;
@@ -142,15 +137,14 @@ module cluster_bus_wrap
end_addr: cluster_base_addr
};
- localparam int unsigned MAX_TXNS_PER_SLV_PORT = (DMA_NB_OUTSND_BURSTS > NB_CORES) ?
- DMA_NB_OUTSND_BURSTS : NB_CORES;
+ localparam int unsigned MAX_TXNS_PER_SLV_PORT = NB_CORES;
localparam xbar_cfg_t AXI_XBAR_CFG = '{
NoSlvPorts: NB_SLAVE,
NoMstPorts: NB_MASTER,
MaxMstTrans: MAX_TXNS_PER_SLV_PORT, //The TCDM ports do not support
//outstanding transactiions anyways
- MaxSlvTrans: DMA_NB_OUTSND_BURSTS + NB_CORES, //Allow up to 4 in-flight transactions
+ MaxSlvTrans: NB_CORES, //Allow up to 4 in-flight transactions
//per slave port
FallThrough: 1'b0, //Use the reccomended default config
LatencyMode: axi_pkg::NO_LATENCY, // CUT_ALL_AX | axi_pkg::DemuxW,
diff --git a/rtl/cluster_interconnect_wrap.sv b/rtl/cluster_interconnect_wrap.sv
index 51b5dd15..364bb9ae 100644
--- a/rtl/cluster_interconnect_wrap.sv
+++ b/rtl/cluster_interconnect_wrap.sv
@@ -15,14 +15,17 @@
*/
`include "hci_helpers.svh"
+`include "pulp_soc_defines.sv"
import hci_package::*;
module cluster_interconnect_wrap
#(
parameter int unsigned NB_CORES = 8,
- parameter int unsigned HWPE_PRESENT = 1,
- parameter int unsigned NB_HWPE_PORTS = 9,
+ parameter int unsigned NB_HWPE = 1,
+ // how many times wider a HWPE port is than DATA_WIDTH
+ parameter int unsigned HWPE_WIDTH_FAC = 4,
+ parameter bit DMA_USE_HWPE_PORT = 1'b0,
parameter int unsigned NB_DMAS = 4,
parameter int unsigned NB_MPERIPHS = 1,
parameter int unsigned NB_TCDM_BANKS = 16,
@@ -31,6 +34,7 @@ module cluster_interconnect_wrap
parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned ADDR_WIDTH = 32,
parameter int unsigned BE_WIDTH = DATA_WIDTH/8,
+ parameter int unsigned TCDM_ID_WIDTH = NB_DMAS + NB_CORES + 4 + HWPE_WIDTH_FAC,
parameter logic [ADDR_WIDTH-1:0] ClusterBaseAddr = 'h10000000,
parameter logic [ADDR_WIDTH-1:0] ClusterPeripheralsOffs = 'h00200000,
parameter logic [ADDR_WIDTH-1:0] ClusterExternalOffs = 'h00400000,
@@ -48,6 +52,7 @@ module cluster_interconnect_wrap
parameter int unsigned USE_ECC_INTERCONNECT = 0,
parameter hci_package::hci_size_parameter_t HCI_CORE_SIZE = '0,
parameter hci_package::hci_size_parameter_t HCI_HWPE_SIZE = '0,
+ parameter hci_package::hci_size_parameter_t HCI_DMA_SIZE = '0,
parameter hci_package::hci_size_parameter_t HCI_MEM_SIZE = '0
)
(
@@ -56,9 +61,9 @@ module cluster_interconnect_wrap
input logic [5:0] cluster_id_i,
XBAR_PERIPH_BUS.Slave hci_ecc_periph_slave,
hci_core_intf.target core_tcdm_slave [0 : NB_CORES-1 ],
- hci_core_intf.target hwpe_tcdm_slave [0 : 0 ],
+ hci_core_intf.target hwpe_tcdm_slave [0 : NB_HWPE-1 ],
XBAR_PERIPH_BUS.Slave core_periph_slave [NB_CORES-1 : 0 ],
- hci_core_intf.target ext_slave [0 : 3 ],
+ hci_core_intf.target ext_slave [0 : `NB_EXT-1 ],
hci_core_intf.target dma_slave [0 : NB_DMAS-1 ],
XBAR_TCDM_BUS.Slave mperiph_slave [NB_MPERIPHS-1 : 0 ],
hci_core_intf.initiator tcdm_sram_master [0 : NB_TCDM_BANKS-1],
@@ -67,7 +72,10 @@ module cluster_interconnect_wrap
input logic [1:0] TCDM_arb_policy_i
);
- localparam TCDM_ID_WIDTH = NB_CORES+NB_DMAS+4+NB_HWPE_PORTS;
+ // if DMA uses HWPE ports, ID width must be increased correspondingly
+ localparam N_HCI_DMA_PORTS = DMA_USE_HWPE_PORT ? 0 : NB_DMAS;
+ localparam N_HCI_HWPE_PORTS = DMA_USE_HWPE_PORT ? NB_HWPE + NB_DMAS : NB_HWPE;
+ localparam int unsigned HCI_FILTER_WRITE_R_VALID[0:NB_HWPE-1] = '{default: 1};
//-********************************************************
@@ -75,16 +83,70 @@ module cluster_interconnect_wrap
//-********************************************************
// Wraps the Logarithmic Interconnect + a HWPE Interconnect
generate
- if( USE_HETEROGENEOUS_INTERCONNECT || !HWPE_PRESENT ) begin : hci_gen
+ if( USE_HETEROGENEOUS_INTERCONNECT || (NB_HWPE == 0) ) begin : hci_gen
+ // HWPEs interface
+ hci_core_intf #(
+ .DW(HCI_HWPE_SIZE.DW),
+ .AW(HCI_HWPE_SIZE.AW),
+ .BW(HCI_HWPE_SIZE.BW),
+ .UW(HCI_HWPE_SIZE.UW),
+ .IW(HCI_HWPE_SIZE.IW),
+ .EW(HCI_HWPE_SIZE.EW),
+ .EHW(HCI_HWPE_SIZE.EHW),
+ .FD(HCI_HWPE_SIZE.FD)
+ )
+ s_hwpe_intc [0:N_HCI_HWPE_PORTS-1] (
+ .clk(clk_i)
+ );
+ // DMA interface (only if !DMA_USE_HWPE_PORT)
+ hci_core_intf #(
+ .DW(HCI_CORE_SIZE.DW),
+ .AW(HCI_CORE_SIZE.AW),
+ .BW(HCI_HWPE_SIZE.BW),
+ .UW(HCI_HWPE_SIZE.UW),
+ .IW(HCI_HWPE_SIZE.IW),
+ .EW(HCI_HWPE_SIZE.EW),
+ .EHW(HCI_CORE_SIZE.EHW),
+ .FD(HCI_DMA_SIZE.FD)
+ )
+ s_dma_intc [0:N_HCI_DMA_PORTS-1] (
+ .clk(clk_i)
+ );
+ // assign HWPE interfaces to s_hwpe_intc[:]
+ for (genvar i=0; i1)?$clog2(NB_HWPES):1)-1:0] hwpe_sel_o,
output hci_package::hci_interconnect_ctrl_t hci_ctrl_o,
// Control ports
diff --git a/rtl/hwpe_subsystem.sv b/rtl/hwpe_subsystem.sv
index 269cfeab..87cc3dab 100644
--- a/rtl/hwpe_subsystem.sv
+++ b/rtl/hwpe_subsystem.sv
@@ -23,6 +23,8 @@ module hwpe_subsystem
parameter int unsigned N_CORES = 8,
parameter int unsigned N_MASTER_PORT = 9,
parameter int unsigned ID_WIDTH = 8,
+ //parameter int unsigned N_HWPES = 8,
+ //parameter int unsigned HWPE_SEL_BITS = (N_HWPES > 1) ? $clog2(N_HWPES) : 1,
parameter hci_package::hci_size_parameter_t HCI_HWPE_SIZE = '0
)
(
@@ -30,6 +32,7 @@ module hwpe_subsystem
input logic rst_n,
input logic test_mode,
input logic hwpe_en_i,
+ //input logic [HWPE_SEL_BITS-1:0] hwpe_sel_i,
input logic [$clog2(MAX_NUM_HWPES)-1:0] hwpe_sel_i,
hci_core_intf.initiator hwpe_xbar_master,
@@ -44,6 +47,7 @@ module hwpe_subsystem
localparam int unsigned EW = HCI_HWPE_SIZE.EW;
localparam int unsigned EHW = HCI_HWPE_SIZE.EHW;
+ // TODO: remove this once we have a proper way to get the number of HWPEs
localparam int unsigned N_HWPES = HWPE_CFG.NumHwpes;
localparam int unsigned HWPE_SEL_BITS = (N_HWPES > 1) ? $clog2(N_HWPES) : 1;
diff --git a/rtl/idma_wrap.sv b/rtl/idma_wrap.sv
index ab4954b0..7c38fab1 100644
--- a/rtl/idma_wrap.sv
+++ b/rtl/idma_wrap.sv
@@ -6,64 +6,74 @@
* dmac_wrap.sv
* Thomas Benz
* Michael Rogenmoser
+ * Georg Rutishauser
*/
// DMA Core wrapper
`include "axi/assign.svh"
`include "axi/typedef.svh"
+`include "obi/typedef.svh"
`include "idma/typedef.svh"
`include "register_interface/typedef.svh"
+`define MY_MAX(a, b) (a > b ? a : b)
+
module dmac_wrap #(
- parameter int unsigned NB_CORES = 4,
- parameter int unsigned AXI_ADDR_WIDTH = 32,
- parameter int unsigned AXI_DATA_WIDTH = 64,
- parameter int unsigned AXI_USER_WIDTH = 6,
- parameter int unsigned AXI_ID_WIDTH = 4,
- parameter int unsigned PE_ID_WIDTH = 1,
- parameter int unsigned NB_PE_PORTS = 1,
- parameter int unsigned DATA_WIDTH = 32,
- parameter int unsigned ADDR_WIDTH = 32,
- parameter int unsigned BE_WIDTH = DATA_WIDTH/8,
- parameter int unsigned NUM_STREAMS = 1, // Only 1 for now
- parameter int unsigned TCDM_SIZE = 0,
- parameter int unsigned TwoDMidend = 1, // Leave this on for now
- parameter int unsigned NB_OUTSND_BURSTS = 8,
- parameter int unsigned GLOBAL_QUEUE_DEPTH = 16,
- parameter int unsigned BACKEND_QUEUE_DEPTH = 16,
- parameter logic [AXI_ADDR_WIDTH-1:0] ClusterBaseAddr = 'h10000000,
- parameter type axi_req_t = logic,
- parameter type axi_resp_t = logic
-) (
- input logic clk_i,
- input logic rst_ni,
- input logic test_mode_i,
- XBAR_PERIPH_BUS.Slave pe_ctrl_slave[NB_PE_PORTS-1:0],
- hci_core_intf.target ctrl_slave[0:NB_CORES-1],
- hci_core_intf.initiator tcdm_master[0:3],
- output axi_req_t ext_master_req_o,
- input axi_resp_t ext_master_resp_i,
- output logic [NB_CORES-1:0] term_event_o,
- output logic [NB_CORES-1:0] term_irq_o,
- output logic [NB_PE_PORTS-1:0] term_event_pe_o,
- output logic [NB_PE_PORTS-1:0] term_irq_pe_o,
- output logic busy_o
-);
-
- localparam int unsigned NumRegs = NB_CORES+NB_PE_PORTS;
- localparam int unsigned MstIdxWidth = AXI_ID_WIDTH;
- localparam int unsigned SlvIdxWidth = AXI_ID_WIDTH - $clog2(NUM_STREAMS);
+ parameter int unsigned NB_CORES = 4,
+ parameter int unsigned AXI_ADDR_WIDTH = 32,
+ parameter int unsigned AXI_DATA_WIDTH = 64,
+ parameter int unsigned AXI_USER_WIDTH = 6,
+ parameter int unsigned AXI_ID_WIDTH = 4,
+ parameter int unsigned PE_ID_WIDTH = 1,
+ parameter int unsigned NB_PE_PORTS = 1,
+ parameter int unsigned DATA_WIDTH = 32,
+ parameter int unsigned ADDR_WIDTH = 32,
+ parameter int unsigned BE_WIDTH = DATA_WIDTH / 8,
+ parameter type axi_req_t = logic,
+ parameter type axi_resp_t = logic,
+ // bidirectional streams: range 1 to 8
+ parameter int unsigned NUM_BIDIR_STREAMS = 1,
+ parameter int unsigned NB_OUTSND_BURSTS = 8,
+ // queue depth per stream
+ parameter int unsigned GLOBAL_QUEUE_DEPTH = 2,
+ // mux read ports between tcdm-tcdm and tcdm-axi?
+ parameter bit MUX_READ = 1'b0,
+ parameter bit TCDM_MEM2BANKS = 1'b0,
+ // when using mem2banks (implies AXI_DATA_WIDTH==64):
+ // 4 ports per stream if read ports muxed, otherwise 6
+ // when not using mem2banks:
+ // 2 ports per stream if read ports muxed, otherwise 3
+ localparam int unsigned NB_TCDM_PORTS_PER_STRM = (2 + (!MUX_READ)) * (1 + TCDM_MEM2BANKS)
+) ( // verilog_format: off // verible does not manage to align this :(
+ input logic clk_i,
+ input logic rst_ni,
+ input logic test_mode_i,
+ XBAR_PERIPH_BUS.Slave pe_ctrl_slave[NB_PE_PORTS-1:0],
+ hci_core_intf.target ctrl_slave[0:NB_CORES-1],
+ hci_core_intf.initiator tcdm_master[NB_TCDM_PORTS_PER_STRM*NUM_BIDIR_STREAMS-1:0],
+ output axi_req_t [NUM_BIDIR_STREAMS-1:0] ext_master_req_o,
+ input axi_resp_t [NUM_BIDIR_STREAMS-1:0] ext_master_resp_i,
+ output logic [NB_CORES-1:0] term_event_o,
+ output logic [NB_CORES-1:0] term_irq_o,
+ output logic [NB_PE_PORTS-1:0] term_event_pe_o,
+ output logic [NB_PE_PORTS-1:0] term_irq_pe_o,
+ output logic busy_o
+); // verilog_format: on
+
+ localparam int unsigned NumRegs = NB_CORES + NB_PE_PORTS;
+ localparam int unsigned NumStreams = 32'd2 * NUM_BIDIR_STREAMS;
+ localparam int unsigned StreamWidth = cf_math_pkg::idx_width(NumStreams);
// CORE --> MCHAN CTRL INTERFACE BUS SIGNALS
- logic [NumRegs-1:0][DATA_WIDTH-1:0] config_wdata;
- logic [NumRegs-1:0][ADDR_WIDTH-1:0] config_add;
+ logic [NumRegs-1:0][ DATA_WIDTH-1:0] config_wdata;
+ logic [NumRegs-1:0][ ADDR_WIDTH-1:0] config_add;
logic [NumRegs-1:0] config_req;
logic [NumRegs-1:0] config_wen;
- logic [NumRegs-1:0][BE_WIDTH-1:0] config_be;
+ logic [NumRegs-1:0][ BE_WIDTH-1:0] config_be;
logic [NumRegs-1:0][PE_ID_WIDTH-1:0] config_id;
logic [NumRegs-1:0] config_gnt;
- logic [NumRegs-1:0][DATA_WIDTH-1:0] config_r_rdata;
+ logic [NumRegs-1:0][ DATA_WIDTH-1:0] config_r_rdata;
logic [NumRegs-1:0] config_r_valid;
logic [NumRegs-1:0] config_r_opc;
logic [NumRegs-1:0][PE_ID_WIDTH-1:0] config_r_id;
@@ -79,16 +89,16 @@ module dmac_wrap #(
assign ctrl_slave[i].gnt = config_gnt[i];
assign ctrl_slave[i].r_opc = config_r_opc[i];
assign ctrl_slave[i].r_valid = config_r_valid[i];
- assign ctrl_slave[i].r_data = config_r_rdata[i];
+ assign ctrl_slave[i].r_data = config_r_rdata[i];
end
for (genvar i = 0; i < NB_PE_PORTS; i++) begin : gen_pe_ctrl_registers
- assign config_add[NB_CORES+i] = pe_ctrl_slave[i].add;
- assign config_req[NB_CORES+i] = pe_ctrl_slave[i].req;
- assign config_wdata[NB_CORES+i] = pe_ctrl_slave[i].wdata;
- assign config_wen[NB_CORES+i] = pe_ctrl_slave[i].wen;
- assign config_be[NB_CORES+i] = pe_ctrl_slave[i].be;
- assign config_id[NB_CORES+i] = pe_ctrl_slave[i].id;
+ assign config_add[NB_CORES+i] = pe_ctrl_slave[i].add;
+ assign config_req[NB_CORES+i] = pe_ctrl_slave[i].req;
+ assign config_wdata[NB_CORES+i] = pe_ctrl_slave[i].wdata;
+ assign config_wen[NB_CORES+i] = pe_ctrl_slave[i].wen;
+ assign config_be[NB_CORES+i] = pe_ctrl_slave[i].be;
+ assign config_id[NB_CORES+i] = pe_ctrl_slave[i].id;
assign pe_ctrl_slave[i].gnt = config_gnt[NB_CORES+i];
assign pe_ctrl_slave[i].r_opc = config_r_opc[NB_CORES+i];
assign pe_ctrl_slave[i].r_valid = config_r_valid[NB_CORES+i];
@@ -96,74 +106,137 @@ module dmac_wrap #(
assign pe_ctrl_slave[i].r_id = config_r_id[NB_CORES+i];
end
- // AXI4+ATOP types
- typedef logic [AXI_ADDR_WIDTH-1:0] addr_t;
- typedef logic [ADDR_WIDTH-1:0] mem_addr_t;
- typedef logic [AXI_DATA_WIDTH-1:0] data_t;
- typedef logic [SlvIdxWidth-1:0] slv_id_t;
- typedef logic [MstIdxWidth-1:0] mst_id_t;
+ // Types types
+ typedef logic [AXI_ADDR_WIDTH-1:0] addr_t;
+ typedef logic [ADDR_WIDTH-1:0] mem_addr_t;
+ typedef logic [AXI_DATA_WIDTH-1:0] data_t;
+ typedef logic [AXI_ID_WIDTH-1:0] id_t;
typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t;
typedef logic [AXI_USER_WIDTH-1:0] user_t;
- // AXI4+ATOP channels typedefs
- `AXI_TYPEDEF_AW_CHAN_T(slv_aw_chan_t, addr_t, slv_id_t, user_t)
- `AXI_TYPEDEF_AW_CHAN_T(mst_aw_chan_t, addr_t, mst_id_t, user_t)
- `AXI_TYPEDEF_AW_CHAN_T(mem_aw_chan_t, mem_addr_t, mst_id_t, user_t)
- `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t)
- `AXI_TYPEDEF_B_CHAN_T(slv_b_chan_t, slv_id_t, user_t)
- `AXI_TYPEDEF_B_CHAN_T(mst_b_chan_t, mst_id_t, user_t)
- `AXI_TYPEDEF_AR_CHAN_T(slv_ar_chan_t, addr_t, slv_id_t, user_t)
- `AXI_TYPEDEF_AR_CHAN_T(mst_ar_chan_t, addr_t, mst_id_t, user_t)
- `AXI_TYPEDEF_AR_CHAN_T(mem_ar_chan_t, mem_addr_t, mst_id_t, user_t)
- `AXI_TYPEDEF_R_CHAN_T(slv_r_chan_t, data_t, slv_id_t, user_t)
- `AXI_TYPEDEF_R_CHAN_T(mst_r_chan_t, data_t, mst_id_t, user_t)
- `AXI_TYPEDEF_REQ_T(slv_req_t, slv_aw_chan_t, w_chan_t, slv_ar_chan_t)
- `AXI_TYPEDEF_REQ_T(mst_req_t, mst_aw_chan_t, w_chan_t, mst_ar_chan_t)
- `AXI_TYPEDEF_REQ_T(mem_req_t, mem_aw_chan_t, w_chan_t, mem_ar_chan_t)
- `AXI_TYPEDEF_RESP_T(slv_resp_t, slv_b_chan_t, slv_r_chan_t)
- `AXI_TYPEDEF_RESP_T(mst_resp_t, mst_b_chan_t, mst_r_chan_t)
+
+ // // AXI4+ATOP channels typedefs
+ //`AXI_TYPEDEF_ALL(axi_int, addr_t, id_t, data_t, strb_t, user_t)
+ `AXI_TYPEDEF_AW_CHAN_T(axi_aw_chan_t, addr_t, id_t, user_t)
+ `AXI_TYPEDEF_W_CHAN_T(axi_w_chan_t, data_t, strb_t, user_t)
+ `AXI_TYPEDEF_B_CHAN_T(axi_b_chan_t, id_t, user_t)
+ `AXI_TYPEDEF_AR_CHAN_T(axi_ar_chan_t, addr_t, id_t, user_t)
+ `AXI_TYPEDEF_R_CHAN_T(axi_r_chan_t, data_t, id_t, user_t)
+ // Memory Init typedefs
+ /// init read request
+ typedef struct packed {
+ logic [AXI_ADDR_WIDTH-1:0] cfg;
+ logic [AXI_DATA_WIDTH-1:0] term;
+ logic [AXI_DATA_WIDTH/8-1:0] strb;
+ logic [AXI_ID_WIDTH-1:0] id;
+ } init_req_chan_t;
+
+ typedef struct packed {
+ init_req_chan_t req_chan;
+ logic req_valid;
+ logic rsp_ready;
+ } init_req_t;
+
+ typedef struct packed {logic [AXI_DATA_WIDTH-1:0] init;} init_rsp_chan_t;
+
+ typedef struct packed {
+ init_rsp_chan_t rsp_chan;
+ logic rsp_valid;
+ logic req_ready;
+ } init_rsp_t;
+
+ // OBI typedefs
+ `OBI_TYPEDEF_MINIMAL_A_OPTIONAL(a_optional_t)
+ `OBI_TYPEDEF_MINIMAL_R_OPTIONAL(r_optional_t)
+ `OBI_TYPEDEF_A_CHAN_T(obi_a_chan_t, AXI_ADDR_WIDTH, AXI_DATA_WIDTH, 0, a_optional_t)
+ `OBI_TYPEDEF_R_CHAN_T(obi_r_chan_t, AXI_DATA_WIDTH, 0, r_optional_t)
+ `OBI_TYPEDEF_REQ_T(obi_req_t, obi_a_chan_t)
+ `OBI_TYPEDEF_RSP_T(obi_rsp_t, obi_r_chan_t)
+
+
+ obi_req_t [NUM_BIDIR_STREAMS-1:0]
+ obi_read_req_from_dma,
+ obi_read_req_from_rrc,
+ obi_reorg_req_from_dma,
+ obi_reorg_req_from_rrc,
+ obi_write_req_from_dma,
+ obi_write_req_from_rrc,
+ obi_read_req_muxed;
+ obi_rsp_t [NUM_BIDIR_STREAMS-1:0]
+ obi_read_rsp_to_dma,
+ obi_read_rsp_to_rrc,
+ obi_reorg_rsp_to_dma,
+ obi_reorg_rsp_to_rrc,
+ obi_write_rsp_to_dma,
+ obi_write_rsp_to_rrc,
+ obi_read_rsp_to_mux;
+
+
// BUS definitions
- mst_req_t tcdm_req, soc_req;
- mem_req_t tcdm_mem_req;
- mst_resp_t soc_rsp;
- mst_resp_t tcdm_rsp;
- slv_req_t [NUM_STREAMS-1:0] dma_req;
- slv_resp_t [NUM_STREAMS-1:0] dma_rsp;
- // interface to structs
- //`AXI_ASSIGN_FROM_REQ(ext_master_req_o, soc_req)
- //`AXI_ASSIGN_TO_RESP(soc_rsp, ext_master_resp_i)
-
- `AXI_ASSIGN_REQ_STRUCT(ext_master_req_o, soc_req)
- `AXI_ASSIGN_RESP_STRUCT(soc_rsp, ext_master_resp_i)
+ axi_req_t [NUM_BIDIR_STREAMS-1:0] soc_req;
+ axi_resp_t [NUM_BIDIR_STREAMS-1:0] soc_rsp;
+ axi_req_t [ NumStreams-1:0] dma_req;
+ axi_resp_t [ NumStreams-1:0] dma_rsp;
+
+ // interface to structs
+ for (genvar s = 0; s < NUM_BIDIR_STREAMS; s++) begin : gen_connect_interface
+ assign ext_master_req_o[s] = soc_req[s];
+ assign soc_rsp[s] = ext_master_resp_i[s];
+ end
+
+ // connect RW axi buses
+ for (genvar s = 0; s < NUM_BIDIR_STREAMS; s++) begin : gen_rw_axi_connection
+ axi_rw_join #(
+ .axi_req_t (axi_req_t),
+ .axi_resp_t(axi_resp_t)
+ ) i_init_axi_rw_join (
+ .clk_i,
+ .rst_ni,
+ .slv_read_req_i (dma_req[2*s+1]),
+ .slv_read_resp_o (dma_rsp[2*s+1]),
+ .slv_write_req_i (dma_req[2*s]),
+ .slv_write_resp_o(dma_rsp[2*s]),
+ .mst_req_o (soc_req[s]),
+ .mst_resp_i (soc_rsp[s])
+ );
+ end
// Register BUS definitions
- `REG_BUS_TYPEDEF_ALL(dma_regs, logic[9:0], logic[31:0], logic[3:0])
+ localparam int unsigned RegAddrWidth = 32'd10;
+ `REG_BUS_TYPEDEF_ALL(dma_regs, logic[RegAddrWidth-1:0], logic[DATA_WIDTH-1:0],
+ logic[BE_WIDTH-1:0])
dma_regs_req_t [NumRegs-1:0] dma_regs_req;
dma_regs_rsp_t [NumRegs-1:0] dma_regs_rsp;
// iDMA struct definitions
- localparam int unsigned TFLenWidth = AXI_ADDR_WIDTH;
- localparam int unsigned NumDim = 2; // Support 2D midend for 2D transfers
- localparam int unsigned RepWidth = 32;
- localparam int unsigned StrideWidth = 32;
- typedef logic [TFLenWidth-1:0] tf_len_t;
- typedef logic [RepWidth-1:0] reps_t;
+ localparam int unsigned TFLenWidth = AXI_ADDR_WIDTH;
+ localparam int unsigned NumDim = 32'd3; // Support 2D midend for 2D transfers
+ localparam int unsigned RepWidth = 32'd32;
+ localparam int unsigned StrideWidth = 32'd32;
+ typedef logic [TFLenWidth-1:0] tf_len_t;
+ typedef logic [RepWidth-1:0] reps_t;
typedef logic [StrideWidth-1:0] strides_t;
// iDMA request / response types
- `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, slv_id_t, addr_t, tf_len_t)
+ `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, id_t, addr_t, tf_len_t)
`IDMA_TYPEDEF_FULL_RSP_T(idma_rsp_t, addr_t)
// iDMA ND request
`IDMA_TYPEDEF_FULL_ND_REQ_T(idma_nd_req_t, idma_req_t, reps_t, strides_t)
- idma_nd_req_t twod_req, twod_req_queue;
- idma_req_t burst_req;
- idma_rsp_t idma_rsp;
+ logic [StreamWidth-1:0] stream_idx;
- logic fe_valid, twod_queue_valid, be_valid, be_rsp_valid;
- logic fe_ready, twod_queue_ready, be_ready, be_rsp_ready;
- logic trans_complete, midend_busy;
- idma_pkg::idma_busy_t idma_busy;
+ idma_nd_req_t twod_req;
+ idma_nd_req_t [NumStreams-1:0] twod_req_queue;
+ idma_req_t [NumStreams-1:0] idma_req;
+ idma_rsp_t [NumStreams-1:0] idma_rsp;
+
+ logic one_fe_valid;
+ logic [NumStreams-1:0] fe_valid, twod_queue_valid, be_valid, be_rsp_valid;
+ logic [NumStreams-1:0] fe_ready, twod_queue_ready, be_ready, be_rsp_ready;
+ logic [NumStreams-1:0] trans_complete, midend_busy;
+ idma_pkg::idma_busy_t [NumStreams-1:0] idma_busy;
+
+ logic [NumStreams-1:0][31:0] done_id, next_id;
// ------------------------------------------------------
// FRONTEND
@@ -171,291 +244,774 @@ module dmac_wrap #(
for (genvar i = 0; i < NumRegs; i++) begin : gen_core_regs
periph_to_reg #(
- .AW ( 10 ),
- .DW ( 32 ),
- .BW ( 8 ),
- .IW ( PE_ID_WIDTH ),
- .req_t ( dma_regs_req_t ),
- .rsp_t ( dma_regs_rsp_t )
+ .AW (RegAddrWidth),
+ .DW (DATA_WIDTH),
+ .BW (BE_WIDTH<<1),
+ .IW (PE_ID_WIDTH),
+ .req_t(dma_regs_req_t),
+ .rsp_t(dma_regs_rsp_t)
) i_pe_translate (
.clk_i,
.rst_ni,
- .req_i ( config_req [i] ),
- .add_i ( config_add [i][9:0] ),
- .wen_i ( config_wen [i] ),
- .wdata_i ( config_wdata [i] ),
- .be_i ( config_be [i] ),
- .id_i ( config_id [i] ),
- .gnt_o ( config_gnt [i] ),
- .r_rdata_o ( config_r_rdata [i] ),
- .r_opc_o ( config_r_opc [i] ),
- .r_id_o ( config_r_id [i] ),
- .r_valid_o ( config_r_valid [i] ),
- .reg_req_o ( dma_regs_req [i] ),
- .reg_rsp_i ( dma_regs_rsp [i] )
+ .req_i (config_req[i]),
+ .add_i (config_add[i][RegAddrWidth-1:0]),
+ .wen_i (config_wen[i]),
+ .wdata_i (config_wdata[i]),
+ .be_i (config_be[i]),
+ .id_i (config_id[i]),
+ .gnt_o (config_gnt[i]),
+ .r_rdata_o(config_r_rdata[i]),
+ .r_opc_o (config_r_opc[i]),
+ .r_id_o (config_r_id[i]),
+ .r_valid_o(config_r_valid[i]),
+ .reg_req_o(dma_regs_req[i]),
+ .reg_rsp_i(dma_regs_rsp[i])
);
end
- idma_reg32_2d_frontend #(
- .NumRegs ( NumRegs ),
- .IdCounterWidth ( 28 ),
- .dma_regs_req_t ( dma_regs_req_t ),
- .dma_regs_rsp_t ( dma_regs_rsp_t ),
- .burst_req_t ( idma_nd_req_t )
- ) i_idma_reg32_2d_frontend (
+ idma_reg32_3d #(
+ .NumRegs (NumRegs),
+ .NumStreams (NumStreams),
+ .IdCounterWidth(32'd32),
+ .reg_req_t (dma_regs_req_t),
+ .reg_rsp_t (dma_regs_rsp_t),
+ .dma_req_t (idma_nd_req_t)
+ ) i_idma_reg32_3d (
.clk_i,
.rst_ni,
- .dma_ctrl_req_i ( dma_regs_req ),
- .dma_ctrl_rsp_o ( dma_regs_rsp ),
- .burst_req_o ( twod_req ),
- .valid_o ( fe_valid ),
- .ready_i ( fe_ready ),
- .backend_idle_i ( ~busy_o ),
- .trans_complete_i ( trans_complete )
+ .dma_ctrl_req_i(dma_regs_req),
+ .dma_ctrl_rsp_o(dma_regs_rsp),
+ .dma_req_o (twod_req),
+ .req_valid_o (one_fe_valid),
+ .req_ready_i (fe_ready[stream_idx]),
+ .next_id_i (next_id[stream_idx]),
+ .stream_idx_o (stream_idx),
+ .done_id_i (done_id),
+ .busy_i (idma_busy),
+ .midend_busy_i (midend_busy)
);
+ always_comb begin : proc_connect_valids
+ fe_valid = '0;
+ fe_valid[stream_idx] = one_fe_valid;
+ end
+
// interrupts and events (currently broadcast tx_cplt event only)
assign term_event_pe_o = |trans_complete ? '1 : '0;
assign term_irq_pe_o = '0;
assign term_event_o = |trans_complete ? '1 : '0;
assign term_irq_o = '0;
- assign busy_o = midend_busy | |idma_busy;
+ assign busy_o = |midend_busy | |idma_busy;
- // ------------------------------------------------------
- // MIDEND
- // ------------------------------------------------------
+ for (genvar s = 0; s < NumStreams; s++) begin : gen_streams
- // global (2D) request FIFO
- stream_fifo #(
- .DEPTH ( GLOBAL_QUEUE_DEPTH ),
- .T (idma_nd_req_t )
- ) i_2D_request_fifo (
- .clk_i,
- .rst_ni,
- .flush_i ( 1'b0 ),
- .testmode_i ( test_mode_i ),
- .usage_o (/*NOT CONNECTED*/),
-
- .data_i ( twod_req ),
- .valid_i ( fe_valid ),
- .ready_o ( fe_ready ),
-
- .data_o ( twod_req_queue ),
- .valid_o ( twod_queue_valid ),
- .ready_i ( twod_queue_ready )
- );
-
- localparam logic [1:0][31:0] RepWidths = '{default: 32'd32};
+ // ------------------------------------------------------
+ // ID counters
+ // ------------------------------------------------------
+ idma_transfer_id_gen #(
+ .IdWidth(32'd32)
+ ) i_idma_transfer_id_gen (
+ .clk_i,
+ .rst_ni,
+ .issue_i (fe_valid[s] & fe_ready[s]),
+ .retire_i (trans_complete[s]),
+ .next_o (next_id[s]),
+ .completed_o(done_id[s])
+ );
- idma_nd_midend #(
- .NumDim ( NumDim ),
- .addr_t ( addr_t ),
- .idma_req_t ( idma_req_t ),
- .idma_rsp_t ( idma_rsp_t ),
- .idma_nd_req_t( idma_nd_req_t ),
- .RepWidths ( RepWidths )
- ) i_idma_2D_midend (
- .clk_i,
- .rst_ni,
- .nd_req_i ( twod_req_queue ),
- .nd_req_valid_i ( twod_queue_valid ),
- .nd_req_ready_o ( twod_queue_ready ),
+ // ------------------------------------------------------
+ // MIDEND
+ // ------------------------------------------------------
+ // global (2D) request FIFO
+ stream_fifo #(
+ .DEPTH(GLOBAL_QUEUE_DEPTH),
+ .T (idma_nd_req_t)
+ ) i_3D_request_fifo (
+ .clk_i,
+ .rst_ni,
+ .flush_i (1'b0),
+ .testmode_i(test_mode_i),
+ .usage_o ( /*NOT CONNECTED*/),
+ .data_i (twod_req),
+ .valid_i (fe_valid[s]),
+ .ready_o (fe_ready[s]),
+ .data_o (twod_req_queue[s]),
+ .valid_o (twod_queue_valid[s]),
+ .ready_i (twod_queue_ready[s])
+ );
- .nd_rsp_o (/*NOT CONNECTED*/ ),
- .nd_rsp_valid_o ( trans_complete ),
- .nd_rsp_ready_i ( 1'b1 ), // Always ready to accept completed transfers
+ localparam logic [1:0][31:0] RepWidths = '{default: 32'd32};
- .burst_req_o ( burst_req ),
- .burst_req_valid_o( be_valid ),
- .burst_req_ready_i( be_ready ),
+ idma_nd_midend #(
+ .NumDim (NumDim),
+ .addr_t (addr_t),
+ .idma_req_t (idma_req_t),
+ .idma_rsp_t (idma_rsp_t),
+ .idma_nd_req_t(idma_nd_req_t),
+ .RepWidths (RepWidths)
+ ) i_idma_3D_midend (
+ .clk_i,
+ .rst_ni,
+ .nd_req_i (twod_req_queue[s]),
+ .nd_req_valid_i (twod_queue_valid[s]),
+ .nd_req_ready_o (twod_queue_ready[s]),
+ .nd_rsp_o ( /*NOT CONNECTED*/),
+ .nd_rsp_valid_o (trans_complete[s]),
+ .nd_rsp_ready_i (1'b1), // Always ready to accept completed transfers
+ .burst_req_o (idma_req[s]),
+ .burst_req_valid_o(be_valid[s]),
+ .burst_req_ready_i(be_ready[s]),
+ .burst_rsp_i (idma_rsp[s]),
+ .burst_rsp_valid_i(be_rsp_valid[s]),
+ .burst_rsp_ready_o(be_rsp_ready[s]),
+ .busy_o (midend_busy[s])
+ );
- .burst_rsp_i ( idma_rsp ),
- .burst_rsp_valid_i( be_rsp_valid ),
- .burst_rsp_ready_o( be_rsp_ready ),
+ // ------------------------------------------------------
+ // BACKEND
+ // ------------------------------------------------------
+
+ // even channels: copy out data
+ if (s[0] == 1'b0) begin : gen_cpy_out
+
+ // Meta Channel Widths
+ localparam int unsigned axi_aw_chan_width = axi_pkg::aw_width(
+ AXI_ADDR_WIDTH, AXI_ID_WIDTH, AXI_USER_WIDTH
+ );
+ localparam int unsigned init_req_chan_width = $bits(init_req_chan_t);
+ localparam int unsigned obi_a_chan_width = $bits(obi_a_chan_t);
+
+
+ typedef struct packed {
+ init_req_chan_t req_chan;
+ logic [`MY_MAX(init_req_chan_width, obi_a_chan_width)-init_req_chan_width:0] padding;
+ } init_read_req_chan_padded_t;
+
+ typedef struct packed {
+ obi_a_chan_t a_chan;
+ logic [`MY_MAX(init_req_chan_width, obi_a_chan_width)-obi_a_chan_width:0] padding;
+ } obi_read_a_chan_padded_t;
+
+ typedef union packed {
+ init_read_req_chan_padded_t init;
+ obi_read_a_chan_padded_t obi;
+ } read_meta_channel_t;
+
+ typedef struct packed {
+ axi_aw_chan_t aw_chan;
+ logic [`MY_MAX(axi_aw_chan_width, init_req_chan_width)-axi_aw_chan_width:0] padding;
+ } axi_write_aw_chan_padded_t;
+
+ typedef struct packed {
+ init_req_chan_t req_chan;
+ logic [`MY_MAX(axi_aw_chan_width, init_req_chan_width)-init_req_chan_width:0] padding;
+ } init_write_req_chan_padded_t;
+
+ typedef union packed {
+ axi_write_aw_chan_padded_t axi;
+ init_write_req_chan_padded_t init;
+ } write_meta_channel_t;
+
+ // local buses
+ init_req_t init_read_req, init_write_req;
+ init_rsp_t init_read_rsp, init_write_rsp;
+
+ idma_backend_r_obi_rw_init_w_axi #(
+ .DataWidth (AXI_DATA_WIDTH),
+ .AddrWidth (AXI_ADDR_WIDTH),
+ .UserWidth (AXI_USER_WIDTH),
+ .AxiIdWidth (AXI_ID_WIDTH),
+ .NumAxInFlight (NB_OUTSND_BURSTS),
+ .BufferDepth (32'd3),
+ .TFLenWidth (TFLenWidth),
+ .MemSysDepth (32'd0),
+ .CombinedShifter (1'b0),
+ .RAWCouplingAvail (1'b0),
+ .MaskInvalidData (1'b0),
+ .HardwareLegalizer (1'b1),
+ .RejectZeroTransfers (1'b1),
+ .idma_req_t (idma_req_t),
+ .idma_rsp_t (idma_rsp_t),
+ .idma_eh_req_t (idma_pkg::idma_eh_req_t),
+ .idma_busy_t (idma_pkg::idma_busy_t),
+ .axi_req_t (axi_req_t),
+ .axi_rsp_t (axi_resp_t),
+ .init_req_t (init_req_t),
+ .init_rsp_t (init_rsp_t),
+ .obi_req_t (obi_req_t),
+ .obi_rsp_t (obi_rsp_t),
+ .read_meta_channel_t (read_meta_channel_t),
+ .write_meta_channel_t(write_meta_channel_t)
+ ) i_idma_backend_r_obi_rw_init_w_axi (
+ .clk_i,
+ .rst_ni,
+ .testmode_i (test_mode_i),
+ .idma_req_i (idma_req[s]),
+ .req_valid_i (be_valid[s]),
+ .req_ready_o (be_ready[s]),
+ .idma_rsp_o (idma_rsp[s]),
+ .rsp_valid_o (be_rsp_valid[s]),
+ .rsp_ready_i (be_rsp_ready[s]),
+ .idma_eh_req_i (1'b0),
+ .eh_req_valid_i (1'b0),
+ .eh_req_ready_o ( /* NOT CONNECTED */),
+ .init_read_req_o (init_read_req),
+ .init_read_rsp_i (init_read_rsp),
+ .obi_read_req_o (obi_read_req_from_dma[s/2]),
+ .obi_read_rsp_i (obi_read_rsp_to_dma[s/2]),
+ .axi_write_req_o (dma_req[s]),
+ .axi_write_rsp_i (dma_rsp[s]),
+ .init_write_req_o(init_write_req),
+ .init_write_rsp_i(init_write_rsp),
+ .busy_o (idma_busy[s])
+ );
+
+ // use a spill register to only give responses when a request was
+ // (or is) asserted
+ spill_register #(
+ .T(logic [-1:0])
+ ) i_init_read_rsp_reflect (
+ .clk_i,
+ .rst_ni,
+ .valid_i(init_read_req.req_valid),
+ .ready_o(init_read_rsp.req_ready),
+ .data_i('0), // not used
+ .valid_o(init_read_rsp.rsp_valid),
+ .ready_i(init_read_req.rsp_ready),
+ .data_o()
+ );
+
+ //implement zero memory using init protocol
+ assign init_read_rsp.rsp_chan.init = '0;
+ // implement /dev/null
+ spill_register #(
+ .T(logic [-1:0])
+ ) i_init_write_rsp_reflect (
+ .clk_i,
+ .rst_ni,
+ .valid_i(init_write_req.req_valid),
+ .ready_o(init_write_rsp.req_ready),
+ .data_i('0), // not used
+ .valid_o(init_write_rsp.rsp_valid),
+ .ready_i(init_write_req.rsp_ready),
+ .data_o()
+ );
+
+ assign init_write_rsp.rsp_chan.init = '0;
+
+ // odd channels: copy in data
+ end else begin : gen_cpy_in
+
+ // Meta Channel Widths
+ localparam int unsigned axi_ar_chan_width = axi_pkg::ar_width(
+ AXI_ADDR_WIDTH, AXI_ID_WIDTH, AXI_USER_WIDTH
+ );
+ localparam int unsigned init_req_chan_width = $bits(init_req_chan_t);
+ localparam int unsigned obi_a_chan_width = $bits(obi_a_chan_t);
+
+ function int unsigned max_width(input int unsigned a, b);
+ return (a > b) ? a : b;
+ endfunction
+
+ typedef struct packed {
+ axi_ar_chan_t ar_chan;
+ logic [
+ `MY_MAX(
+ axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width)
+ )
+ -axi_ar_chan_width:0] padding;
+ } axi_read_ar_chan_padded_t;
+
+ typedef struct packed {
+ init_req_chan_t req_chan;
+ logic [
+ `MY_MAX(axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width))
+ -init_req_chan_width:0] padding;
+ } init_read_req_chan_padded_t;
+
+ typedef struct packed {
+ obi_a_chan_t a_chan;
+ logic [
+ `MY_MAX(axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width))
+ -obi_a_chan_width:0] padding;
+ } obi_read_a_chan_padded_t;
+
+ typedef union packed {
+ axi_read_ar_chan_padded_t axi;
+ init_read_req_chan_padded_t init;
+ obi_read_a_chan_padded_t obi;
+ } read_meta_channel_t;
+
+ typedef struct packed {
+ init_req_chan_t req_chan;
+ logic [`MY_MAX(init_req_chan_width, obi_a_chan_width)-init_req_chan_width:0] padding;
+ } init_write_req_chan_padded_t;
+
+ typedef struct packed {
+ obi_a_chan_t a_chan;
+ logic [`MY_MAX(init_req_chan_width, obi_a_chan_width)-obi_a_chan_width:0] padding;
+ } obi_write_a_chan_padded_t;
+
+ typedef union packed {
+ init_write_req_chan_padded_t init;
+ obi_write_a_chan_padded_t obi;
+ } write_meta_channel_t;
+
+ // local buses
+ init_req_t init_read_req, init_write_req;
+ init_rsp_t init_read_rsp, init_write_rsp;
+
+ idma_backend_r_axi_rw_init_rw_obi #(
+ .DataWidth (AXI_DATA_WIDTH),
+ .AddrWidth (AXI_ADDR_WIDTH),
+ .UserWidth (AXI_USER_WIDTH),
+ .AxiIdWidth (AXI_ID_WIDTH),
+ .NumAxInFlight (NB_OUTSND_BURSTS),
+ .BufferDepth (32'd3),
+ .TFLenWidth (TFLenWidth),
+ .MemSysDepth (32'd0),
+ .CombinedShifter (1'b0),
+ .RAWCouplingAvail (1'b0),
+ .MaskInvalidData (1'b0),
+ .HardwareLegalizer (1'b1),
+ .RejectZeroTransfers (1'b1),
+ .idma_req_t (idma_req_t),
+ .idma_rsp_t (idma_rsp_t),
+ .idma_eh_req_t (idma_pkg::idma_eh_req_t),
+ .idma_busy_t (idma_pkg::idma_busy_t),
+ .axi_req_t (axi_req_t),
+ .axi_rsp_t (axi_resp_t),
+ .init_req_t (init_req_t),
+ .init_rsp_t (init_rsp_t),
+ .obi_req_t (obi_req_t),
+ .obi_rsp_t (obi_rsp_t),
+ .read_meta_channel_t (read_meta_channel_t),
+ .write_meta_channel_t(write_meta_channel_t)
+ ) i_idma_backend_r_axi_rw_init_rw_obi (
+ .clk_i,
+ .rst_ni,
+ .testmode_i (test_mode_i),
+ .idma_req_i (idma_req[s]),
+ .req_valid_i (be_valid[s]),
+ .req_ready_o (be_ready[s]),
+ .idma_rsp_o (idma_rsp[s]),
+ .rsp_valid_o (be_rsp_valid[s]),
+ .rsp_ready_i (be_rsp_ready[s]),
+ .idma_eh_req_i (1'b0),
+ .eh_req_valid_i (1'b0),
+ .eh_req_ready_o ( /* NOT CONNECTED */),
+ .axi_read_req_o (dma_req[s]),
+ .axi_read_rsp_i (dma_rsp[s]),
+ .init_read_req_o (init_read_req),
+ .init_read_rsp_i (init_read_rsp),
+ .obi_read_req_o (obi_reorg_req_from_dma[s/2]),
+ .obi_read_rsp_i (obi_reorg_rsp_to_dma[s/2]),
+ .init_write_req_o(init_write_req),
+ .init_write_rsp_i(init_write_rsp),
+ .obi_write_req_o (obi_write_req_from_dma[s/2]),
+ .obi_write_rsp_i (obi_write_rsp_to_dma[s/2]),
+ .busy_o (idma_busy[s])
+ );
+
+ // use a spill register to only give responses when a request was
+ // (or is) asserted
+ spill_register #(
+ .T(logic [-1:0])
+ ) i_init_read_rsp_reflect (
+ .clk_i,
+ .rst_ni,
+ .valid_i(init_read_req.req_valid),
+ .ready_o(init_read_rsp.req_ready),
+ .data_i('0), // not used
+ .valid_o(init_read_rsp.rsp_valid),
+ .ready_i(init_read_req.rsp_ready),
+ .data_o()
+ );
+ //implement zero memory using init protocol
+ assign init_read_rsp.rsp_chan.init = '0;
+ // implement /dev/null
+ spill_register #(
+ .T(logic [-1:0])
+ ) i_init_write_rsp_reflect (
+ .clk_i,
+ .rst_ni,
+ .valid_i(init_write_req.req_valid),
+ .ready_o(init_write_rsp.req_ready),
+ .data_i('0), // not used
+ .valid_o(init_write_rsp.rsp_valid),
+ .ready_i(init_write_req.rsp_ready),
+ .data_o()
+ );
+ assign init_write_rsp.rsp_chan.init = '0;
+ end : gen_cpy_in
+ end : gen_streams
- .busy_o ( midend_busy )
- );
// ------------------------------------------------------
- // BACKEND
+ // MUX read OBI connections if specified
// ------------------------------------------------------
+ for (genvar s = 0; s < NUM_BIDIR_STREAMS; s++) begin
+ if (MUX_READ) begin
+ localparam obi_pkg::obi_cfg_t sbr_obi_cfg = '{
+ UseRReady: 1'b1,
+ CombGnt: 1'b0,
+ AddrWidth: AXI_ADDR_WIDTH,
+ DataWidth: AXI_DATA_WIDTH,
+ IdWidth: 0,
+ Integrity: 1'b0,
+ BeFull: 1'b1,
+ OptionalCfg: obi_pkg::ObiMinimalOptionalConfig
+ };
+
+ // iDMA OBI
+
+ obi_mux #(
+ .SbrPortObiCfg (sbr_obi_cfg),
+ .MgrPortObiCfg (sbr_obi_cfg),
+ .sbr_port_obi_req_t(obi_req_t),
+ .sbr_port_a_chan_t (obi_a_chan_t),
+ .sbr_port_obi_rsp_t(obi_rsp_t),
+ .sbr_port_r_chan_t (obi_r_chan_t),
+ .mgr_port_obi_req_t(obi_req_t),
+ .mgr_port_obi_rsp_t(obi_rsp_t),
+ .NumSbrPorts (2),
+ .NumMaxTrans (2),
+ .UseIdForRouting (1'b0)
+ ) obi_read_mux_i (
+ .clk_i,
+ .rst_ni,
+ .testmode_i (test_mode_i),
+ .sbr_ports_req_i({obi_reorg_req_from_dma[s], obi_read_req_from_dma[s]}),
+ .sbr_ports_rsp_o({obi_reorg_rsp_to_dma[s], obi_read_rsp_to_dma[s]}),
+ .mgr_port_req_o (obi_read_req_muxed[s]),
+ .mgr_port_rsp_i (obi_read_rsp_to_mux[s])
+ );
+ assign obi_reorg_req_from_rrc = '0;
+ assign obi_reorg_rsp_to_rrc = '0;
+ end else begin // if (MUX_READ)
+ // pass through the read req/rsp from/to dma
+ assign obi_read_req_muxed = obi_read_req_from_dma;
+ assign obi_read_rsp_to_dma = obi_read_rsp_to_mux;
+
+ obi_rready_converter #(
+ .obi_a_chan_t(obi_a_chan_t),
+ .obi_r_chan_t(obi_r_chan_t),
+ .Depth(1)
+ ) obi_rready_converter_reorg_i (
+ .clk_i,
+ .rst_ni,
+ .test_mode_i,
+ .sbr_a_chan_i(obi_reorg_req_from_dma[s].a),
+ .req_i(obi_reorg_req_from_dma[s].req),
+ .gnt_o(obi_reorg_rsp_to_dma[s].gnt),
+ .rready_i(obi_reorg_req_from_dma[s].rready),
+ .sbr_r_chan_o(obi_reorg_rsp_to_dma[s].r),
+ .rvalid_o(obi_reorg_rsp_to_dma[s].rvalid),
+ .mgr_a_chan_o(obi_reorg_req_from_rrc[s].a),
+ .req_o(obi_reorg_req_from_rrc[s].req),
+ .mgr_r_chan_i(obi_reorg_rsp_to_rrc[s].r),
+ .gnt_i(obi_reorg_rsp_to_rrc[s].gnt),
+ .rvalid_i(obi_reorg_rsp_to_rrc[s].rvalid)
+ );
+ // We are always ready for responses, because we don't
+ // send more requests than we can absorb in the fifo
+ assign obi_reorg_req_from_rrc[s].rready = 1'b1;
+ end // else: !if(MUX_READ)
+
+ obi_rready_converter #(
+ .obi_a_chan_t(obi_a_chan_t),
+ .obi_r_chan_t(obi_r_chan_t),
+ .Depth(1)
+ ) obi_rready_converter_read_i (
+ .clk_i,
+ .rst_ni,
+ .test_mode_i,
+ .sbr_a_chan_i(obi_read_req_muxed[s].a),
+ .req_i(obi_read_req_muxed[s].req),
+ .gnt_o(obi_read_rsp_to_mux[s].gnt),
+ .rready_i(obi_read_req_muxed[s].rready),
+ .sbr_r_chan_o(obi_read_rsp_to_mux[s].r),
+ .rvalid_o(obi_read_rsp_to_mux[s].rvalid),
+ .mgr_a_chan_o(obi_read_req_from_rrc[s].a),
+ .req_o(obi_read_req_from_rrc[s].req),
+ .mgr_r_chan_i(obi_read_rsp_to_rrc[s].r),
+ .gnt_i(obi_read_rsp_to_rrc[s].gnt),
+ .rvalid_i(obi_read_rsp_to_rrc[s].rvalid)
+ );
+ // We are always ready for responses, because we don't
+ // send more requests than we can absorb in the fifo
+ assign obi_read_req_from_rrc[s].rready = 1'b1;
- idma_backend #(
- .DataWidth ( AXI_DATA_WIDTH ),
- .AddrWidth ( AXI_ADDR_WIDTH ),
- .UserWidth ( AXI_USER_WIDTH ),
- .AxiIdWidth ( AXI_ID_WIDTH ),
- .NumAxInFlight ( NB_OUTSND_BURSTS ),
- .BufferDepth ( 3 ),
- .TFLenWidth ( TFLenWidth ),
- .RAWCouplingAvail ( 1'b1 ),
- .MemSysDepth ( 32'd0 ),
- .MaskInvalidData ( 1'b1 ),
- .HardwareLegalizer ( 1'b1 ),
- .RejectZeroTransfers ( 1'b1 ),
- .ErrorCap ( idma_pkg::NO_ERROR_HANDLING ),
- .idma_req_t ( idma_req_t ),
- .idma_rsp_t ( idma_rsp_t ),
- .idma_eh_req_t ( idma_pkg::idma_eh_req_t ),
- .idma_busy_t ( idma_pkg::idma_busy_t ),
- .protocol_req_t ( slv_req_t ),
- .protocol_rsp_t ( slv_resp_t ),
- .aw_chan_t ( slv_aw_chan_t ),
- .ar_chan_t ( slv_ar_chan_t )
- ) i_idma_backend (
- .clk_i,
- .rst_ni,
- .testmode_i ( test_mode_i ),
-
- .idma_req_i ( burst_req ),
- .req_valid_i ( be_valid ),
- .req_ready_o ( be_ready ),
-
- .idma_rsp_o ( idma_rsp ),
- .rsp_valid_o ( be_rsp_valid ),
- .rsp_ready_i ( be_rsp_ready ),
- .idma_eh_req_i ( '0 ), // No error handling
- .eh_req_valid_i ( 1'b1 ),
- .eh_req_ready_o (/*NOT CONNECTED*/),
+ obi_rready_converter #(
+ .obi_a_chan_t(obi_a_chan_t),
+ .obi_r_chan_t(obi_r_chan_t),
+ .Depth(1)
+ ) obi_rready_converter_wr_i (
+ .clk_i,
+ .rst_ni,
+ .test_mode_i,
+ .sbr_a_chan_i(obi_write_req_from_dma[s].a),
+ .req_i(obi_write_req_from_dma[s].req),
+ .gnt_o(obi_write_rsp_to_dma[s].gnt),
+ .rready_i(obi_write_req_from_dma[s].rready),
+ .sbr_r_chan_o(obi_write_rsp_to_dma[s].r),
+ .rvalid_o(obi_write_rsp_to_dma[s].rvalid),
+ .mgr_a_chan_o(obi_write_req_from_rrc[s].a),
+ .req_o(obi_write_req_from_rrc[s].req),
+ .mgr_r_chan_i(obi_write_rsp_to_rrc[s].r),
+ .gnt_i(obi_write_rsp_to_rrc[s].gnt),
+ .rvalid_i(obi_write_rsp_to_rrc[s].rvalid)
+ );
+ // Same as above
+ assign obi_write_req_from_rrc[s].rready = 1'b1;
+ end
- .protocol_req_o ( dma_req ),
- .protocol_rsp_i ( dma_rsp ),
- .busy_o ( idma_busy )
- );
// ------------------------------------------------------
- // AXI connection to EXT/TCDM
+ // TCDM connections
// ------------------------------------------------------
-
- // xbar
- localparam int unsigned NumRules = 3;
- typedef struct packed {
- int unsigned idx;
- logic [AXI_ADDR_WIDTH-1:0] start_addr;
- logic [AXI_ADDR_WIDTH-1:0] end_addr;
- } xbar_rule_t;
- xbar_rule_t [NumRules-1:0] addr_map;
- logic [AXI_ADDR_WIDTH-1:0] cluster_base_addr;
- assign cluster_base_addr = ClusterBaseAddr; /* + (cluster_id_i << 22);*/
- assign addr_map = '{
- '{ // SoC low
- start_addr: '0,
- end_addr: cluster_base_addr,
- idx: 0
- },
- '{ // TCDM
- start_addr: cluster_base_addr,
- end_addr: cluster_base_addr + TCDM_SIZE,
- idx: 1
- },
- '{ // SoC high
- start_addr: cluster_base_addr + TCDM_SIZE,
- end_addr: '1,
- idx: 0
- }
- };
- localparam int unsigned NumMstPorts = 2;
- localparam int unsigned NumSlvPorts = NUM_STREAMS;
-
- /* verilator lint_off WIDTHCONCAT */
- localparam axi_pkg::xbar_cfg_t XbarCfg = '{
- NoSlvPorts: NumSlvPorts,
- NoMstPorts: NumMstPorts,
- MaxMstTrans: NB_OUTSND_BURSTS,
- MaxSlvTrans: NB_OUTSND_BURSTS,
- FallThrough: 1'b0,
- LatencyMode: axi_pkg::CUT_ALL_PORTS,
- PipelineStages: 0,
- AxiIdWidthSlvPorts: SlvIdxWidth,
- AxiIdUsedSlvPorts: SlvIdxWidth,
- UniqueIds: 1'b0,
- AxiAddrWidth: AXI_ADDR_WIDTH,
- AxiDataWidth: AXI_DATA_WIDTH,
- NoAddrRules: NumRules
- };
- /* verilator lint_on WIDTHCONCAT */
-
- axi_xbar #(
- .Cfg ( XbarCfg ),
- .slv_aw_chan_t( slv_aw_chan_t ),
- .mst_aw_chan_t( mst_aw_chan_t ),
- .w_chan_t ( w_chan_t ),
- .slv_b_chan_t ( slv_b_chan_t ),
- .mst_b_chan_t ( mst_b_chan_t ),
- .slv_ar_chan_t( slv_ar_chan_t ),
- .mst_ar_chan_t( mst_ar_chan_t ),
- .slv_r_chan_t ( slv_r_chan_t ),
- .mst_r_chan_t ( mst_r_chan_t ),
- .slv_req_t ( slv_req_t ),
- .slv_resp_t ( slv_resp_t ),
- .mst_req_t ( mst_req_t ),
- .mst_resp_t ( mst_resp_t ),
- .rule_t ( xbar_rule_t )
- ) i_dma_axi_xbar (
- .clk_i ( clk_i ),
- .rst_ni ( rst_ni ),
- .test_i ( test_mode_i ),
- .slv_ports_req_i ( dma_req ),
- .slv_ports_resp_o ( dma_rsp ),
- .mst_ports_req_o ( { tcdm_req, soc_req } ),
- .mst_ports_resp_i ( { tcdm_rsp, soc_rsp } ),
- .addr_map_i ( addr_map ),
- .en_default_mst_port_i ( '0 ),
- .default_mst_port_i ( '0 )
- );
-
- localparam int unsigned TcdmFifoDepth = 1;
- `AXI_ASSIGN_REQ_STRUCT(tcdm_mem_req, tcdm_req)
-
- axi_to_mem_split #(
- .axi_req_t ( mem_req_t ),
- .axi_resp_t ( mst_resp_t ),
- .AddrWidth ( ADDR_WIDTH ),
- .AxiDataWidth ( AXI_DATA_WIDTH ),
- .IdWidth ( MstIdxWidth ),
- .MemDataWidth ( DATA_WIDTH ),
- .BufDepth ( TcdmFifoDepth ),
- .HideStrb ( 1'b1 )
- ) i_axi_to_mem (
- .clk_i,
- .rst_ni,
- .busy_o (),
- .axi_req_i ( tcdm_mem_req ),
- .axi_resp_o ( tcdm_rsp ),
- .mem_req_o ( { tcdm_master[0].req, tcdm_master[1].req,
- tcdm_master[2].req, tcdm_master[3].req } ),
- .mem_gnt_i ( { tcdm_master[0].gnt, tcdm_master[1].gnt,
- tcdm_master[2].gnt, tcdm_master[3].gnt } ),
- .mem_addr_o ( { tcdm_master[0].add, tcdm_master[1].add,
- tcdm_master[2].add, tcdm_master[3].add } ),
- .mem_wdata_o ( { tcdm_master[0].data, tcdm_master[1].data,
- tcdm_master[2].data, tcdm_master[3].data } ),
- .mem_strb_o ( { tcdm_master[0].be, tcdm_master[1].be,
- tcdm_master[2].be, tcdm_master[3].be } ),
- .mem_atop_o ( ),
- .mem_we_o ( { tcdm_master_we_0, tcdm_master_we_1,
- tcdm_master_we_2, tcdm_master_we_3 } ),
- .mem_rvalid_i ( { tcdm_master[0].r_valid, tcdm_master[1].r_valid,
- tcdm_master[2].r_valid, tcdm_master[3].r_valid } ),
- .mem_rdata_i ( { tcdm_master[0].r_data, tcdm_master[1].r_data,
- tcdm_master[2].r_data, tcdm_master[3].r_data } )
- );
-
- // flip we polarity
- assign tcdm_master[0].wen = !tcdm_master_we_0;
- assign tcdm_master[1].wen = !tcdm_master_we_1;
- assign tcdm_master[2].wen = !tcdm_master_we_2;
- assign tcdm_master[3].wen = !tcdm_master_we_3;
-
- for (genvar ii=0; ii<4; ii++) begin : gen_tie_unused_tcdm_master
- assign tcdm_master[ii].user = '0;
- assign tcdm_master[ii].ecc = '0;
- assign tcdm_master[ii].id = '0;
- assign tcdm_master[ii].ereq = '0;
- assign tcdm_master[ii].r_eready = '1;
- end
-
-endmodule : dmac_wrap
+ for (genvar s = 0; s < NUM_BIDIR_STREAMS; s++) begin
+ if (TCDM_MEM2BANKS) begin : tcdm_mem2banks
+ // Currently, mem2banks only implemented for AXI_DATA_WIDTH==64
+ // TODO: parametrize so it works for arbitrary data widths
+ initial begin : mem2banks_check_axi_width
+ if (AXI_DATA_WIDTH != 64) begin
+ $error("idma_wrap: AXI_DATA_WIDTH must be 64 when TCDM_MEM2BANKS is 1!");
+ end
+ end
+
+ logic tcdm_master_we_0;
+ logic tcdm_master_we_1;
+ logic tcdm_master_we_2;
+ logic tcdm_master_we_3;
+ logic tcdm_master_we_4;
+ logic tcdm_master_we_5;
+
+ mem_to_banks #(
+ .AddrWidth(AXI_ADDR_WIDTH),
+ .DataWidth(AXI_DATA_WIDTH),
+ .NumBanks (32'd2),
+ .HideStrb (1'b1),
+ .MaxTrans (32'd1),
+ .FifoDepth(32'd1)
+ ) i_mem_to_banks_write (
+ .clk_i,
+ .rst_ni,
+ .req_i(obi_write_req_from_rrc[s].req),
+ .gnt_o(obi_write_rsp_to_rrc[s].gnt),
+ .addr_i(obi_write_req_from_rrc[s].a.addr),
+ .wdata_i(obi_write_req_from_rrc[s].a.wdata),
+ .strb_i(obi_write_req_from_rrc[s].a.be),
+ .atop_i('0),
+ .we_i(obi_write_req_from_rrc[s].a.we),
+ .rvalid_o(obi_write_rsp_to_rrc[s].rvalid),
+ .rdata_o(obi_write_rsp_to_rrc[s].r.rdata),
+ .bank_req_o({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].req, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].req
+ }),
+ .bank_gnt_i({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].gnt, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].gnt
+ }),
+ .bank_addr_o({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].add, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].add
+ }),
+ .bank_wdata_o({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].data
+ }),
+ .bank_strb_o({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].be, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].be
+ }),
+ .bank_atop_o( /* NOT CONNECTED */),
+ .bank_we_o({tcdm_master_we_1, tcdm_master_we_0}),
+ .bank_rvalid_i({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_valid,
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_valid
+ }),
+ .bank_rdata_i({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_data
+ })
+ );
+
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].user = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].user = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].wen = !tcdm_master_we_0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].wen = !tcdm_master_we_1;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].r_ready = 1'b1;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_ready = 1'b1;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].id = '0; // TODO change?
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].id = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].ecc = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].ecc = '0;
+
+ mem_to_banks #(
+ .AddrWidth(AXI_ADDR_WIDTH),
+ .DataWidth(AXI_DATA_WIDTH),
+ .NumBanks (32'd2),
+ .HideStrb (1'b1),
+ .MaxTrans (32'd1),
+ .FifoDepth(32'd1)
+ ) i_mem_to_banks_read (
+ .clk_i,
+ .rst_ni,
+ .req_i(obi_read_req_from_rrc[s].req),
+ .gnt_o(obi_read_rsp_to_rrc[s].gnt),
+ .addr_i(obi_read_req_from_rrc[s].a.addr),
+ .wdata_i(obi_read_req_from_rrc[s].a.wdata),
+ .strb_i(obi_read_req_from_rrc[s].a.be),
+ .atop_i('0),
+ .we_i(obi_read_req_from_rrc[s].a.we),
+ .rvalid_o(obi_read_rsp_to_rrc[s].rvalid),
+ .rdata_o(obi_read_rsp_to_rrc[s].r.rdata),
+ .bank_req_o({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].req, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].req
+ }),
+ .bank_gnt_i({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].gnt, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].gnt
+ }),
+ .bank_addr_o({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].add, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].add
+ }),
+ .bank_wdata_o({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].data
+ }),
+ .bank_strb_o({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].be, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].be
+ }),
+ .bank_atop_o( /* NOT CONNECTED */),
+ .bank_we_o({tcdm_master_we_3, tcdm_master_we_2}),
+ .bank_rvalid_i({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].r_valid,
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_valid
+ }),
+ .bank_rdata_i({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].r_data,
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_data
+ })
+ );
+
+
+ //assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].boffs = '0;
+ //assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].lrdy = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].user = '0;
+ //assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].boffs = '0;
+ //assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].lrdy = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].user = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].wen = !tcdm_master_we_2;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].wen = !tcdm_master_we_3;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_ready = 1'b1;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].r_ready = 1'b1;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].id = '0; // TODO change?
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].id = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].ecc = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].ecc = '0;
+
+
+ if (!MUX_READ) begin // if we don't mux the read, we have 6*NUM_BIDIR_STREAMS interfaces and the reorg
+ // interface goes straight to TCDM masters 5 and 4.
+ mem_to_banks #(
+ .AddrWidth(AXI_ADDR_WIDTH),
+ .DataWidth(AXI_DATA_WIDTH),
+ .NumBanks (32'd2),
+ .HideStrb (1'b1),
+ .MaxTrans (32'd1),
+ .FifoDepth(32'd1)
+ ) i_mem_to_banks_reorg (
+ .clk_i,
+ .rst_ni,
+ .req_i(obi_reorg_req_from_rrc[s].req),
+ .gnt_o(obi_reorg_rsp_to_rrc[s].gnt),
+ .addr_i(obi_reorg_req_from_rrc[s].a.addr),
+ .wdata_i(obi_reorg_req_from_rrc[s].a.wdata),
+ .strb_i(obi_reorg_req_from_rrc[s].a.be),
+ .atop_i('0),
+ .we_i(obi_reorg_req_from_rrc[s].a.we),
+ .rvalid_o(obi_reorg_rsp_to_rrc[s].rvalid),
+ .rdata_o(obi_reorg_rsp_to_rrc[s].r.rdata),
+ .bank_req_o({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].req, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].req
+ }),
+ .bank_gnt_i({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].gnt, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].gnt
+ }),
+ .bank_addr_o({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].add, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].add
+ }),
+ .bank_wdata_o({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].data
+ }),
+ .bank_strb_o({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].be, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].be
+ }),
+ .bank_atop_o( /* NOT CONNECTED */),
+ .bank_we_o({tcdm_master_we_5, tcdm_master_we_4}),
+ .bank_rvalid_i({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].r_valid,
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].r_valid
+ }),
+ .bank_rdata_i({
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].r_data,
+ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].r_data
+ })
+ );
+
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].boffs = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].lrdy = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].user = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].boffs = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].lrdy = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].user = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].wen = !tcdm_master_we_4;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].wen = !tcdm_master_we_5;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].r_ready = 1'b1;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].r_ready = 1'b1;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].id = '0; // TODO change?
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].id = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].ecc = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].ecc = '0;
+ end
+ end else begin : passthrough_obi_to_tcdm
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].user = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].req = obi_write_req_from_rrc[s].req;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].wen = !obi_write_req_from_rrc[s].a.we;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].add = obi_write_req_from_rrc[s].a.addr;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].data = obi_write_req_from_rrc[s].a.wdata;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].be = obi_write_req_from_rrc[s].a.be;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_ready = obi_write_req_from_rrc[s].rready;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].id = '0; // TODO change?
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].ecc = '0;
+ assign obi_write_rsp_to_rrc[s].gnt = tcdm_master[NB_TCDM_PORTS_PER_STRM*s].gnt;
+ assign obi_write_rsp_to_rrc[s].rvalid = tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_valid;
+ assign obi_write_rsp_to_rrc[s].r.rdata = tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_data;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].user = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].req = obi_read_req_from_rrc[s].req;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].wen = !obi_read_req_from_rrc[s].a.we;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].add = obi_read_req_from_rrc[s].a.addr;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].data = obi_read_req_from_rrc[s].a.wdata;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].be = obi_read_req_from_rrc[s].a.be;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_ready = obi_read_req_from_rrc[s].rready;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].id = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].ecc = '0;
+ assign obi_read_rsp_to_rrc[s].gnt = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].gnt;
+ assign obi_read_rsp_to_rrc[s].rvalid = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_valid;
+ assign obi_read_rsp_to_rrc[s].r.rdata = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_data;
+ if (!MUX_READ) begin : passthrough_obi_read
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].req = obi_reorg_req_from_rrc[s].req;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].wen = !obi_reorg_req_from_rrc[s].a.we;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].add = obi_reorg_req_from_rrc[s].a.addr;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].data = obi_reorg_req_from_rrc[s].a.wdata;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].be = obi_reorg_req_from_rrc[s].a.be;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_ready = obi_read_req_from_rrc[s].rready;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].id = '0;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].ecc = '0;
+ assign obi_reorg_rsp_to_rrc[s].gnt = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].gnt;
+ assign obi_reorg_rsp_to_rrc[s].rvalid = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_valid;
+ assign obi_reorg_rsp_to_rrc[s].r.rdata = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_data;
+ assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].user = '0;
+ end
+ end
+ end
+endmodule
+`undef MY_MAX
\ No newline at end of file
diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv
index 9db018cd..4d5ca36e 100644
--- a/rtl/pulp_cluster.sv
+++ b/rtl/pulp_cluster.sv
@@ -22,6 +22,8 @@
`include "cluster_bus_defines.sv"
`include "pulp_interfaces.sv"
`include "register_interface/typedef.svh"
+`include "pulp_soc_defines.sv"
+
module pulp_cluster
import pulp_cluster_package::*;
@@ -29,13 +31,14 @@ module pulp_cluster
import rapid_recovery_pkg::*;
import fpnew_pkg::*;
#(
- parameter pulp_cluster_package::pulp_cluster_cfg_t Cfg = pulp_cluster_package::PulpClusterDefaultCfg,
+ parameter pulp_cluster_package::pulp_cluster_cfg_t Cfg =
+ pulp_cluster_package::PulpClusterDefaultCfg,
localparam int unsigned TcdmBankSize = Cfg.TcdmSize/Cfg.TcdmNumBank,
localparam int unsigned TcdmNumRows = TcdmBankSize/4,
localparam int unsigned MaxUniqId = 1,
localparam int unsigned AxiIdInWidth = pulp_cluster_package::AxiSubordinateIdwidth,
localparam int unsigned AxiIdOutWidth = pulp_cluster_package::AxiManagerIdwidth,
- // CDC AXI parameters (external to cluster)
+ // CDC AXI parameters (external to cluster, narrow)
localparam int unsigned AwInWidth = axi_pkg::aw_width(Cfg.AxiAddrWidth,
Cfg.AxiIdInWidth,
Cfg.AxiUserWidth),
@@ -54,7 +57,7 @@ module pulp_cluster
localparam int unsigned AsyncInBDataWidth = (2**Cfg.AxiCdcLogDepth)*BInWidth,
localparam int unsigned AsyncInArDatawidth = (2**Cfg.AxiCdcLogDepth)*ArInWidth,
localparam int unsigned AsyncInRDataWidth = (2**Cfg.AxiCdcLogDepth)*RInWidth,
- // CDC AXI parameters (cluster to external)
+ // CDC AXI parameters (cluster to external, narrow)
localparam int unsigned AwOutWidth = axi_pkg::aw_width(Cfg.AxiAddrWidth,
Cfg.AxiIdOutWidth,
Cfg.AxiUserWidth),
@@ -73,6 +76,25 @@ module pulp_cluster
localparam int unsigned AsyncOutBDataWidth = (2**Cfg.AxiCdcLogDepth)*BOutWidth,
localparam int unsigned AsyncOutArDataWidth = (2**Cfg.AxiCdcLogDepth)*ArOutWidth,
localparam int unsigned AsyncOutRDataWidth = (2**Cfg.AxiCdcLogDepth)*ROutWidth,
+ // CDC AXI parameters (cluster to external, wide)
+ localparam int unsigned AwOutWideWidth = axi_pkg::aw_width(Cfg.AxiAddrWidth,
+ Cfg.AxiIdOutWideWidth,
+ Cfg.AxiUserWidth),
+ localparam int unsigned WOutWideWidth = axi_pkg::w_width(Cfg.AxiDataOutWideWidth,
+ Cfg.AxiUserWidth),
+ localparam int unsigned BOutWideWidth = axi_pkg::b_width(Cfg.AxiIdOutWideWidth,
+ Cfg.AxiUserWidth),
+ localparam int unsigned ArOutWideWidth = axi_pkg::ar_width(Cfg.AxiAddrWidth,
+ Cfg.AxiIdOutWideWidth,
+ Cfg.AxiUserWidth),
+ localparam int unsigned ROutWideWidth = axi_pkg::r_width(Cfg.AxiDataOutWideWidth,
+ Cfg.AxiIdOutWideWidth,
+ Cfg.AxiUserWidth),
+ localparam int unsigned AsyncOutAwWideDataWidth = (2**Cfg.AxiCdcLogDepth)*AwOutWideWidth,
+ localparam int unsigned AsyncOutWWideDataWidth = (2**Cfg.AxiCdcLogDepth)*WOutWideWidth,
+ localparam int unsigned AsyncOutBWideDataWidth = (2**Cfg.AxiCdcLogDepth)*BOutWideWidth,
+ localparam int unsigned AsyncOutArWideDataWidth = (2**Cfg.AxiCdcLogDepth)*ArOutWideWidth,
+ localparam int unsigned AsyncOutRWideDataWidth = (2**Cfg.AxiCdcLogDepth)*ROutWideWidth,
// Internal bus parameters
// TCDM data bus width (never changes)
localparam int unsigned DataWidth = 32,
@@ -105,7 +127,9 @@ module pulp_cluster
// TCDM banks data width extended with parity for ECCs
localparam int unsigned ProtectedTcdmWidth = DataWidth + ParityWidth,
// Number of parity bits for ECC-extended HCI HWPE branch
- localparam int unsigned HWPEParityWidth = ($clog2(DataWidth)+2)*Cfg.HwpeNumPorts + ($clog2(AddrWidth+(Cfg.HwpeNumPorts*DataWidth)/8+1)+2)
+ localparam int unsigned HWPEParityWidth =
+ ($clog2(DataWidth) + 2) * Cfg.HwpeNumPorts +
+ ($clog2(AddrWidth + (Cfg.HwpeNumPorts * DataWidth) / 8 + 1) + 2)
)(
input logic clk_i,
input logic rst_ni,
@@ -130,6 +154,7 @@ module pulp_cluster
input logic axi_isolate_i,
output logic axi_isolated_o,
+ output logic axi_isolated_wide_o,
input logic dma_pe_evt_ack_i,
output logic dma_pe_evt_valid_o,
@@ -149,7 +174,7 @@ module pulp_cluster
input logic [AsyncEventDataWidth-1:0] async_cluster_events_data_i,
- // AXI4 SLAVE
+ // AXI4 SLAVE Narrow
//***************************************
// WRITE ADDRESS CHANNEL
input logic [Cfg.AxiCdcLogDepth:0] async_data_slave_aw_wptr_i,
@@ -175,7 +200,7 @@ module pulp_cluster
output logic [Cfg.AxiCdcLogDepth:0] async_data_slave_b_wptr_o,
output logic [AsyncInBDataWidth-1:0] async_data_slave_b_data_o,
input logic [Cfg.AxiCdcLogDepth:0] async_data_slave_b_rptr_i,
- // AXI4 MASTER
+ // AXI4 MASTER Narrow
//***************************************
// WRITE ADDRESS CHANNEL
output logic [Cfg.AxiCdcLogDepth:0] async_data_master_aw_wptr_o,
@@ -200,12 +225,39 @@ module pulp_cluster
// WRITE RESPONSE CHANNEL
input logic [Cfg.AxiCdcLogDepth:0] async_data_master_b_wptr_i,
input logic [AsyncOutBDataWidth-1:0] async_data_master_b_data_i,
- output logic [Cfg.AxiCdcLogDepth:0] async_data_master_b_rptr_o
+ output logic [Cfg.AxiCdcLogDepth:0] async_data_master_b_rptr_o,
+ // AXI4 MASTER Wide
+ //**************************************
+ // WRITE ADDRESS CHANNEL
+ output logic [Cfg.AxiCdcLogDepth:0] async_wide_master_aw_wptr_o,
+ output logic [AsyncOutAwWideDataWidth-1:0] async_wide_master_aw_data_o,
+ input logic [Cfg.AxiCdcLogDepth:0] async_wide_master_aw_rptr_i,
+
+ // READ ADDRESS CHANNEL
+ output logic [Cfg.AxiCdcLogDepth:0] async_wide_master_ar_wptr_o,
+ output logic [AsyncOutArWideDataWidth-1:0] async_wide_master_ar_data_o,
+ input logic [Cfg.AxiCdcLogDepth:0] async_wide_master_ar_rptr_i,
+
+ // WRITE DATA CHANNEL
+ output logic [Cfg.AxiCdcLogDepth:0] async_wide_master_w_wptr_o,
+ output logic [AsyncOutWWideDataWidth-1:0] async_wide_master_w_data_o,
+ input logic [Cfg.AxiCdcLogDepth:0] async_wide_master_w_rptr_i,
+
+ // READ DATA CHANNEL
+ input logic [Cfg.AxiCdcLogDepth:0] async_wide_master_r_wptr_i,
+ input logic [AsyncOutRWideDataWidth-1:0] async_wide_master_r_data_i,
+ output logic [Cfg.AxiCdcLogDepth:0] async_wide_master_r_rptr_o,
+
+ // WRITE RESPONSE CHANNEL
+ input logic [Cfg.AxiCdcLogDepth:0] async_wide_master_b_wptr_i,
+ input logic [AsyncOutBWideDataWidth-1:0] async_wide_master_b_data_i,
+ output logic [Cfg.AxiCdcLogDepth:0] async_wide_master_b_rptr_o
);
//Ensure that the input AXI ID width is big enough to accomodate the accomodate the IDs of internal wiring
if (Cfg.AxiIdInWidth < 1 + $clog2(Cfg.iCacheNumBanks))
- $info("AXI input ID width must be larger than 1+$clog2(Cfg.iCacheNumBanks) which is %d but was %d", 1 + $clog2(Cfg.iCacheNumBanks), Cfg.AxiIdInWidth);
+ $info("AXI input ID width must be larger than 1+$clog2(Cfg.iCacheNumBanks) which is %d but was %d"
+ , 1 + $clog2(Cfg.iCacheNumBanks), Cfg.AxiIdInWidth);
localparam int unsigned NB_L1_CUTS = 16;
localparam int unsigned RW_MARGIN_WIDTH = 4;
@@ -230,6 +282,8 @@ logic [Cfg.NumCores-1:0] dbg_core_running;
logic [Cfg.NumCores-1:0] s_dbg_irq;
logic s_hwpe_en;
logic [$clog2(MAX_NUM_HWPES)-1:0] s_hwpe_sel;
+// localparam int unsigned HWPE_SEL_BITS = (Cfg.HwpeCfg.NumHwpes > 1) ? $clog2(Cfg.HwpeCfg.NumHwpes) : 1;
+// logic [HWPE_SEL_BITS-1:0] s_hwpe_sel;
logic fetch_en_synch;
logic en_sa_boot_synch;
@@ -291,10 +345,37 @@ logic s_dma_cl_irq;
logic s_dma_fc_event;
logic s_dma_fc_irq;
+// Determine if wide AXI port should be enabled based on DMA type and configuration
+// - MCHAN: Always disable wide port (uses narrow port only)
+// - iDMA: Use Cfg.EnableWidePort parameter
+// `ifdef TARGET_MCHAN
+// localparam bit EnableWidePort = 1'b0; // MCHAN never needs wide ports
+// `else
+// localparam bit EnableWidePort = Cfg.EnableWidePort; // User-configurable for iDMA
+// `endif
+
+// Wide AXI infrastructure: Conditional implementation based on EnableWidePort
+// - MCHAN: Always uses narrow transfers (EnableWidePort = 0)
+// - iDMA with EnableWidePort=1: Uses wide transfers (256-bit AXI)
+// - iDMA with EnableWidePort=0: Uses narrow transfers (64-bit AXI)
+// - Wide infrastructure present for interface compatibility
+// - Narrow DMA master merged with cluster bus master when wide disabled
+
logic [Cfg.NumCores-1:0] hmr_barrier_matched;
logic [Cfg.NumCores-1:0] hmr_dmr_sw_resynch_req, hmr_tmr_sw_resynch_req;
logic [Cfg.NumCores-1:0] hmr_dmr_sw_synch_req, hmr_tmr_sw_synch_req;
+// number of log interconnect ports per DMA HCI port - i.e., how many times the
+// DMA ports are wider than the intc ports
+// DMA ports are currently muxed together with HWPE ports to a single port.
+// Thus they don't contribute to the ID width
+// TODO Arpan correct this if needed
+localparam DMA_IW_CONTRIB_FAC = Cfg.DmaUseHwpePort ? 0 : 1;
+// data width of the TCDM master ports coming from the DMA.
+// if using MCHAN, must be 32
+localparam int unsigned DMA_HCI_DATA_WIDTH = Cfg.DmaUseHwpePort ? Cfg.AxiDataOutWideWidth : DataWidth;
+
+
localparam hci_package::hci_size_parameter_t HciCoreSizeParam = '{
DW: DataWidth,
AW: AddrWidth,
@@ -302,7 +383,8 @@ localparam hci_package::hci_size_parameter_t HciCoreSizeParam = '{
UW: DEFAULT_UW,
IW: DEFAULT_IW,
EW: DEFAULT_EW,
- EHW: DEFAULT_EHW
+ EHW: DEFAULT_EHW,
+ FD: 0
};
localparam hci_package::hci_size_parameter_t HciHwpeSizeParam = '{
DW: Cfg.HwpeNumPorts * DataWidth,
@@ -311,14 +393,27 @@ localparam hci_package::hci_size_parameter_t HciHwpeSizeParam = '{
UW: DEFAULT_UW,
IW: DEFAULT_IW,
EW: (Cfg.ECCInterco) ? HWPEParityWidth : DEFAULT_EW,
- EHW: DEFAULT_EHW
+ EHW: DEFAULT_EHW,
+ FD: 2
};
+localparam hci_package::hci_size_parameter_t HciDmaSizeParam = '{
+ DW: DMA_HCI_DATA_WIDTH,
+ AW: AddrWidth,
+ BW: DEFAULT_BW,
+ UW: DEFAULT_UW,
+ IW: DEFAULT_IW,
+ EW: DEFAULT_EW,
+ EHW: DEFAULT_EHW,
+ FD: 0
+};
+
/* logarithmic and peripheral interconnect interfaces */
// ext -> log interconnect
hci_core_intf #(
.DW ( HciCoreSizeParam.DW ),
- .AW ( HciCoreSizeParam.AW )
-) s_hci_ext[0:Cfg.DmaNumPlugs-1] (
+ .AW ( HciCoreSizeParam.AW ),
+ .FD ( HciCoreSizeParam.FD )
+) s_hci_ext[0:`NB_EXT-1] (
.clk ( clk_i )
);
@@ -328,14 +423,14 @@ XBAR_PERIPH_BUS s_xbar_speriph_bus[Cfg.NumSlvPeriphs-1:0]();
// periph interconnect -> HWPE subsystem
XBAR_PERIPH_BUS s_hwpe_cfg_bus();
-// DMA -> log interconnect
+// DMA -> (optionally) size converter
hci_core_intf #(
- .DW ( HciCoreSizeParam.DW ),
- .AW ( HciCoreSizeParam.AW )
+ .DW ( HciDmaSizeParam.DW ),
+ .AW ( HciDmaSizeParam.AW ),
+ .FD ( HciDmaSizeParam.FD )
) s_hci_dma[0:Cfg.DmaNumPlugs-1] (
.clk ( clk_i )
);
-XBAR_TCDM_BUS s_dma_plugin_xbar_bus[Cfg.DmaNumPlugs-1:0]();
// ext -> xbar periphs FIXME
XBAR_TCDM_BUS s_mperiph_xbar_bus[Cfg.NumMstPeriphs-1:0]();
@@ -348,13 +443,15 @@ hci_core_intf #(
.DW ( HciHwpeSizeParam.DW ),
.AW ( HciHwpeSizeParam.AW ),
.EW ( HciHwpeSizeParam.EW ),
- .EHW ( HciHwpeSizeParam.EHW )
+ .EHW ( HciHwpeSizeParam.EHW ),
+ .FD ( HciHwpeSizeParam.FD )
) s_hci_hwpe [0:0] (
.clk ( clk_i )
);
hci_core_intf #(
.DW ( HciCoreSizeParam.DW ),
- .AW ( HciCoreSizeParam.AW )
+ .AW ( HciCoreSizeParam.AW ),
+ .FD ( HciCoreSizeParam.FD )
) s_hci_core [0:Cfg.NumCores-1] (
.clk ( clk_i )
);
@@ -382,7 +479,8 @@ XBAR_TCDM_BUS s_debug_bus[Cfg.NumCores-1:0]();
// FIXME: iDMA
hci_core_intf #(
.DW ( HciCoreSizeParam.DW ),
- .AW ( HciCoreSizeParam.AW )
+ .AW ( HciCoreSizeParam.AW ),
+ .FD ( HciCoreSizeParam.FD )
) s_core_dmactrl_bus [0:Cfg.NumCores-1] (
.clk ( clk_i )
);
@@ -416,7 +514,11 @@ snitch_icache_pkg::icache_l0_events_t [Cfg.NumCores-1:0] s_icache_l0_events;
snitch_icache_pkg::icache_l1_events_t s_icache_l1_events;
//----------------------------------------------------------------------//
-localparam TCDM_ID_WIDTH = Cfg.NumCores + Cfg.DmaNumPlugs + 4 + Cfg.HwpeNumPorts;
+// DMA ports do not need ID extension if mapped to HWPE ports as they are
+// currently muxed
+// TODO Arpan fix if needed
+localparam TCDM_ID_WIDTH = Cfg.NumCores + Cfg.DmaNumPlugs*DMA_IW_CONTRIB_FAC + `NB_EXT + Cfg.HwpeNumPorts;
+
localparam hci_package::hci_size_parameter_t HciMemSizeParam = '{
DW: DataWidth,
AW: AddrMemWidth+2, // AddrMemWidth is word-wise, +2 for byte-wise
@@ -424,7 +526,8 @@ localparam hci_package::hci_size_parameter_t HciMemSizeParam = '{
UW: DEFAULT_UW,
IW: TCDM_ID_WIDTH,
EW: (Cfg.ECCInterco) ? ParityWidth+MetaParityWidth : DEFAULT_EW,
- EHW: DEFAULT_EHW
+ EHW: DEFAULT_EHW,
+ FD: 0
};
// log interconnect -> TCDM memory banks (SRAM)
@@ -449,6 +552,7 @@ hci_core_intf #(
// ***********************************************************************************************+
// ***********************************************************************************************+
+
//***************************************************
/* synchronous AXI interfaces internal to the cluster */
//***************************************************
@@ -473,6 +577,16 @@ hci_core_intf #(
`AXI_TYPEDEF_REQ_T(c2s_out_int_req_t,c2s_out_int_aw_chan_t,c2s_out_int_w_chan_t,c2s_out_int_ar_chan_t)
`AXI_TYPEDEF_RESP_T(c2s_out_int_resp_t,c2s_out_int_b_chan_t,c2s_out_int_r_chan_t)
+ // CLUSTER TO SOC Wide
+ `AXI_TYPEDEF_AW_CHAN_T(c2s_wide_aw_chan_t,logic[Cfg.AxiAddrWidth-1:0],logic[Cfg.AxiIdOutWideWidth-1:0],logic[Cfg.AxiUserWidth-1:0])
+ `AXI_TYPEDEF_W_CHAN_T(c2s_wide_w_chan_t,logic[Cfg.AxiDataOutWideWidth-1:0],logic[Cfg.AxiDataOutWideWidth/8-1:0],logic[Cfg.AxiUserWidth-1:0])
+ `AXI_TYPEDEF_B_CHAN_T(c2s_wide_b_chan_t,logic[Cfg.AxiIdOutWideWidth-1:0],logic[Cfg.AxiUserWidth-1:0])
+ `AXI_TYPEDEF_AR_CHAN_T(c2s_wide_ar_chan_t,logic[Cfg.AxiAddrWidth-1:0],logic[Cfg.AxiIdOutWideWidth-1:0],logic[Cfg.AxiUserWidth-1:0])
+ `AXI_TYPEDEF_R_CHAN_T(c2s_wide_r_chan_t,logic[Cfg.AxiDataOutWideWidth-1:0],logic[Cfg.AxiIdOutWideWidth-1:0],logic[Cfg.AxiUserWidth-1:0])
+
+ `AXI_TYPEDEF_REQ_T(c2s_wide_req_t, c2s_wide_aw_chan_t, c2s_wide_w_chan_t, c2s_wide_ar_chan_t)
+ `AXI_TYPEDEF_RESP_T(c2s_wide_resp_t, c2s_wide_b_chan_t, c2s_wide_r_chan_t)
+
typedef s2c_in_int_aw_chan_t c2s_in_int_aw_chan_t;
typedef c2s_out_int_w_chan_t c2s_in_int_w_chan_t;
typedef s2c_in_int_b_chan_t c2s_in_int_b_chan_t;
@@ -497,15 +611,17 @@ hci_core_intf #(
c2s_in_int_req_t s_core_instr_bus_req;
c2s_in_int_resp_t s_core_instr_bus_resp;
+ // DMA master signals - always declared, conditionally connected
+ c2s_wide_req_t s_dma_master_req; // Wide DMA master (256-bit)
+ c2s_wide_resp_t s_dma_master_resp;
+ c2s_out_int_req_t s_dma_narrow_master_req; // Narrow DMA master (64-bit)
+ c2s_out_int_resp_t s_dma_narrow_master_resp;
+
// core per2axi -> ext
c2s_in_int_req_t s_core_ext_bus_req;
c2s_in_int_resp_t s_core_ext_bus_resp;
- // DMA -> ext
- c2s_in_int_req_t s_dma_ext_bus_req;
- c2s_in_int_resp_t s_dma_ext_bus_resp;
-
// ext -> axi2mem
c2s_out_int_req_t s_ext_tcdm_bus_req;
c2s_out_int_resp_t s_ext_tcdm_bus_resp;
@@ -533,7 +649,6 @@ cluster_bus_wrap #(
.NB_MASTER ( Cfg.NumAxiOut ),
.NB_SLAVE ( Cfg.NumAxiIn ),
.NB_CORES ( Cfg.NumCores ),
- .DMA_NB_OUTSND_BURSTS ( Cfg.DmaNumOutstandingBursts ),
.TCDM_SIZE ( Cfg.TcdmSize ),
.AXI_ADDR_WIDTH ( Cfg.AxiAddrWidth ),
.AXI_DATA_WIDTH ( Cfg.AxiDataOutWidth ),
@@ -565,8 +680,6 @@ cluster_bus_wrap #(
.data_slave_resp_o ( s_core_ext_bus_resp ),
.instr_slave_req_i ( s_core_instr_bus_req ),
.instr_slave_resp_o ( s_core_instr_bus_resp ),
- .dma_slave_req_i ( s_dma_ext_bus_req ),
- .dma_slave_resp_o ( s_dma_ext_bus_resp ),
.ext_slave_req_i ( s_data_slave_64_req ),
.ext_slave_resp_o ( s_data_slave_64_resp ),
.tcdm_master_req_o ( s_ext_tcdm_bus_req ),
@@ -578,7 +691,7 @@ cluster_bus_wrap #(
);
axi2mem_wrap #(
- .NB_DMAS ( Cfg.DmaNumPlugs ),
+ .NB_DMAS ( `NB_EXT ),
.AXI_ADDR_WIDTH ( Cfg.AxiAddrWidth ),
.AXI_DATA_WIDTH ( Cfg.AxiDataOutWidth ),
.AXI_USER_WIDTH ( Cfg.AxiUserWidth ),
@@ -657,8 +770,9 @@ per2axi_wrap #(
cluster_interconnect_wrap #(
.NB_CORES ( Cfg.NumCores ),
- .HWPE_PRESENT ( Cfg.HwpePresent ),
- .NB_HWPE_PORTS ( Cfg.HwpeNumPorts ),
+ .NB_HWPE ( Cfg.HwpePresent ),
+ .HWPE_WIDTH_FAC ( Cfg.HwpeNumPorts ),
+ .DMA_USE_HWPE_PORT ( Cfg.DmaUseHwpePort ),
.NB_DMAS ( Cfg.DmaNumPlugs ),
.NB_MPERIPHS ( Cfg.NumMstPeriphs ),
.NB_TCDM_BANKS ( Cfg.TcdmNumBank ),
@@ -680,12 +794,12 @@ cluster_interconnect_wrap #(
.USE_ECC_INTERCONNECT ( Cfg.EnableECC && Cfg.ECCInterco ),
.HCI_CORE_SIZE ( HciCoreSizeParam ),
.HCI_HWPE_SIZE ( HciHwpeSizeParam ),
+ .HCI_DMA_SIZE ( HciDmaSizeParam ),
.HCI_MEM_SIZE ( HciMemSizeParam )
-
) cluster_interconnect_wrap_i (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
- .cluster_id_i ( '0 ),
+ .cluster_id_i ( cluster_id_i ),
.hci_ecc_periph_slave ( s_periph_hwpe_hci_ecc_bus ),
@@ -707,46 +821,93 @@ cluster_interconnect_wrap #(
//***************************************************
//*********************DMAC WRAP*********************
//***************************************************
-dmac_wrap #(
- .NB_CORES ( Cfg.NumCores ),
- .NB_OUTSND_BURSTS ( Cfg.DmaNumOutstandingBursts ),
- .AXI_ADDR_WIDTH ( Cfg.AxiAddrWidth ),
- .AXI_DATA_WIDTH ( Cfg.AxiDataOutWidth ),
- .AXI_ID_WIDTH ( AxiIdInWidth ),
- .AXI_USER_WIDTH ( Cfg.AxiUserWidth ),
- .PE_ID_WIDTH ( Cfg.NumCores + 1 ),
- .DATA_WIDTH ( DataWidth ),
- .ADDR_WIDTH ( AddrWidth ),
- .BE_WIDTH ( BeWidth ),
- .axi_req_t ( c2s_in_int_req_t ),
- .axi_resp_t ( c2s_in_int_resp_t ),
-`ifdef TARGET_MCHAN
- .NB_CTRLS ( Cfg.NumCores + 2 ),
- .MCHAN_BURST_LENGTH ( Cfg.DmaBurstLength ),
- .TCDM_ADD_WIDTH ( TcdmAddrWidth )
-`else
- .NB_PE_PORTS ( 2 ),
- .NUM_STREAMS ( 4 ),
- .TCDM_SIZE ( Cfg.TcdmSize ),
- .ClusterBaseAddr ( Cfg.ClusterBaseAddr )
-`endif
-) dmac_wrap_i (
- .clk_i ( clk_i ),
- .rst_ni ( rst_ni ),
- .test_mode_i ( test_mode_i ),
- .pe_ctrl_slave ( s_periph_dma_bus[1:0] ),
- .ctrl_slave ( s_core_dmactrl_bus ),
- .tcdm_master ( s_hci_dma ),
-
- .ext_master_req_o ( s_dma_ext_bus_req ),
- .ext_master_resp_i ( s_dma_ext_bus_resp ),
-
- .term_event_o ( s_dma_event ),
- .term_irq_o ( s_dma_irq ),
- .term_event_pe_o ( {s_dma_fc_event, s_dma_cl_event} ),
- .term_irq_pe_o ( {s_dma_fc_irq, s_dma_cl_irq} ),
- .busy_o ( s_dmac_busy )
-);
+if (Cfg.EnableWidePort) begin : gen_wide_port_idma
+ dmac_wrap #(
+ .NB_CORES ( Cfg.NumCores ),
+ .NB_OUTSND_BURSTS ( Cfg.DmaNumOutstandingBursts ),
+ .AXI_ADDR_WIDTH ( Cfg.AxiAddrWidth ),
+ .AXI_DATA_WIDTH ( Cfg.AxiDataOutWideWidth ),
+ .AXI_ID_WIDTH ( Cfg.AxiIdOutWideWidth ),
+ .AXI_USER_WIDTH ( Cfg.AxiUserWidth ),
+ .PE_ID_WIDTH ( Cfg.NumCores + 1 ),
+ .DATA_WIDTH ( DataWidth ),
+ .ADDR_WIDTH ( AddrWidth ),
+ .BE_WIDTH ( BeWidth ),
+ .axi_req_t ( c2s_wide_req_t ),
+ .axi_resp_t ( c2s_wide_resp_t ),
+ `ifdef TARGET_MCHAN
+ .NB_CTRLS ( Cfg.NumCores + 2 ),
+ .MCHAN_BURST_LENGTH ( Cfg.DmaBurstLength ),
+ .TCDM_ADD_WIDTH ( TcdmAddrWidth )
+ `else
+ .NB_PE_PORTS ( 2 ),
+ .NUM_BIDIR_STREAMS ( 1 ),
+ .GLOBAL_QUEUE_DEPTH ( 2 ),
+ .MUX_READ ( 1'b1 ),
+ .TCDM_MEM2BANKS ( !Cfg.DmaUseHwpePort )
+ `endif
+ ) dmac_wrap_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .test_mode_i ( test_mode_i ),
+ .pe_ctrl_slave ( s_periph_dma_bus[1:0] ),
+ .ctrl_slave ( s_core_dmactrl_bus ),
+ .tcdm_master ( s_hci_dma ),
+ `ifdef TARGET_MCHAN
+ .ext_master_req_o ( /* MCHAN uses narrow port - not connected to wide */ ),
+ .ext_master_resp_i ( '0 ),
+ `else
+ .ext_master_req_o ( {s_dma_master_req} ),
+ .ext_master_resp_i ( {s_dma_master_resp} ),
+ `endif
+ .term_event_o ( s_dma_event ),
+ .term_irq_o ( s_dma_irq ),
+ .term_event_pe_o ( {s_dma_fc_event, s_dma_cl_event} ),
+ .term_irq_pe_o ( {s_dma_fc_irq, s_dma_cl_irq} ),
+ .busy_o ( s_dmac_busy )
+ );
+end else begin : gen_narrow_port_idma
+ dmac_wrap #(
+ .NB_CORES ( Cfg.NumCores ),
+ .NB_OUTSND_BURSTS ( Cfg.DmaNumOutstandingBursts ),
+ .AXI_ADDR_WIDTH ( Cfg.AxiAddrWidth ),
+ .AXI_DATA_WIDTH ( Cfg.AxiDataOutWidth ),
+ .AXI_ID_WIDTH ( AxiIdOutWidth ),
+ .AXI_USER_WIDTH ( Cfg.AxiUserWidth ),
+ .PE_ID_WIDTH ( Cfg.NumCores + 1 ),
+ .DATA_WIDTH ( DataWidth ),
+ .ADDR_WIDTH ( AddrWidth ),
+ .BE_WIDTH ( BeWidth ),
+ .axi_req_t ( c2s_out_int_req_t ),
+ .axi_resp_t ( c2s_out_int_resp_t ),
+ `ifdef TARGET_MCHAN
+ .NB_CTRLS ( Cfg.NumCores + 2 ),
+ .MCHAN_BURST_LENGTH ( Cfg.DmaBurstLength ),
+ .TCDM_ADD_WIDTH ( TcdmAddrWidth )
+ `else
+ .NB_PE_PORTS ( 2 ),
+ .NUM_BIDIR_STREAMS ( 1 ),
+ .GLOBAL_QUEUE_DEPTH ( 2 ),
+ .MUX_READ ( 1'b1 ),
+ .TCDM_MEM2BANKS ( !Cfg.DmaUseHwpePort )
+ `endif
+ ) dmac_wrap_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .test_mode_i ( test_mode_i ),
+ .pe_ctrl_slave ( s_periph_dma_bus[1:0] ),
+ .ctrl_slave ( s_core_dmactrl_bus ),
+ .tcdm_master ( s_hci_dma ),
+ .ext_master_req_o ( {s_dma_narrow_master_req} ),
+ .ext_master_resp_i ( {s_dma_narrow_master_resp} ),
+ .term_event_o ( s_dma_event ),
+ .term_irq_o ( s_dma_irq ),
+ .term_event_pe_o ( {s_dma_fc_event, s_dma_cl_event} ),
+ .term_irq_pe_o ( {s_dma_fc_irq, s_dma_cl_irq} ),
+ .busy_o ( s_dmac_busy )
+ );
+end
+
//***************************************************
//**************CLUSTER PERIPHERALS******************
@@ -754,6 +915,7 @@ dmac_wrap #(
cluster_peripherals #(
.NB_CORES ( Cfg.NumCores ),
.NB_HWPES ( MAX_NUM_HWPES ),
+ //.NB_HWPES ( Cfg.HwpeCfg.NumHwpes ),
.NB_MPERIPHS ( Cfg.NumMstPeriphs ),
.NB_CACHE_BANKS ( Cfg.iCacheNumBanks),
.NB_SPERIPHS ( Cfg.NumSlvPeriphs ),
@@ -1027,7 +1189,7 @@ generate
.test_en_i ( test_mode_i ),
.clk_en_i ( clk_core_en[i] ),
.base_addr_i ( base_addr_i ),
- .cluster_id_i ( '0 ),
+ .cluster_id_i ( cluster_id_i ),
.ext_perf_o ( ext_perf[i] ),
.core_data_req_i ( demux_data_req[i] ),
.core_data_rsp_o ( demux_data_rsp[i] ),
@@ -1050,6 +1212,7 @@ end
logic [Cfg.NumCores/3-1:0] hmr_tmr_sw_resynch_req_short;
logic [Cfg.NumCores/2-1:0] hmr_dmr_sw_resynch_req_short;
+
always_comb begin
hmr_tmr_sw_resynch_req = '0;
hmr_dmr_sw_resynch_req = '0;
@@ -1135,13 +1298,13 @@ generate
assign setback = '0;
for (genvar i = 0; i < Cfg.NumCores; i++) begin
- assign hmr2core[i].clock_en = sys2hmr[i].clock_en;
- assign hmr2core[i].boot_addr = sys2hmr[i].boot_addr;
- assign hmr2core[i].core_id = sys2hmr[i].core_id;
- assign hmr2core[i].cluster_id = sys2hmr[i].cluster_id;
- assign hmr2core[i].instr_gnt = sys2hmr[i].instr_gnt;
- assign hmr2core[i].instr_rvalid = sys2hmr[i].instr_rvalid;
- assign hmr2core[i].instr_rdata = sys2hmr[i].instr_rdata;
+ assign hmr2core[i].clock_en = sys2hmr[i].clock_en;
+ assign hmr2core[i].boot_addr = sys2hmr[i].boot_addr;
+ assign hmr2core[i].core_id = sys2hmr[i].core_id;
+ assign hmr2core[i].cluster_id = sys2hmr[i].cluster_id;
+ assign hmr2core[i].instr_gnt = sys2hmr[i].instr_gnt;
+ assign hmr2core[i].instr_rvalid = sys2hmr[i].instr_rvalid;
+ assign hmr2core[i].instr_rdata = sys2hmr[i].instr_rdata;
assign hmr2core[i].data_gnt = sys2hmr[i].data_gnt;
assign hmr2core[i].data_rvalid = sys2hmr[i].data_rvalid;
assign hmr2core[i].data_rdata = sys2hmr[i].data_rdata;
@@ -1199,6 +1362,8 @@ generate
.HWPE_CFG ( Cfg.HwpeCfg ),
.N_CORES ( Cfg.NumCores ),
.N_MASTER_PORT ( Cfg.HwpeNumPorts ),
+ //.N_HWPES ( Cfg.HwpeCfg.NumHwpes ),
+ //.HWPE_SEL_BITS ( HWPE_SEL_BITS ),
.ID_WIDTH ( Cfg.NumCores + Cfg.NumMstPeriphs ),
.HCI_HWPE_SIZE ( HciHwpeSizeParam )
) hwpe_subsystem_i (
@@ -1503,6 +1668,7 @@ tcdm_banks_wrap #(
//********************************************************
//**************** AXI REGISTER SLICES *******************
//********************************************************
+
// CLUSTER TO SOC
`AXI_TYPEDEF_AW_CHAN_T(c2s_aw_chan_t,logic[Cfg.AxiAddrWidth-1:0],logic[Cfg.AxiIdOutWidth-1:0],logic[Cfg.AxiUserWidth-1:0])
`AXI_TYPEDEF_W_CHAN_T(c2s_w_chan_t,logic[Cfg.AxiDataOutWidth-1:0],logic[Cfg.AxiDataOutWidth/8-1:0],logic[Cfg.AxiUserWidth-1:0])
@@ -1513,9 +1679,6 @@ tcdm_banks_wrap #(
`AXI_TYPEDEF_REQ_T(c2s_req_t,c2s_aw_chan_t,c2s_w_chan_t,c2s_ar_chan_t)
`AXI_TYPEDEF_RESP_T(c2s_resp_t,c2s_b_chan_t,c2s_r_chan_t)
-c2s_req_t src_req, isolate_src_req ;
-c2s_resp_t src_resp, isolate_src_resp;
-
sync #(
.STAGES ( Cfg.SyncStages ),
.ResetValue ( 1'b1 )
@@ -1566,6 +1729,10 @@ sync #(
.serial_o ( mbox_irq_synch )
);
+// Cluster to Soc (narrow)
+c2s_req_t src_req, isolate_src_req;
+c2s_resp_t src_resp, isolate_src_resp;
+
`AXI_TYPEDEF_AW_CHAN_T(c2s_remap_aw_chan_t,logic[Cfg.AxiAddrWidth-1:0],logic[AxiIdOutWidth-1:0],logic[Cfg.AxiUserWidth-1:0])
`AXI_TYPEDEF_W_CHAN_T(c2s_remap_w_chan_t,logic[Cfg.AxiDataOutWidth-1:0],logic[Cfg.AxiDataOutWidth/8-1:0],logic[Cfg.AxiUserWidth-1:0])
`AXI_TYPEDEF_B_CHAN_T(c2s_remap_b_chan_t,logic[AxiIdOutWidth-1:0],logic[Cfg.AxiUserWidth-1:0])
@@ -1578,8 +1745,79 @@ sync #(
c2s_remap_req_t src_remap_req;
c2s_remap_resp_t src_remap_resp;
-`AXI_ASSIGN_REQ_STRUCT(src_remap_req,s_data_master_req)
-`AXI_ASSIGN_RESP_STRUCT(s_data_master_resp,src_remap_resp)
+// Connect DMA narrow master when wide port disabled, otherwise cluster bus master
+if (Cfg.EnableWidePort) begin : gen_cluster_bus_narrow_master
+ `AXI_ASSIGN_REQ_STRUCT(src_remap_req, s_data_master_req)
+ `AXI_ASSIGN_RESP_STRUCT(s_data_master_resp, src_remap_resp)
+end else begin : gen_dma_narrow_master
+ // Merge cluster bus master and DMA narrow master via AXI multiplexer
+ localparam int SlvIdWidth = AxiIdOutWidth;
+ localparam int MstIdWidth = AxiIdOutWidth + 1;
+
+ // Widened AW channel for mux output
+ `AXI_TYPEDEF_AW_CHAN_T(c2s_mux_aw_chan_t, logic[Cfg.AxiAddrWidth-1:0], logic[MstIdWidth-1:0], logic[Cfg.AxiUserWidth-1:0])
+ `AXI_TYPEDEF_W_CHAN_T(c2s_mux_w_chan_t, logic[Cfg.AxiDataOutWidth-1:0], logic[Cfg.AxiDataOutWidth/8-1:0], logic[Cfg.AxiUserWidth-1:0])
+ `AXI_TYPEDEF_B_CHAN_T(c2s_mux_b_chan_t, logic[MstIdWidth-1:0], logic[Cfg.AxiUserWidth-1:0])
+ `AXI_TYPEDEF_AR_CHAN_T(c2s_mux_ar_chan_t, logic[Cfg.AxiAddrWidth-1:0], logic[MstIdWidth-1:0], logic[Cfg.AxiUserWidth-1:0])
+ `AXI_TYPEDEF_R_CHAN_T(c2s_mux_r_chan_t, logic[Cfg.AxiDataOutWidth-1:0], logic[MstIdWidth-1:0], logic[Cfg.AxiUserWidth-1:0])
+
+ `AXI_TYPEDEF_REQ_T(c2s_mux_req_t, c2s_mux_aw_chan_t, c2s_mux_w_chan_t, c2s_mux_ar_chan_t)
+ `AXI_TYPEDEF_RESP_T(c2s_mux_resp_t, c2s_mux_b_chan_t, c2s_mux_r_chan_t)
+
+ // Arrays for the two slave ports
+ c2s_remap_req_t [1:0] narrow_master_reqs;
+ c2s_remap_resp_t [1:0] narrow_master_resps;
+ c2s_mux_req_t mux_req;
+ c2s_mux_resp_t mux_resp;
+
+ // Bind cluster-bus and DMA inputs
+ `AXI_ASSIGN_REQ_STRUCT(narrow_master_reqs[0], s_data_master_req)
+ `AXI_ASSIGN_REQ_STRUCT(narrow_master_reqs[1], s_dma_narrow_master_req)
+
+ // 2-to-1 AXI multiplexer (prepending ID bit)
+ axi_mux #(
+ .SlvAxiIDWidth ( AxiIdOutWidth ),
+ .slv_aw_chan_t ( c2s_remap_aw_chan_t ), .mst_aw_chan_t ( c2s_mux_aw_chan_t ),
+ .w_chan_t ( c2s_remap_w_chan_t ),
+ .slv_b_chan_t ( c2s_remap_b_chan_t ), .mst_b_chan_t ( c2s_mux_b_chan_t ),
+ .slv_ar_chan_t ( c2s_remap_ar_chan_t ), .mst_ar_chan_t ( c2s_mux_ar_chan_t ),
+ .slv_r_chan_t ( c2s_remap_r_chan_t ), .mst_r_chan_t ( c2s_mux_r_chan_t ),
+ .slv_req_t ( c2s_remap_req_t ), .slv_resp_t ( c2s_remap_resp_t ),
+ .mst_req_t ( c2s_mux_req_t ), .mst_resp_t ( c2s_mux_resp_t ),
+ .NoSlvPorts ( 2 ), .FallThrough(1'b1)
+ ) i_idma_narrow_mux (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .test_i ( test_mode_i ),
+ // Inputs: cluster-bus first, then DMA narrow
+ .slv_reqs_i ( narrow_master_reqs ),
+ .slv_resps_o ( narrow_master_resps ),
+ // Output of mux feeds ID shrink stage
+ .mst_req_o ( mux_req ),
+ .mst_resp_i ( mux_resp )
+ );
+
+ axi_id_remap #(
+ .AxiSlvPortIdWidth ( MstIdWidth ), // ID width = AxiIdOutWidth + 1
+ .AxiSlvPortMaxUniqIds ( 4 ),
+ .AxiMaxTxnsPerId ( Cfg.AxiMaxOutTrans ),
+ .AxiMstPortIdWidth ( AxiIdOutWidth ),
+ .slv_req_t ( c2s_mux_req_t ),
+ .slv_resp_t ( c2s_mux_resp_t ),
+ .mst_req_t ( c2s_remap_req_t ),
+ .mst_resp_t ( c2s_remap_resp_t )
+ ) i_idma_narrow_id_shrink (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .slv_req_i ( mux_req ),
+ .slv_resp_o ( mux_resp ),
+ .mst_req_o ( src_remap_req ),
+ .mst_resp_i ( src_remap_resp )
+ );
+ // Drive external responses from narrow_master_resps
+ `AXI_ASSIGN_RESP_STRUCT(s_data_master_resp, narrow_master_resps[0])
+ `AXI_ASSIGN_RESP_STRUCT(s_dma_narrow_master_resp, narrow_master_resps[1])
+end
if (Cfg.AxiIdOutWidth != AxiIdOutWidth) begin : gen_c2s_idwremap
axi_id_remap #(
@@ -1626,37 +1864,103 @@ axi_isolate #(
);
axi_cdc_src #(
- .aw_chan_t ( c2s_aw_chan_t ),
- .w_chan_t ( c2s_w_chan_t ),
- .b_chan_t ( c2s_b_chan_t ),
- .r_chan_t ( c2s_r_chan_t ),
- .ar_chan_t ( c2s_ar_chan_t ),
- .axi_req_t ( c2s_req_t ),
- .axi_resp_t ( c2s_resp_t ),
- .LogDepth ( Cfg.AxiCdcLogDepth ),
- .SyncStages ( Cfg.AxiCdcSyncStages )
+ .aw_chan_t ( c2s_aw_chan_t ),
+ .w_chan_t ( c2s_w_chan_t ),
+ .b_chan_t ( c2s_b_chan_t ),
+ .r_chan_t ( c2s_r_chan_t ),
+ .ar_chan_t ( c2s_ar_chan_t ),
+ .axi_req_t ( c2s_req_t ),
+ .axi_resp_t ( c2s_resp_t ),
+ .LogDepth ( Cfg.AxiCdcLogDepth ),
+ .SyncStages ( Cfg.AxiCdcSyncStages )
) axi_master_cdc_i (
- .src_rst_ni ( pwr_on_rst_ni ),
- .src_clk_i ( clk_i ),
- .src_req_i ( src_req ),
- .src_resp_o ( src_resp ),
- .async_data_master_aw_wptr_o ( async_data_master_aw_wptr_o ),
- .async_data_master_aw_rptr_i ( async_data_master_aw_rptr_i ),
- .async_data_master_aw_data_o ( async_data_master_aw_data_o ),
- .async_data_master_w_wptr_o ( async_data_master_w_wptr_o ),
- .async_data_master_w_rptr_i ( async_data_master_w_rptr_i ),
- .async_data_master_w_data_o ( async_data_master_w_data_o ),
- .async_data_master_ar_wptr_o ( async_data_master_ar_wptr_o ),
- .async_data_master_ar_rptr_i ( async_data_master_ar_rptr_i ),
- .async_data_master_ar_data_o ( async_data_master_ar_data_o ),
- .async_data_master_b_wptr_i ( async_data_master_b_wptr_i ),
- .async_data_master_b_rptr_o ( async_data_master_b_rptr_o ),
- .async_data_master_b_data_i ( async_data_master_b_data_i ),
- .async_data_master_r_wptr_i ( async_data_master_r_wptr_i ),
- .async_data_master_r_rptr_o ( async_data_master_r_rptr_o ),
- .async_data_master_r_data_i ( async_data_master_r_data_i )
+ .src_rst_ni ( pwr_on_rst_ni ),
+ .src_clk_i ( clk_i ),
+ .src_req_i ( src_req ),
+ .src_resp_o ( src_resp ),
+ .async_data_master_aw_wptr_o ( async_data_master_aw_wptr_o ),
+ .async_data_master_aw_rptr_i ( async_data_master_aw_rptr_i ),
+ .async_data_master_aw_data_o ( async_data_master_aw_data_o ),
+ .async_data_master_w_wptr_o ( async_data_master_w_wptr_o ),
+ .async_data_master_w_rptr_i ( async_data_master_w_rptr_i ),
+ .async_data_master_w_data_o ( async_data_master_w_data_o ),
+ .async_data_master_ar_wptr_o ( async_data_master_ar_wptr_o ),
+ .async_data_master_ar_rptr_i ( async_data_master_ar_rptr_i ),
+ .async_data_master_ar_data_o ( async_data_master_ar_data_o ),
+ .async_data_master_b_wptr_i ( async_data_master_b_wptr_i ),
+ .async_data_master_b_rptr_o ( async_data_master_b_rptr_o ),
+ .async_data_master_b_data_i ( async_data_master_b_data_i ),
+ .async_data_master_r_wptr_i ( async_data_master_r_wptr_i ),
+ .async_data_master_r_rptr_o ( async_data_master_r_rptr_o ),
+ .async_data_master_r_data_i ( async_data_master_r_data_i )
);
+// Cluster to Soc (wide)
+c2s_wide_req_t src_wide_req, isolate_src_wide_req;
+c2s_wide_resp_t src_wide_resp, isolate_src_wide_resp;
+
+// Route DMA master request/response based on EnableWidePort
+assign isolate_src_wide_req = Cfg.EnableWidePort ? s_dma_master_req : s_dma_narrow_master_req;
+assign s_dma_master_resp = Cfg.EnableWidePort ? isolate_src_wide_resp : s_dma_narrow_master_resp;
+
+// Instantiate wide port isolation and CDC only when enabled
+generate
+ if (Cfg.EnableWidePort) begin : gen_wide_port
+ axi_isolate #(
+ .NumPending ( 8 ),
+ .TerminateTransaction ( 1 ),
+ .AtopSupport ( 1 ),
+ .AxiAddrWidth ( Cfg.AxiAddrWidth ),
+ .AxiDataWidth ( Cfg.AxiDataOutWideWidth ),
+ .AxiIdWidth ( Cfg.AxiIdOutWideWidth ),
+ .AxiUserWidth ( Cfg.AxiUserWidth ),
+ .axi_req_t ( c2s_wide_req_t ),
+ .axi_resp_t ( c2s_wide_resp_t )
+ ) i_axi_wide_master_isolate (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .slv_req_i ( isolate_src_wide_req ),
+ .slv_resp_o ( isolate_src_wide_resp ),
+ .mst_req_o ( src_wide_req ),
+ .mst_resp_i ( src_wide_resp ),
+ .isolate_i ( axi_isolate_synch ),
+ .isolated_o ( axi_isolated_wide_o )
+ );
+
+ axi_cdc_src #(
+ .aw_chan_t ( c2s_wide_aw_chan_t ),
+ .w_chan_t ( c2s_wide_w_chan_t ),
+ .b_chan_t ( c2s_wide_b_chan_t ),
+ .r_chan_t ( c2s_wide_r_chan_t ),
+ .ar_chan_t ( c2s_wide_ar_chan_t ),
+ .axi_req_t ( c2s_wide_req_t ),
+ .axi_resp_t ( c2s_wide_resp_t ),
+ .LogDepth ( Cfg.AxiCdcLogDepth ),
+ .SyncStages ( Cfg.AxiCdcSyncStages )
+ ) axi_wide_master_cdc_i (
+ .src_rst_ni ( pwr_on_rst_ni ),
+ .src_clk_i ( clk_i ),
+ .src_req_i ( src_wide_req ),
+ .src_resp_o ( src_wide_resp ),
+ .async_data_master_aw_wptr_o ( async_wide_master_aw_wptr_o ),
+ .async_data_master_aw_rptr_i ( async_wide_master_aw_rptr_i ),
+ .async_data_master_aw_data_o ( async_wide_master_aw_data_o ),
+ .async_data_master_w_wptr_o ( async_wide_master_w_wptr_o ),
+ .async_data_master_w_rptr_i ( async_wide_master_w_rptr_i ),
+ .async_data_master_w_data_o ( async_wide_master_w_data_o ),
+ .async_data_master_ar_wptr_o ( async_wide_master_ar_wptr_o ),
+ .async_data_master_ar_rptr_i ( async_wide_master_ar_rptr_i ),
+ .async_data_master_ar_data_o ( async_wide_master_ar_data_o ),
+ .async_data_master_b_wptr_i ( async_wide_master_b_wptr_i ),
+ .async_data_master_b_rptr_o ( async_wide_master_b_rptr_o ),
+ .async_data_master_b_data_i ( async_wide_master_b_data_i ),
+ .async_data_master_r_wptr_i ( async_wide_master_r_wptr_i ),
+ .async_data_master_r_rptr_o ( async_wide_master_r_rptr_o ),
+ .async_data_master_r_data_i ( async_wide_master_r_data_i )
+ );
+ end
+endgenerate
+
// SOC TO CLUSTER
`AXI_TYPEDEF_AW_CHAN_T(s2c_aw_chan_t,logic[Cfg.AxiAddrWidth-1:0],logic[Cfg.AxiIdInWidth-1:0],logic[Cfg.AxiUserWidth-1:0])
`AXI_TYPEDEF_W_CHAN_T(s2c_w_chan_t,logic[Cfg.AxiDataInWidth-1:0],logic[Cfg.AxiDataInWidth/8-1:0],logic[Cfg.AxiUserWidth-1:0])
@@ -1667,6 +1971,7 @@ axi_cdc_src #(
`AXI_TYPEDEF_REQ_T(s2c_req_t,s2c_aw_chan_t,s2c_w_chan_t,s2c_ar_chan_t)
`AXI_TYPEDEF_RESP_T(s2c_resp_t,s2c_b_chan_t,s2c_r_chan_t)
+// Soc to Cluster (narrow)
s2c_req_t dst_req;
s2c_resp_t dst_resp;
@@ -1808,4 +2113,31 @@ edge_propagator_tx ep_dma_pe_irq_i (
.valid_o ( dma_pe_irq_valid_o )
);
+// pragma translate_off
+`ifndef VERILATOR
+initial begin : p_assert
+ `ifdef TARGET_MCHAN
+ assert(DMA_HCI_DATA_WIDTH == 32)
+ else $fatal(1, "When using MCHAN, DMA_HCI_DATA_WIDTH must be 32!");
+ assert(Cfg.DmaNumPlugs == 4)
+ else $fatal(1, "When using MCHAN, Cfg.DmaNumPlugs must be 4!");
+ assert(!Cfg.DmaUseHwpePort)
+ else $fatal(1, "When using MCHAN, Cfg.DmaUseHwpePort must be 0!");
+ assert(!Cfg.EnableWidePort)
+ else $fatal(1, "When using MCHAN, wide port should be disabled!");
+ `else
+ if (!Cfg.DmaUseHwpePort) begin
+ // The DMA can have wide access to TCDM only when sharing the master port to HCI with the HWPE
+ assert(DMA_HCI_DATA_WIDTH == DataWidth)
+ else $fatal(1, "When Cfg.DmaUseHwpePort is 0, DMA_HCI_DATA_WIDTH must be equal to DataWidth!");
+ end
+ // Note: iDMA now uses conditional data width and AXI path selection
+ // EnableWidePort=0: iDMA uses 64-bit narrow transfers via cluster bus AXI path
+ // EnableWidePort=1: iDMA uses 256-bit wide transfers via dedicated wide AXI path
+ `endif
+end
+`endif
+// pragma translate_on
+
+
endmodule
diff --git a/rtl/pulp_cluster_wrap.sv b/rtl/pulp_cluster_wrap.sv
index 25d1e700..dffe70d1 100644
--- a/rtl/pulp_cluster_wrap.sv
+++ b/rtl/pulp_cluster_wrap.sv
@@ -19,7 +19,7 @@ package pulp_cluster_wrap_package;
localparam int unsigned AxiIdInWidth = pulp_cluster_package::AxiSubordinateIdwidth;
localparam int unsigned AxiIdOutWidth = pulp_cluster_package::AxiManagerIdwidth;
- // CDC AXI parameters (external to cluster)
+ // CDC AXI parameters (external to cluster, narrow)
localparam int unsigned AwInWidth = axi_pkg::aw_width(Cfg.AxiAddrWidth,
Cfg.AxiIdInWidth,
Cfg.AxiUserWidth);
@@ -38,7 +38,7 @@ package pulp_cluster_wrap_package;
localparam int unsigned AsyncInBDataWidth = (2**Cfg.AxiCdcLogDepth)*BInWidth;
localparam int unsigned AsyncInArDatawidth = (2**Cfg.AxiCdcLogDepth)*ArInWidth;
localparam int unsigned AsyncInRDataWidth = (2**Cfg.AxiCdcLogDepth)*RInWidth;
- // CDC AXI parameters (cluster to external)
+ // CDC AXI parameters (cluster to external, narrow)
localparam int unsigned AwOutWidth = axi_pkg::aw_width(Cfg.AxiAddrWidth,
Cfg.AxiIdOutWidth,
Cfg.AxiUserWidth);
@@ -58,6 +58,25 @@ package pulp_cluster_wrap_package;
localparam int unsigned AsyncOutArDataWidth = (2**Cfg.AxiCdcLogDepth)*ArOutWidth;
localparam int unsigned AsyncOutRDataWidth = (2**Cfg.AxiCdcLogDepth)*ROutWidth;
localparam int unsigned AsyncEventDataWidth = (2**Cfg.AxiCdcLogDepth)*EventWidth;
+ // CDC AXI parameters (cluster to external, wide)
+ localparam int unsigned AwOutWideWidth = axi_pkg::aw_width(Cfg.AxiAddrWidth,
+ Cfg.AxiIdOutWideWidth,
+ Cfg.AxiUserWidth);
+ localparam int unsigned WOutWideWidth = axi_pkg::w_width(Cfg.AxiDataOutWideWidth,
+ Cfg.AxiUserWidth);
+ localparam int unsigned BOutWideWidth = axi_pkg::b_width(Cfg.AxiIdOutWideWidth,
+ Cfg.AxiUserWidth);
+ localparam int unsigned ArOutWideWidth = axi_pkg::ar_width(Cfg.AxiAddrWidth,
+ Cfg.AxiIdOutWideWidth,
+ Cfg.AxiUserWidth);
+ localparam int unsigned ROutWideWidth = axi_pkg::r_width(Cfg.AxiDataOutWideWidth,
+ Cfg.AxiIdOutWideWidth,
+ Cfg.AxiUserWidth);
+ localparam int unsigned AsyncOutAwWideDataWidth = (2**Cfg.AxiCdcLogDepth)*AwOutWideWidth;
+ localparam int unsigned AsyncOutWWideDataWidth = (2**Cfg.AxiCdcLogDepth)*WOutWideWidth;
+ localparam int unsigned AsyncOutBWideDataWidth = (2**Cfg.AxiCdcLogDepth)*BOutWideWidth;
+ localparam int unsigned AsyncOutArWideDataWidth = (2**Cfg.AxiCdcLogDepth)*ArOutWideWidth;
+ localparam int unsigned AsyncOutRWideDataWidth = (2**Cfg.AxiCdcLogDepth)*ROutWideWidth;
endpackage
module pulp_cluster_wrap (
@@ -75,6 +94,7 @@ module pulp_cluster_wrap (
output logic busy_o,
input logic axi_isolate_i,
output logic axi_isolated_o,
+ output logic axi_isolated_wide_o,
input logic dma_pe_evt_ack_i,
output logic dma_pe_evt_valid_o,
input logic dma_pe_irq_ack_i,
@@ -86,7 +106,7 @@ module pulp_cluster_wrap (
input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_cluster_events_wptr_i,
output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_cluster_events_rptr_o,
input logic [pulp_cluster_wrap_package::AsyncEventDataWidth-1:0] async_cluster_events_data_i,
- // AXI4 SLAVE
+ // AXI4 SLAVE Narrow
//***************************************
// WRITE ADDRESS CHANNEL
input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_data_slave_aw_wptr_i,
@@ -108,7 +128,7 @@ module pulp_cluster_wrap (
output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_data_slave_b_wptr_o,
output logic [pulp_cluster_wrap_package::AsyncInBDataWidth-1:0] async_data_slave_b_data_o,
input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_data_slave_b_rptr_i,
- // AXI4 MASTER
+ // AXI4 MASTER Narrow
//***************************************
// WRITE ADDRESS CHANNEL
output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_data_master_aw_wptr_o,
@@ -129,7 +149,33 @@ module pulp_cluster_wrap (
// WRITE RESPONSE CHANNEL
input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_data_master_b_wptr_i,
input logic [pulp_cluster_wrap_package::AsyncOutBDataWidth-1:0] async_data_master_b_data_i,
- output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_data_master_b_rptr_o
+ output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_data_master_b_rptr_o,
+ // AXI4 MASTER Wide
+ //**************************************
+ // WRITE ADDRESS CHANNEL
+ output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_aw_wptr_o,
+ output logic [pulp_cluster_wrap_package::AsyncOutAwWideDataWidth-1:0] async_wide_master_aw_data_o,
+ input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_aw_rptr_i,
+
+ // READ ADDRESS CHANNEL
+ output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_ar_wptr_o,
+ output logic [pulp_cluster_wrap_package::AsyncOutArWideDataWidth-1:0] async_wide_master_ar_data_o,
+ input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_ar_rptr_i,
+
+ // WRITE DATA CHANNEL
+ output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_w_wptr_o,
+ output logic [pulp_cluster_wrap_package::AsyncOutWWideDataWidth-1:0] async_wide_master_w_data_o,
+ input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_w_rptr_i,
+
+ // READ DATA CHANNEL
+ input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_r_wptr_i,
+ input logic [pulp_cluster_wrap_package::AsyncOutRWideDataWidth-1:0] async_wide_master_r_data_i,
+ output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_r_rptr_o,
+
+ // WRITE RESPONSE CHANNEL
+ input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_b_wptr_i,
+ input logic [pulp_cluster_wrap_package::AsyncOutBWideDataWidth-1:0] async_wide_master_b_data_i,
+ output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_b_rptr_o
);
pulp_cluster #( .Cfg(pulp_cluster_wrap_package::Cfg) ) pulp_cluster_i (
@@ -139,6 +185,7 @@ module pulp_cluster_wrap (
.ref_clk_i,
.axi_isolate_i ( '0 ),
.axi_isolated_o,
+ .axi_isolated_wide_o,
.pmu_mem_pwdn_i ( 1'b0 ),
.base_addr_i,
.dma_pe_evt_ack_i ( '1 ),
@@ -189,6 +236,21 @@ module pulp_cluster_wrap (
.async_data_slave_r_data_o,
.async_data_slave_b_wptr_o,
.async_data_slave_b_rptr_i,
- .async_data_slave_b_data_o
+ .async_data_slave_b_data_o,
+ .async_wide_master_aw_wptr_o,
+ .async_wide_master_aw_data_o,
+ .async_wide_master_aw_rptr_i,
+ .async_wide_master_ar_wptr_o,
+ .async_wide_master_ar_data_o,
+ .async_wide_master_ar_rptr_i,
+ .async_wide_master_w_wptr_o,
+ .async_wide_master_w_data_o,
+ .async_wide_master_w_rptr_i,
+ .async_wide_master_r_wptr_i,
+ .async_wide_master_r_data_i,
+ .async_wide_master_r_rptr_o,
+ .async_wide_master_b_wptr_i,
+ .async_wide_master_b_data_i,
+ .async_wide_master_b_rptr_o
);
endmodule
diff --git a/scripts/wave.tcl b/scripts/wave.tcl
index 09f55650..8689538f 100644
--- a/scripts/wave.tcl
+++ b/scripts/wave.tcl
@@ -7,32 +7,56 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
-set CORE_TYPE [examine sim:/pulp_cluster_tb/PulpClusterCfg.CoreType]
-set NUM_CORES [examine -radix dec sim:/pulp_cluster_tb/PulpClusterCfg.NumCores]
-set HMR_PRESENT [examine sim:/pulp_cluster_tb/PulpClusterCfg.HMRPresent]
-set HWPE_PRESENT [examine sim:/pulp_cluster_tb/PulpClusterCfg.HwpePresent]
-set HWPE_LIST [examine sim:/pulp_cluster_tb/PulpClusterCfg.HwpeCfg.HwpeList]
-set NUM_HWPE [examine -radix dec sim:/pulp_cluster_tb/PulpClusterCfg.HwpeCfg.NumHwpes]
+set CORE_TYPE [examine -radix symbolic sim:/pulp_cluster_tb/PulpClusterCfgVis.CoreType]
+set NUM_CORES [examine -radix dec sim:/pulp_cluster_tb/PulpClusterCfgVis.NumCores]
+set HMR_PRESENT [examine -radix hex sim:/pulp_cluster_tb/PulpClusterCfgVis.HMRPresent]
+set HWPE_PRESENT [examine -radix hex sim:/pulp_cluster_tb/PulpClusterCfgVis.HwpePresent]
+set HWPE_LIST [examine sim:/pulp_cluster_tb/PulpClusterCfgVis.HwpeCfg.HwpeList]
+set NUM_HWPE [examine -radix dec sim:/pulp_cluster_tb/PulpClusterCfgVis.HwpeCfg.NumHwpes]
+
+set ENABLE_WIDE [examine -radix hex sim:/pulp_cluster_tb/PulpClusterCfgVis.EnableWidePort]
+if { $ENABLE_WIDE == 1 } {
+ set CLUSTER_PATH /pulp_cluster_tb/gen_dma_buses/cluster_i
+} else {
+ set CLUSTER_PATH /pulp_cluster_tb/gen_dma_stubs/cluster_i
+}
# Cluster
-add wave -noupdate -group cluster /pulp_cluster_tb/cluster_i/*
-# HMR
-add wave -noupdate -group hmr /pulp_cluster_tb/cluster_i/gen_hmr_unit/i_hmr_unit/*
+add wave -noupdate -group cluster $CLUSTER_PATH/*
+
+# HMR (if present)
+if { $HMR_PRESENT != 0 } {
+ set hmr_path "$CLUSTER_PATH/gen_hmr_unit/i_hmr_unit/*"
+ set hmr_matches [find signals $hmr_path]
+ if { [llength $hmr_matches] > 0 } {
+ add wave -noupdate -group hmr $hmr_path
+ }
+}
+
# Peripherals
-add wave -noupdate -group cluster_peripherals /pulp_cluster_tb/cluster_i/cluster_peripherals_i/*
+add wave -noupdate -group cluster_peripherals $CLUSTER_PATH/cluster_peripherals_i/*
# Control Unit
-add wave -noupdate -group cluster_control_unit /pulp_cluster_tb/cluster_i/cluster_peripherals_i/cluster_control_unit_i/*
-# HWPE Subsystem
-add wave -noupdate -group hwpe_subsystem /pulp_cluster_tb/cluster_i/hwpe_gen/hwpe_subsystem_i/*
+add wave -noupdate -group cluster_control_unit $CLUSTER_PATH/cluster_peripherals_i/cluster_control_unit_i/*
+# HWPE Subsystem (if present)
+if { $HWPE_PRESENT != 0 } {
+ add wave -noupdate -group hwpe_subsystem $CLUSTER_PATH/hwpe_gen/hwpe_subsystem_i/*
+}
for {set i 0} {$i < $NUM_HWPE} {incr i} {
- #set HWPE_NAME [tolower $HWPE_LIST($i)]
- set HWPE_NAME [string tolower [examine sim:/pulp_cluster_tb/PulpClusterCfg.HwpeCfg.HwpeList[$i]]]
- add wave -noupdate -group hwpe_subsystem -group $HWPE_NAME /pulp_cluster_tb/cluster_i/hwpe_gen/hwpe_subsystem_i/gen_hwpe[$i]/gen_$HWPE_NAME/i_$HWPE_NAME/*
+ if { $HWPE_PRESENT != 0 } {
+ # Try for different HWPEs
+ foreach hwpe_kind {redmule neureka softex} {
+ set inst_path "$CLUSTER_PATH/hwpe_gen/hwpe_subsystem_i/gen_hwpe[$i]/gen_${hwpe_kind}/i_${hwpe_kind}/*"
+ set matches [find signals $inst_path]
+ if { [llength $matches] > 0 } {
+ add wave -noupdate -group hwpe_subsystem -group $hwpe_kind $inst_path
+ }
+ }
+ }
}
# Cores
for {set i 0} {$i < $NUM_CORES} {incr i} {
- add wave -noupdate -group Core[$i] -group core_region /pulp_cluster_tb/cluster_i/CORE[$i]/core_region_i/*
- add wave -noupdate -group Core[$i] -group core_region -group core /pulp_cluster_tb/cluster_i/CORE[$i]/core_region_i/${CORE_TYPE}_CORE/${CORE_TYPE}_CORE/*
+ add wave -noupdate -group Core[$i] -group core_region $CLUSTER_PATH/CORE[$i]/core_region_i/*
+ add wave -noupdate -group Core[$i] -group core_region -group core $CLUSTER_PATH/CORE[$i]/core_region_i/${CORE_TYPE}_CORE/${CORE_TYPE}_CORE/*
}
configure wave -timelineunits ns
diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv
index 0b333c89..c4688a3b 100644
--- a/tb/pulp_cluster_tb.sv
+++ b/tb/pulp_cluster_tb.sv
@@ -25,7 +25,13 @@ import "DPI-C" function get_entry(output longint entry_ret);
import "DPI-C" function byte get_section(output longint address, output longint len);
import "DPI-C" context function byte read_section(input longint address, inout byte buffer[], input longint len);
+// PULP Cluster flavors
+`ifndef TB_ENABLE_WIDE_PORT
+ `define TB_ENABLE_WIDE_PORT 1
+`endif
+
module pulp_cluster_tb;
+ localparam bit EnableWidePort = `TB_ENABLE_WIDE_PORT;
import pulp_cluster_package::*;
import uvm_pkg::*;
@@ -50,13 +56,16 @@ module pulp_cluster_tb;
localparam AxiAw = 32;
localparam AxiDw = 64;
localparam AxiIw = 6;
- localparam NMst = 2;
+ localparam NMst = 3;
localparam NSlv = 3;
localparam AxiIwMst = AxiIw + $clog2(NMst);
localparam AxiWideBeWidth = AxiDw/8;
localparam AxiWideByteOffset = $clog2(AxiWideBeWidth);
localparam AxiUw = 10;
+ localparam DmaAxiDw = EnableWidePort ? 256 : 64;
+ localparam DmaAxiIw = EnableWidePort ? 1 : AxiIw;
+
localparam bit[AxiAw-1:0] ClustBase = 'h10000000;
localparam bit[AxiAw-1:0] ClustPeriphOffs = 'h00200000;
localparam bit[AxiAw-1:0] ClustExtOffs = 'h00400000;
@@ -74,23 +83,42 @@ module pulp_cluster_tb;
typedef logic [AxiIw-1:0] axi_id_t;
typedef logic [AxiIwMst-1:0] axi_m_id_t;
+ typedef logic [DmaAxiDw-1:0] dma_axi_data_t;
+ typedef logic [DmaAxiDw/8-1:0] dma_axi_strb_t;
+ typedef logic [DmaAxiIw-1:0] dma_axi_id_t;
+
+ // Narrow AXI port type
`AXI_TYPEDEF_W_CHAN_T(w_chan_t, axi_data_t, axi_strb_t, axi_user_t)
`AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, axi_addr_t, axi_id_t, axi_user_t)
`AXI_TYPEDEF_B_CHAN_T(b_chan_t, axi_id_t, axi_user_t)
`AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, axi_addr_t, axi_id_t, axi_user_t)
`AXI_TYPEDEF_R_CHAN_T(r_chan_t, axi_data_t, axi_id_t, axi_user_t)
+
`AXI_TYPEDEF_REQ_T(axi_req_t, aw_chan_t, w_chan_t, ar_chan_t)
`AXI_TYPEDEF_RESP_T(axi_resp_t, b_chan_t, r_chan_t)
+ // Wide AXI port type
+ `AXI_TYPEDEF_W_CHAN_T(dma_w_chan_t, dma_axi_data_t, dma_axi_strb_t, axi_user_t)
+ `AXI_TYPEDEF_AW_CHAN_T(dma_aw_chan_t, axi_addr_t, dma_axi_id_t, axi_user_t)
+ `AXI_TYPEDEF_B_CHAN_T(dma_b_chan_t, dma_axi_id_t, axi_user_t)
+ `AXI_TYPEDEF_AR_CHAN_T(dma_ar_chan_t, axi_addr_t, dma_axi_id_t, axi_user_t)
+ `AXI_TYPEDEF_R_CHAN_T(dma_r_chan_t, dma_axi_data_t, dma_axi_id_t, axi_user_t)
+
+ `AXI_TYPEDEF_REQ_T(dma_axi_req_t, aw_chan_t, dma_w_chan_t, ar_chan_t)
+ `AXI_TYPEDEF_RESP_T(dma_axi_resp_t, b_chan_t, dma_r_chan_t)
+
+ // Memory-side AXI port type
`AXI_TYPEDEF_AW_CHAN_T(aw_m_chan_t, axi_addr_t, axi_m_id_t, axi_user_t)
`AXI_TYPEDEF_B_CHAN_T(b_m_chan_t, axi_m_id_t, axi_user_t)
`AXI_TYPEDEF_AR_CHAN_T(ar_m_chan_t, axi_addr_t, axi_m_id_t, axi_user_t)
`AXI_TYPEDEF_R_CHAN_T(r_m_chan_t, axi_data_t, axi_m_id_t, axi_user_t)
+
`AXI_TYPEDEF_REQ_T(axi_m_req_t, aw_m_chan_t, w_chan_t, ar_m_chan_t)
`AXI_TYPEDEF_RESP_T(axi_m_resp_t, b_m_chan_t, r_m_chan_t)
typedef logic [AxiAw-1:0] addr_t;
typedef logic [AxiDw-1:0] data_t;
+ typedef logic [DmaAxiDw-1:0] dma_data_t;
data_t memory [bit [31:0]];
int sections [bit [31:0]];
@@ -113,13 +141,342 @@ module pulp_cluster_tb;
.AXI_DATA_WIDTH( AxiDw ),
.AXI_ID_WIDTH ( AxiIw ),
.AXI_USER_WIDTH( AxiUw )
- ) axi_slave[NMst-1:0]();
+ ) axi_slave[NMst-1:0]();
+
+ localparam pulp_cluster_cfg_t PulpClusterCfg = '{
+ CoreType: pulp_cluster_package::RI5CY,
+ NumCores: `NB_CORES,
+ DmaNumPlugs: `NB_DMAS,
+ DmaNumOutstandingBursts: 8,
+ DmaBurstLength: 256,
+ DmaUseHwpePort: `DMA_USE_HWPE_PORT,
+ NumMstPeriphs: `NB_MPERIPHS,
+ NumSlvPeriphs: `NB_SPERIPHS,
+ ClusterAlias: 1,
+ ClusterAliasBase: 'h0,
+ NumSyncStages: 3,
+ UseHci: 1,
+ TcdmSize: 128*1024,
+ TcdmNumBank: 16,
+ HwpePresent: 1,
+ HwpeCfg: '{NumHwpes: 3, HwpeList: {SOFTEX, NEUREKA, REDMULE}},
+ HwpeNumPorts: 9,
+ HMRPresent: 1,
+ HMRDmrEnabled: 1,
+ HMRTmrEnabled: 1,
+ HMRDmrFIxed: 0,
+ HMRTmrFIxed: 0,
+ HMRInterleaveGrps: 1,
+ HMREnableRapidRecovery: 1,
+ HMRSeparateDataVoters: 1,
+ HMRSeparateAxiBus: 0,
+ HMRNumBusVoters: 1,
+ EnableECC: 0,
+ ECCInterco: 0,
+ iCacheNumBanks: 2,
+ iCacheNumLines: 1,
+ iCacheNumWays: 4,
+ iCacheSharedSize: 4*1024,
+ iCachePrivateSize: 512,
+ iCachePrivateDataWidth: 32,
+ EnableReducedTag: 1,
+ L2Size: 1000*1024,
+ DmBaseAddr: 'h60203000,
+ BootRomBaseAddr: BootAddr,
+ BootAddr: BootAddr,
+ EnablePrivateFpu: 1,
+ EnablePrivateFpDivSqrt: 0,
+ NumAxiIn: NumAxiSubordinatePorts,
+ NumAxiOut: NumAxiManagerPorts,
+ AxiIdInWidth: AxiIw-2,
+ AxiIdOutWidth: AxiIw,
+ AxiIdOutWideWidth: 1,
+ AxiAddrWidth: AxiAw,
+ AxiDataInWidth: AxiDw,
+ AxiDataOutWidth: AxiDw,
+ AxiDataOutWideWidth: DmaAxiDw,
+ AxiUserWidth: AxiUw,
+ AxiMaxInTrans: 64,
+ AxiMaxOutTrans: 64,
+ AxiCdcLogDepth: 3,
+ AxiCdcSyncStages: 3,
+ SyncStages: 3,
+ ClusterBaseAddr: ClustBaseAddr,
+ ClusterPeriphOffs: ClustPeriphOffs,
+ ClusterExternalOffs: ClustExtOffs,
+ EnableRemapAddress: 0,
+ EnableWidePort: EnableWidePort,
+ SnitchICache: 0,
+ default: '0
+ };
+
+ // Questasim visible; keep true to avoid optimization
+ (* keep = "true" *) pulp_cluster_package::pulp_cluster_cfg_t PulpClusterCfgVis = PulpClusterCfg;
+
+ initial begin
+ if (EnableWidePort) begin
+ $display("[TB] Instantiating cluster with wide ports");
+ end else begin
+ $display("[TB] Instantiating cluster with narrow ports");
+ end
+ end
+
+ generate
+ if (EnableWidePort) begin : gen_dma_buses
+ AXI_BUS #(
+ .AXI_ADDR_WIDTH( AxiAw ),
+ .AXI_DATA_WIDTH( DmaAxiDw ),
+ .AXI_ID_WIDTH ( DmaAxiIw ),
+ .AXI_USER_WIDTH( AxiUw )
+ ) dma_slave();
+
+ AXI_BUS #(
+ .AXI_ADDR_WIDTH( AxiAw ),
+ .AXI_DATA_WIDTH( DmaAxiDw ),
+ .AXI_ID_WIDTH ( AxiIw ),
+ .AXI_USER_WIDTH( AxiUw )
+ ) dma_slave_iw();
+
+ AXI_BUS_ASYNC_GRAY #(
+ .AXI_ADDR_WIDTH ( AxiAw ),
+ .AXI_DATA_WIDTH ( DmaAxiDw ),
+ .AXI_ID_WIDTH ( DmaAxiIw ),
+ .AXI_USER_WIDTH ( AxiUw ),
+ .LOG_DEPTH ( 3 )
+ ) async_dma_axi_bus();
+
+ axi_dw_converter_intf #(
+ .AXI_ID_WIDTH ( AxiIw ),
+ .AXI_ADDR_WIDTH ( AxiAw ),
+ .AXI_SLV_PORT_DATA_WIDTH ( DmaAxiDw ),
+ .AXI_MST_PORT_DATA_WIDTH ( AxiDw ),
+ .AXI_USER_WIDTH ( AxiUw ),
+ .AXI_MAX_READS ( 3 )
+ ) i_dma_dw_conv (
+ .clk_i ( s_clk ),
+ .rst_ni ( s_rstn ),
+ .slv ( dma_slave_iw ),
+ .mst ( axi_slave[2] )
+ );
+
+ axi_iw_converter_intf #(
+ .AXI_SLV_PORT_ID_WIDTH ( DmaAxiIw ),
+ .AXI_MST_PORT_ID_WIDTH ( AxiIw ),
+ .AXI_SLV_PORT_MAX_UNIQ_IDS ( 5 ),
+ .AXI_SLV_PORT_MAX_TXNS_PER_ID ( 5 ),
+ .AXI_SLV_PORT_MAX_TXNS ( 5 ),
+ .AXI_MST_PORT_MAX_UNIQ_IDS ( 5 ),
+ .AXI_MST_PORT_MAX_TXNS_PER_ID ( 5 ),
+ .AXI_ADDR_WIDTH ( AxiAw ),
+ .AXI_DATA_WIDTH ( DmaAxiDw ),
+ .AXI_USER_WIDTH ( AxiUw )
+ ) i_dma_iw_conv (
+ .clk_i ( s_clk ),
+ .rst_ni ( s_rstn ),
+ .slv ( dma_slave ),
+ .mst ( dma_slave_iw )
+ );
+
+ axi_cdc_dst_intf #(
+ .AXI_ADDR_WIDTH ( AxiAw ),
+ .AXI_DATA_WIDTH ( DmaAxiDw ),
+ .AXI_ID_WIDTH ( DmaAxiIw ),
+ .AXI_USER_WIDTH ( AxiUw ),
+ .LOG_DEPTH ( 3 )
+ ) cluster_to_soc_dma_dst_cdc_fifo_i (
+ .dst_clk_i ( s_clk ),
+ .dst_rst_ni ( s_rstn ),
+ .src ( async_dma_axi_bus ),
+ .dst ( dma_slave )
+ );
+
+ pulp_cluster
+ `ifdef USE_PULP_PARAMETERS
+ #( .Cfg ( PulpClusterCfg ) )
+ `endif
+ cluster_i (
+ .clk_i ( s_clk ),
+ .rst_ni ( s_rstn ),
+ .pwr_on_rst_ni ( s_rstn ),
+ .ref_clk_i ( s_clk ),
+ .axi_isolate_i ( '0 ),
+ .axi_isolated_o ( ),
+ .axi_isolated_wide_o ( ),
+
+ .pmu_mem_pwdn_i ( 1'b0 ),
+
+ .base_addr_i ( ClustBase[31:28] ),
+
+ .dma_pe_evt_ack_i ( '1 ),
+ .dma_pe_evt_valid_o ( ),
+
+ .dma_pe_irq_ack_i ( 1'b1 ),
+ .dma_pe_irq_valid_o ( ),
+
+ .dbg_irq_valid_i ( '0 ),
+ .mbox_irq_i ( '0 ),
+
+ .pf_evt_ack_i ( 1'b1 ),
+ .pf_evt_valid_o ( ),
+
+ .async_cluster_events_wptr_i ( '0 ),
+ .async_cluster_events_rptr_o ( ),
+ .async_cluster_events_data_i ( '0 ),
+
+ .en_sa_boot_i ( s_cluster_en_sa_boot ),
+ .test_mode_i ( 1'b0 ),
+ .fetch_en_i ( s_cluster_fetch_en ),
+ .eoc_o ( s_cluster_eoc ),
+ .busy_o ( s_cluster_busy ),
+ .cluster_id_i ( ClustIdx ),
+
+ .async_data_master_aw_wptr_o ( async_cluster_to_soc_axi_bus.aw_wptr ),
+ .async_data_master_aw_rptr_i ( async_cluster_to_soc_axi_bus.aw_rptr ),
+ .async_data_master_aw_data_o ( async_cluster_to_soc_axi_bus.aw_data ),
+ .async_data_master_ar_wptr_o ( async_cluster_to_soc_axi_bus.ar_wptr ),
+ .async_data_master_ar_rptr_i ( async_cluster_to_soc_axi_bus.ar_rptr ),
+ .async_data_master_ar_data_o ( async_cluster_to_soc_axi_bus.ar_data ),
+ .async_data_master_w_data_o ( async_cluster_to_soc_axi_bus.w_data ),
+ .async_data_master_w_wptr_o ( async_cluster_to_soc_axi_bus.w_wptr ),
+ .async_data_master_w_rptr_i ( async_cluster_to_soc_axi_bus.w_rptr ),
+ .async_data_master_r_wptr_i ( async_cluster_to_soc_axi_bus.r_wptr ),
+ .async_data_master_r_rptr_o ( async_cluster_to_soc_axi_bus.r_rptr ),
+ .async_data_master_r_data_i ( async_cluster_to_soc_axi_bus.r_data ),
+ .async_data_master_b_wptr_i ( async_cluster_to_soc_axi_bus.b_wptr ),
+ .async_data_master_b_rptr_o ( async_cluster_to_soc_axi_bus.b_rptr ),
+ .async_data_master_b_data_i ( async_cluster_to_soc_axi_bus.b_data ),
+
+ .async_wide_master_aw_wptr_o ( async_dma_axi_bus.aw_wptr ),
+ .async_wide_master_aw_rptr_i ( async_dma_axi_bus.aw_rptr ),
+ .async_wide_master_aw_data_o ( async_dma_axi_bus.aw_data ),
+ .async_wide_master_ar_wptr_o ( async_dma_axi_bus.ar_wptr ),
+ .async_wide_master_ar_rptr_i ( async_dma_axi_bus.ar_rptr ),
+ .async_wide_master_ar_data_o ( async_dma_axi_bus.ar_data ),
+ .async_wide_master_w_data_o ( async_dma_axi_bus.w_data ),
+ .async_wide_master_w_wptr_o ( async_dma_axi_bus.w_wptr ),
+ .async_wide_master_w_rptr_i ( async_dma_axi_bus.w_rptr ),
+ .async_wide_master_r_wptr_i ( async_dma_axi_bus.r_wptr ),
+ .async_wide_master_r_rptr_o ( async_dma_axi_bus.r_rptr ),
+ .async_wide_master_r_data_i ( async_dma_axi_bus.r_data ),
+ .async_wide_master_b_wptr_i ( async_dma_axi_bus.b_wptr ),
+ .async_wide_master_b_rptr_o ( async_dma_axi_bus.b_rptr ),
+ .async_wide_master_b_data_i ( async_dma_axi_bus.b_data ),
+
+ .async_data_slave_aw_wptr_i ( async_soc_to_cluster_axi_bus.aw_wptr ),
+ .async_data_slave_aw_rptr_o ( async_soc_to_cluster_axi_bus.aw_rptr ),
+ .async_data_slave_aw_data_i ( async_soc_to_cluster_axi_bus.aw_data ),
+ .async_data_slave_ar_wptr_i ( async_soc_to_cluster_axi_bus.ar_wptr ),
+ .async_data_slave_ar_rptr_o ( async_soc_to_cluster_axi_bus.ar_rptr ),
+ .async_data_slave_ar_data_i ( async_soc_to_cluster_axi_bus.ar_data ),
+ .async_data_slave_w_data_i ( async_soc_to_cluster_axi_bus.w_data ),
+ .async_data_slave_w_wptr_i ( async_soc_to_cluster_axi_bus.w_wptr ),
+ .async_data_slave_w_rptr_o ( async_soc_to_cluster_axi_bus.w_rptr ),
+ .async_data_slave_r_wptr_o ( async_soc_to_cluster_axi_bus.r_wptr ),
+ .async_data_slave_r_rptr_i ( async_soc_to_cluster_axi_bus.r_rptr ),
+ .async_data_slave_r_data_o ( async_soc_to_cluster_axi_bus.r_data ),
+ .async_data_slave_b_wptr_o ( async_soc_to_cluster_axi_bus.b_wptr ),
+ .async_data_slave_b_rptr_i ( async_soc_to_cluster_axi_bus.b_rptr ),
+ .async_data_slave_b_data_o ( async_soc_to_cluster_axi_bus.b_data )
+ );
+
+ end else begin : gen_dma_stubs
+ pulp_cluster
+ `ifdef USE_PULP_PARAMETERS
+ #( .Cfg ( PulpClusterCfg ) )
+ `endif
+ cluster_i (
+ .clk_i ( s_clk ),
+ .rst_ni ( s_rstn ),
+ .pwr_on_rst_ni ( s_rstn ),
+ .ref_clk_i ( s_clk ),
+ .axi_isolate_i ( '0 ),
+ .axi_isolated_o ( ),
+ .axi_isolated_wide_o ( ),
+
+ .pmu_mem_pwdn_i ( 1'b0 ),
+
+ .base_addr_i ( ClustBase[31:28] ),
+
+ .dma_pe_evt_ack_i ( '1 ),
+ .dma_pe_evt_valid_o ( ),
+
+ .dma_pe_irq_ack_i ( 1'b1 ),
+ .dma_pe_irq_valid_o ( ),
+
+ .dbg_irq_valid_i ( '0 ),
+ .mbox_irq_i ( '0 ),
+
+ .pf_evt_ack_i ( 1'b1 ),
+ .pf_evt_valid_o ( ),
+
+ .async_cluster_events_wptr_i ( '0 ),
+ .async_cluster_events_rptr_o ( ),
+ .async_cluster_events_data_i ( '0 ),
+
+ .en_sa_boot_i ( s_cluster_en_sa_boot ),
+ .test_mode_i ( 1'b0 ),
+ .fetch_en_i ( s_cluster_fetch_en ),
+ .eoc_o ( s_cluster_eoc ),
+ .busy_o ( s_cluster_busy ),
+ .cluster_id_i ( ClustIdx ),
+
+ .async_data_master_aw_wptr_o ( async_cluster_to_soc_axi_bus.aw_wptr ),
+ .async_data_master_aw_rptr_i ( async_cluster_to_soc_axi_bus.aw_rptr ),
+ .async_data_master_aw_data_o ( async_cluster_to_soc_axi_bus.aw_data ),
+ .async_data_master_ar_wptr_o ( async_cluster_to_soc_axi_bus.ar_wptr ),
+ .async_data_master_ar_rptr_i ( async_cluster_to_soc_axi_bus.ar_rptr ),
+ .async_data_master_ar_data_o ( async_cluster_to_soc_axi_bus.ar_data ),
+ .async_data_master_w_data_o ( async_cluster_to_soc_axi_bus.w_data ),
+ .async_data_master_w_wptr_o ( async_cluster_to_soc_axi_bus.w_wptr ),
+ .async_data_master_w_rptr_i ( async_cluster_to_soc_axi_bus.w_rptr ),
+ .async_data_master_r_wptr_i ( async_cluster_to_soc_axi_bus.r_wptr ),
+ .async_data_master_r_rptr_o ( async_cluster_to_soc_axi_bus.r_rptr ),
+ .async_data_master_r_data_i ( async_cluster_to_soc_axi_bus.r_data ),
+ .async_data_master_b_wptr_i ( async_cluster_to_soc_axi_bus.b_wptr ),
+ .async_data_master_b_rptr_o ( async_cluster_to_soc_axi_bus.b_rptr ),
+ .async_data_master_b_data_i ( async_cluster_to_soc_axi_bus.b_data ),
+
+ // Wide master ports tied off when wide port disabled
+ .async_wide_master_aw_wptr_o ( ),
+ .async_wide_master_aw_rptr_i ( '0 ),
+ .async_wide_master_aw_data_o ( ),
+ .async_wide_master_ar_wptr_o ( ),
+ .async_wide_master_ar_rptr_i ( '0 ),
+ .async_wide_master_ar_data_o ( ),
+ .async_wide_master_w_data_o ( ),
+ .async_wide_master_w_wptr_o ( ),
+ .async_wide_master_w_rptr_i ( '0 ),
+ .async_wide_master_r_wptr_i ( '0 ),
+ .async_wide_master_r_rptr_o ( ),
+ .async_wide_master_r_data_i ( 'x ),
+ .async_wide_master_b_wptr_i ( '0 ),
+ .async_wide_master_b_rptr_o ( ),
+ .async_wide_master_b_data_i ( 'x ),
+
+ .async_data_slave_aw_wptr_i ( async_soc_to_cluster_axi_bus.aw_wptr ),
+ .async_data_slave_aw_rptr_o ( async_soc_to_cluster_axi_bus.aw_rptr ),
+ .async_data_slave_aw_data_i ( async_soc_to_cluster_axi_bus.aw_data ),
+ .async_data_slave_ar_wptr_i ( async_soc_to_cluster_axi_bus.ar_wptr ),
+ .async_data_slave_ar_rptr_o ( async_soc_to_cluster_axi_bus.ar_rptr ),
+ .async_data_slave_ar_data_i ( async_soc_to_cluster_axi_bus.ar_data ),
+ .async_data_slave_w_data_i ( async_soc_to_cluster_axi_bus.w_data ),
+ .async_data_slave_w_wptr_i ( async_soc_to_cluster_axi_bus.w_wptr ),
+ .async_data_slave_w_rptr_o ( async_soc_to_cluster_axi_bus.w_rptr ),
+ .async_data_slave_r_wptr_o ( async_soc_to_cluster_axi_bus.r_wptr ),
+ .async_data_slave_r_rptr_i ( async_soc_to_cluster_axi_bus.r_rptr ),
+ .async_data_slave_r_data_o ( async_soc_to_cluster_axi_bus.r_data ),
+ .async_data_slave_b_wptr_o ( async_soc_to_cluster_axi_bus.b_wptr ),
+ .async_data_slave_b_rptr_i ( async_soc_to_cluster_axi_bus.b_rptr ),
+ .async_data_slave_b_data_o ( async_soc_to_cluster_axi_bus.b_data )
+ );
+ end
+ endgenerate
AXI_BUS #(
- .AXI_ADDR_WIDTH( AxiAw ),
- .AXI_DATA_WIDTH( AxiDw ),
- .AXI_ID_WIDTH ( AxiIw-2 ),
- .AXI_USER_WIDTH( AxiUw )
+ .AXI_ADDR_WIDTH( AxiAw ),
+ .AXI_DATA_WIDTH( AxiDw ),
+ .AXI_ID_WIDTH ( AxiIw-2 ),
+ .AXI_USER_WIDTH( AxiUw )
) soc_to_cluster_axi_bus();
AXI_BUS_ASYNC_GRAY #(
@@ -131,11 +488,11 @@ module pulp_cluster_tb;
) async_soc_to_cluster_axi_bus();
AXI_BUS_ASYNC_GRAY #(
- .AXI_ADDR_WIDTH ( AxiAw ),
- .AXI_DATA_WIDTH ( AxiDw ),
- .AXI_ID_WIDTH ( AxiIw ),
- .AXI_USER_WIDTH ( AxiUw ),
- .LOG_DEPTH ( 3 )
+ .AXI_ADDR_WIDTH ( AxiAw ),
+ .AXI_DATA_WIDTH ( AxiDw ),
+ .AXI_ID_WIDTH ( AxiIw ),
+ .AXI_USER_WIDTH ( AxiUw ),
+ .LOG_DEPTH ( 3 )
) async_cluster_to_soc_axi_bus();
// Behavioural slaves
@@ -280,156 +637,20 @@ module pulp_cluster_tb;
.AXI_ID_WIDTH ( AxiIw ),
.AXI_USER_WIDTH ( AxiUw ),
.LOG_DEPTH ( 3 )
- ) cluster_to_soc_dst_cdc_fifo_i (
+ ) cluster_to_soc_dst_cdc_fifo_i (
.dst_clk_i ( s_clk ),
.dst_rst_ni ( s_rstn ),
.src ( async_cluster_to_soc_axi_bus ),
.dst ( axi_slave[1] )
);
- localparam pulp_cluster_cfg_t PulpClusterCfg = '{
- CoreType: pulp_cluster_package::RI5CY,
- NumCores: `NB_CORES,
- DmaNumPlugs: `NB_DMAS,
- DmaNumOutstandingBursts: 8,
- DmaBurstLength: 256,
- NumMstPeriphs: `NB_MPERIPHS,
- NumSlvPeriphs: `NB_SPERIPHS,
- ClusterAlias: 1,
- ClusterAliasBase: 'h0,
- NumSyncStages: 3,
- UseHci: 1,
- TcdmSize: 128*1024,
- TcdmNumBank: 16,
- HwpePresent: 1,
- HwpeCfg: '{NumHwpes: 3, HwpeList: {SOFTEX, NEUREKA, REDMULE}},
- HwpeNumPorts: 9,
- HMRPresent: 1,
- HMRDmrEnabled: 1,
- HMRTmrEnabled: 1,
- HMRDmrFIxed: 0,
- HMRTmrFIxed: 0,
- HMRInterleaveGrps: 1,
- HMREnableRapidRecovery: 1,
- HMRSeparateDataVoters: 1,
- HMRSeparateAxiBus: 0,
- HMRNumBusVoters: 1,
- EnableECC: 1,
- ECCInterco: 1,
- iCacheNumBanks: 2,
- iCacheNumLines: 1,
- iCacheNumWays: 4,
- iCacheSharedSize: 4*1024,
- iCachePrivateSize: 512,
- iCachePrivateDataWidth: 32,
- EnableReducedTag: 1,
- L2Size: 1000*1024,
- DmBaseAddr: 'h60203000,
- BootRomBaseAddr: BootAddr,
- BootAddr: BootAddr,
- EnablePrivateFpu: 1,
- EnablePrivateFpDivSqrt: 0,
- NumAxiIn: NumAxiSubordinatePorts,
- NumAxiOut: NumAxiManagerPorts,
- AxiIdInWidth: AxiIw-2,
- AxiIdOutWidth:AxiIw,
- AxiAddrWidth: AxiAw,
- AxiDataInWidth: AxiDw,
- AxiDataOutWidth: AxiDw,
- AxiUserWidth: AxiUw,
- AxiMaxInTrans: 64,
- AxiMaxOutTrans: 64,
- AxiCdcLogDepth: 3,
- AxiCdcSyncStages: 3,
- SyncStages: 3,
- ClusterBaseAddr: ClustBaseAddr,
- ClusterPeriphOffs: ClustPeriphOffs,
- ClusterExternalOffs: ClustExtOffs,
- EnableRemapAddress: 0,
- SnitchICache: 0,
- default: '0
- };
-
- pulp_cluster
-`ifdef USE_PULP_PARAMETERS
- #(
- .Cfg ( PulpClusterCfg )
- )
-`endif
- cluster_i (
- .clk_i ( s_clk ),
- .rst_ni ( s_rstn ),
- .pwr_on_rst_ni ( s_rstn ),
- .ref_clk_i ( s_clk ),
- .axi_isolate_i ( '0 ),
- .axi_isolated_o ( ),
-
- .pmu_mem_pwdn_i ( 1'b0 ),
-
- .base_addr_i ( ClustBase[31:28] ),
-
- .dma_pe_evt_ack_i ( '1 ),
- .dma_pe_evt_valid_o ( ),
-
- .dma_pe_irq_ack_i ( 1'b1 ),
- .dma_pe_irq_valid_o ( ),
-
- .dbg_irq_valid_i ( '0 ),
- .mbox_irq_i ( '0 ),
-
- .pf_evt_ack_i ( 1'b1 ),
- .pf_evt_valid_o ( ),
-
- .async_cluster_events_wptr_i ( '0 ),
- .async_cluster_events_rptr_o ( ),
- .async_cluster_events_data_i ( '0 ),
-
- .en_sa_boot_i ( s_cluster_en_sa_boot ),
- .test_mode_i ( 1'b0 ),
- .fetch_en_i ( s_cluster_fetch_en ),
- .eoc_o ( s_cluster_eoc ),
- .busy_o ( s_cluster_busy ),
- .cluster_id_i ( ClustIdx ),
-
- .async_data_master_aw_wptr_o ( async_cluster_to_soc_axi_bus.aw_wptr ),
- .async_data_master_aw_rptr_i ( async_cluster_to_soc_axi_bus.aw_rptr ),
- .async_data_master_aw_data_o ( async_cluster_to_soc_axi_bus.aw_data ),
- .async_data_master_ar_wptr_o ( async_cluster_to_soc_axi_bus.ar_wptr ),
- .async_data_master_ar_rptr_i ( async_cluster_to_soc_axi_bus.ar_rptr ),
- .async_data_master_ar_data_o ( async_cluster_to_soc_axi_bus.ar_data ),
- .async_data_master_w_data_o ( async_cluster_to_soc_axi_bus.w_data ),
- .async_data_master_w_wptr_o ( async_cluster_to_soc_axi_bus.w_wptr ),
- .async_data_master_w_rptr_i ( async_cluster_to_soc_axi_bus.w_rptr ),
- .async_data_master_r_wptr_i ( async_cluster_to_soc_axi_bus.r_wptr ),
- .async_data_master_r_rptr_o ( async_cluster_to_soc_axi_bus.r_rptr ),
- .async_data_master_r_data_i ( async_cluster_to_soc_axi_bus.r_data ),
- .async_data_master_b_wptr_i ( async_cluster_to_soc_axi_bus.b_wptr ),
- .async_data_master_b_rptr_o ( async_cluster_to_soc_axi_bus.b_rptr ),
- .async_data_master_b_data_i ( async_cluster_to_soc_axi_bus.b_data ),
-
- .async_data_slave_aw_wptr_i ( async_soc_to_cluster_axi_bus.aw_wptr ),
- .async_data_slave_aw_rptr_o ( async_soc_to_cluster_axi_bus.aw_rptr ),
- .async_data_slave_aw_data_i ( async_soc_to_cluster_axi_bus.aw_data ),
- .async_data_slave_ar_wptr_i ( async_soc_to_cluster_axi_bus.ar_wptr ),
- .async_data_slave_ar_rptr_o ( async_soc_to_cluster_axi_bus.ar_rptr ),
- .async_data_slave_ar_data_i ( async_soc_to_cluster_axi_bus.ar_data ),
- .async_data_slave_w_data_i ( async_soc_to_cluster_axi_bus.w_data ),
- .async_data_slave_w_wptr_i ( async_soc_to_cluster_axi_bus.w_wptr ),
- .async_data_slave_w_rptr_o ( async_soc_to_cluster_axi_bus.w_rptr ),
- .async_data_slave_r_wptr_o ( async_soc_to_cluster_axi_bus.r_wptr ),
- .async_data_slave_r_rptr_i ( async_soc_to_cluster_axi_bus.r_rptr ),
- .async_data_slave_r_data_o ( async_soc_to_cluster_axi_bus.r_data ),
- .async_data_slave_b_wptr_o ( async_soc_to_cluster_axi_bus.b_wptr ),
- .async_data_slave_b_rptr_i ( async_soc_to_cluster_axi_bus.b_rptr ),
- .async_data_slave_b_data_o ( async_soc_to_cluster_axi_bus.b_data )
- );
-
// Load ELF binary file
task load_binary;
input string binary; // File name
output addr_t entry_point;
addr_t section_addr, section_len;
byte buffer[];
+ addr_t section_addr_offset;
// Read ELF
void'(read_elf(binary));
@@ -437,16 +658,21 @@ module pulp_cluster_tb;
$display("[TB] Reading %s", binary);
while (get_section(section_addr, section_len)) begin
// Read Sections
- automatic int num_words = (section_len + AxiWideBeWidth - 1)/AxiWideBeWidth;
- $display("[TB] Reading section %x with %0d words", section_addr, num_words);
+ automatic int num_start_unaligned_bytes = section_addr%AxiWideBeWidth;
+ automatic int num_wide_words = (num_start_unaligned_bytes + section_len + AxiWideBeWidth - 1)/AxiWideBeWidth;
+
+ $display("[TB] Reading section %x with %0d words", section_addr, num_wide_words);
- sections[section_addr >> AxiWideByteOffset] = num_words;
- buffer = new[num_words * AxiWideBeWidth];
+ sections[section_addr >> AxiWideByteOffset] = num_wide_words;
+ buffer = new[num_wide_words * AxiWideBeWidth];
void'(read_section(section_addr, buffer, section_len));
- for (int i = 0; i < num_words; i++) begin
+ for (int i = 0; i < num_wide_words; i++) begin
automatic logic [AxiWideBeWidth-1:0][7:0] word = '0;
for (int j = 0; j < AxiWideBeWidth; j++) begin
- word[j] = buffer[i * AxiWideBeWidth + j];
+ automatic int index = i * AxiWideBeWidth + j - num_start_unaligned_bytes;
+ if(index >= 0) begin
+ word[j] = buffer[index];
+ end
end
memory[section_addr/AxiWideBeWidth + i] = word;
end