From 05492977c8cefdd2e50d5c68e3a556486f7385e0 Mon Sep 17 00:00:00 2001
From: Wonhyuk Yang <wonhyuk@postech.ac.kr>
Date: Sat, 23 May 2026 01:13:48 +0900
Subject: [PATCH] [Tests] Reorganize tests/ into ops/<family>, models, system

tests/ was a flat mix of op-level files, single-file model tests, and
multi-file model directories with inconsistent capitalization. This PR
introduces a hierarchy:

  tests/
    _pytorchsim_utils.py
    ops/
      elementwise/  reduce/  gemm/  conv/  attention/
      view/         sort/    sparsity/  misc/  fusion/
    models/
      DeepSeek/  Diffusion/  Llama/  MLP/  MoE/
      MobileNet/  Mixtral8x7B/  Yolov5/
      test_mlp.py  test_resnet.py  test_single_perceptron.py
      test_transformer.py  test_vit.py
    system/
      test_eager.py  test_hetro.py  test_scheduler.py
      test_stonne.py  test_vectorops.py

Mixtral_8x7B renamed to Mixtral8x7B for consistency with the other
PascalCase model dirs. Existing single-file model dirs are kept as
dirs (they may grow companion files like the Mixtral model.py).

All file moves use git mv to preserve history. External path
references rewritten across .github/workflows/pytorchsim_test.yml,
README.md, CLAUDE.md, .github/ISSUE_TEMPLATE/bug_report.md, and the
scripts/{sparsity,stonne}_experiment/ shell scripts.

Cross-test imports updated to drop the `tests.` prefix (because PR #234
puts `<repo>/tests` on sys.path[0] to avoid the ultralytics top-level
`tests` package collision):
- tests/system/test_hetro.py
- tests/system/test_scheduler.py
- tests/system/test_vectorops.py
- tests/ops/sparsity/test_sparsity.py

__init__.py added to each new subdir so e.g. `from ops.elementwise.test_add
import test_vectoradd` resolves.

Six files (test_resnet, test_vit, test_stonne, test_sparse_core,
test_gqa, test_diffusion) had their `sys.path.append` inside an
`if __name__ == "__main__":` block, which made the PR-A migration
script insert the import at the wrong indent level. Moved the
sys.path/import block to module top.

Sample-verified locally on tests/ops/elementwise/test_add.py and
tests/ops/fusion/test_matmul_vector.py (both pass) plus import-only
checks on the cross-import files.

Stacks on top of PR #234 (tests/_pytorchsim_utils.py extraction).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/ISSUE_TEMPLATE/bug_report.md          |  2 +-
 .github/workflows/pytorchsim_test.yml         | 80 +++++++++----------
 CLAUDE.md                                     | 18 ++---
 README.md                                     | 16 ++--
 scripts/sparsity_experiment/run.sh            | 60 +++++++-------
 scripts/stonne_experiment/run.sh              |  6 +-
 scripts/stonne_experiment/run_trace.sh        |  2 +-
 .../DeepSeek/test_deepseek_v3_base.py         |  0
 .../{ => models}/Diffusion/test_diffusion.py  |  0
 tests/{ => models}/Llama/test_llama.py        |  0
 tests/{ => models}/MLP/test_mlp.py            |  0
 tests/{ => models}/MLP/test_mlp_cpu.py        |  0
 .../Mixtral8x7B}/model.py                     |  0
 .../Mixtral8x7B}/test_attention.py            |  0
 tests/{ => models}/MoE/test_moe.py            |  0
 tests/{ => models}/MoE/test_moe_cpu.py        |  0
 .../{ => models}/MobileNet/test_mobilenet.py  |  0
 tests/{ => models}/Yolov5/test_yolov5.py      |  0
 tests/{Fusion => models}/__init__.py          |  0
 tests/{ => models}/test_mlp.py                |  0
 tests/{ => models}/test_resnet.py             |  0
 tests/{ => models}/test_single_perceptron.py  |  0
 tests/{ => models}/test_transformer.py        |  0
 tests/{ => models}/test_vit.py                |  0
 tests/ops/__init__.py                         |  0
 tests/ops/attention/__init__.py               |  0
 tests/{ => ops/attention}/test_gqa.py         |  0
 tests/{ => ops/attention}/test_gqa_decode.py  |  0
 tests/{ => ops/attention}/test_sdpa.py        |  0
 tests/ops/conv/__init__.py                    |  0
 tests/{ => ops/conv}/test_cnn.py              |  0
 tests/{ => ops/conv}/test_conv2d.py           |  0
 tests/{ => ops/conv}/test_group_conv.py       |  0
 tests/{ => ops/conv}/test_pool.py             |  0
 tests/ops/elementwise/__init__.py             |  0
 .../{ => ops/elementwise}/test_activation.py  |  0
 tests/{ => ops/elementwise}/test_add.py       |  0
 tests/{ => ops/elementwise}/test_exponent.py  |  0
 .../elementwise}/test_transcendental.py       |  0
 tests/ops/fusion/__init__.py                  |  0
 .../fusion}/test_addmm_residual.py            |  0
 .../fusion}/test_attention_fusion.py          |  0
 .../fusion}/test_bmm_reduction.py             |  0
 .../fusion}/test_conv_fusion.py               |  0
 .../fusion}/test_matmul_activation.py         |  0
 .../fusion}/test_matmul_reduction.py          |  0
 .../fusion}/test_matmul_scalar.py             |  0
 .../fusion}/test_matmul_vector.py             |  0
 .../fusion}/test_prologue_fusion.py           |  0
 .../fusion}/test_transformer_fusion.py        |  0
 tests/ops/gemm/__init__.py                    |  0
 tests/{ => ops/gemm}/test_bmm.py              |  0
 tests/{ => ops/gemm}/test_matmul.py           |  0
 tests/ops/misc/__init__.py                    |  0
 tests/{ => ops/misc}/test_expert_mask.py      |  0
 tests/{ => ops/misc}/test_indirect_access.py  |  0
 tests/ops/reduce/__init__.py                  |  0
 tests/{ => ops/reduce}/test_batchnorm.py      |  0
 tests/{ => ops/reduce}/test_layernorm.py      |  0
 tests/{ => ops/reduce}/test_reduce.py         |  0
 tests/{ => ops/reduce}/test_softmax.py        |  0
 tests/ops/sort/__init__.py                    |  0
 tests/{ => ops/sort}/test_sort.py             |  0
 tests/{ => ops/sort}/test_topk.py             |  0
 tests/ops/sparsity/__init__.py                |  0
 tests/{ => ops/sparsity}/test_sparse_core.py  |  0
 tests/{ => ops/sparsity}/test_sparsity.py     |  7 +-
 tests/ops/view/__init__.py                    |  0
 tests/{ => ops/view}/test_cat.py              |  0
 tests/{ => ops/view}/test_transpose2D.py      |  0
 tests/{ => ops/view}/test_transpose3D.py      |  0
 tests/{ => ops/view}/test_view3D_2D.py        |  0
 tests/system/__init__.py                      |  0
 tests/{ => system}/test_eager.py              |  0
 tests/{ => system}/test_hetro.py              |  4 +-
 tests/{ => system}/test_scheduler.py          |  7 +-
 tests/{ => system}/test_stonne.py             |  0
 tests/{ => system}/test_vectorops.py          | 17 ++--
 78 files changed, 114 insertions(+), 105 deletions(-)
 rename tests/{ => models}/DeepSeek/test_deepseek_v3_base.py (100%)
 rename tests/{ => models}/Diffusion/test_diffusion.py (100%)
 rename tests/{ => models}/Llama/test_llama.py (100%)
 rename tests/{ => models}/MLP/test_mlp.py (100%)
 rename tests/{ => models}/MLP/test_mlp_cpu.py (100%)
 rename tests/{Mixtral_8x7B => models/Mixtral8x7B}/model.py (100%)
 rename tests/{Mixtral_8x7B => models/Mixtral8x7B}/test_attention.py (100%)
 rename tests/{ => models}/MoE/test_moe.py (100%)
 rename tests/{ => models}/MoE/test_moe_cpu.py (100%)
 rename tests/{ => models}/MobileNet/test_mobilenet.py (100%)
 rename tests/{ => models}/Yolov5/test_yolov5.py (100%)
 rename tests/{Fusion => models}/__init__.py (100%)
 rename tests/{ => models}/test_mlp.py (100%)
 rename tests/{ => models}/test_resnet.py (100%)
 rename tests/{ => models}/test_single_perceptron.py (100%)
 rename tests/{ => models}/test_transformer.py (100%)
 rename tests/{ => models}/test_vit.py (100%)
 create mode 100644 tests/ops/__init__.py
 create mode 100644 tests/ops/attention/__init__.py
 rename tests/{ => ops/attention}/test_gqa.py (100%)
 rename tests/{ => ops/attention}/test_gqa_decode.py (100%)
 rename tests/{ => ops/attention}/test_sdpa.py (100%)
 create mode 100644 tests/ops/conv/__init__.py
 rename tests/{ => ops/conv}/test_cnn.py (100%)
 rename tests/{ => ops/conv}/test_conv2d.py (100%)
 rename tests/{ => ops/conv}/test_group_conv.py (100%)
 rename tests/{ => ops/conv}/test_pool.py (100%)
 create mode 100644 tests/ops/elementwise/__init__.py
 rename tests/{ => ops/elementwise}/test_activation.py (100%)
 rename tests/{ => ops/elementwise}/test_add.py (100%)
 rename tests/{ => ops/elementwise}/test_exponent.py (100%)
 rename tests/{ => ops/elementwise}/test_transcendental.py (100%)
 create mode 100644 tests/ops/fusion/__init__.py
 rename tests/{Fusion => ops/fusion}/test_addmm_residual.py (100%)
 rename tests/{Fusion => ops/fusion}/test_attention_fusion.py (100%)
 rename tests/{Fusion => ops/fusion}/test_bmm_reduction.py (100%)
 rename tests/{Fusion => ops/fusion}/test_conv_fusion.py (100%)
 rename tests/{Fusion => ops/fusion}/test_matmul_activation.py (100%)
 rename tests/{Fusion => ops/fusion}/test_matmul_reduction.py (100%)
 rename tests/{Fusion => ops/fusion}/test_matmul_scalar.py (100%)
 rename tests/{Fusion => ops/fusion}/test_matmul_vector.py (100%)
 rename tests/{Fusion => ops/fusion}/test_prologue_fusion.py (100%)
 rename tests/{Fusion => ops/fusion}/test_transformer_fusion.py (100%)
 create mode 100644 tests/ops/gemm/__init__.py
 rename tests/{ => ops/gemm}/test_bmm.py (100%)
 rename tests/{ => ops/gemm}/test_matmul.py (100%)
 create mode 100644 tests/ops/misc/__init__.py
 rename tests/{ => ops/misc}/test_expert_mask.py (100%)
 rename tests/{ => ops/misc}/test_indirect_access.py (100%)
 create mode 100644 tests/ops/reduce/__init__.py
 rename tests/{ => ops/reduce}/test_batchnorm.py (100%)
 rename tests/{ => ops/reduce}/test_layernorm.py (100%)
 rename tests/{ => ops/reduce}/test_reduce.py (100%)
 rename tests/{ => ops/reduce}/test_softmax.py (100%)
 create mode 100644 tests/ops/sort/__init__.py
 rename tests/{ => ops/sort}/test_sort.py (100%)
 rename tests/{ => ops/sort}/test_topk.py (100%)
 create mode 100644 tests/ops/sparsity/__init__.py
 rename tests/{ => ops/sparsity}/test_sparse_core.py (100%)
 rename tests/{ => ops/sparsity}/test_sparsity.py (94%)
 create mode 100644 tests/ops/view/__init__.py
 rename tests/{ => ops/view}/test_cat.py (100%)
 rename tests/{ => ops/view}/test_transpose2D.py (100%)
 rename tests/{ => ops/view}/test_transpose3D.py (100%)
 rename tests/{ => ops/view}/test_view3D_2D.py (100%)
 create mode 100644 tests/system/__init__.py
 rename tests/{ => system}/test_eager.py (100%)
 rename tests/{ => system}/test_hetro.py (94%)
 rename tests/{ => system}/test_scheduler.py (91%)
 rename tests/{ => system}/test_stonne.py (100%)
 rename tests/{ => system}/test_vectorops.py (64%)
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index 7022ebba..36a5f4f5 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -19,7 +19,7 @@ If the issue occurs while running a Python workload or involves a simulator cras
 
 For example:
 ```
-python3 tests/test_add.py
+python3 tests/ops/elementwise/test_add.py
 ...
 [SpikeSimulator] cmd> spike --isa rv64gcv --varch=vlen:256,elen:64 --vectorlane-size=128 \
   -m0x80000000:0x1900000000,0x2000000000:0x1000000 \
diff --git a/.github/workflows/pytorchsim_test.yml b/.github/workflows/pytorchsim_test.yml
index 4b4fab80..6db998ae 100644
--- a/.github/workflows/pytorchsim_test.yml
+++ b/.github/workflows/pytorchsim_test.yml
@@ -33,7 +33,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_add.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/elementwise/test_add.py
 
   test_transcendental:
     name: Run test_transcendental.py
@@ -52,7 +52,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_transcendental.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/elementwise/test_transcendental.py
 
   test_activation:
     name: Run test_activation.py
@@ -71,7 +71,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_activation.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/elementwise/test_activation.py
 
   test_batchnorm:
     name: Run test_batchnorm.py
@@ -90,7 +90,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_batchnorm.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/reduce/test_batchnorm.py
 
   test_bmm:
     name: Run test_bmm.py
@@ -109,7 +109,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_bmm.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/gemm/test_bmm.py
 
   test_cnn:
     name: Run test_cnn.py
@@ -128,7 +128,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_cnn.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/conv/test_cnn.py
 
   test_conv2d:
     name: Run test_conv2d.py
@@ -147,7 +147,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_conv2d.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/conv/test_conv2d.py
 
   test_cat:
     name: Run test_cat.py
@@ -166,7 +166,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_cat.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/view/test_cat.py
 
   test_matmul:
     name: Run test_matmul.py
@@ -185,7 +185,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_matmul.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/gemm/test_matmul.py
 
   test_reduce:
     name: Run test_reduce.py
@@ -204,7 +204,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_reduce.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/reduce/test_reduce.py
 
   test_softmax:
     name: Run test_softmax.py
@@ -223,7 +223,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_softmax.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/reduce/test_softmax.py
 
   test_transpose2D:
     name: Run test_transpose2D.py
@@ -242,7 +242,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_transpose2D.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/view/test_transpose2D.py
 
   test_view3D_2D:
     name: Run test_view3D_2D.py
@@ -261,7 +261,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_view3D_2D.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/view/test_view3D_2D.py
 
   test_layernorm:
     name: Run test_layernorm.py
@@ -280,7 +280,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_layernorm.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/reduce/test_layernorm.py
 
   test_mlp:
     name: Run test_mlp.py
@@ -299,7 +299,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_mlp.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_mlp.py
 
   test_resnet:
     name: Run test_resnet.py
@@ -318,7 +318,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_resnet.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_resnet.py
 
       - name: Run test_resnet50.py
         run: |
@@ -326,7 +326,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_resnet.py --model_type resnet50
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_resnet.py --model_type resnet50
 
   test_mobilenet:
     name: Run test_mobilenet.py
@@ -345,7 +345,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/MobileNet/test_mobilenet.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/models/MobileNet/test_mobilenet.py
 
   test_transformer:
     name: Run test_transformer.py
@@ -364,7 +364,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_transformer.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_transformer.py
 
   test_transpose3D:
     name: Run test_transpose3D.py
@@ -383,7 +383,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_transpose3D.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/view/test_transpose3D.py
 
   test_sparsity:
     name: Run test_sparsity.py
@@ -402,7 +402,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_sparsity.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/sparsity/test_sparsity.py
 
   test_pool:
     name: Run test_pool.py
@@ -421,7 +421,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_pool.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/conv/test_pool.py
 
   test_perceptron:
     name: Run test_perceptron.py
@@ -440,7 +440,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_single_perceptron.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_single_perceptron.py
 
   test_fusion:
     name: Run test_fusion
@@ -459,7 +459,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_addmm_residual.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_addmm_residual.py
 
       - name: Run test_matmul_activation.py
         run: |
@@ -467,7 +467,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_matmul_activation.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_matmul_activation.py
 
       - name: Run test_matmul_scalar.py
         run: |
@@ -475,7 +475,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_matmul_scalar.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_matmul_scalar.py
 
       - name: Run test_matmul_reduction.py
         run: |
@@ -483,7 +483,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_matmul_reduction.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_matmul_reduction.py
 
       - name: Run test_bmm_reduction.py
         run: |
@@ -491,7 +491,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_bmm_reduction.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_bmm_reduction.py
 
       - name: Run test_prologue_fusion.py
         run: |
@@ -499,7 +499,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_prologue_fusion.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_prologue_fusion.py
 
       - name: Run test_transformer_fusion.py
         run: |
@@ -507,7 +507,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_transformer_fusion.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_transformer_fusion.py
 
       - name: Run test_conv_fusion.py
         run: |
@@ -515,7 +515,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_conv_fusion.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_conv_fusion.py
 
   test_moe:
     name: Run test_moe
@@ -534,7 +534,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/MoE/test_moe.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/models/MoE/test_moe.py
 
   test_mistral:
     name: Run test_mistral
@@ -553,7 +553,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/Mixtral_8x7B/test_attention.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/models/Mixtral8x7B/test_attention.py
 
   test_vit:
     name: Run test_vit
@@ -572,7 +572,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_vit.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_vit.py
 
   test_diffusion:
     name: Run test_diffusion
@@ -591,7 +591,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/Diffusion/test_diffusion.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/models/Diffusion/test_diffusion.py
 
   test_indirect:
     name: Run test_indirect
@@ -610,7 +610,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_indirect_access.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/misc/test_indirect_access.py
 
   test_scheduler:
     name: Run test_scheduler
@@ -629,7 +629,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/test_scheduler.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/system/test_scheduler.py
 
   test_llama:
     name: Run test_llama1&2
@@ -648,7 +648,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/Llama/test_llama.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/models/Llama/test_llama.py
 
   test_yolov5:
     name: Run test_yolov5
@@ -667,7 +667,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/Yolov5/test_yolov5.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/models/Yolov5/test_yolov5.py
 
   test_deepseek:
     name: Run test_deepseek
@@ -686,7 +686,7 @@ jobs:
           docker run --rm \
             -e vpu_num_lanes="${{ inputs.vector_lane }}" \
             -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
-            ${{ inputs.image_name }} python3 PyTorchSim/tests/DeepSeek/test_deepseek_v3_base.py
+            ${{ inputs.image_name }} python3 PyTorchSim/tests/models/DeepSeek/test_deepseek_v3_base.py
 
   test_accuracy:
     name: Run test_accuracy and test_speedup
diff --git a/CLAUDE.md b/CLAUDE.md
index 8eb99c93..4a4e7424 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -23,7 +23,7 @@ The pipeline runs in that order on every `torch.compile` invocation; you'll see
 | `TOGSim/` | C++ TOGSim source. `src/Simulator.cc`, `Core.cc`, `Dram.cc`, `Interconnect.cc`, `L2Cache.cc`, `Tile.cc`, `TileGraph.cc` are the core models. Externals: ramulator2, booksim, stonneCore, onnx, protobuf, spdlog, yaml-cpp |
 | `AsmParser/` | `tog_generator.py`, `onnx_utility.py` — TOG generation from ONNX/ASM |
 | `configs/` | TOGSim hardware configs (YAML). The default is `systolic_ws_128x128_c1_simple_noc_tpuv3.yml`. Naming pattern: `systolic_ws_<size>_c<cores>_<noc>_<target>.yml` |
-| `tests/` | ~36 op- and model-level tests. Subdirs `DeepSeek/`, `Diffusion/`, `Llama/`, `MLP/`, `Mixtral_8x7B/`, `MoE/`, `Yolov5/`, `Fusion/` for whole-model workloads |
+| `tests/` | Op- and model-level tests organized under `ops/<family>/` (elementwise, reduce, gemm, conv, attention, view, sort, sparsity, misc, fusion), `models/<name>/` (Llama, Mixtral8x7B, DeepSeek, Diffusion, MoE, MLP, MobileNet, Yolov5) plus single-file model tests (test_resnet, test_transformer, test_vit, test_mlp, test_single_perceptron), and `system/` (scheduler, eager, hetro, stonne, vectorops). Shared helper: `tests/_utils.py` |
 | `experiments/artifact/` | Paper reproduction scripts (`cycle_validation/run_cycle.sh`, `speedup/run_speedup.sh`) |
 | `scripts/` | One-off experiment runners (CompilerOpt, ILS, batch, chiplet, sparsity, stonne, end2end). `build_from_source.sh` builds gem5/llvm/spike |
 | `gem5_script/` | gem5 wrapper scripts called by `CycleSimulator` |
@@ -36,16 +36,16 @@ The pipeline runs in that order on every `torch.compile` invocation; you'll see
 Most tests follow the same pattern: build CPU reference, compile via `torch.compile` on `npu:0`, compare with `torch.allclose` (rtol=atol=1e-4). They all have `if __name__ == "__main__"` blocks.
 
 ```bash
-python tests/test_add.py        # vector add (smoke test, fastest)
-python tests/test_matmul.py     # GEMM
-python tests/test_mlp.py        # MLP forward + backward (training path)
-python tests/test_scheduler.py  # multi-tenant launch_model
-python tests/test_eager.py      # eager-fallback registration
+python tests/ops/elementwise/test_add.py        # vector add (smoke test, fastest)
+python tests/ops/gemm/test_matmul.py     # GEMM
+python tests/models/test_mlp.py        # MLP forward + backward (training path)
+python tests/system/test_scheduler.py  # multi-tenant launch_model
+python tests/system/test_eager.py      # eager-fallback registration
 ```
 
-Run a model from `tests/Llama/`, `tests/DeepSeek/`, etc. similarly.
+Run a model from `tests/models/Llama/`, `tests/models/DeepSeek/`, etc. similarly.
 
-**CI coverage:** the GitHub Actions workflow `.github/workflows/pytorchsim_test.yml` runs an **explicit allowlist** of `tests/*.py` files (~40 jobs, one Docker container per test). Adding a new file under `tests/` does *not* automatically gate PRs — register it in `pytorchsim_test.yml` if you want CI to exercise it. Conversely, files like `tests/test_gqa.py`, `tests/test_gqa_decode.py`, and `tests/test_eager.py` exist in the repo but are *not* in CI, so local validation is the only safety net for them.
+**CI coverage:** the GitHub Actions workflow `.github/workflows/pytorchsim_test.yml` runs an **explicit allowlist** of `tests/*.py` files (~40 jobs, one Docker container per test). Adding a new file under `tests/` does *not* automatically gate PRs — register it in `pytorchsim_test.yml` if you want CI to exercise it. Conversely, files like `tests/ops/attention/test_gqa.py`, `tests/ops/attention/test_gqa_decode.py`, and `tests/system/test_eager.py` exist in the repo but are *not* in CI, so local validation is the only safety net for them.
 
 **For fast iteration** (skip functional check):
 ```bash
@@ -123,7 +123,7 @@ Conan deps for TOGSim: `boost/1.79.0`, `robin-hood-hashing/3.11.5`, `spdlog/1.11
 - **Adding a PyTorch device op:** `PyTorchSimDevice/csrc/aten/native/*` (Minimal/Extra split mirrors `torch_openreg`).
 - **TOGSim hardware model changes:** `TOGSim/src/{Core,Dram,Interconnect,L2Cache,Tile,TileGraph}.cc` + matching `include/*.h`.
 - **TOG generation:** `AsmParser/tog_generator.py` builds the raw graph and serializes it via `AsmParser/onnx_utility.py` to **ONNX, which is the on-disk TOG format** consumed by TOGSim.
-- **Eager fallback registration:** `torch.npu.register_eager_to_compile([...])` — see `tests/test_eager.py`.
+- **Eager fallback registration:** `torch.npu.register_eager_to_compile([...])` — see `tests/system/test_eager.py`.
 - **Per-run results:** `togsim_results/<YYYYMMDD_HHMMSS_<hash>>.log` (stats) and `.trace` (instruction trace). The path is also printed at the end of every run.
 - **Wrapper codegen path:** printed as `Wrapper Codegen Path = /tmp/torchinductor_<user>/<hash>/...py` — useful for inspecting generated kernel code and tensor names for `SRAM_BUFFER_PLAN_PATH`.
 
diff --git a/README.md b/README.md
index 800f4761..f0bdc772 100644
--- a/README.md
+++ b/README.md
@@ -40,15 +40,15 @@ PyTorchSim **supports**:
 |---|:-:|:-:|---|
 | ResNet-18 | <img src="https://avatars.githubusercontent.com/u/21003710?s=48&v=4" width="20"/> | ✅ | channel last format |
 | ResNet-50 | <img src="https://avatars.githubusercontent.com/u/21003710?s=48&v=4" width="20"/> | ✅ | channel last format |
-| MobileNet-v2 | <img src="https://avatars.githubusercontent.com/u/21003710?s=48&v=4" width="20"/> | ✅ | `tests/MobileNet/` (torchvision) |
-| YOLOv5 | <img src="https://avatars.githubusercontent.com/u/21003710?s=48&v=4" width="20"/> | ✅ | `tests/Yolov5/` |
+| MobileNet-v2 | <img src="https://avatars.githubusercontent.com/u/21003710?s=48&v=4" width="20"/> | ✅ | `tests/models/MobileNet/` (torchvision) |
+| YOLOv5 | <img src="https://avatars.githubusercontent.com/u/21003710?s=48&v=4" width="20"/> | ✅ | `tests/models/Yolov5/` |
 | BERT | <img src="https://avatars.githubusercontent.com/u/21003710?s=48&v=4" width="20"/> | ✅ |  |
 | GPT-2 | <img src="https://avatars.githubusercontent.com/u/21003710?s=48&v=4" width="20"/> | ✅ |  |
-| ViT | <img src="https://avatars.githubusercontent.com/u/21003710?s=48&v=4" width="20"/> | ✅ | `tests/test_vit.py` |
+| ViT | <img src="https://avatars.githubusercontent.com/u/21003710?s=48&v=4" width="20"/> | ✅ | `tests/models/test_vit.py` |
 | Mistral | <img src="https://avatars.githubusercontent.com/u/21003710?s=48&v=4" width="20"/> | ✅ | |
 | Stable-diffusion v1 | 🤗 | ✅ |  |
-| Llama 2/3 | 🤗 | ✅ | `tests/Llama/` (blocks & decode-style paths) |
-| DeepSeek-V3 (base) | 🤗 | ✅ | `tests/DeepSeek/` — several ops(e.g., gate ops) are not cycle-modeled |
+| Llama 2/3 | 🤗 | ✅ | `tests/models/Llama/` (blocks & decode-style paths) |
+| DeepSeek-V3 (base) | 🤗 | ✅ | `tests/models/DeepSeek/` — several ops(e.g., gate ops) are not cycle-modeled |
 | Llama-4 | 🤗 | ⏳ | In development |
 | Broader model support | — | ⏳ | In development |
 <!-- ## Requirements
@@ -104,7 +104,7 @@ The script clones each dep at the tag pinned in [`thirdparty/github-releases.jso
 ### Run Examples
 The `tests` directory contains several AI workload examples.
 ```bash
-python tests/test_matmul.py 
+python tests/ops/gemm/test_matmul.py 
 ```
 The result is written to `${TORCHSIM_LOG_PATH}/togsim_result/XXX.log`. The log file contains detailed core, memory, and interconnect stats.
 
@@ -201,7 +201,7 @@ optimizer.zero_grad()
 loss.backward()
 compiled_step()
 ```
-`tests/test_mlp.py` provides an example of MLP training.
+`tests/models/test_mlp.py` provides an example of MLP training.
 
 ## One TOGSim session, one continuous log
 
@@ -243,7 +243,7 @@ with TOGSimulator(config_path=config):
 Here `synchronize()` acts as a barrier: it does not return until every `launch_model` issued **above** it has finished in the simulator. The later pair of `launch_model` calls therefore runs only after those earlier models have fully completed—so the sync is the point in the timeline where **all preceding launches are done**.
 
 ```bash
-python tests/test_scheduler.py
+python tests/system/test_scheduler.py
 ```
 
 Use a TOGSim config(`.yml`) that defines **partitions** when mapping queues to cores, for example:
diff --git a/scripts/sparsity_experiment/run.sh b/scripts/sparsity_experiment/run.sh
index 7996b5ab..d349ac6b 100755
--- a/scripts/sparsity_experiment/run.sh
+++ b/scripts/sparsity_experiment/run.sh
@@ -6,48 +6,48 @@ export TORCHSIM_FORCE_TIME_N=8
 
 OUTPUT_DIR="12GB"
 export TOGSIM_CONFIG="/workspace/PyTorchSim/configs/systolic_ws_8x8_c1_12G_simple_noc.yml"
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.0  > ${OUTPUT_DIR}/0.0
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.2  > ${OUTPUT_DIR}/0.2
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.4  > ${OUTPUT_DIR}/0.4
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.6  > ${OUTPUT_DIR}/0.6
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.8  > ${OUTPUT_DIR}/0.8
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.0  > ${OUTPUT_DIR}/0.0
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.2  > ${OUTPUT_DIR}/0.2
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.4  > ${OUTPUT_DIR}/0.4
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.6  > ${OUTPUT_DIR}/0.6
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.8  > ${OUTPUT_DIR}/0.8
 
 OUTPUT_DIR="24GB"
 export TOGSIM_CONFIG="/workspace/PyTorchSim/configs/systolic_ws_8x8_c1_24G_simple_noc.yml"
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.0  > ${OUTPUT_DIR}/0.0
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.2  > ${OUTPUT_DIR}/0.2
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.4  > ${OUTPUT_DIR}/0.4
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.6  > ${OUTPUT_DIR}/0.6
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.8  > ${OUTPUT_DIR}/0.8
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.0  > ${OUTPUT_DIR}/0.0
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.2  > ${OUTPUT_DIR}/0.2
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.4  > ${OUTPUT_DIR}/0.4
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.6  > ${OUTPUT_DIR}/0.6
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.8  > ${OUTPUT_DIR}/0.8
 
 OUTPUT_DIR="48GB"
 export TOGSIM_CONFIG="/workspace/PyTorchSim/configs/systolic_ws_8x8_c1_48G_simple_noc.yml"
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.0  > ${OUTPUT_DIR}/0.0
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.2  > ${OUTPUT_DIR}/0.2
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.4  > ${OUTPUT_DIR}/0.4
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.6  > ${OUTPUT_DIR}/0.6
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.8  > ${OUTPUT_DIR}/0.8
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.0  > ${OUTPUT_DIR}/0.0
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.2  > ${OUTPUT_DIR}/0.2
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.4  > ${OUTPUT_DIR}/0.4
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.6  > ${OUTPUT_DIR}/0.6
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.8  > ${OUTPUT_DIR}/0.8
 
 OUTPUT_DIR="12GB_2core"
 export TOGSIM_CONFIG="/workspace/PyTorchSim/configs/systolic_ws_8x8_c2_12G_simple_noc.yml"
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.0  > ${OUTPUT_DIR}/0.0
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.2  > ${OUTPUT_DIR}/0.2
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.4  > ${OUTPUT_DIR}/0.4
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.6  > ${OUTPUT_DIR}/0.6
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.8  > ${OUTPUT_DIR}/0.8
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.0  > ${OUTPUT_DIR}/0.0
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.2  > ${OUTPUT_DIR}/0.2
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.4  > ${OUTPUT_DIR}/0.4
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.6  > ${OUTPUT_DIR}/0.6
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.8  > ${OUTPUT_DIR}/0.8
 
 OUTPUT_DIR="24GB_2core"
 export TOGSIM_CONFIG="/workspace/PyTorchSim/configs/systolic_ws_8x8_c2_24G_simple_noc.yml"
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.0  > ${OUTPUT_DIR}/0.0
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.2  > ${OUTPUT_DIR}/0.2
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.4  > ${OUTPUT_DIR}/0.4
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.6  > ${OUTPUT_DIR}/0.6
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.8  > ${OUTPUT_DIR}/0.8
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.0  > ${OUTPUT_DIR}/0.0
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.2  > ${OUTPUT_DIR}/0.2
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.4  > ${OUTPUT_DIR}/0.4
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.6  > ${OUTPUT_DIR}/0.6
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.8  > ${OUTPUT_DIR}/0.8
 
 OUTPUT_DIR="48GB_2core"
 export TOGSIM_CONFIG="/workspace/PyTorchSim/configs/systolic_ws_8x8_c2_48G_simple_noc.yml"
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.0  > ${OUTPUT_DIR}/0.0
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.2  > ${OUTPUT_DIR}/0.2
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.4  > ${OUTPUT_DIR}/0.4
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.6  > ${OUTPUT_DIR}/0.6
-python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity  0.8  > ${OUTPUT_DIR}/0.8
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.0  > ${OUTPUT_DIR}/0.0
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.2  > ${OUTPUT_DIR}/0.2
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.4  > ${OUTPUT_DIR}/0.4
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.6  > ${OUTPUT_DIR}/0.6
+python3 /workspace/PyTorchSim/tests/ops/sparsity/test_sparsity.py --sparsity  0.8  > ${OUTPUT_DIR}/0.8
diff --git a/scripts/stonne_experiment/run.sh b/scripts/stonne_experiment/run.sh
index 2e386d9c..e6479aa6 100755
--- a/scripts/stonne_experiment/run.sh
+++ b/scripts/stonne_experiment/run.sh
@@ -2,8 +2,8 @@
 export TORCHSIM_FORCE_TIME_M=1024
 export TORCHSIM_FORCE_TIME_K=1024
 export TORCHSIM_FORCE_TIME_N=1024
-python3 ../../tests/test_hetro.py --M 1024 --N 1024 --K 1024 --sparsity 0.9 --config stonne_big_c1_simple_noc.yml --mode 0 > hetero/big_sparse.log
-python3 ../../tests/test_hetro.py --M 1024 --N 1024 --K 1024 --sparsity 0.9 --config systolic_ws_128x128_c1_simple_noc_tpuv3_half.yml --mode 1 > hetero/big.log
-python3 ../../tests/test_hetro.py --M 1024 --N 1024 --K 1024 --sparsity 0.9 --config heterogeneous_c2_simple_noc.yml --mode 2 > hetero/hetero.log
+python3 ../../tests/system/test_hetro.py --M 1024 --N 1024 --K 1024 --sparsity 0.9 --config stonne_big_c1_simple_noc.yml --mode 0 > hetero/big_sparse.log
+python3 ../../tests/system/test_hetro.py --M 1024 --N 1024 --K 1024 --sparsity 0.9 --config systolic_ws_128x128_c1_simple_noc_tpuv3_half.yml --mode 1 > hetero/big.log
+python3 ../../tests/system/test_hetro.py --M 1024 --N 1024 --K 1024 --sparsity 0.9 --config heterogeneous_c2_simple_noc.yml --mode 2 > hetero/hetero.log
 
 echo "All processes completed!"
diff --git a/scripts/stonne_experiment/run_trace.sh b/scripts/stonne_experiment/run_trace.sh
index 5a4ff890..f959b07d 100755
--- a/scripts/stonne_experiment/run_trace.sh
+++ b/scripts/stonne_experiment/run_trace.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-SCRIPT="/workspace/PyTorchSim/tests/test_stonne.py"
+SCRIPT="/workspace/PyTorchSim/tests/system/test_stonne.py"
 
 SIZES=(32 64 128)
 SPARSITIES=(0.0 0.2 0.4 0.6 0.8)
diff --git a/tests/DeepSeek/test_deepseek_v3_base.py b/tests/models/DeepSeek/test_deepseek_v3_base.py
similarity index 100%
rename from tests/DeepSeek/test_deepseek_v3_base.py
rename to tests/models/DeepSeek/test_deepseek_v3_base.py
diff --git a/tests/Diffusion/test_diffusion.py b/tests/models/Diffusion/test_diffusion.py
similarity index 100%
rename from tests/Diffusion/test_diffusion.py
rename to tests/models/Diffusion/test_diffusion.py
diff --git a/tests/Llama/test_llama.py b/tests/models/Llama/test_llama.py
similarity index 100%
rename from tests/Llama/test_llama.py
rename to tests/models/Llama/test_llama.py
diff --git a/tests/MLP/test_mlp.py b/tests/models/MLP/test_mlp.py
similarity index 100%
rename from tests/MLP/test_mlp.py
rename to tests/models/MLP/test_mlp.py
diff --git a/tests/MLP/test_mlp_cpu.py b/tests/models/MLP/test_mlp_cpu.py
similarity index 100%
rename from tests/MLP/test_mlp_cpu.py
rename to tests/models/MLP/test_mlp_cpu.py
diff --git a/tests/Mixtral_8x7B/model.py b/tests/models/Mixtral8x7B/model.py
similarity index 100%
rename from tests/Mixtral_8x7B/model.py
rename to tests/models/Mixtral8x7B/model.py
diff --git a/tests/Mixtral_8x7B/test_attention.py b/tests/models/Mixtral8x7B/test_attention.py
similarity index 100%
rename from tests/Mixtral_8x7B/test_attention.py
rename to tests/models/Mixtral8x7B/test_attention.py
diff --git a/tests/MoE/test_moe.py b/tests/models/MoE/test_moe.py
similarity index 100%
rename from tests/MoE/test_moe.py
rename to tests/models/MoE/test_moe.py
diff --git a/tests/MoE/test_moe_cpu.py b/tests/models/MoE/test_moe_cpu.py
similarity index 100%
rename from tests/MoE/test_moe_cpu.py
rename to tests/models/MoE/test_moe_cpu.py
diff --git a/tests/MobileNet/test_mobilenet.py b/tests/models/MobileNet/test_mobilenet.py
similarity index 100%
rename from tests/MobileNet/test_mobilenet.py
rename to tests/models/MobileNet/test_mobilenet.py
diff --git a/tests/Yolov5/test_yolov5.py b/tests/models/Yolov5/test_yolov5.py
similarity index 100%
rename from tests/Yolov5/test_yolov5.py
rename to tests/models/Yolov5/test_yolov5.py
diff --git a/tests/Fusion/__init__.py b/tests/models/__init__.py
similarity index 100%
rename from tests/Fusion/__init__.py
rename to tests/models/__init__.py
diff --git a/tests/test_mlp.py b/tests/models/test_mlp.py
similarity index 100%
rename from tests/test_mlp.py
rename to tests/models/test_mlp.py
diff --git a/tests/test_resnet.py b/tests/models/test_resnet.py
similarity index 100%
rename from tests/test_resnet.py
rename to tests/models/test_resnet.py
diff --git a/tests/test_single_perceptron.py b/tests/models/test_single_perceptron.py
similarity index 100%
rename from tests/test_single_perceptron.py
rename to tests/models/test_single_perceptron.py
diff --git a/tests/test_transformer.py b/tests/models/test_transformer.py
similarity index 100%
rename from tests/test_transformer.py
rename to tests/models/test_transformer.py
diff --git a/tests/test_vit.py b/tests/models/test_vit.py
similarity index 100%
rename from tests/test_vit.py
rename to tests/models/test_vit.py
diff --git a/tests/ops/__init__.py b/tests/ops/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/ops/attention/__init__.py b/tests/ops/attention/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_gqa.py b/tests/ops/attention/test_gqa.py
similarity index 100%
rename from tests/test_gqa.py
rename to tests/ops/attention/test_gqa.py
diff --git a/tests/test_gqa_decode.py b/tests/ops/attention/test_gqa_decode.py
similarity index 100%
rename from tests/test_gqa_decode.py
rename to tests/ops/attention/test_gqa_decode.py
diff --git a/tests/test_sdpa.py b/tests/ops/attention/test_sdpa.py
similarity index 100%
rename from tests/test_sdpa.py
rename to tests/ops/attention/test_sdpa.py
diff --git a/tests/ops/conv/__init__.py b/tests/ops/conv/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_cnn.py b/tests/ops/conv/test_cnn.py
similarity index 100%
rename from tests/test_cnn.py
rename to tests/ops/conv/test_cnn.py
diff --git a/tests/test_conv2d.py b/tests/ops/conv/test_conv2d.py
similarity index 100%
rename from tests/test_conv2d.py
rename to tests/ops/conv/test_conv2d.py
diff --git a/tests/test_group_conv.py b/tests/ops/conv/test_group_conv.py
similarity index 100%
rename from tests/test_group_conv.py
rename to tests/ops/conv/test_group_conv.py
diff --git a/tests/test_pool.py b/tests/ops/conv/test_pool.py
similarity index 100%
rename from tests/test_pool.py
rename to tests/ops/conv/test_pool.py
diff --git a/tests/ops/elementwise/__init__.py b/tests/ops/elementwise/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_activation.py b/tests/ops/elementwise/test_activation.py
similarity index 100%
rename from tests/test_activation.py
rename to tests/ops/elementwise/test_activation.py
diff --git a/tests/test_add.py b/tests/ops/elementwise/test_add.py
similarity index 100%
rename from tests/test_add.py
rename to tests/ops/elementwise/test_add.py
diff --git a/tests/test_exponent.py b/tests/ops/elementwise/test_exponent.py
similarity index 100%
rename from tests/test_exponent.py
rename to tests/ops/elementwise/test_exponent.py
diff --git a/tests/test_transcendental.py b/tests/ops/elementwise/test_transcendental.py
similarity index 100%
rename from tests/test_transcendental.py
rename to tests/ops/elementwise/test_transcendental.py
diff --git a/tests/ops/fusion/__init__.py b/tests/ops/fusion/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/Fusion/test_addmm_residual.py b/tests/ops/fusion/test_addmm_residual.py
similarity index 100%
rename from tests/Fusion/test_addmm_residual.py
rename to tests/ops/fusion/test_addmm_residual.py
diff --git a/tests/Fusion/test_attention_fusion.py b/tests/ops/fusion/test_attention_fusion.py
similarity index 100%
rename from tests/Fusion/test_attention_fusion.py
rename to tests/ops/fusion/test_attention_fusion.py
diff --git a/tests/Fusion/test_bmm_reduction.py b/tests/ops/fusion/test_bmm_reduction.py
similarity index 100%
rename from tests/Fusion/test_bmm_reduction.py
rename to tests/ops/fusion/test_bmm_reduction.py
diff --git a/tests/Fusion/test_conv_fusion.py b/tests/ops/fusion/test_conv_fusion.py
similarity index 100%
rename from tests/Fusion/test_conv_fusion.py
rename to tests/ops/fusion/test_conv_fusion.py
diff --git a/tests/Fusion/test_matmul_activation.py b/tests/ops/fusion/test_matmul_activation.py
similarity index 100%
rename from tests/Fusion/test_matmul_activation.py
rename to tests/ops/fusion/test_matmul_activation.py
diff --git a/tests/Fusion/test_matmul_reduction.py b/tests/ops/fusion/test_matmul_reduction.py
similarity index 100%
rename from tests/Fusion/test_matmul_reduction.py
rename to tests/ops/fusion/test_matmul_reduction.py
diff --git a/tests/Fusion/test_matmul_scalar.py b/tests/ops/fusion/test_matmul_scalar.py
similarity index 100%
rename from tests/Fusion/test_matmul_scalar.py
rename to tests/ops/fusion/test_matmul_scalar.py
diff --git a/tests/Fusion/test_matmul_vector.py b/tests/ops/fusion/test_matmul_vector.py
similarity index 100%
rename from tests/Fusion/test_matmul_vector.py
rename to tests/ops/fusion/test_matmul_vector.py
diff --git a/tests/Fusion/test_prologue_fusion.py b/tests/ops/fusion/test_prologue_fusion.py
similarity index 100%
rename from tests/Fusion/test_prologue_fusion.py
rename to tests/ops/fusion/test_prologue_fusion.py
diff --git a/tests/Fusion/test_transformer_fusion.py b/tests/ops/fusion/test_transformer_fusion.py
similarity index 100%
rename from tests/Fusion/test_transformer_fusion.py
rename to tests/ops/fusion/test_transformer_fusion.py
diff --git a/tests/ops/gemm/__init__.py b/tests/ops/gemm/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_bmm.py b/tests/ops/gemm/test_bmm.py
similarity index 100%
rename from tests/test_bmm.py
rename to tests/ops/gemm/test_bmm.py
diff --git a/tests/test_matmul.py b/tests/ops/gemm/test_matmul.py
similarity index 100%
rename from tests/test_matmul.py
rename to tests/ops/gemm/test_matmul.py
diff --git a/tests/ops/misc/__init__.py b/tests/ops/misc/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_expert_mask.py b/tests/ops/misc/test_expert_mask.py
similarity index 100%
rename from tests/test_expert_mask.py
rename to tests/ops/misc/test_expert_mask.py
diff --git a/tests/test_indirect_access.py b/tests/ops/misc/test_indirect_access.py
similarity index 100%
rename from tests/test_indirect_access.py
rename to tests/ops/misc/test_indirect_access.py
diff --git a/tests/ops/reduce/__init__.py b/tests/ops/reduce/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_batchnorm.py b/tests/ops/reduce/test_batchnorm.py
similarity index 100%
rename from tests/test_batchnorm.py
rename to tests/ops/reduce/test_batchnorm.py
diff --git a/tests/test_layernorm.py b/tests/ops/reduce/test_layernorm.py
similarity index 100%
rename from tests/test_layernorm.py
rename to tests/ops/reduce/test_layernorm.py
diff --git a/tests/test_reduce.py b/tests/ops/reduce/test_reduce.py
similarity index 100%
rename from tests/test_reduce.py
rename to tests/ops/reduce/test_reduce.py
diff --git a/tests/test_softmax.py b/tests/ops/reduce/test_softmax.py
similarity index 100%
rename from tests/test_softmax.py
rename to tests/ops/reduce/test_softmax.py
diff --git a/tests/ops/sort/__init__.py b/tests/ops/sort/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_sort.py b/tests/ops/sort/test_sort.py
similarity index 100%
rename from tests/test_sort.py
rename to tests/ops/sort/test_sort.py
diff --git a/tests/test_topk.py b/tests/ops/sort/test_topk.py
similarity index 100%
rename from tests/test_topk.py
rename to tests/ops/sort/test_topk.py
diff --git a/tests/ops/sparsity/__init__.py b/tests/ops/sparsity/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_sparse_core.py b/tests/ops/sparsity/test_sparse_core.py
similarity index 100%
rename from tests/test_sparse_core.py
rename to tests/ops/sparsity/test_sparse_core.py
diff --git a/tests/test_sparsity.py b/tests/ops/sparsity/test_sparsity.py
similarity index 94%
rename from tests/test_sparsity.py
rename to tests/ops/sparsity/test_sparsity.py
index eaa7c63c..50253243 100644
--- a/tests/test_sparsity.py
+++ b/tests/ops/sparsity/test_sparsity.py
@@ -7,9 +7,10 @@
 import torch
 import torch._dynamo
 import torch.utils.cpp_extension
-sys.path.append(os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim'))
-from test_transformer import EncoderBlock, test_result
-from test_mlp import MLP
+sys.path.insert(0, os.path.join(os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim'), 'tests'))
+from _pytorchsim_utils import test_result
+from models.test_transformer import EncoderBlock
+from models.test_mlp import MLP
 
 def apply_random_zero(tensor, zero_prob, block_size=8):
     if not 0 <= zero_prob <= 1:
diff --git a/tests/ops/view/__init__.py b/tests/ops/view/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_cat.py b/tests/ops/view/test_cat.py
similarity index 100%
rename from tests/test_cat.py
rename to tests/ops/view/test_cat.py
diff --git a/tests/test_transpose2D.py b/tests/ops/view/test_transpose2D.py
similarity index 100%
rename from tests/test_transpose2D.py
rename to tests/ops/view/test_transpose2D.py
diff --git a/tests/test_transpose3D.py b/tests/ops/view/test_transpose3D.py
similarity index 100%
rename from tests/test_transpose3D.py
rename to tests/ops/view/test_transpose3D.py
diff --git a/tests/test_view3D_2D.py b/tests/ops/view/test_view3D_2D.py
similarity index 100%
rename from tests/test_view3D_2D.py
rename to tests/ops/view/test_view3D_2D.py
diff --git a/tests/system/__init__.py b/tests/system/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_eager.py b/tests/system/test_eager.py
similarity index 100%
rename from tests/test_eager.py
rename to tests/system/test_eager.py
diff --git a/tests/test_hetro.py b/tests/system/test_hetro.py
similarity index 94%
rename from tests/test_hetro.py
rename to tests/system/test_hetro.py
index eaf145d4..38cdd41c 100644
--- a/tests/test_hetro.py
+++ b/tests/system/test_hetro.py
@@ -3,10 +3,10 @@
 import torch
 import argparse
 
-sys.path.append(os.environ.get("TORCHSIM_DIR", default="/workspace/PyTorchSim"))
+sys.path.insert(0, os.path.join(os.environ.get("TORCHSIM_DIR", default="/workspace/PyTorchSim"), "tests"))
 
 from Simulator.simulator import TOGSimulator
-from test_stonne import sparse_matmul
+from system.test_stonne import sparse_matmul
 
 
 def custom_matmul(a, b):
diff --git a/tests/test_scheduler.py b/tests/system/test_scheduler.py
similarity index 91%
rename from tests/test_scheduler.py
rename to tests/system/test_scheduler.py
index beab8054..68abac20 100644
--- a/tests/test_scheduler.py
+++ b/tests/system/test_scheduler.py
@@ -1,10 +1,13 @@
 import os
+import sys
 import torch
 from torchvision.models import resnet18 as model1
-from test_transformer import EncoderBlock as model2
-from Simulator.simulator import TOGSimulator
 
 base_path = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim')
+sys.path.append(base_path)
+from models.test_transformer import EncoderBlock as model2
+from Simulator.simulator import TOGSimulator
+
 config = f'{base_path}/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.yml'
 
 target_model1 = model1().eval()
diff --git a/tests/test_stonne.py b/tests/system/test_stonne.py
similarity index 100%
rename from tests/test_stonne.py
rename to tests/system/test_stonne.py
diff --git a/tests/test_vectorops.py b/tests/system/test_vectorops.py
similarity index 64%
rename from tests/test_vectorops.py
rename to tests/system/test_vectorops.py
index 90e9c0f5..b83f3cf6 100644
--- a/tests/test_vectorops.py
+++ b/tests/system/test_vectorops.py
@@ -1,16 +1,21 @@
+import os
+import sys
+
 import torch
 
+sys.path.insert(0, os.path.join(os.environ.get("TORCHSIM_DIR", default="/workspace/PyTorchSim"), "tests"))
+
 if __name__ == "__main__":
     device = torch.device("npu:0")
-    
+
     # Target shape
     seq_list = [1,128,512,2048,8192]
     d_model = 768
-    from tests.test_add import test_vectoradd
-    from tests.test_activation import test_GeLU
-    from tests.test_reduce import test_reduce_sum2
-    from tests.test_layernorm import test_LayerNorm
-    from tests.test_softmax import test_softmax
+    from ops.elementwise.test_add import test_vectoradd
+    from ops.elementwise.test_activation import test_GeLU
+    from ops.reduce.test_reduce import test_reduce_sum2
+    from ops.reduce.test_layernorm import test_LayerNorm
+    from ops.reduce.test_softmax import test_softmax
     func_list = [test_vectoradd, test_GeLU, test_reduce_sum2, test_LayerNorm, test_softmax]
     for test_func in func_list:
         for seq in seq_list: