-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patharchitecture_analysis.c
More file actions
143 lines (118 loc) · 4.35 KB
/
Copy patharchitecture_analysis.c
File metadata and controls
143 lines (118 loc) · 4.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
/*
* Architecture Analysis - Memory Hierarchy Benchmark
* COMPS XII - Unit 8: Architecture Decisions That Shape Performance
*
* This program measures memory access times at different data sizes
* to reveal cache boundaries on your machine.
*
* Uses pointer-chasing to defeat CPU prefetching.
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define ITERATIONS 50000000
// Prevents compiler from optimizing away our reads
volatile int sink;
// Fisher-Yates shuffle to randomize access pattern
void shuffle(int* arr, int size) {
for (int i = size - 1; i > 0; i--) {
int j = rand() % (i + 1);
int temp = arr[i];
arr[i] = arr[j];
arr[j] = temp;
}
}
double measure_access_time(int* arr, int size) {
clock_t start, end;
// Create a pointer-chase: each element contains the index of the next
int* chase = (int*)malloc(size * sizeof(int));
for (int i = 0; i < size; i++) {
chase[i] = i;
}
shuffle(chase, size);
// Build the chain: arr[i] points to the next index to visit
int* chain = (int*)malloc(size * sizeof(int));
for (int i = 0; i < size - 1; i++) {
chain[chase[i]] = chase[i + 1];
}
chain[chase[size - 1]] = chase[0]; // Complete the loop
// Warm up
int idx = 0;
for (int i = 0; i < size; i++) {
idx = chain[idx];
}
sink = idx;
// Measure pointer-chasing access
long total_accesses = ITERATIONS;
idx = 0;
start = clock();
for (long i = 0; i < total_accesses; i++) {
idx = chain[idx];
}
end = clock();
sink = idx;
free(chase);
free(chain);
double seconds = (double)(end - start) / CLOCKS_PER_SEC;
return (seconds / total_accesses) * 1e9; // nanoseconds per access
}
int main() {
srand(42); // Fixed seed for reproducibility
printf("=== Memory Hierarchy Benchmark ===\n\n");
printf("This benchmark reveals your CPU's cache boundaries by measuring\n");
printf("how long it takes to access data at different array sizes.\n\n");
printf("Uses random pointer-chasing to defeat CPU prefetching.\n");
printf("This may take 2-3 minutes to complete.\n\n");
// Test sizes from 4KB to 128MB (powers of 2)
int sizes[] = {
1024, // 4 KB
2048, // 8 KB
4096, // 16 KB
8192, // 32 KB
16384, // 64 KB
32768, // 128 KB
65536, // 256 KB
131072, // 512 KB
262144, // 1 MB
524288, // 2 MB
1048576, // 4 MB
2097152, // 8 MB
4194304, // 16 MB
8388608, // 32 MB
16777216, // 64 MB
33554432 // 128 MB
};
int num_sizes = sizeof(sizes) / sizeof(sizes[0]);
printf("%-12s %-15s %-20s\n", "Array Size", "Elements", "Avg Access Time (ns)");
printf("%-12s %-15s %-20s\n", "----------", "--------", "--------------------");
double prev_time = 0;
for (int s = 0; s < num_sizes; s++) {
int num_elements = sizes[s];
int size_bytes = num_elements * sizeof(int);
// Measure access time
double ns_per_access = measure_access_time(NULL, num_elements);
// Format size string
char size_str[20];
if (size_bytes >= 1048576) {
sprintf(size_str, "%d MB", size_bytes / 1048576);
} else if (size_bytes >= 1024) {
sprintf(size_str, "%d KB", size_bytes / 1024);
} else {
sprintf(size_str, "%d B", size_bytes);
}
// Mark significant jumps (likely cache boundary)
char marker[10] = "";
if (prev_time > 0 && ns_per_access > prev_time * 1.5) {
sprintf(marker, " <-- JUMP");
}
printf("%-12s %-15d %.2f%s\n", size_str, num_elements, ns_per_access, marker);
prev_time = ns_per_access;
}
printf("\n=== Interpretation Guide ===\n");
printf("- Consistent low times (1-4 ns): Data fits in L1 cache\n");
printf("- First jump (~10-20 ns): Data exceeded L1, now hitting L2\n");
printf("- Second jump (~20-40 ns): Data exceeded L2, now hitting L3\n");
printf("- Third jump (~50-100+ ns): Data exceeded L3, now hitting main RAM\n");
printf("\nCompare these jump points to your CPU's cache sizes from Unit 7.\n");
return 0;
}