Skip to content

Commit 7ae7ff8

Browse files
committed
GPU (Linux): detect EU count & vmem size of Intel GPUs
1 parent 842ba9f commit 7ae7ff8

File tree

2 files changed

+1567
-12
lines changed

2 files changed

+1567
-12
lines changed

src/detection/gpu/gpu_linux.c

Lines changed: 190 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "common/library.h"
77
#include "common/properties.h"
88
#include "util/stringUtils.h"
9+
#include "util/mallocHelper.h"
910

1011
#include <inttypes.h>
1112

@@ -15,6 +16,13 @@
1516
#include <fcntl.h>
1617
#endif
1718

19+
#ifdef FF_HAVE_DRM
20+
#include <i915_drm.h>
21+
#include "xe_drm.h"
22+
#include <fcntl.h>
23+
#include <sys/ioctl.h>
24+
#endif
25+
1826
#include "gpu_asahi.h"
1927

2028
#define FF_STR_INDIR(x) #x
@@ -222,36 +230,182 @@ static void pciDetectAmdSpecific(const FFGPUOptions* options, FFGPUResult* gpu,
222230
}
223231
}
224232

225-
static void pciDetectIntelSpecific(FFGPUResult* gpu, FFstrbuf* pciDir, FFstrbuf* buffer)
233+
static void pciDetectIntelSpecific(FFGPUResult* gpu, FFstrbuf* pciDir, FFstrbuf* buffer, const char* drmKey)
226234
{
227235
// Works for Intel GPUs
228236
// https://patchwork.kernel.org/project/intel-gfx/patch/1422039866-11572-3-git-send-email-ville.syrjala@linux.intel.com/
229237

230238
// 0000:00:02.0 is reserved for Intel integrated graphics
231239
gpu->type = gpu->deviceId == 20 ? FF_GPU_TYPE_INTEGRATED : FF_GPU_TYPE_DISCRETE;
232240

241+
if (!drmKey) return;
242+
233243
if (ffStrbufEqualS(&gpu->driver, "xe"))
234244
{
235245
ffStrbufAppendS(pciDir, "/tile0/gt0/freq0/max_freq");
236246
}
237247
else
238248
{
239-
ffStrbufAppendS(pciDir, "/drm/");
240-
FF_AUTO_CLOSE_DIR DIR* dirp = opendir(pciDir->chars);
241-
if (!dirp) return;
242-
struct dirent* entry;
243-
while ((entry = readdir(dirp)) != NULL)
244-
{
245-
if (ffStrStartsWith(entry->d_name, "card")) break;
246-
}
247-
if (!entry) return;
248-
ffStrbufAppendS(pciDir, entry->d_name);
249+
ffStrbufAppendC(pciDir, '/');
250+
ffStrbufAppendS(pciDir, drmKey);
249251
ffStrbufAppendS(pciDir, "/gt_max_freq_mhz");
250252
}
251253
if (ffReadFileBuffer(pciDir->chars, buffer))
252254
gpu->frequency = (uint32_t) ffStrbufToUInt(buffer, 0);
253255
}
254256

257+
static inline int popcountBytes(uint8_t* bytes, uint32_t length)
258+
{
259+
int count = 0;
260+
while (length >= 8)
261+
{
262+
count += __builtin_popcountll(*(uint64_t*) bytes);
263+
bytes += 8;
264+
length -= 8;
265+
}
266+
if (length >= 4)
267+
{
268+
count += __builtin_popcountl(*(uint32_t*) bytes);
269+
bytes += 4;
270+
length -= 4;
271+
}
272+
if (length >= 2)
273+
{
274+
count += __builtin_popcountl(*(uint16_t*) bytes);
275+
bytes += 2;
276+
length -= 2;
277+
}
278+
if (length)
279+
{
280+
count += __builtin_popcountl(*(uint8_t*) bytes);
281+
}
282+
return count;
283+
}
284+
285+
static const char* drmDetectIntelSpecific(FFGPUResult* gpu, const char* drmKey, FFstrbuf* buffer)
286+
{
287+
#if FF_HAVE_DRM
288+
ffStrbufSetS(buffer, "/dev/dri/");
289+
ffStrbufAppendS(buffer, drmKey);
290+
FF_AUTO_CLOSE_FD int fd = open(buffer->chars, O_RDONLY);
291+
if (fd < 0) return "Failed to open drm device";
292+
293+
if (ffStrbufEqualS(&gpu->driver, "xe"))
294+
{
295+
{
296+
struct drm_xe_device_query query = {
297+
.extensions = 0,
298+
.query = DRM_XE_DEVICE_QUERY_GT_TOPOLOGY,
299+
.size = 0,
300+
.data = 0,
301+
};
302+
if (ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query) >= 0)
303+
{
304+
FF_AUTO_FREE uint8_t* buffer = malloc(query.size);
305+
query.data = (uintptr_t) buffer;
306+
if (ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query) >= 0)
307+
{
308+
int dssCount = 0, euPerDssCount = 0;
309+
for (struct drm_xe_query_topology_mask* topo = (void*) buffer;
310+
(uint8_t*) topo < buffer + query.size;
311+
topo = (void*) (topo->mask + topo->num_bytes)
312+
) {
313+
switch (topo->type)
314+
{
315+
case DRM_XE_TOPO_DSS_COMPUTE:
316+
case DRM_XE_TOPO_DSS_GEOMETRY:
317+
dssCount += popcountBytes(topo->mask, topo->num_bytes);
318+
break;
319+
case DRM_XE_TOPO_EU_PER_DSS:
320+
euPerDssCount += popcountBytes(topo->mask, topo->num_bytes);
321+
break;
322+
}
323+
}
324+
gpu->coreCount = dssCount * euPerDssCount;
325+
}
326+
}
327+
}
328+
329+
{
330+
struct drm_xe_device_query query = {
331+
.query = DRM_XE_DEVICE_QUERY_MEM_REGIONS,
332+
};
333+
if (ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query) >= 0)
334+
{
335+
FF_AUTO_FREE uint8_t* buffer = malloc(query.size);
336+
query.data = (uintptr_t) buffer;
337+
if (ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query) >= 0)
338+
{
339+
gpu->dedicated.total = gpu->shared.total = gpu->dedicated.used = gpu->shared.used = 0;
340+
struct drm_xe_query_mem_regions* regionInfo = (void*) buffer;
341+
for (uint32_t i = 0; i < regionInfo->num_mem_regions; i++)
342+
{
343+
struct drm_xe_mem_region* region = regionInfo->mem_regions + i;
344+
switch (region->mem_class)
345+
{
346+
case DRM_XE_MEM_REGION_CLASS_SYSMEM:
347+
gpu->shared.total += region->total_size;
348+
gpu->shared.used += region->used;
349+
break;
350+
case DRM_XE_MEM_REGION_CLASS_VRAM:
351+
gpu->dedicated.total += region->total_size;
352+
gpu->dedicated.used += region->used;
353+
break;
354+
}
355+
}
356+
}
357+
}
358+
}
359+
}
360+
else if (ffStrbufEqualS(&gpu->driver, "i915"))
361+
{
362+
{
363+
int value;
364+
drm_i915_getparam_t getparam = { .param = I915_PARAM_EU_TOTAL, .value = &value };
365+
if (ioctl(fd, DRM_IOCTL_I915_GETPARAM, &getparam) >= 0)
366+
gpu->coreCount = value;
367+
}
368+
{
369+
struct drm_i915_query_item queryItem = {
370+
.query_id = DRM_I915_QUERY_MEMORY_REGIONS,
371+
};
372+
struct drm_i915_query query = {
373+
.items_ptr = (uintptr_t) &queryItem,
374+
.num_items = 1,
375+
};
376+
if (ioctl(fd, DRM_IOCTL_I915_QUERY, &query) >= 0 )
377+
{
378+
FF_AUTO_FREE uint8_t* buffer = calloc(1, (size_t) queryItem.length);
379+
queryItem.data_ptr = (uintptr_t) buffer;
380+
if (ioctl(fd, DRM_IOCTL_I915_QUERY, &query) >= 0)
381+
{
382+
gpu->dedicated.total = gpu->shared.total = gpu->dedicated.used = gpu->shared.used = 0;
383+
struct drm_i915_query_memory_regions* regionInfo = (void*) buffer;
384+
for (uint32_t i = 0; i < regionInfo->num_regions; i++)
385+
{
386+
struct drm_i915_memory_region_info* region = regionInfo->regions + i;
387+
switch (region->region.memory_class)
388+
{
389+
case I915_MEMORY_CLASS_SYSTEM:
390+
gpu->shared.total += region->probed_size;
391+
gpu->shared.used += region->probed_size - region->unallocated_size;
392+
break;
393+
case I915_MEMORY_CLASS_DEVICE:
394+
gpu->dedicated.total += region->probed_size;
395+
gpu->dedicated.used += region->probed_size - region->unallocated_size;
396+
break;
397+
}
398+
}
399+
}
400+
}
401+
}
402+
}
403+
return NULL;
404+
#else
405+
return "Fastfetch is not compiled with drm support";
406+
#endif
407+
}
408+
255409
static const char* detectPci(const FFGPUOptions* options, FFlist* gpus, FFstrbuf* buffer, FFstrbuf* deviceDir, const char* drmKey)
256410
{
257411
const uint32_t drmDirPathLength = deviceDir->length;
@@ -300,6 +454,28 @@ static const char* detectPci(const FFGPUOptions* options, FFlist* gpus, FFstrbuf
300454
gpu->deviceId = (pciDomain * 100000ull) + (pciBus * 1000ull) + (pciDevice * 10ull) + pciFunc;
301455
gpu->frequency = FF_GPU_FREQUENCY_UNSET;
302456

457+
char drmKeyBuffer[8];
458+
if (options->driverSpecific && !drmKey)
459+
{
460+
ffStrbufAppendS(deviceDir, "/drm");
461+
FF_AUTO_CLOSE_DIR DIR* dirp = opendir(deviceDir->chars);
462+
if (dirp)
463+
{
464+
struct dirent* entry;
465+
while ((entry = readdir(dirp)) != NULL)
466+
{
467+
if (ffStrStartsWith(entry->d_name, "card"))
468+
{
469+
strncpy(drmKeyBuffer, entry->d_name, sizeof(drmKeyBuffer) - 1);
470+
drmKeyBuffer[sizeof(drmKeyBuffer) - 1] = '\0';
471+
drmKey = drmKeyBuffer;
472+
break;
473+
}
474+
}
475+
}
476+
ffStrbufSubstrBefore(deviceDir, drmDirPathLength);
477+
}
478+
303479
if (drmKey) ffStrbufSetF(&gpu->platformApi, "DRM (%s)", drmKey);
304480

305481
pciDetectDriver(&gpu->driver, deviceDir, buffer, drmKey);
@@ -337,8 +513,10 @@ static const char* detectPci(const FFGPUOptions* options, FFlist* gpus, FFstrbuf
337513
}
338514
else if (gpu->vendor.chars == FF_GPU_VENDOR_NAME_INTEL)
339515
{
340-
pciDetectIntelSpecific(gpu, deviceDir, buffer);
516+
pciDetectIntelSpecific(gpu, deviceDir, buffer, drmKey);
341517
ffStrbufSubstrBefore(deviceDir, drmDirPathLength);
518+
if (options->driverSpecific && drmKey)
519+
drmDetectIntelSpecific(gpu, drmKey, buffer);
342520
}
343521
else
344522
{

0 commit comments

Comments
 (0)