Skip to content

Commit 5dfd267

Browse files
committed
Binary: add comments
1 parent d650f67 commit 5dfd267

File tree

4 files changed

+108
-0
lines changed

4 files changed

+108
-0
lines changed

src/util/binary.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,17 @@
22

33
#include "fastfetch.h"
44

5+
/**
6+
* Extracts string literals from a binary file
7+
*
8+
* @param file Path to the binary file to extract strings from
9+
* @param cb Callback function that will be called for each string found
10+
* Return false from callback to stop extraction
11+
* @param userdata User-provided data passed to the callback function
12+
* @param minLength Minimum length of strings to extract
13+
*
14+
* @return NULL on success, error message on failure.
15+
* @note This function won't return an error if no strings are found.
16+
* Always check if strings are correctly extracted after this function all.
17+
*/
518
const char* ffBinaryExtractStrings(const char* file, bool (*cb)(const char* str, uint32_t len, void* userdata), void* userdata, uint32_t minLength);

src/util/binary_apple.c

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,28 @@
1414

1515
// Ref: https://github.com/AlexDenisov/segment_dumper/blob/master/main.c
1616

17+
/**
18+
* Helper function to read data from a file at a specific offset
19+
*/
1720
static inline bool readData(FILE *objFile, void *buf, size_t size, off_t offset)
1821
{
1922
fseek(objFile, offset, SEEK_SET);
2023
return fread(buf, 1, size, objFile) == size;
2124
}
2225

26+
/**
27+
* Handles a Mach-O section by extracting strings from the __cstring section
28+
*
29+
* @param objFile File handle to the Mach-O object file
30+
* @param name Section name to check
31+
* @param offset Offset of the section in the file
32+
* @param size Size of the section
33+
* @param cb Callback function to process strings
34+
* @param userdata User data for the callback
35+
* @param minLength Minimum string length to extract
36+
*
37+
* @return true to continue processing, false to stop
38+
*/
2339
static bool handleMachSection(FILE *objFile, const char *name, off_t offset, size_t size, bool (*cb)(const char *str, uint32_t len, void *userdata), void *userdata, uint32_t minLength)
2440
{
2541
if (!ffStrEquals(name, "__cstring")) return true;
@@ -43,6 +59,22 @@ static bool handleMachSection(FILE *objFile, const char *name, off_t offset, siz
4359
return true;
4460
}
4561

62+
/**
63+
* Processes a Mach-O header (32-bit or 64-bit)
64+
*
65+
* This function parses the load commands in a Mach-O header, looking for
66+
* LC_SEGMENT or LC_SEGMENT_64 commands that contain the __TEXT segment.
67+
* It then processes the sections within that segment to extract strings.
68+
*
69+
* @param objFile File handle to the Mach-O object file
70+
* @param offset Offset of the Mach header in the file
71+
* @param is_64 Whether this is a 64-bit Mach-O header
72+
* @param cb Callback function to process strings
73+
* @param userdata User data for the callback
74+
* @param minLength Minimum string length to extract
75+
*
76+
* @return NULL on success, error message on failure
77+
*/
4678
static const char* dumpMachHeader(FILE *objFile, off_t offset, bool is_64, bool (*cb)(const char *str, uint32_t len, void *userdata), void *userdata, uint32_t minLength)
4779
{
4880
uint32_t ncmds;
@@ -117,6 +149,20 @@ static const char* dumpMachHeader(FILE *objFile, off_t offset, bool is_64, bool
117149
return NULL;
118150
}
119151

152+
/**
153+
* Processes a Fat binary header (Universal binary)
154+
*
155+
* This function handles the fat header of a universal binary, which can contain
156+
* multiple Mach-O binaries for different architectures. It extracts and processes
157+
* each embedded Mach-O file.
158+
*
159+
* @param objFile File handle to the universal binary
160+
* @param cb Callback function to process strings
161+
* @param userdata User data for the callback
162+
* @param minLength Minimum string length to extract
163+
*
164+
* @return NULL on success, error message on failure
165+
*/
120166
static const char* dumpFatHeader(FILE *objFile, bool (*cb)(const char *str, uint32_t len, void *userdata), void *userdata, uint32_t minLength)
121167
{
122168
struct fat_header header;
@@ -165,21 +211,32 @@ static const char* dumpFatHeader(FILE *objFile, bool (*cb)(const char *str, uint
165211
return "Unsupported fat header";
166212
}
167213

214+
/**
215+
* Extracts string literals from a Mach-O (Apple) binary file
216+
*
217+
* This function supports both single-architecture Mach-O files and
218+
* universal binaries (fat binaries) containing multiple architectures.
219+
* It locates the __cstring section in the __TEXT segment which contains
220+
* the string literals used in the program.
221+
*/
168222
const char *ffBinaryExtractStrings(const char *machoFile, bool (*cb)(const char *str, uint32_t len, void *userdata), void *userdata, uint32_t minLength)
169223
{
170224
FF_AUTO_CLOSE_FILE FILE *objFile = fopen(machoFile, "rb");
171225
if (objFile == NULL)
172226
return "File could not be opened";
173227

228+
// Read the magic number to determine the type of binary
174229
uint32_t magic;
175230
if (!readData(objFile, &magic, sizeof(magic), 0))
176231
return "read magic number failed";
177232

233+
// Check for supported formats
178234
// MH_CIGAM and MH_CIGAM_64 seem to be no longer used, as `swap_mach_header` is marked as deprecated.
179235
// However FAT_CIGAM and FAT_CIGAM_64 are still used (/usr/bin/vim).
180236
if (magic != MH_MAGIC && magic != MH_MAGIC_64 && magic != FAT_CIGAM && magic != FAT_CIGAM_64 && magic != FAT_MAGIC && magic != FAT_MAGIC_64)
181237
return "Unsupported format or big endian mach-o file";
182238

239+
// Process either a fat binary or a regular Mach-O binary
183240
if (magic == FAT_MAGIC || magic == FAT_MAGIC_64 || magic == FAT_CIGAM || magic == FAT_CIGAM_64)
184241
return dumpFatHeader(objFile, cb, userdata, minLength);
185242
else

src/util/binary_linux.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
#include <libelf.h> // #1254
1010
#include <fcntl.h>
1111

12+
/**
13+
* Structure to hold dynamically loaded libelf function pointers
14+
*/
1215
struct FFElfData {
1316
FF_LIBRARY_SYMBOL(elf_version)
1417
FF_LIBRARY_SYMBOL(elf_begin)
@@ -23,15 +26,27 @@ struct FFElfData {
2326
bool inited;
2427
} elfData;
2528

29+
/**
30+
* Extracts string literals from an ELF (Linux/Unix) binary file
31+
*
32+
* This function loads the libelf library dynamically, opens the ELF file,
33+
* locates the .rodata section (which contains string literals), and
34+
* scans it for valid strings. Each string found is passed to the
35+
* callback function for processing.
36+
*
37+
* The function supports both 32-bit and 64-bit ELF formats.
38+
*/
2639
const char* ffBinaryExtractStrings(const char* elfFile, bool (*cb)(const char* str, uint32_t len, void* userdata), void* userdata, uint32_t minLength)
2740
{
41+
// Initialize libelf if not already done
2842
if (!elfData.inited)
2943
{
3044
elfData.inited = true;
3145
FF_LIBRARY_LOAD(libelf, "dlopen libelf" FF_LIBRARY_EXTENSION " failed", "libelf" FF_LIBRARY_EXTENSION, 1);
3246
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libelf, elfData, elf_version)
3347
if (elfData.ffelf_version(EV_CURRENT) == EV_NONE) return "elf_version() failed";
3448

49+
// Load all required libelf functions
3550
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libelf, elfData, elf_begin)
3651
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libelf, elfData, elf_getshdrstrndx)
3752
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libelf, elfData, elf_nextscn)
@@ -47,22 +62,26 @@ const char* ffBinaryExtractStrings(const char* elfFile, bool (*cb)(const char* s
4762
if (elfData.ffelf_end == NULL)
4863
return "load libelf failed";
4964

65+
// Open the ELF file
5066
FF_AUTO_CLOSE_FD int fd = open(elfFile, O_RDONLY, 0);
5167
if (fd < 0) return "open() failed";
5268

5369
Elf* elf = elfData.ffelf_begin(fd, ELF_C_READ, NULL);
5470
if (elf == NULL) return "elf_begin() failed";
5571

72+
// Get the section header string table index
5673
size_t shstrndx = 0;
5774
if (elfData.ffelf_getshdrstrndx(elf, &shstrndx) < 0)
5875
{
5976
elfData.ffelf_end(elf);
6077
return "elf_getshdrstrndx() failed";
6178
}
6279

80+
// Iterate through all sections, looking for .rodata which contains string literals
6381
Elf_Scn* scn = NULL;
6482
while ((scn = elfData.ffelf_nextscn(elf, scn)) != NULL)
6583
{
84+
// Try 64-bit section header first, then 32-bit if that fails
6685
Elf64_Shdr* shdr64 = elfData.ffelf64_getshdr(scn);
6786
Elf32_Shdr* shdr32 = NULL;
6887
if (shdr64 == NULL)
@@ -71,18 +90,22 @@ const char* ffBinaryExtractStrings(const char* elfFile, bool (*cb)(const char* s
7190
if (shdr32 == NULL) continue;
7291
}
7392

93+
// Get the section name and check if it's .rodata
7494
const char* name = elfData.ffelf_strptr(elf, shstrndx, shdr64 ? shdr64->sh_name : shdr32->sh_name);
7595
if (name == NULL || !ffStrEquals(name, ".rodata")) continue;
7696

97+
// Get the section data
7798
Elf_Data* data = elfData.ffelf_getdata(scn, NULL);
7899
if (data == NULL) continue;
79100

101+
// Scan the section for string literals
80102
for (size_t off = 0; off < data->d_size; ++off)
81103
{
82104
const char* p = (const char*) data->d_buf + off;
83105
if (*p == '\0') continue;
84106
uint32_t len = (uint32_t) strlen(p);
85107
if (len < minLength) continue;
108+
// Only process printable ASCII characters
86109
if (*p >= ' ' && *p <= '~') // Ignore control characters
87110
{
88111
if (!cb(p, len, userdata)) break;
@@ -99,6 +122,9 @@ const char* ffBinaryExtractStrings(const char* elfFile, bool (*cb)(const char* s
99122

100123
#else
101124

125+
/**
126+
* Fallback implementation when libelf is not available
127+
*/
102128
const char* ffBinaryExtractStrings(const char* file, bool (*cb)(const char* str, uint32_t len, void* userdata), void* userdata, uint32_t minLength)
103129
{
104130
FF_UNUSED(file, cb, userdata, minLength);

src/util/binary_windows.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,37 @@
88
#include <stdlib.h>
99
#include <string.h>
1010

11+
/**
12+
* Extracts string literals from a PE (Windows) executable
13+
*
14+
* This function maps the PE file into memory, locates the .rdata section
15+
* (which typically contains string literals), and scans it for valid strings.
16+
* Each string found is passed to the callback function for processing.
17+
*/
1118
const char* ffBinaryExtractStrings(const char *peFile, bool (*cb)(const char *str, uint32_t len, void *userdata), void *userdata, uint32_t minLength)
1219
{
20+
// Use MapAndLoad with cleanup attribute to ensure proper unloading
1321
__attribute__((__cleanup__(UnMapAndLoad))) LOADED_IMAGE loadedImage = {};
1422
if (!MapAndLoad(peFile, NULL, &loadedImage, FALSE, TRUE))
1523
return "File could not be loaded";
1624

25+
// Iterate through all sections in the PE file
1726
for (ULONG i = 0; i < loadedImage.NumberOfSections; ++i)
1827
{
1928
PIMAGE_SECTION_HEADER section = &loadedImage.Sections[i];
29+
// Look for initialized data sections with the name ".rdata" which typically contains string literals
2030
if ((section->Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA) && ffStrEquals((const char*) section->Name, ".rdata"))
2131
{
2232
uint8_t *data = (uint8_t *) loadedImage.MappedAddress + section->PointerToRawData;
2333

34+
// Scan the section for string literals
2435
for (size_t off = 0; off < section->SizeOfRawData; ++off)
2536
{
2637
const char* p = (const char*) data + off;
2738
if (*p == '\0') continue;
2839
uint32_t len = (uint32_t) strlen(p);
2940
if (len < minLength) continue;
41+
// Only process printable ASCII characters
3042
if (*p >= ' ' && *p <= '~') // Ignore control characters
3143
{
3244
if (!cb(p, len, userdata)) break;

0 commit comments

Comments
 (0)