1414
1515// Ref: https://github.com/AlexDenisov/segment_dumper/blob/master/main.c
1616
17+ /**
18+ * Helper function to read data from a file at a specific offset
19+ */
1720static inline bool readData (FILE * objFile , void * buf , size_t size , off_t offset )
1821{
1922 fseek (objFile , offset , SEEK_SET );
2023 return fread (buf , 1 , size , objFile ) == size ;
2124}
2225
26+ /**
27+ * Handles a Mach-O section by extracting strings from the __cstring section
28+ *
29+ * @param objFile File handle to the Mach-O object file
30+ * @param name Section name to check
31+ * @param offset Offset of the section in the file
32+ * @param size Size of the section
33+ * @param cb Callback function to process strings
34+ * @param userdata User data for the callback
35+ * @param minLength Minimum string length to extract
36+ *
37+ * @return true to continue processing, false to stop
38+ */
2339static bool handleMachSection (FILE * objFile , const char * name , off_t offset , size_t size , bool (* cb )(const char * str , uint32_t len , void * userdata ), void * userdata , uint32_t minLength )
2440{
2541 if (!ffStrEquals (name , "__cstring" )) return true;
@@ -43,6 +59,22 @@ static bool handleMachSection(FILE *objFile, const char *name, off_t offset, siz
4359 return true;
4460}
4561
62+ /**
63+ * Processes a Mach-O header (32-bit or 64-bit)
64+ *
65+ * This function parses the load commands in a Mach-O header, looking for
66+ * LC_SEGMENT or LC_SEGMENT_64 commands that contain the __TEXT segment.
67+ * It then processes the sections within that segment to extract strings.
68+ *
69+ * @param objFile File handle to the Mach-O object file
70+ * @param offset Offset of the Mach header in the file
71+ * @param is_64 Whether this is a 64-bit Mach-O header
72+ * @param cb Callback function to process strings
73+ * @param userdata User data for the callback
74+ * @param minLength Minimum string length to extract
75+ *
76+ * @return NULL on success, error message on failure
77+ */
4678static const char * dumpMachHeader (FILE * objFile , off_t offset , bool is_64 , bool (* cb )(const char * str , uint32_t len , void * userdata ), void * userdata , uint32_t minLength )
4779{
4880 uint32_t ncmds ;
@@ -117,6 +149,20 @@ static const char* dumpMachHeader(FILE *objFile, off_t offset, bool is_64, bool
117149 return NULL ;
118150}
119151
152+ /**
153+ * Processes a Fat binary header (Universal binary)
154+ *
155+ * This function handles the fat header of a universal binary, which can contain
156+ * multiple Mach-O binaries for different architectures. It extracts and processes
157+ * each embedded Mach-O file.
158+ *
159+ * @param objFile File handle to the universal binary
160+ * @param cb Callback function to process strings
161+ * @param userdata User data for the callback
162+ * @param minLength Minimum string length to extract
163+ *
164+ * @return NULL on success, error message on failure
165+ */
120166static const char * dumpFatHeader (FILE * objFile , bool (* cb )(const char * str , uint32_t len , void * userdata ), void * userdata , uint32_t minLength )
121167{
122168 struct fat_header header ;
@@ -165,21 +211,32 @@ static const char* dumpFatHeader(FILE *objFile, bool (*cb)(const char *str, uint
165211 return "Unsupported fat header" ;
166212}
167213
214+ /**
215+ * Extracts string literals from a Mach-O (Apple) binary file
216+ *
217+ * This function supports both single-architecture Mach-O files and
218+ * universal binaries (fat binaries) containing multiple architectures.
219+ * It locates the __cstring section in the __TEXT segment which contains
220+ * the string literals used in the program.
221+ */
168222const char * ffBinaryExtractStrings (const char * machoFile , bool (* cb )(const char * str , uint32_t len , void * userdata ), void * userdata , uint32_t minLength )
169223{
170224 FF_AUTO_CLOSE_FILE FILE * objFile = fopen (machoFile , "rb" );
171225 if (objFile == NULL )
172226 return "File could not be opened" ;
173227
228+ // Read the magic number to determine the type of binary
174229 uint32_t magic ;
175230 if (!readData (objFile , & magic , sizeof (magic ), 0 ))
176231 return "read magic number failed" ;
177232
233+ // Check for supported formats
178234 // MH_CIGAM and MH_CIGAM_64 seem to be no longer used, as `swap_mach_header` is marked as deprecated.
179235 // However FAT_CIGAM and FAT_CIGAM_64 are still used (/usr/bin/vim).
180236 if (magic != MH_MAGIC && magic != MH_MAGIC_64 && magic != FAT_CIGAM && magic != FAT_CIGAM_64 && magic != FAT_MAGIC && magic != FAT_MAGIC_64 )
181237 return "Unsupported format or big endian mach-o file" ;
182238
239+ // Process either a fat binary or a regular Mach-O binary
183240 if (magic == FAT_MAGIC || magic == FAT_MAGIC_64 || magic == FAT_CIGAM || magic == FAT_CIGAM_64 )
184241 return dumpFatHeader (objFile , cb , userdata , minLength );
185242 else
0 commit comments