diff --git a/.gitignore b/.gitignore index 24bdea7..758e24a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,8 @@ testfiles/renderer-output/ testfiles/testpdfio testfiles/test-codes/ source/tools/pdf2cairo/pdf2cairo + +out.png +out.jpg +debug.jpg +debug.png \ No newline at end of file diff --git a/source/cairo/cairo-device-private.h b/source/cairo/cairo-device-private.h index 7841a1e..b505981 100644 --- a/source/cairo/cairo-device-private.h +++ b/source/cairo/cairo-device-private.h @@ -67,4 +67,10 @@ void device_show_text(p2c_device_t *dev, const char *str); void device_show_text_kerning(p2c_device_t *dev, operand_t *operands, int num_operands); void device_set_text_rendering_mode(p2c_device_t *dev, int mode); void device_get_current_point(p2c_device_t *dev, double *x, double *y); + +// --- Image/XObject Rendering --- +void device_draw_image(p2c_device_t *dev, pdfio_obj_t *xobj); + + + #endif // CAIRO_DEVICE_PRIVATE_H diff --git a/source/cairo/cairo-device.c b/source/cairo/cairo-device.c index 54aaabd..e962e87 100644 --- a/source/cairo/cairo-device.c +++ b/source/cairo/cairo-device.c @@ -7,6 +7,121 @@ // #include "cairo-private.h" +#include +#include + +typedef struct p2c_jpeg_error_s +{ + struct jpeg_error_mgr pub; + jmp_buf jmp; +} p2c_jpeg_error_t; + +static cairo_user_data_key_t p2c_surface_data_key; + +static void +p2c_jpeg_error_exit(j_common_ptr cinfo) +{ + p2c_jpeg_error_t *error = (p2c_jpeg_error_t *)cinfo->err; + + (*cinfo->err->output_message)(cinfo); + longjmp(error->jmp, 1); +} + +static cairo_surface_t * +decode_jpeg_surface(const unsigned char *data, size_t length) +{ + struct jpeg_decompress_struct cinfo; + p2c_jpeg_error_t jerr; + cairo_surface_t *surface = NULL; + unsigned char *surface_data = NULL; + JSAMPLE *buffer = NULL; + int stride; + int width, height, components; + + cinfo.err = jpeg_std_error(&jerr.pub); + jerr.pub.error_exit = p2c_jpeg_error_exit; + + if (setjmp(jerr.jmp)) + { + jpeg_destroy_decompress(&cinfo); + free(buffer); + free(surface_data); + return (NULL); + } + + jpeg_create_decompress(&cinfo); + jpeg_mem_src(&cinfo, (unsigned char *)data, length); + jpeg_read_header(&cinfo, TRUE); + jpeg_start_decompress(&cinfo); + + width = (int)cinfo.output_width; + height = (int)cinfo.output_height; + components = cinfo.output_components; + stride = cairo_format_stride_for_width(CAIRO_FORMAT_RGB24, width); + + surface_data = calloc((size_t)stride, cinfo.output_height); + buffer = malloc((size_t)cinfo.output_width * cinfo.output_components); + if (!surface_data) + { + jpeg_destroy_decompress(&cinfo); + return (NULL); + } + else if (!buffer) + { + jpeg_destroy_decompress(&cinfo); + free(surface_data); + return (NULL); + } + + while (cinfo.output_scanline < cinfo.output_height) + { + JSAMPROW row = surface_data + (size_t)stride * cinfo.output_scanline; + JSAMPROW src = buffer; + + jpeg_read_scanlines(&cinfo, &src, 1); + + for (JDIMENSION x = 0; x < cinfo.output_width; x ++) + { + unsigned char *dst = row + x * 4; + + if (components == 3) + { + dst[0] = buffer[x * 3 + 2]; + dst[1] = buffer[x * 3 + 1]; + dst[2] = buffer[x * 3 + 0]; + } + else if (components == 1) + { + dst[0] = buffer[x]; + dst[1] = buffer[x]; + dst[2] = buffer[x]; + } + + dst[3] = 255; + } + } + + jpeg_finish_decompress(&cinfo); + jpeg_destroy_decompress(&cinfo); + free(buffer); + + surface = cairo_image_surface_create_for_data(surface_data, + CAIRO_FORMAT_RGB24, + width, + height, + stride); + if (cairo_surface_status(surface) != CAIRO_STATUS_SUCCESS) + { + free(surface_data); + cairo_surface_destroy(surface); + return (NULL); + } + + cairo_surface_set_user_data(surface, &p2c_surface_data_key, surface_data, free); + cairo_surface_mark_dirty(surface); + + return (surface); +} // --- Device LifeCycle Functions --- @@ -90,6 +205,118 @@ device_create(pdfrip_page_t *page, // I - Data related to PDF page return (dev); } +#include +#include + +void device_draw_image(p2c_device_t *dev, pdfio_obj_t *xobj) +{ + pdfio_dict_t *dict = pdfioObjGetDict(xobj); + pdfio_stream_t *stream; + unsigned char *data = NULL; + size_t length = 0, capacity = 0; + + int width = (int)pdfioDictGetNumber(dict, "Width"); + int height = (int)pdfioDictGetNumber(dict, "Height"); + int bpc = (int)pdfioDictGetNumber(dict, "BitsPerComponent"); + + const char *colorspace = pdfioDictGetName(dict, "ColorSpace"); + const char *filter = pdfioDictGetName(dict, "Filter"); + + fprintf(stderr, "DEBUG: Image %dx%d, bpc=%d, cs=%s, filter=%s\n", + width, height, bpc, colorspace ? colorspace : "NULL", + filter ? filter : "NONE"); + + stream = pdfioObjOpenStream(xobj, !(filter && !strcmp(filter, "DCTDecode"))); + if (!stream) + { + fprintf(stderr, "ERROR: Cannot open image stream\n"); + return; + } + + for (;;) + { + unsigned char buffer[8192]; + ssize_t bytes = pdfioStreamRead(stream, buffer, sizeof(buffer)); + + if (bytes < 0) + { + fprintf(stderr, "ERROR: Cannot read image stream\n"); + free(data); + pdfioStreamClose(stream); + return; + } + + if (bytes == 0) + break; + + if (length + (size_t)bytes > capacity) + { + size_t new_capacity = capacity ? capacity * 2 : 8192; + unsigned char *temp; + + while (new_capacity < length + (size_t)bytes) + new_capacity *= 2; + + temp = realloc(data, new_capacity); + if (!temp) + { + fprintf(stderr, "ERROR: Out of memory reading image stream\n"); + free(data); + pdfioStreamClose(stream); + return; + } + + data = temp; + capacity = new_capacity; + } + + memcpy(data + length, buffer, (size_t)bytes); + length += (size_t)bytes; + } + + pdfioStreamClose(stream); + + fprintf(stderr, "DEBUG: Got %zu bytes\n", length); + + // Temporary debug path for raw JPEG image data. + if (filter && !strcmp(filter, "DCTDecode")) + { + cairo_surface_t *image; + FILE *f = fopen("debug.jpg", "wb"); + if (f) + { + fwrite(data, 1, length, f); + fclose(f); + fprintf(stderr, "DEBUG: Saved image as debug.jpg\n"); + } + + image = decode_jpeg_surface(data, length); + if (!image) + { + fprintf(stderr, "ERROR: Unable to decode JPEG image\n"); + free(data); + return; + } + + cairo_save(dev->cr); + cairo_translate(dev->cr, 0.0, 1.0); + cairo_scale(dev->cr, 1.0, -1.0); + cairo_scale(dev->cr, 1.0 / width, 1.0 / height); + cairo_set_source_surface(dev->cr, image, 0.0, 0.0); + cairo_pattern_set_filter(cairo_get_source(dev->cr), CAIRO_FILTER_BEST); + cairo_rectangle(dev->cr, 0.0, 0.0, width, height); + cairo_fill(dev->cr); + cairo_restore(dev->cr); + + cairo_surface_destroy(image); + free(data); + return; + } + + fprintf(stderr, "WARNING: Unsupported image format\n"); + free(data); +} + // // 'device_destroy()' - frees all allocated resources // diff --git a/source/pdf/parser.c b/source/pdf/parser.c index d9a0d04..a499058 100644 --- a/source/pdf/parser.c +++ b/source/pdf/parser.c @@ -575,6 +575,70 @@ handle_Tr(p2c_device_t *dev, } } +void process_form_xobject(p2c_device_t *dev, pdfio_obj_t *xobj) +{ + pdfio_dict_t *dict = pdfioObjGetDict(xobj); + + pdfio_stream_t *st = pdfioObjOpenStream(xobj, true); + + pdfrip_page_t fake_page = { + .object = xobj, + .resources_dict = pdfioDictGetDict(dict, "Resources"), + .num_streams = 1 + }; + + process_content_stream(dev, &fake_page); + + pdfioStreamClose(st); +} + +static void +handle_Do(p2c_device_t *dev, pdfio_dict_t *resources) +{ + if (operand_stack_ptr == 1 && + operand_stack[0].type == OP_TYPE_NAME) + { + const char *name = operand_stack[0].value.name + 1; + + if (g_verbose) + fprintf(stderr, "DEBUG: Operator Do (XObject): %s\n", name); + + pdfio_dict_t *xobject_dict = pdfioDictGetDict(resources, "XObject"); + if (!xobject_dict) + { + fprintf(stderr, "ERROR: No XObject dictionary\n"); + return; + } + + pdfio_obj_t *xobj = pdfioDictGetObj(xobject_dict, name); + if (!xobj) + { + fprintf(stderr, "ERROR: XObject %s not found\n", name); + return; + } + + pdfio_dict_t *dict = pdfioObjGetDict(xobj); + const char *subtype = pdfioDictGetName(dict, "Subtype"); + + if (!subtype) + { + fprintf(stderr, "ERROR: Missing subtype\n"); + return; + } + + if (!strcmp(subtype, "Image")) + { + fprintf(stderr, "DEBUG: Rendering Image XObject\n"); + device_draw_image(dev, xobj); + } + else if (!strcmp(subtype, "Form")) + { + fprintf(stderr, "DEBUG: Rendering Form XObject\n"); + process_form_xobject(dev, xobj); + } + } +} + // --- Dispatch Table and Logic --- // type for our handler functions @@ -595,6 +659,7 @@ static const pdf_operator_t operator_table[] = {"B*", handle_B_star}, {"BT", handle_BT}, {"CS", handle_CS}, + {"Do", handle_Do}, {"ET", handle_ET}, {"G", handle_G}, {"K", handle_K}, @@ -645,6 +710,29 @@ compare_operators(const void *a, return strcmp(token, op->name); } +static pdfio_stream_t * +reopen_page_stream_at_token(pdfio_obj_t *page, size_t stream_index, size_t token_count) +{ + pdfio_stream_t *st = pdfioPageOpenStream(page, stream_index, true); + char token[1024]; + + if (!st) + return (NULL); + + while (token_count > 0) + { + if (!pdfioStreamGetToken(st, token, sizeof(token))) + { + pdfioStreamClose(st); + return (NULL); + } + + token_count --; + } + + return (st); +} + void process_content_stream(p2c_device_t *dev, pdfrip_page_t *page_data) @@ -657,8 +745,12 @@ process_content_stream(p2c_device_t *dev, for(size_t i=0; inum_streams; i++) { pdfio_stream_t *st = pdfioPageOpenStream(page_data->object, i, true); + size_t token_count = 0; + while (pdfioStreamGetToken(st, token, sizeof(token))) { + token_count ++; + //fprintf(stderr, "DEBUG: Token: '%s'\n", token); if (isdigit(token[0]) || token[0] == '-' || token[0] == '+' || token[0] == '.') { @@ -717,19 +809,39 @@ process_content_stream(p2c_device_t *dev, //fprintf(stderr, "hello DEBUG: Token: '%s'\n", token); const pdf_operator_t *op = bsearch(token, operator_table, operator_table_size, sizeof(pdf_operator_t), compare_operators); - if (op) - { - op->handler(dev, page_data->resources_dict); - } - else - { + if (op) + { + if (!strcmp(token, "Do")) + { + pdfioStreamClose(st); + st = NULL; + + op->handler(dev, page_data->resources_dict); + + st = reopen_page_stream_at_token(page_data->object, i, token_count); + if (!st) + { + fprintf(stderr, "ERROR: Unable to resume page content stream after Do\n"); + return; + } + } + else + { + op->handler(dev, page_data->resources_dict); + } + } + else + { if (g_verbose) printf("DEBUG: Unhandled operator: %s\n", token); - } + } // Clear the operand stack for the next command operand_stack_ptr = 0; } } + + if (st) + pdfioStreamClose(st); } } diff --git a/testfiles/input/xobject/two-images.pdf b/testfiles/input/xobject/two-images.pdf new file mode 100644 index 0000000..e6bf9e8 Binary files /dev/null and b/testfiles/input/xobject/two-images.pdf differ