From 9cf0013afa12e6e7f4648b29965c41d49e1cf73f Mon Sep 17 00:00:00 2001 From: Andreas Gey Date: Fri, 30 Jan 2026 14:47:48 +0100 Subject: [PATCH] Add support for PDF RunLengthDecode filter Implemented RunLengthDecode filter for PDF streams per PDF 1.7 spec. Added a new RunLengthDecode class with a Decode method, registered it in Filtering.cs, and updated GetFilter to return it. This enables decompression of streams using the RunLengthDecode filter, improving PDF compatibility. --- .../src/PdfSharp/Pdf.Filters/Filtering.cs | 4 ++ .../PdfSharp/Pdf.Filters/RunLengthDecode.cs | 69 +++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/RunLengthDecode.cs diff --git a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/Filtering.cs b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/Filtering.cs index 1f48f8f8..2244982b 100644 --- a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/Filtering.cs +++ b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/Filtering.cs @@ -170,6 +170,8 @@ public static Filter GetFilter(string filterName) case PdfFilterNames.RunLengthDecode: + return _runLengthDecode ??= new RunLengthDecode(); + case PdfFilterNames.CcittFaxDecode: case PdfFilterNames.Jbig2Decode: case PdfFilterNames.JpxDecode: @@ -220,6 +222,8 @@ public static AsciiHexDecode AsciiHexDecode static DctDecode? _dctDecode; + static RunLengthDecode? _runLengthDecode; + //jpxDecode //crypt diff --git a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/RunLengthDecode.cs b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/RunLengthDecode.cs new file mode 100644 index 00000000..1a1c3f1e --- /dev/null +++ b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/RunLengthDecode.cs @@ -0,0 +1,69 @@ +namespace PdfSharp.Pdf.Filters +{ + /// + /// https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/pdfreference1.7old.pdf + /// The RunLengthDecode filter decodes data that has been encoded in a simple + /// byte-oriented format based on run length.The encoded data is a sequence of + /// runs, where each run consists of a length byte followed by 1 to 128 bytes of data.If + /// the length byte is in the range 0 to 127, the following length + 1 (1 to 128) bytes + /// are copied literally during decompression.If length is in the range 129 to 255, the + /// following single byte is to be copied 257 − length (2 to 128) times during + /// decompression.A length value of 128 denotes EOD. + /// The compression achieved by run-length encoding depends on the input data.In + /// the best case (all zeros), a compression of approximately 64:1 is achieved for long + /// files. The worst case (the hexadecimal sequence 00 alternating with FF) results in + /// an expansion of 127:128. + /// + internal class RunLengthDecode : Filter + { + public override Byte[] Encode(byte[] data) + { + throw new NotImplementedException(); + } + + public override Byte[] Decode(byte[] compressedData, FilterParms? parms) + { + var decompressedData = new List(); + var i = 0; + + // Iterate through the compressed stream and decode the RLE data + while (i < compressedData.Length) + { + var lengthByte = compressedData[i]; + i++; + + // If lengthByte is 128, it means EOD (End of Data) + if (lengthByte == 128) + { + break; + } + + if (lengthByte <= 127) + { + // Copy (lengthByte + 1) bytes literally + var length = lengthByte + 1; + for (var j = 0; j < length; j++) + { + decompressedData.Add(compressedData[i]); + i++; + } + } + else + { + // Repeat the next byte (257 - lengthByte) times + var repeatCount = 257 - lengthByte; + var value = compressedData[i]; + i++; + + for (var j = 0; j < repeatCount; j++) + { + decompressedData.Add(value); + } + } + } + + // Return the decompressed byte array + return decompressedData.ToArray(); + } + } +} \ No newline at end of file