-
Notifications
You must be signed in to change notification settings - Fork 833
boundary handling and decompression limits in CompressedRTF #1071
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: trunk
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,9 +22,11 @@ Licensed to the Apache Software Foundation (ASF) under one or more | |
| import java.io.OutputStream; | ||
| import java.nio.charset.StandardCharsets; | ||
|
|
||
| import org.apache.commons.io.input.BoundedInputStream; | ||
| import org.apache.poi.util.IOUtils; | ||
| import org.apache.poi.util.LZWDecompresser; | ||
| import org.apache.poi.util.LittleEndian; | ||
| import org.apache.poi.util.RecordFormatException; | ||
|
|
||
|
|
||
| /** | ||
|
|
@@ -52,6 +54,16 @@ public final class CompressedRTF extends LZWDecompresser { | |
| "\\fmodern \\fscript \\fdecor MS Sans SerifSymbolArialTimes New RomanCourier" + | ||
| "{\\colortbl\\red0\\green0\\blue0\n\r\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx"; | ||
|
|
||
| private static final int DEFAULT_MAX_RECORD_LENGTH = 50_000_000; | ||
| /** | ||
| * MS-OXRTFCP uses 8-item chunks per control byte. If the data ends mid-chunk, | ||
| * the remaining bits in the control byte are processed. If they are literal bits, | ||
| * they produce trailing literal bytes from the stream. A chunk can thus emit | ||
| * up to 7 bytes of padding beyond the declared uncompressed size. | ||
| */ | ||
| private static final int MAX_PADDING_LENGTH = 7; | ||
| private static int MAX_RECORD_LENGTH = DEFAULT_MAX_RECORD_LENGTH; | ||
|
|
||
| private int compressedSize; | ||
| private int decompressedSize; | ||
|
|
||
|
|
@@ -62,6 +74,20 @@ public CompressedRTF() { | |
| super(true, 2, true); | ||
| } | ||
|
|
||
| /** | ||
| * @param length the max decompressed record length allowed for CompressedRTF | ||
| */ | ||
| public static void setMaxRecordLength(int length) { | ||
| MAX_RECORD_LENGTH = length; | ||
| } | ||
|
|
||
| /** | ||
| * @return the max decompressed record length allowed for CompressedRTF | ||
| */ | ||
| public static int getMaxRecordLength() { | ||
| return MAX_RECORD_LENGTH; | ||
| } | ||
|
|
||
| /** | ||
| * Decompresses the whole of the compressed RTF | ||
| * stream, outputting the resulting RTF bytes. | ||
|
|
@@ -79,19 +105,28 @@ public void decompress(InputStream src, OutputStream res) throws IOException { | |
| /* int dataCRC = */ LittleEndian.readInt(src); | ||
|
|
||
| // TODO - Handle CRC checking on the output side | ||
| IOUtils.safelyAllocateCheck(decompressedSize, MAX_RECORD_LENGTH); | ||
| LimitedOutputStream limited = new LimitedOutputStream(res, decompressedSize + MAX_PADDING_LENGTH); | ||
|
|
||
| // Do we need to do anything? | ||
| if(compressionType == UNCOMPRESSED_SIGNATURE_INT) { | ||
| // Nope, nothing fancy to do | ||
| IOUtils.copy(src, res); | ||
| copyCompressedPayload(src, limited); | ||
| return; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't use return like this - also copyCompressedPayload has its own limit checks, quite untidy to do this |
||
| } else if(compressionType == COMPRESSED_SIGNATURE_INT) { | ||
| // We need to decompress it below | ||
| } else { | ||
| throw new IllegalArgumentException("Invalid compression signature " + compressionType); | ||
| } | ||
|
|
||
| // Have it processed | ||
| super.decompress(src, res); | ||
| try (InputStream bounded = BoundedInputStream.builder() | ||
| .setInputStream(src) | ||
| .setMaxCount(getCompressedSize()) | ||
| .setPropagateClose(false) | ||
| .get()) { | ||
| super.decompress(bounded, limited); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -127,4 +162,48 @@ protected int populateDictionary(byte[] dict) { | |
| // Start adding new codes after the constants | ||
| return preload.length; | ||
| } | ||
|
|
||
| private void copyCompressedPayload(InputStream src, OutputStream res) throws IOException { | ||
| long remaining = getCompressedSize(); | ||
| byte[] buffer = IOUtils.safelyAllocate(Math.min(8192L, remaining), MAX_RECORD_LENGTH); | ||
| while (remaining > 0) { | ||
| int read = src.read(buffer, 0, (int)Math.min(buffer.length, remaining)); | ||
| if (read < 0) { | ||
| throw new IOException("Not enough data to read " + getCompressedSize() + " bytes of uncompressed RTF"); | ||
| } | ||
| res.write(buffer, 0, read); | ||
| remaining -= read; | ||
| } | ||
| } | ||
|
|
||
| private static final class LimitedOutputStream extends OutputStream { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. https://github.com/apache/commons-io/blob/master/src/main/java/org/apache/commons/io/output/CountingOutputStream.java should be usable - don't really like declaring our own class for this |
||
| private final OutputStream delegate; | ||
| private final int limit; | ||
| private int count; | ||
|
|
||
| private LimitedOutputStream(OutputStream delegate, int limit) { | ||
| this.delegate = delegate; | ||
| this.limit = limit; | ||
| } | ||
|
|
||
| @Override | ||
| public void write(int b) throws IOException { | ||
| checkLimit(1); | ||
| delegate.write(b); | ||
| count++; | ||
| } | ||
|
|
||
| @Override | ||
| public void write(byte[] b, int off, int len) throws IOException { | ||
| checkLimit(len); | ||
| delegate.write(b, off, len); | ||
| count += len; | ||
| } | ||
|
|
||
| private void checkLimit(int len) { | ||
| if (count + (long)len > limit) { | ||
| throw new RecordFormatException("Compressed RTF expands beyond declared size " + limit); | ||
| } | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why this length as opposed to some other value?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mainly wanted a limit that was large enough to avoid rejecting legitimate larger RTF payloads, while still enforcing a reasonable upper bound for malformed or inconsistent size declarations.
The 50MB value was meant as a conservative default rather than a format-defined maximum. Happy to align it with an existing POI-wide limit pattern, or reduce/remove the configurable default if you’d prefer.