@@ -47,8 +47,7 @@ def initialize(io_stream, filename, repair_pdf: false)
4747 end
4848
4949 if filename . end_with? ( '.pdf' ) && repair_pdf
50- rescue_broken_pdf ( @io_stream )
51- @file_mimetype = Marcel ::MimeType . for @io_stream
50+ fix_pdf!
5251
5352 logger . debug ( "Loaded new input #{ @filename } from #{ self . class } " )
5453 return if ALLOWED_MIME_TYPES . include? @file_mimetype
@@ -57,27 +56,40 @@ def initialize(io_stream, filename, repair_pdf: false)
5756 raise Errors ::MindeeMimeTypeError , @file_mimetype . to_s
5857 end
5958
60- # Attempts to fix pdf files if mimetype is rejected.
61- # "Broken PDFs" are often a result of third-party injecting invalid headers.
62- # This attempts to remove them and send the file
63- # @param stream [StringIO, File]
64- def rescue_broken_pdf ( stream )
65- stream . gets ( '%PDF-' )
66- raise Errors ::MindeePDFError if stream . eof? || stream . pos > 500
67-
68- stream . pos = stream . pos - 5
69- data = stream . read
70- @io_stream . close
71-
72- @io_stream = StringIO . new
73- @io_stream << data
59+ # @deprecated See {#fix_pdf!} or {#self.fix_pdf} instead.
60+ def rescue_broken_pdf ( _ )
61+ fix_pdf!
7462 end
7563
76- # Shorthand for pdf mimetype validation.
64+ # Shorthand for PDF mimetype validation.
7765 def pdf?
7866 @file_mimetype . to_s == 'application/pdf'
7967 end
8068
69+ # Attempts to fix the PDF data in the file.
70+ # @param maximum_offset [Integer] Maximum offset to look for the PDF header.
71+ # @return [void]
72+ # @raise [Mindee::Errors::MindeePDFError]
73+ def fix_pdf! ( maximum_offset : 500 )
74+ @io_stream = LocalInputSource . fix_pdf ( @io_stream , maximum_offset : maximum_offset )
75+ @io_stream . rewind
76+ @file_mimetype = Marcel ::MimeType . for @io_stream
77+ end
78+
79+ # Attempt to fix the PDF data in the given stream.
80+ # @param stream [StringIO] The stream to fix.
81+ # @param maximum_offset [Integer] Maximum offset to look for the PDF header.
82+ # @return [StringIO] The fixed stream.
83+ # @raise [Mindee::Errors::MindeePDFError]
84+ def self . fix_pdf ( stream , maximum_offset : 500 )
85+ out_stream = StringIO . new
86+ stream . gets ( '%PDF-' )
87+ raise Errors ::MindeePDFError if stream . eof? || stream . pos > maximum_offset
88+
89+ stream . pos = stream . pos - 5
90+ out_stream << stream . read
91+ end
92+
8193 # Cuts a PDF file according to provided options.
8294 # @param options [PageOptions, nil] Page cutting/merge options:
8395 #
0 commit comments