From 308b85852e7c99dfe61fe2495dec6f29386c46aa Mon Sep 17 00:00:00 2001 From: Elliot Crosby-McCullough Date: Tue, 21 Apr 2026 09:55:38 +0100 Subject: [PATCH] Add a patch to handle badly formed XLS files --- lib/spreadsheet/excel/reader.rb | 9 +++++++++ lib/spreadsheet/excel/worksheet.rb | 2 +- test/data/test_row_record_empty_range.xls | Bin 0 -> 5632 bytes test/integration.rb | 14 ++++++++++++++ 4 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 test/data/test_row_record_empty_range.xls diff --git a/lib/spreadsheet/excel/reader.rb b/lib/spreadsheet/excel/reader.rb index ac034f1..54a6972 100755 --- a/lib/spreadsheet/excel/reader.rb +++ b/lib/spreadsheet/excel/reader.rb @@ -1309,6 +1309,15 @@ def set_row_address worksheet, work, pos, len xf = (flags & 0x0fff0000) >> 16 attrs.store :default_format, @workbook.format(xf) end + # When a ROW record claims no cells (`first_used` == `first_unused`) but + # `set_missing_row_address` already recorded a valid offset from cell + # records found earlier in the stream, preserve the original offset. + # Some XLS writers emit ROW records after cell data with zeroed column + # ranges; without this fix, `read_row` would seek to the wrong position. + if first_used == first_unused && (existing = worksheet.row_addresses[index]) + attrs[:offset] = existing[:offset] + attrs[:row_block] = existing[:row_block] + end # TODO: Row spacing worksheet.set_row_address index, attrs end diff --git a/lib/spreadsheet/excel/worksheet.rb b/lib/spreadsheet/excel/worksheet.rb index 0797edc..5854c9d 100644 --- a/lib/spreadsheet/excel/worksheet.rb +++ b/lib/spreadsheet/excel/worksheet.rb @@ -12,7 +12,7 @@ class Worksheet < Spreadsheet::Worksheet include Spreadsheet::Excel::Offset offset :dimensions - attr_reader :offset, :ole, :links, :guts, :notes + attr_reader :offset, :ole, :links, :guts, :notes, :row_addresses def initialize opts = {} @row_addresses = nil super diff --git a/test/data/test_row_record_empty_range.xls b/test/data/test_row_record_empty_range.xls new file mode 100644 index 0000000000000000000000000000000000000000..c5b2b998cb19ba2eb9d2c0306088eceac72e2d9f GIT binary patch literal 5632 zcmeHLO-NKx6#m}m_*4FksgQvTLRMxtGyX4h)GSwtKMNy*8l6`S+Kfyu!WLyFlrG9f!4BBP|!k~&UfC7sgO(rQt{5rJNNwD``vrbz4yF3^LpO1 zIP;)<2^**l8Tg*aMb;X-A#!d?FM{|J396|wM`STM^bbWKH%AsS%Z;b`ud=z8T<;~g za6j{faNvVH$1#SUSk&}xA$95)LI%gsD%`*wu@qpZj;dJsu<{+s?`h3v%G=d)O?fMd zv1lB_e6XjwS+F2TM_rZbD-dqrk+xY-iyx(UDRmMP*SXon>qbXB5*qzG9Tk~*n1!rc zw;46#q0vQ@$O6vdYr+eB;RHy}y^eir<6yuGd}E&^JO0_SE*tGPU75fn5+`+oPs{e6 z5Cae4z!}rD{L5H?98Z13(GxmrN^vk2Hn#&^=DFc`WXy`hqLP&R%~NLFj1IFxSI9CY z7x$eHMXiW6F5+M$eA=|U9c~d1_gpRBo0?mE zwzLiK`D^7H@C6!vVs$350>8rQ6s(r!rpAW)x`5wjOM>NZsP#1}mME~(Ptm&clds1z zPk0!GDtq*B79j`AcR#;5GSnM9p_WOpka$*iGnPo;GFwRF0y^X+Mq*N{))xDfUtt~W zQn_4ti5zrEHC4T(N#XF6*14+QD(R1}Y&8hNkC)1UetS4@8l?h|>gd+B1y+`Hfm zD&<8PmOl8ILXA;l+`dJ65=@nbBqk=e_sZ?Gwl<-yfbju5%5BXMjdQg%muN(=wG7cX zTU*N%jq$KGw`j6lsP%bnM!eoqWtHYfL zI1z9n;6%WQfD-{H0!{?dhybsHylL_3$O|K{iM%fJO343pR^(yn(Ek~Me#GQs!HYwP zN*TwvtbF=Z2Dg!l4EItxl={DrbqA&QI7Tp};}}V&`MNstBWksOj$@s3GbQy~VzWBw T`F^|qwewG}i%o0&QU5;xv7!DI literal 0 HcmV?d00001 diff --git a/test/integration.rb b/test/integration.rb index a82d49c..a9ecc08 100644 --- a/test/integration.rb +++ b/test/integration.rb @@ -1217,6 +1217,20 @@ def test_missing_row_op assert_not_nil sheet[2, 1] end + def test_row_record_with_empty_cell_range + # Some XLS writers emit ROW records with `first_used` == `first_unused` (claiming + # no cells) even though cell records (e.g. `LABELSST`) exist for that row + # earlier in the stream. Previously this caused `read_row` to seek to the wrong + # offset, returning an empty row despite valid cell data being present. + path = File.join @data, "test_row_record_empty_range.xls" + book = Spreadsheet.open path + sheet = book.worksheet 0 + row0 = sheet.row(0).to_a.compact + assert_operator row0.length, :>, 0, "Row 0 should not be empty" + assert_equal "Name", row0[0] + assert_equal ["Name", "Code", "Description", "Reference", "Date", "Quantity"], row0 + end + def test_changes path = File.join @data, "test_changes.xls" book = Spreadsheet.open path