diff --git a/src/adapter/etl-adapter-google-sheet/src/Flow/ETL/Adapter/GoogleSheet/GoogleSheetExtractor.php b/src/adapter/etl-adapter-google-sheet/src/Flow/ETL/Adapter/GoogleSheet/GoogleSheetExtractor.php index 746d836b9..24ed29eb0 100644 --- a/src/adapter/etl-adapter-google-sheet/src/Flow/ETL/Adapter/GoogleSheet/GoogleSheetExtractor.php +++ b/src/adapter/etl-adapter-google-sheet/src/Flow/ETL/Adapter/GoogleSheet/GoogleSheetExtractor.php @@ -76,38 +76,31 @@ public function extract(FlowContext $context) : \Generator $shouldPutInputIntoRows = $context->config->shouldPutInputIntoRows(); while ([] !== $values) { - $rows = \array_map( - function (array $rowData) use ($headers, $headersCount, $shouldPutInputIntoRows) { - $rowDataCount = \count($rowData); + foreach ($values as $rowData) { + $rowDataCount = \count($rowData); - // Expand columns to the size of the previous row - for ($i = $rowDataCount; $i < $headersCount; $i++) { - $rowData[$i] = null; - } - - if ($rowDataCount > $headersCount) { - if (!$this->dropExtraColumns) { - throw InvalidArgumentException::because('Row has more columns (%d) than headers (%d)', $rowDataCount, $headersCount); - } + // Expand columns to the size of the previous row + for ($i = $rowDataCount; $i < $headersCount; $i++) { + $rowData[$i] = null; + } - $rowData = \array_slice($rowData, 0, $headersCount); + if ($rowDataCount > $headersCount) { + if (!$this->dropExtraColumns) { + throw InvalidArgumentException::because('Row has more columns (%d) than headers (%d)', $rowDataCount, $headersCount); } - $row = \array_combine($headers, $rowData); + $rowData = \array_slice($rowData, 0, $headersCount); + } - if ($shouldPutInputIntoRows) { - $row['_spread_sheet_id'] = $this->spreadsheetId; - $row['_sheet_name'] = $this->columnRange->sheetName; - } + $row = \array_combine($headers, $rowData); - return $row; - }, - $values - ); + if ($shouldPutInputIntoRows) { + $row['_spread_sheet_id'] = $this->spreadsheetId; + $row['_sheet_name'] = $this->columnRange->sheetName; + } - $totalRows += \count($rows); + $totalRows++; - foreach ($rows as $row) { $signal = yield array_to_rows($row, $context->entryFactory(), schema: $this->schema); $this->incrementReturnedRows(); diff --git a/src/adapter/etl-adapter-google-sheet/tests/Flow/ETL/Adapter/GoogleSheet/Tests/Fixtures/missing-columns.json b/src/adapter/etl-adapter-google-sheet/tests/Flow/ETL/Adapter/GoogleSheet/Tests/Fixtures/missing-columns.json new file mode 100644 index 000000000..26900ad28 --- /dev/null +++ b/src/adapter/etl-adapter-google-sheet/tests/Flow/ETL/Adapter/GoogleSheet/Tests/Fixtures/missing-columns.json @@ -0,0 +1,11 @@ +{ + "range": "Sheet!A1:C5", + "majorDimension": "ROWS", + "values": [ + ["Header 1", "Header 2", "Header 3"], + ["A2", "B2", "C2"], + ["A3", "B3"], + ["A4", "B4"], + ["A5", "B5", "C5"] + ] +} diff --git a/src/adapter/etl-adapter-google-sheet/tests/Flow/ETL/Adapter/GoogleSheet/Tests/Integration/GoogleSheetExtractorTest.php b/src/adapter/etl-adapter-google-sheet/tests/Flow/ETL/Adapter/GoogleSheet/Tests/Integration/GoogleSheetExtractorTest.php index f97d7f73b..105f3542f 100644 --- a/src/adapter/etl-adapter-google-sheet/tests/Flow/ETL/Adapter/GoogleSheet/Tests/Integration/GoogleSheetExtractorTest.php +++ b/src/adapter/etl-adapter-google-sheet/tests/Flow/ETL/Adapter/GoogleSheet/Tests/Integration/GoogleSheetExtractorTest.php @@ -19,6 +19,24 @@ protected function setUp() : void $this->context = new GoogleSheetsContext(); } + public function test_extract_expand_missing_columns() : void + { + $rows = df() + ->extract( + from_google_sheet( + $this->context->sheets(__DIR__ . '/../Fixtures/missing-columns.json'), + '1234567890', + 'Sheet', + ) + ) + ->fetch() + ->toArray(); + + foreach ($rows as $row) { + self::assertCount(3, $row); + } + } + public function test_extract_puts_null_in_not_matching_schema_rows() : void { $rows = df() @@ -58,6 +76,8 @@ public function test_extract_skip_extra_empty_rows() : void ->fetch() ->toArray(); + self::assertCount(10, $rows); + foreach ($rows as $row) { self::assertNotSame([], $row); } @@ -81,6 +101,23 @@ public function test_extract_with_cut_extra_columns() : void } } + public function test_extract_with_limit() : void + { + $extractor = from_google_sheet( + $this->context->sheets(__DIR__ . '/../Fixtures/extra-columns.json'), + '1234567890', + 'Sheet', + ); + $extractor->changeLimit(2); + + $rows = df() + ->extract($extractor) + ->fetch() + ->toArray(); + + self::assertCount(2, $rows); + } + public function test_extract_without_cut_extra_columns() : void { $this->expectException(InvalidArgumentException::class);