Skip to content

Commit 571b696

Browse files
REST: Fix serde of tasks with multiple deletes (apache#14573)
Co-authored-by: Prashant Kumar Singh <prashant.singh@snowflake.com>
1 parent a6c4e6a commit 571b696

3 files changed

Lines changed: 138 additions & 7 deletions

File tree

core/src/main/java/org/apache/iceberg/rest/TableScanResponseParser.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ public static void serializeScanTasks(
9696
gen.writeArrayFieldStart(DELETE_FILES);
9797
for (int i = 0; i < deleteFiles.size(); i++) {
9898
DeleteFile deleteFile = deleteFiles.get(i);
99-
deleteFilePathToIndex.put(String.valueOf(deleteFile.path()), i);
99+
deleteFilePathToIndex.put(deleteFile.location(), i);
100100
ContentFileParser.toJson(deleteFiles.get(i), specsById.get(deleteFile.specId()), gen);
101101
}
102102

@@ -105,11 +105,11 @@ public static void serializeScanTasks(
105105

106106
if (fileScanTasks != null) {
107107
gen.writeArrayFieldStart(FILE_SCAN_TASKS);
108-
Set<Integer> deleteFileReferences = Sets.newHashSet();
109108
for (FileScanTask fileScanTask : fileScanTasks) {
109+
Set<Integer> deleteFileReferences = Sets.newHashSet();
110110
if (deleteFiles != null) {
111111
for (DeleteFile taskDelete : fileScanTask.deletes()) {
112-
deleteFileReferences.add(deleteFilePathToIndex.get(taskDelete.path().toString()));
112+
deleteFileReferences.add(deleteFilePathToIndex.get(taskDelete.location()));
113113
}
114114
}
115115

core/src/test/java/org/apache/iceberg/TestBase.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,15 +111,15 @@ public class TestBase {
111111
.withPartitionPath("data_bucket=0")
112112
.withRecordCount(1)
113113
.build();
114-
static final DataFile FILE_B =
114+
public static final DataFile FILE_B =
115115
DataFiles.builder(SPEC)
116116
.withPath("/path/to/data-b.parquet")
117117
.withFileSizeInBytes(10)
118118
.withPartitionPath("data_bucket=1") // easy way to set partition data for now
119119
.withRecordCount(1)
120120
.withSplitOffsets(ImmutableList.of(1L))
121121
.build();
122-
static final DeleteFile FILE_B_DELETES =
122+
public static final DeleteFile FILE_B_DELETES =
123123
FileMetadata.deleteFileBuilder(SPEC)
124124
.ofPositionDeletes()
125125
.withPath("/path/to/data-b-deletes.parquet")
@@ -138,15 +138,15 @@ public class TestBase {
138138
.withContentOffset(4)
139139
.withContentSizeInBytes(6)
140140
.build();
141-
static final DataFile FILE_C =
141+
public static final DataFile FILE_C =
142142
DataFiles.builder(SPEC)
143143
.withPath("/path/to/data-c.parquet")
144144
.withFileSizeInBytes(10)
145145
.withPartitionPath("data_bucket=2") // easy way to set partition data for now
146146
.withRecordCount(1)
147147
.withSplitOffsets(ImmutableList.of(2L, 8L))
148148
.build();
149-
static final DeleteFile FILE_C2_DELETES =
149+
public static final DeleteFile FILE_C2_DELETES =
150150
FileMetadata.deleteFileBuilder(SPEC)
151151
.ofEqualityDeletes(1)
152152
.withPath("/path/to/data-c-deletes.parquet")

core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020

2121
import static org.apache.iceberg.TestBase.FILE_A;
2222
import static org.apache.iceberg.TestBase.FILE_A_DELETES;
23+
import static org.apache.iceberg.TestBase.FILE_B;
24+
import static org.apache.iceberg.TestBase.FILE_B_DELETES;
25+
import static org.apache.iceberg.TestBase.FILE_C;
26+
import static org.apache.iceberg.TestBase.FILE_C2_DELETES;
2327
import static org.apache.iceberg.TestBase.PARTITION_SPECS_BY_ID;
2428
import static org.apache.iceberg.TestBase.SCHEMA;
2529
import static org.apache.iceberg.TestBase.SPEC;
@@ -265,6 +269,133 @@ public void roundTripSerdeWithValidStatusAndFileScanTasks() {
265269
assertThat(PlanTableScanResponseParser.toJson(copyResponse)).isEqualTo(expectedToJson);
266270
}
267271

272+
@Test
273+
public void multipleTasksWithDifferentDeleteFilesDontAccumulateReferences() {
274+
ResidualEvaluator residualEvaluator =
275+
ResidualEvaluator.of(SPEC, Expressions.alwaysTrue(), true);
276+
277+
// Create three tasks, each with its own distinct delete file
278+
FileScanTask taskA =
279+
new BaseFileScanTask(
280+
FILE_A,
281+
new DeleteFile[] {FILE_A_DELETES},
282+
SchemaParser.toJson(SCHEMA),
283+
PartitionSpecParser.toJson(SPEC),
284+
residualEvaluator);
285+
286+
FileScanTask taskB =
287+
new BaseFileScanTask(
288+
FILE_B,
289+
new DeleteFile[] {FILE_B_DELETES},
290+
SchemaParser.toJson(SCHEMA),
291+
PartitionSpecParser.toJson(SPEC),
292+
residualEvaluator);
293+
294+
FileScanTask taskC =
295+
new BaseFileScanTask(
296+
FILE_C,
297+
new DeleteFile[] {FILE_C2_DELETES},
298+
SchemaParser.toJson(SCHEMA),
299+
PartitionSpecParser.toJson(SPEC),
300+
residualEvaluator);
301+
302+
PlanTableScanResponse response =
303+
PlanTableScanResponse.builder()
304+
.withPlanStatus(PlanStatus.COMPLETED)
305+
.withFileScanTasks(List.of(taskA, taskB, taskC))
306+
.withDeleteFiles(List.of(FILE_A_DELETES, FILE_B_DELETES, FILE_C2_DELETES))
307+
.withSpecsById(PARTITION_SPECS_BY_ID)
308+
.build();
309+
310+
String expectedJson =
311+
"{\n"
312+
+ " \"plan-status\" : \"completed\",\n"
313+
+ " \"delete-files\" : [ {\n"
314+
+ " \"spec-id\" : 0,\n"
315+
+ " \"content\" : \"POSITION_DELETES\",\n"
316+
+ " \"file-path\" : \"/path/to/data-a-deletes.parquet\",\n"
317+
+ " \"file-format\" : \"PARQUET\",\n"
318+
+ " \"partition\" : {\n"
319+
+ " \"1000\" : 0\n"
320+
+ " },\n"
321+
+ " \"file-size-in-bytes\" : 10,\n"
322+
+ " \"record-count\" : 1\n"
323+
+ " }, {\n"
324+
+ " \"spec-id\" : 0,\n"
325+
+ " \"content\" : \"POSITION_DELETES\",\n"
326+
+ " \"file-path\" : \"/path/to/data-b-deletes.parquet\",\n"
327+
+ " \"file-format\" : \"PARQUET\",\n"
328+
+ " \"partition\" : {\n"
329+
+ " \"1000\" : 1\n"
330+
+ " },\n"
331+
+ " \"file-size-in-bytes\" : 10,\n"
332+
+ " \"record-count\" : 1\n"
333+
+ " }, {\n"
334+
+ " \"spec-id\" : 0,\n"
335+
+ " \"content\" : \"EQUALITY_DELETES\",\n"
336+
+ " \"file-path\" : \"/path/to/data-c-deletes.parquet\",\n"
337+
+ " \"file-format\" : \"PARQUET\",\n"
338+
+ " \"partition\" : {\n"
339+
+ " \"1000\" : 2\n"
340+
+ " },\n"
341+
+ " \"file-size-in-bytes\" : 10,\n"
342+
+ " \"record-count\" : 1,\n"
343+
+ " \"equality-ids\" : [ 1 ],\n"
344+
+ " \"sort-order-id\" : 0\n"
345+
+ " } ],\n"
346+
+ " \"file-scan-tasks\" : [ {\n"
347+
+ " \"data-file\" : {\n"
348+
+ " \"spec-id\" : 0,\n"
349+
+ " \"content\" : \"DATA\",\n"
350+
+ " \"file-path\" : \"/path/to/data-a.parquet\",\n"
351+
+ " \"file-format\" : \"PARQUET\",\n"
352+
+ " \"partition\" : {\n"
353+
+ " \"1000\" : 0\n"
354+
+ " },\n"
355+
+ " \"file-size-in-bytes\" : 10,\n"
356+
+ " \"record-count\" : 1,\n"
357+
+ " \"sort-order-id\" : 0\n"
358+
+ " },\n"
359+
+ " \"delete-file-references\" : [ 0 ],\n"
360+
+ " \"residual-filter\" : true\n"
361+
+ " }, {\n"
362+
+ " \"data-file\" : {\n"
363+
+ " \"spec-id\" : 0,\n"
364+
+ " \"content\" : \"DATA\",\n"
365+
+ " \"file-path\" : \"/path/to/data-b.parquet\",\n"
366+
+ " \"file-format\" : \"PARQUET\",\n"
367+
+ " \"partition\" : {\n"
368+
+ " \"1000\" : 1\n"
369+
+ " },\n"
370+
+ " \"file-size-in-bytes\" : 10,\n"
371+
+ " \"record-count\" : 1,\n"
372+
+ " \"split-offsets\" : [ 1 ],\n"
373+
+ " \"sort-order-id\" : 0\n"
374+
+ " },\n"
375+
+ " \"delete-file-references\" : [ 1 ],\n"
376+
+ " \"residual-filter\" : true\n"
377+
+ " }, {\n"
378+
+ " \"data-file\" : {\n"
379+
+ " \"spec-id\" : 0,\n"
380+
+ " \"content\" : \"DATA\",\n"
381+
+ " \"file-path\" : \"/path/to/data-c.parquet\",\n"
382+
+ " \"file-format\" : \"PARQUET\",\n"
383+
+ " \"partition\" : {\n"
384+
+ " \"1000\" : 2\n"
385+
+ " },\n"
386+
+ " \"file-size-in-bytes\" : 10,\n"
387+
+ " \"record-count\" : 1,\n"
388+
+ " \"split-offsets\" : [ 2, 8 ],\n"
389+
+ " \"sort-order-id\" : 0\n"
390+
+ " },\n"
391+
+ " \"delete-file-references\" : [ 2 ],\n"
392+
+ " \"residual-filter\" : true\n"
393+
+ " } ]\n"
394+
+ "}";
395+
String json = PlanTableScanResponseParser.toJson(response, true);
396+
assertThat(json).isEqualTo(expectedJson);
397+
}
398+
268399
@Test
269400
public void roundTripSerdeWithoutDeleteFiles() {
270401
ResidualEvaluator residualEvaluator =

0 commit comments

Comments
 (0)