Skip to content

Commit e43f47b

Browse files
committed
fix absent statistics
1 parent 495a566 commit e43f47b

File tree

1 file changed

+38
-41
lines changed

1 file changed

+38
-41
lines changed

datafusion_iceberg/src/statistics.rs

Lines changed: 38 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -18,49 +18,46 @@ pub(crate) fn statistics_from_datafiles(
1818
datafiles
1919
.iter()
2020
.filter(|(_, manifest)| !matches!(manifest.status(), Status::Deleted))
21-
.fold(
21+
.map(|(_, manifest)| {
22+
let column_stats = column_statistics(schema, manifest);
2223
Statistics {
23-
num_rows: Precision::Exact(0),
24-
total_byte_size: Precision::Exact(0),
25-
column_statistics: vec![
26-
ColumnStatistics {
27-
null_count: Precision::Absent,
28-
max_value: Precision::Absent,
29-
min_value: Precision::Absent,
30-
distinct_count: Precision::Absent,
31-
sum_value: Precision::Absent,
32-
};
33-
schema.fields().len()
34-
],
35-
},
36-
|acc, (_, manifest)| {
37-
let column_stats = column_statistics(schema, manifest);
38-
Statistics {
39-
num_rows: acc.num_rows.add(&Precision::Exact(
40-
*manifest.data_file().record_count() as usize,
41-
)),
42-
total_byte_size: acc.total_byte_size.add(&Precision::Exact(
43-
*manifest.data_file().file_size_in_bytes() as usize,
44-
)),
45-
column_statistics: acc
46-
.column_statistics
47-
.into_iter()
48-
.zip(column_stats)
49-
.map(|(acc, x)| {
50-
let new_distinct_count = new_distinct_count(&acc, &x);
24+
num_rows: Precision::Exact(*manifest.data_file().record_count() as usize),
25+
total_byte_size: Precision::Exact(
26+
*manifest.data_file().file_size_in_bytes() as usize
27+
),
28+
column_statistics: column_stats
29+
.into_iter()
30+
.map(|x| ColumnStatistics {
31+
null_count: x.null_count,
32+
max_value: x.max_value,
33+
min_value: x.min_value,
34+
distinct_count: x.distinct_count,
35+
sum_value: x.sum_value,
36+
})
37+
.collect(),
38+
}
39+
})
40+
.reduce(|acc, x| Statistics {
41+
num_rows: acc.num_rows.add(&x.num_rows),
42+
total_byte_size: acc.total_byte_size.add(&x.total_byte_size),
43+
column_statistics: acc
44+
.column_statistics
45+
.into_iter()
46+
.zip(x.column_statistics)
47+
.map(|(acc, x)| {
48+
let new_distinct_count = new_distinct_count(&acc, &x);
5149

52-
ColumnStatistics {
53-
null_count: acc.null_count.add(&x.null_count),
54-
max_value: acc.max_value.max(&x.max_value),
55-
min_value: acc.min_value.min(&x.min_value),
56-
distinct_count: new_distinct_count,
57-
sum_value: acc.sum_value.add(&x.sum_value),
58-
}
59-
})
60-
.collect(),
61-
}
62-
},
63-
)
50+
ColumnStatistics {
51+
null_count: acc.null_count.add(&x.null_count),
52+
max_value: acc.max_value.max(&x.max_value),
53+
min_value: acc.min_value.min(&x.min_value),
54+
distinct_count: new_distinct_count,
55+
sum_value: acc.sum_value.add(&x.sum_value),
56+
}
57+
})
58+
.collect(),
59+
})
60+
.unwrap_or_default()
6461
}
6562

6663
fn column_statistics<'a>(

0 commit comments

Comments
 (0)