2020
2121use crate :: file_groups:: FileGroup ;
2222use crate :: {
23- display:: FileGroupsDisplay , file:: FileSource ,
23+ PartitionedFile , display:: FileGroupsDisplay , file:: FileSource ,
2424 file_compression_type:: FileCompressionType , file_stream:: FileStream ,
25- source:: DataSource , statistics:: MinMaxStatistics , PartitionedFile ,
25+ source:: DataSource , statistics:: MinMaxStatistics ,
2626} ;
2727use arrow:: datatypes:: FieldRef ;
2828use arrow:: datatypes:: { DataType , Schema , SchemaRef } ;
2929use datafusion_common:: config:: ConfigOptions ;
3030use datafusion_common:: {
31- internal_datafusion_err , internal_err , Constraints , Result , ScalarValue , Statistics ,
31+ Constraints , Result , ScalarValue , Statistics , internal_datafusion_err , internal_err ,
3232} ;
3333use datafusion_execution:: {
34- object_store :: ObjectStoreUrl , SendableRecordBatchStream , TaskContext ,
34+ SendableRecordBatchStream , TaskContext , object_store :: ObjectStoreUrl ,
3535} ;
3636use datafusion_expr:: Operator ;
3737use datafusion_physical_expr:: expressions:: BinaryExpr ;
3838use datafusion_physical_expr:: projection:: ProjectionExprs ;
3939use datafusion_physical_expr:: utils:: reassign_expr_columns;
40- use datafusion_physical_expr:: { split_conjunction , EquivalenceProperties , Partitioning } ;
40+ use datafusion_physical_expr:: { EquivalenceProperties , Partitioning , split_conjunction } ;
4141use datafusion_physical_expr_adapter:: PhysicalExprAdapterFactory ;
4242use datafusion_physical_expr_common:: physical_expr:: PhysicalExpr ;
4343use datafusion_physical_expr_common:: sort_expr:: LexOrdering ;
4444use datafusion_physical_plan:: {
45- display:: { display_orderings, ProjectSchemaDisplay } ,
45+ DisplayAs , DisplayFormatType ,
46+ display:: { ProjectSchemaDisplay , display_orderings} ,
4647 filter_pushdown:: FilterPushdownPropagation ,
4748 metrics:: ExecutionPlanMetricsSet ,
48- DisplayAs , DisplayFormatType ,
4949} ;
5050use std:: { any:: Any , fmt:: Debug , fmt:: Formatter , fmt:: Result as FmtResult , sync:: Arc } ;
5151
@@ -303,7 +303,9 @@ impl FileScanConfigBuilder {
303303 match self . clone ( ) . with_projection_indices ( indices) {
304304 Ok ( builder) => builder,
305305 Err ( e) => {
306- warn ! ( "Failed to push down projection in FileScanConfigBuilder::with_projection: {e}" ) ;
306+ warn ! (
307+ "Failed to push down projection in FileScanConfigBuilder::with_projection: {e}"
308+ ) ;
307309 self
308310 }
309311 }
@@ -643,16 +645,16 @@ impl DataSource for FileScanConfig {
643645 fn partition_statistics ( & self , partition : Option < usize > ) -> Result < Statistics > {
644646 if let Some ( partition) = partition {
645647 // Get statistics for a specific partition
646- if let Some ( file_group) = self . file_groups . get ( partition) {
647- if let Some ( stat) = file_group. file_statistics ( None ) {
648- // Project the statistics based on the projection
649- let output_schema = self . projected_schema ( ) ? ;
650- return if let Some ( projection ) = self . file_source . projection ( ) {
651- projection . project_statistics ( stat . clone ( ) , & output_schema )
652- } else {
653- Ok ( stat . clone ( ) )
654- } ;
655- }
648+ if let Some ( file_group) = self . file_groups . get ( partition)
649+ && let Some ( stat) = file_group. file_statistics ( None )
650+ {
651+ // Project the statistics based on the projection
652+ let output_schema = self . projected_schema ( ) ? ;
653+ return if let Some ( projection ) = self . file_source . projection ( ) {
654+ projection . project_statistics ( stat . clone ( ) , & output_schema )
655+ } else {
656+ Ok ( stat . clone ( ) )
657+ } ;
656658 }
657659 // If no statistics available for this partition, return unknown
658660 Ok ( Statistics :: new_unknown ( self . projected_schema ( ) ?. as_ref ( ) ) )
@@ -1217,16 +1219,16 @@ mod tests {
12171219 use std:: collections:: HashMap ;
12181220
12191221 use super :: * ;
1220- use crate :: test_util:: col;
12211222 use crate :: TableSchema ;
1223+ use crate :: test_util:: col;
12221224 use crate :: {
12231225 generate_test_files, test_util:: MockSource , tests:: aggr_test_schema,
12241226 verify_sort_integrity,
12251227 } ;
12261228
12271229 use arrow:: datatypes:: Field ;
12281230 use datafusion_common:: stats:: Precision ;
1229- use datafusion_common:: { internal_err , ColumnStatistics } ;
1231+ use datafusion_common:: { ColumnStatistics , internal_err } ;
12301232 use datafusion_expr:: { Operator , SortExpr } ;
12311233 use datafusion_physical_expr:: create_physical_sort_expr;
12321234 use datafusion_physical_expr:: expressions:: { BinaryExpr , Column , Literal } ;
@@ -1267,7 +1269,7 @@ mod tests {
12671269 use chrono:: TimeZone ;
12681270 use datafusion_common:: DFSchema ;
12691271 use datafusion_expr:: execution_props:: ExecutionProps ;
1270- use object_store:: { path:: Path , ObjectMeta } ;
1272+ use object_store:: { ObjectMeta , path:: Path } ;
12711273
12721274 struct File {
12731275 name : & ' static str ,
@@ -1368,12 +1370,16 @@ mod tests {
13681370 true ,
13691371 ) ] ) ,
13701372 files: vec![
1371- File :: new_nullable( "0" , "2023-01-01" , vec![ Some ( ( Some ( 0.00 ) , Some ( 0.49 ) ) ) ] ) ,
1373+ File :: new_nullable(
1374+ "0" ,
1375+ "2023-01-01" ,
1376+ vec![ Some ( ( Some ( 0.00 ) , Some ( 0.49 ) ) ) ] ,
1377+ ) ,
13721378 File :: new_nullable( "1" , "2023-01-01" , vec![ Some ( ( Some ( 0.50 ) , None ) ) ] ) ,
13731379 File :: new_nullable( "2" , "2023-01-02" , vec![ Some ( ( Some ( 0.00 ) , None ) ) ] ) ,
13741380 ] ,
13751381 sort: vec![ col( "value" ) . sort( true , false ) ] ,
1376- expected_result: Ok ( vec![ vec![ "0" , "1" ] , vec![ "2" ] ] )
1382+ expected_result: Ok ( vec![ vec![ "0" , "1" ] , vec![ "2" ] ] ) ,
13771383 } ,
13781384 TestCase {
13791385 name: "nullable sort columns, nulls first" ,
@@ -1384,11 +1390,15 @@ mod tests {
13841390 ) ] ) ,
13851391 files: vec![
13861392 File :: new_nullable( "0" , "2023-01-01" , vec![ Some ( ( None , Some ( 0.49 ) ) ) ] ) ,
1387- File :: new_nullable( "1" , "2023-01-01" , vec![ Some ( ( Some ( 0.50 ) , Some ( 1.00 ) ) ) ] ) ,
1393+ File :: new_nullable(
1394+ "1" ,
1395+ "2023-01-01" ,
1396+ vec![ Some ( ( Some ( 0.50 ) , Some ( 1.00 ) ) ) ] ,
1397+ ) ,
13881398 File :: new_nullable( "2" , "2023-01-02" , vec![ Some ( ( None , Some ( 1.00 ) ) ) ] ) ,
13891399 ] ,
13901400 sort: vec![ col( "value" ) . sort( true , true ) ] ,
1391- expected_result: Ok ( vec![ vec![ "0" , "1" ] , vec![ "2" ] ] )
1401+ expected_result: Ok ( vec![ vec![ "0" , "1" ] , vec![ "2" ] ] ) ,
13921402 } ,
13931403 TestCase {
13941404 name: "all three non-overlapping" ,
@@ -1444,7 +1454,9 @@ mod tests {
14441454 File :: new( "2" , "2023-01-02" , vec![ None ] ) ,
14451455 ] ,
14461456 sort: vec![ col( "value" ) . sort( true , false ) ] ,
1447- expected_result: Err ( "construct min/max statistics for split_groups_by_statistics\n caused by\n collect min/max values\n caused by\n get min/max for column: 'value'\n caused by\n Error during planning: statistics not found" ) ,
1457+ expected_result: Err (
1458+ "construct min/max statistics for split_groups_by_statistics\n caused by\n collect min/max values\n caused by\n get min/max for column: 'value'\n caused by\n Error during planning: statistics not found" ,
1459+ ) ,
14481460 } ,
14491461 ] ;
14501462
@@ -1621,10 +1633,12 @@ mod tests {
16211633 "test.parquet" . to_string( ) ,
16221634 1024 ,
16231635 ) ] ) ] )
1624- . with_output_ordering ( vec ! [ [ PhysicalSortExpr :: new_default( Arc :: new(
1625- Column :: new( "date" , 0 ) ,
1626- ) ) ]
1627- . into( ) ] )
1636+ . with_output_ordering ( vec ! [
1637+ [ PhysicalSortExpr :: new_default( Arc :: new( Column :: new(
1638+ "date" , 0 ,
1639+ ) ) ) ]
1640+ . into( ) ,
1641+ ] )
16281642 . with_file_compression_type ( FileCompressionType :: UNCOMPRESSED )
16291643 . with_newlines_in_values ( true )
16301644 . build ( ) ;
0 commit comments