Skip to content

Commit 9e92efd

Browse files
committed
Fix header
1 parent 95e6854 commit 9e92efd

File tree

3 files changed

+17
-5
lines changed

3 files changed

+17
-5
lines changed

datafusion/vcf/examples/datafusion_integration.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ async fn main() -> datafusion::error::Result<()> {
77
env_logger::init();
88
// let path = "gs://gcp-public-data--gnomad/release/4.1/vcf/exomes/gnomad.exomes.v4.1.sites.chr21.vcf.bgz".to_string();
99
// let path = "gs://gcp-public-data--gnomad/release/4.1/genome_sv/gnomad.v4.1.sv.sites.vcf.gz".to_string();
10-
let path = "gs://genomics-public-data/platinum-genomes/vcf/NA12878_S1.genome.vcf".to_string();
10+
// let path = "gs://genomics-public-data/platinum-genomes/vcf/NA12878_S1.genome.vcf".to_string();
1111
// let path ="/tmp/gnomad.exomes.v4.1.sites.chr21.vcf.bgz".to_string();
12-
// let path ="/tmp/gnomad.v4.1.sv.sites.vcf.gz".to_string();
12+
let path ="/tmp/gnomad.v4.1.sv.sites.vcf.gz".to_string();
1313
// let infos = Some(Vec::from(["AC".to_string(), "AF".to_string(), "AN".to_string(), "FS".to_string(), "AN_raw".to_string(), "variant_type".to_string(), "AS_culprit".to_string(), "only_het".to_string()]));
1414
// let infos = Some(Vec::from(["SVTYPE".to_string()]));
1515
// let infos = Some(Vec::from(["AF".to_string()]));

datafusion/vcf/src/storage.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,19 @@ pub async fn get_remote_vcf_header(file_path: String) -> Result<vcf::Header, Err
225225
Ok(header)
226226
}
227227

228+
pub async fn get_header(file_path: String) -> Result<vcf::Header, Error> {
229+
let storage_type = get_storage_type(file_path.clone());
230+
let header = match storage_type {
231+
StorageType::LOCAL => {
232+
get_local_vcf_header(file_path, 1).await?
233+
}
234+
_ => {
235+
get_remote_vcf_header(file_path).await?
236+
}
237+
};
238+
Ok(header)
239+
}
240+
228241
pub enum VcfRemoteReader {
229242
BGZF( vcf::r#async::io::Reader<AsyncReader<StreamReader<FuturesBytesStream, Bytes>>>),
230243
PLAIN( vcf::r#async::io::Reader<StreamReader<FuturesBytesStream, Bytes>>)

datafusion/vcf/src/table_provider.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,15 @@ use noodles::vcf::Header;
1717
use noodles::vcf::header::Infos;
1818
use noodles::vcf::header::record::value::map::info::{Number, Type};
1919
use crate::physical_exec::VcfExec;
20-
use crate::storage::{get_local_vcf_bgzf_reader, get_local_vcf_header, get_remote_vcf_bgzf_reader, get_remote_vcf_header, get_storage_type, StorageType};
20+
use crate::storage::{get_header, get_local_vcf_bgzf_reader, get_local_vcf_header, get_remote_vcf_bgzf_reader, get_remote_vcf_header, get_storage_type, StorageType};
2121

2222
async fn determine_schema_from_header(
2323
file_path: &str,
2424
info_fields: &Option<Vec<String>>,
2525
format_fields: &Option<Vec<String>>,
2626
) -> datafusion::common::Result<SchemaRef> {
2727

28-
let storage_type = get_storage_type(file_path.to_string());
29-
let header = get_remote_vcf_header(file_path.to_string()).await?;
28+
let header = get_header(file_path.to_string()).await?;
3029
let header_infos = header.infos();
3130

3231
let mut fields = vec![

0 commit comments

Comments
 (0)