Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions arrow-array/src/record_batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,10 +443,10 @@ impl RecordBatch {
/// // Initially, the metadata is empty
/// assert!(batch.schema().metadata().get("key").is_none());
/// // Insert a key-value pair into the metadata
/// batch.schema_metadata_mut().insert("key".into(), "value".into());
/// batch.schema_metadata_mut().insert("key", "value");
/// assert_eq!(batch.schema().metadata().get("key"), Some(&String::from("value")));
/// ```
pub fn schema_metadata_mut(&mut self) -> &mut std::collections::HashMap<String, String> {
pub fn schema_metadata_mut(&mut self) -> &mut arrow_schema::Metadata {
let schema = Arc::make_mut(&mut self.schema);
&mut schema.metadata
}
Expand Down Expand Up @@ -1709,9 +1709,7 @@ mod tests {
batch.clone().with_schema(required_schema).unwrap_err();

// Can add metadata
let metadata = vec![("foo".to_string(), "bar".to_string())]
.into_iter()
.collect();
let metadata = arrow_schema::Metadata::from([("foo", "bar")]);
let metadata_schema = nullable_schema.as_ref().clone().with_metadata(metadata);
let batch = batch.with_schema(Arc::new(metadata_schema)).unwrap();

Expand Down
8 changes: 4 additions & 4 deletions arrow-avro/src/reader/async_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ mod tests {
use arrow_array::cast::AsArray;
use arrow_array::types::{Int32Type, Int64Type};
use arrow_array::*;
use arrow_schema::{DataType, Field, Schema, SchemaRef, TimeUnit};
use arrow_schema::{DataType, Field, Metadata, Schema, SchemaRef, TimeUnit};
use futures::{StreamExt, TryStreamExt};
use object_store::local::LocalFileSystem;
use object_store::path::Path;
Expand Down Expand Up @@ -1630,7 +1630,7 @@ mod tests {
let expected_schema = get_alltypes_schema()
.as_ref()
.clone()
.with_metadata(Default::default());
.with_metadata(Metadata::default());

// Build reader without providing reader schema - should use writer schema from file
let reader = AsyncAvroFileReader::builder(file_reader, file_size, 1024)
Expand All @@ -1657,7 +1657,7 @@ mod tests {
let schema = get_alltypes_schema()
.project(&[0, 1, 7])
.unwrap()
.with_metadata(Default::default());
.with_metadata(Metadata::default());
let reader_schema = AvroSchema::try_from(&schema).unwrap();
let expected_schema = schema.clone();

Expand Down Expand Up @@ -1690,7 +1690,7 @@ mod tests {
let expected_schema = get_nested_records_schema()
.as_ref()
.clone()
.with_metadata(Default::default());
.with_metadata(Metadata::default());

let reader = AsyncAvroFileReader::builder(file_reader, file_size, 1024)
.try_build()
Expand Down
40 changes: 10 additions & 30 deletions arrow-avro/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5482,34 +5482,14 @@ mod test {
#[cfg(not(feature = "avro_custom_types"))]
{
let schema = Arc::new(Schema::new(vec![
Field::new("duration_time_nanos", DataType::Int64, false).with_metadata(
[(
"logicalType".to_string(),
"arrow.duration-nanos".to_string(),
)]
.into(),
),
Field::new("duration_time_micros", DataType::Int64, false).with_metadata(
[(
"logicalType".to_string(),
"arrow.duration-micros".to_string(),
)]
.into(),
),
Field::new("duration_time_millis", DataType::Int64, false).with_metadata(
[(
"logicalType".to_string(),
"arrow.duration-millis".to_string(),
)]
.into(),
),
Field::new("duration_time_seconds", DataType::Int64, false).with_metadata(
[(
"logicalType".to_string(),
"arrow.duration-seconds".to_string(),
)]
.into(),
),
Field::new("duration_time_nanos", DataType::Int64, false)
.with_metadata([("logicalType", "arrow.duration-nanos")]),
Field::new("duration_time_micros", DataType::Int64, false)
.with_metadata([("logicalType", "arrow.duration-micros")]),
Field::new("duration_time_millis", DataType::Int64, false)
.with_metadata([("logicalType", "arrow.duration-millis")]),
Field::new("duration_time_seconds", DataType::Int64, false)
.with_metadata([("logicalType", "arrow.duration-seconds")]),
Comment on lines +5485 to +5492

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a very nice cleanup.

]));

let nanos =
Expand Down Expand Up @@ -8431,7 +8411,7 @@ mod test {
const UUID_EXT_KEY: &str = "ARROW:extension:name";
const UUID_LOGICAL_KEY: &str = "logicalType";

let uuid_md_top: Option<HashMap<String, String>> = batch
let uuid_md_top: Option<arrow_schema::Metadata> = batch
.schema()
.field_with_name("uuid_str")
.ok()
Expand All @@ -8449,7 +8429,7 @@ mod test {
}
});

let uuid_md_union: Option<HashMap<String, String>> = batch
let uuid_md_union: Option<arrow_schema::Metadata> = batch
.schema()
.field_with_name("union_uuid_or_fixed10")
.ok()
Expand Down
13 changes: 5 additions & 8 deletions arrow-avro/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
#[cfg(feature = "canonical_extension_types")]
use arrow_schema::extension::ExtensionType;
use arrow_schema::{
ArrowError, DataType, Field as ArrowField, IntervalUnit, Schema as ArrowSchema, TimeUnit,
UnionMode,
ArrowError, DataType, Field as ArrowField, IntervalUnit, Metadata, Schema as ArrowSchema,
TimeUnit, UnionMode,
};
use serde::{Deserialize, Serialize};
use serde_json::{Map as JsonMap, Value, json};
Expand Down Expand Up @@ -1155,10 +1155,7 @@ fn is_internal_arrow_key(key: &str) -> bool {
/// skipping keys that are Avro-reserved, internal Arrow keys, or
/// nested under the `avro.schema.` namespace. Values that parse as
/// JSON are inserted as JSON; otherwise the raw string is preserved.
fn extend_with_passthrough_metadata(
target: &mut JsonMap<String, Value>,
metadata: &HashMap<String, String>,
) {
fn extend_with_passthrough_metadata(target: &mut JsonMap<String, Value>, metadata: &Metadata) {
for (meta_key, meta_val) in metadata {
if meta_key.starts_with("avro.") || is_internal_arrow_key(meta_key) {
continue;
Expand Down Expand Up @@ -1318,7 +1315,7 @@ fn union_branch_signature(branch: &Value) -> Result<String, ArrowError> {
fn datatype_to_avro(
dt: &DataType,
field_name: &str,
metadata: &HashMap<String, String>,
metadata: &Metadata,
name_gen: &mut NameGenerator,
null_order: Nullability,
strip: bool,
Expand Down Expand Up @@ -1915,7 +1912,7 @@ fn datatype_to_avro(
fn process_datatype(
dt: &DataType,
field_name: &str,
metadata: &HashMap<String, String>,
metadata: &Metadata,
name_gen: &mut NameGenerator,
null_order: Nullability,
is_nullable: bool,
Expand Down
4 changes: 2 additions & 2 deletions arrow-avro/src/writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1883,8 +1883,8 @@ mod tests {
/// Checks that `actual_meta` contains all of `expected_meta`, and any additional
/// keys in `actual_meta` are from a permitted set.
fn assert_metadata_is_superset(
expected_meta: &HashMap<String, String>,
actual_meta: &HashMap<String, String>,
expected_meta: &arrow_schema::Metadata,
actual_meta: &arrow_schema::Metadata,
context: &str,
) {
let allowed_additions: HashSet<&str> =
Expand Down
4 changes: 2 additions & 2 deletions arrow-csv/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,7 @@ impl Decoder {
fn parse(
rows: &StringRecords<'_>,
fields: &Fields,
metadata: Option<std::collections::HashMap<String, String>>,
metadata: Option<Metadata>,
projection: Option<&Vec<usize>>,
line_number: usize,
null_regex: &NullRegex,
Expand Down Expand Up @@ -1326,7 +1326,7 @@ mod tests {
assert_eq!(37, batch.num_rows());
assert_eq!(3, batch.num_columns());

assert_eq!(&metadata, batch.schema().metadata());
assert_eq!(batch.schema().metadata(), &metadata);
}

#[test]
Expand Down
16 changes: 4 additions & 12 deletions arrow-integration-test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1341,18 +1341,10 @@ mod tests {
let nanos_tz = Some("Africa/Johannesburg".into());

let schema = Schema::new(vec![
Field::new("bools-with-metadata-map", DataType::Boolean, true).with_metadata(
[("k".to_string(), "v".to_string())]
.iter()
.cloned()
.collect(),
),
Field::new("bools-with-metadata-vec", DataType::Boolean, true).with_metadata(
[("k2".to_string(), "v2".to_string())]
.iter()
.cloned()
.collect(),
),
Field::new("bools-with-metadata-map", DataType::Boolean, true)
.with_metadata([("k", "v")]),
Field::new("bools-with-metadata-vec", DataType::Boolean, true)
.with_metadata([("k2", "v2")]),
Field::new("bools", DataType::Boolean, true),
Field::new("int8s", DataType::Int8, true),
Field::new("int16s", DataType::Int16, true),
Expand Down
7 changes: 6 additions & 1 deletion arrow-integration-test/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,14 @@ use std::collections::HashMap;

/// Generate a JSON representation of the `Schema`.
pub fn schema_to_json(schema: &Schema) -> serde_json::Value {
let metadata: serde_json::Map<String, serde_json::Value> = schema
.metadata()
.iter()
.map(|(k, v)| (k.clone(), serde_json::Value::String(v.clone())))
.collect();
serde_json::json!({
"fields": schema.fields().iter().map(|f| field_to_json(f.as_ref())).collect::<Vec<_>>(),
"metadata": serde_json::to_value(schema.metadata()).unwrap()
"metadata": metadata
})
}

Expand Down
12 changes: 5 additions & 7 deletions arrow-ipc/src/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,14 +131,12 @@ impl<'a> IpcSchemaEncoder<'a> {
/// Push a key-value metadata into a FlatBufferBuilder and return [WIPOffset]
pub fn metadata_to_fb<'a>(
fbb: &mut FlatBufferBuilder<'a>,
metadata: &HashMap<String, String>,
metadata: &Metadata,
) -> WIPOffset<Vector<'a, ForwardsUOffset<KeyValue<'a>>>> {
let mut ordered_keys = metadata.keys().collect::<Vec<_>>();
ordered_keys.sort();
let custom_metadata = ordered_keys
.into_iter()
.map(|k| {
let v = metadata.get(k).unwrap();
// `Metadata` iterates in deterministic (sorted) key order
let custom_metadata = metadata
.iter()
.map(|(k, v)| {
let fb_key_name = fbb.create_string(k);
let fb_val_name = fbb.create_string(v);

Expand Down
4 changes: 2 additions & 2 deletions arrow-ipc/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1082,7 +1082,7 @@ pub struct FileWriter<W> {
/// Keeps track of dictionaries that have been written
dictionary_tracker: DictionaryTracker,
/// User level customized metadata
custom_metadata: HashMap<String, String>,
custom_metadata: Metadata,

data_gen: IpcDataGenerator,

Expand Down Expand Up @@ -1146,7 +1146,7 @@ impl<W: Write> FileWriter<W> {
record_blocks: vec![],
finished: false,
dictionary_tracker,
custom_metadata: HashMap::new(),
custom_metadata: Default::default(),
data_gen,
compression_context: CompressionContext::default(),
})
Expand Down
15 changes: 8 additions & 7 deletions arrow-schema/src/datatype_display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
// under the License.

use crate::DataType;
use crate::Metadata;
use std::fmt;
use std::fmt::Display;
use std::{collections::HashMap, fmt};

impl Display for DataType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn format_metadata(metadata: &HashMap<String, String>) -> String {
fn format_metadata(metadata: &Metadata) -> String {
format!("{}", FormatMetadata(metadata))
}

Expand Down Expand Up @@ -183,26 +184,26 @@ impl Display for DataType {
}
}

/// Adapter to format a metadata HashMap consistently.
struct FormatMetadata<'a>(&'a HashMap<String, String>);
/// Adapter to format [`Metadata`] consistently.
struct FormatMetadata<'a>(&'a Metadata);

impl fmt::Display for FormatMetadata<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let metadata = self.0;
if metadata.is_empty() {
Ok(())
} else {
let mut entries: Vec<(&String, &String)> = metadata.iter().collect();
entries.sort_by(|a, b| a.0.cmp(b.0));
// `Metadata` iterates in sorted key order
write!(f, ", metadata: ")?;
f.debug_map().entries(entries).finish()
f.debug_map().entries(metadata.iter()).finish()
}
}
}

#[cfg(test)]
mod tests {

use std::collections::HashMap;
use std::sync::Arc;

use crate::Field;
Expand Down
29 changes: 8 additions & 21 deletions arrow-schema/src/extension/canonical/bool8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,8 @@ mod tests {
#[test]
#[should_panic(expected = "Extension type name missing")]
fn missing_name() {
let field = Field::new("", DataType::Int8, false).with_metadata(
[(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]
.into_iter()
.collect(),
);
let field = Field::new("", DataType::Int8, false)
.with_metadata([(EXTENSION_TYPE_METADATA_KEY, "")]);
field.extension_type::<Bool8>();
}

Expand All @@ -119,28 +116,18 @@ mod tests {
#[test]
#[should_panic(expected = "Bool8 extension type expects an empty string as metadata")]
fn missing_metadata() {
let field = Field::new("", DataType::Int8, false).with_metadata(
[(EXTENSION_TYPE_NAME_KEY.to_owned(), Bool8::NAME.to_owned())]
.into_iter()
.collect(),
);
let field = Field::new("", DataType::Int8, false)
.with_metadata([(EXTENSION_TYPE_NAME_KEY, Bool8::NAME)]);
field.extension_type::<Bool8>();
}

#[test]
#[should_panic(expected = "Bool8 extension type expects an empty string as metadata")]
fn invalid_metadata() {
let field = Field::new("", DataType::Int8, false).with_metadata(
[
(EXTENSION_TYPE_NAME_KEY.to_owned(), Bool8::NAME.to_owned()),
(
EXTENSION_TYPE_METADATA_KEY.to_owned(),
"non-empty".to_owned(),
),
]
.into_iter()
.collect(),
);
let field = Field::new("", DataType::Int8, false).with_metadata([
(EXTENSION_TYPE_NAME_KEY, Bool8::NAME),
(EXTENSION_TYPE_METADATA_KEY, "non-empty"),
]);
field.extension_type::<Bool8>();
}
}
Loading
Loading