@@ -22,6 +22,7 @@ use std::collections::HashMap;
2222use std:: fmt:: Debug ;
2323use std:: marker:: PhantomData ;
2424use std:: sync:: Arc ;
25+ use std:: sync:: OnceLock ;
2526
2627use arrow_data:: ArrayData ;
2728use arrow_data:: ArrayDataBuilder ;
@@ -115,20 +116,24 @@ pub struct BinaryViewColumnGeneric<T: ViewType + ?Sized> {
115116 buffers : Arc < [ Buffer < u8 > ] > ,
116117 phantom : PhantomData < T > ,
117118 /// Total bytes length if we would concat them all
118- total_bytes_len : usize ,
119+ /// Initialized lazily when needed.
120+ total_bytes_len : OnceLock < usize > ,
119121 /// Total bytes in the buffer (exclude remaining capacity)
120- total_buffer_len : usize ,
122+ /// Initialized lazily when needed.
123+ total_buffer_len : OnceLock < usize > ,
121124}
122125
123126impl < T : ViewType + ?Sized > Clone for BinaryViewColumnGeneric < T > {
124127 fn clone ( & self ) -> Self {
128+ let total_bytes_len = self . total_bytes_len . get ( ) . copied ( ) ;
129+ let total_buffer_len = self . total_buffer_len . get ( ) . copied ( ) ;
125130 Self {
126131 views : self . views . clone ( ) ,
127132 buffers : self . buffers . clone ( ) ,
128133
129134 phantom : Default :: default ( ) ,
130- total_bytes_len : self . total_bytes_len ,
131- total_buffer_len : self . total_buffer_len ,
135+ total_bytes_len : Self :: init_cache ( total_bytes_len) ,
136+ total_buffer_len : Self :: init_cache ( total_buffer_len) ,
132137 }
133138 }
134139}
@@ -138,29 +143,41 @@ unsafe impl<T: ViewType + ?Sized> Send for BinaryViewColumnGeneric<T> {}
138143unsafe impl < T : ViewType + ?Sized > Sync for BinaryViewColumnGeneric < T > { }
139144
140145impl < T : ViewType + ?Sized > BinaryViewColumnGeneric < T > {
146+ fn init_cache ( value : Option < usize > ) -> OnceLock < usize > {
147+ let cache = OnceLock :: new ( ) ;
148+ if let Some ( v) = value {
149+ let _ = cache. set ( v) ;
150+ }
151+ cache
152+ }
153+
141154 pub fn new_unchecked (
142155 views : Buffer < View > ,
143156 buffers : Arc < [ Buffer < u8 > ] > ,
144157
145- total_bytes_len : usize ,
146- total_buffer_len : usize ,
158+ total_bytes_len : Option < usize > ,
159+ total_buffer_len : Option < usize > ,
147160 ) -> Self {
148161 #[ cfg( debug_assertions) ]
149162 {
150- let total = views. iter ( ) . map ( |v| v. length as usize ) . sum :: < usize > ( ) ;
151- assert_eq ! ( total, total_bytes_len) ;
163+ if let Some ( total_bytes_len) = total_bytes_len {
164+ let total = views. iter ( ) . map ( |v| v. length as usize ) . sum :: < usize > ( ) ;
165+ assert_eq ! ( total, total_bytes_len) ;
166+ }
152167
153- let total = buffers. iter ( ) . map ( |v| v. len ( ) ) . sum :: < usize > ( ) ;
154- assert_eq ! ( total, total_buffer_len) ;
168+ if let Some ( total_buffer_len) = total_buffer_len {
169+ let total = buffers. iter ( ) . map ( |v| v. len ( ) ) . sum :: < usize > ( ) ;
170+ assert_eq ! ( total, total_buffer_len) ;
171+ }
155172 }
156173
157174 Self {
158175 views,
159176 buffers,
160177
161178 phantom : Default :: default ( ) ,
162- total_bytes_len,
163- total_buffer_len,
179+ total_bytes_len : Self :: init_cache ( total_bytes_len ) ,
180+ total_buffer_len : Self :: init_cache ( total_buffer_len ) ,
164181 }
165182 }
166183
@@ -173,9 +190,8 @@ impl<T: ViewType + ?Sized> BinaryViewColumnGeneric<T> {
173190 total_buffer_len : Option < usize > ,
174191 ) -> Self {
175192 let total_buffer_len =
176- total_buffer_len. unwrap_or ( buffers. iter ( ) . map ( |v| v. len ( ) ) . sum :: < usize > ( ) ) ;
177- let total_bytes_len = views. iter ( ) . map ( |v| v. length as usize ) . sum :: < usize > ( ) ;
178- Self :: new_unchecked ( views, buffers, total_bytes_len, total_buffer_len)
193+ total_buffer_len. unwrap_or_else ( || buffers. iter ( ) . map ( |v| v. len ( ) ) . sum :: < usize > ( ) ) ;
194+ Self :: new_unchecked ( views, buffers, None , Some ( total_buffer_len) )
179195 }
180196
181197 pub fn data_buffers ( & self ) -> & Arc < [ Buffer < u8 > ] > {
@@ -212,7 +228,7 @@ impl<T: ViewType + ?Sized> BinaryViewColumnGeneric<T> {
212228 /// Creates an empty [`BinaryViewColumnGeneric`], i.e. whose `.len` is zero.
213229 #[ inline]
214230 pub fn new_empty ( ) -> Self {
215- Self :: new_unchecked ( Buffer :: new ( ) , Arc :: from ( [ ] ) , 0 , 0 )
231+ Self :: new_unchecked ( Buffer :: new ( ) , Arc :: from ( [ ] ) , Some ( 0 ) , Some ( 0 ) )
216232 }
217233
218234 /// Returns the element at index `i`
@@ -293,11 +309,13 @@ impl<T: ViewType + ?Sized> BinaryViewColumnGeneric<T> {
293309
294310 /// Get the total length of bytes that it would take to concatenate all binary/str values in this array.
295311 pub fn total_bytes_len ( & self ) -> usize {
296- self . total_bytes_len
312+ * self
313+ . total_bytes_len
314+ . get_or_init ( || self . views . iter ( ) . map ( |v| v. length as usize ) . sum :: < usize > ( ) )
297315 }
298316
299317 pub fn memory_size ( & self ) -> usize {
300- self . total_buffer_len + self . len ( ) * 16
318+ self . total_buffer_len ( ) + self . len ( ) * 16
301319 }
302320
303321 fn total_unshared_buffer_len ( & self ) -> usize {
@@ -317,7 +335,9 @@ impl<T: ViewType + ?Sized> BinaryViewColumnGeneric<T> {
317335
318336 /// Get the length of bytes that are stored in the variadic buffers.
319337 pub fn total_buffer_len ( & self ) -> usize {
320- self . total_buffer_len
338+ * self
339+ . total_buffer_len
340+ . get_or_init ( || self . buffers . iter ( ) . map ( |v| v. len ( ) ) . sum :: < usize > ( ) )
321341 }
322342
323343 #[ inline( always) ]
@@ -397,15 +417,15 @@ impl<T: ViewType + ?Sized> BinaryViewColumnGeneric<T> {
397417 unsafe fn slice_unchecked ( & mut self , offset : usize , length : usize ) {
398418 debug_assert ! ( offset + length <= self . len( ) ) ;
399419 self . views . slice_unchecked ( offset, length) ;
400- self . total_bytes_len = self . views . iter ( ) . map ( |v| v . length as usize ) . sum :: < usize > ( ) ;
420+ self . total_bytes_len = OnceLock :: new ( ) ;
401421 }
402422
403423 impl_sliced ! ( ) ;
404424
405425 pub fn maybe_gc ( self ) -> Self {
406426 const GC_MINIMUM_SAVINGS : usize = 16 * 1024 ; // At least 16 KiB.
407427
408- if self . total_buffer_len <= GC_MINIMUM_SAVINGS {
428+ if self . total_buffer_len ( ) <= GC_MINIMUM_SAVINGS {
409429 return self ;
410430 }
411431
@@ -435,6 +455,8 @@ impl<T: ViewType + ?Sized> BinaryViewColumnGeneric<T> {
435455 }
436456
437457 pub fn make_mut ( self ) -> BinaryViewColumnBuilder < T > {
458+ let total_bytes_len = self . total_bytes_len ( ) ;
459+ let total_buffer_len = self . total_buffer_len ( ) ;
438460 let views = self . views . make_mut ( ) ;
439461 let completed_buffers = self . buffers . to_vec ( ) ;
440462
@@ -444,8 +466,8 @@ impl<T: ViewType + ?Sized> BinaryViewColumnGeneric<T> {
444466 in_progress_buffer : vec ! [ ] ,
445467
446468 phantom : Default :: default ( ) ,
447- total_bytes_len : self . total_bytes_len ,
448- total_buffer_len : self . total_buffer_len ,
469+ total_bytes_len,
470+ total_buffer_len,
449471 }
450472 }
451473
@@ -454,26 +476,29 @@ impl<T: ViewType + ?Sized> BinaryViewColumnGeneric<T> {
454476 use Either :: * ;
455477 let is_unique = ( Arc :: strong_count ( & self . buffers ) + Arc :: weak_count ( & self . buffers ) ) == 1 ;
456478
479+ let total_bytes_len = self . total_bytes_len ( ) ;
480+ let total_buffer_len = self . total_buffer_len ( ) ;
481+
457482 match ( self . views . into_mut ( ) , is_unique) {
458483 ( Right ( views) , true ) => Right ( BinaryViewColumnBuilder {
459484 views,
460485 completed_buffers : self . buffers . to_vec ( ) ,
461486 in_progress_buffer : vec ! [ ] ,
462487 phantom : Default :: default ( ) ,
463- total_bytes_len : self . total_bytes_len ,
464- total_buffer_len : self . total_buffer_len ,
488+ total_bytes_len,
489+ total_buffer_len,
465490 } ) ,
466491 ( Right ( views) , false ) => Left ( Self :: new_unchecked (
467492 views. into ( ) ,
468493 self . buffers ,
469- self . total_bytes_len ,
470- self . total_buffer_len ,
494+ Some ( total_bytes_len) ,
495+ Some ( total_buffer_len) ,
471496 ) ) ,
472497 ( Left ( views) , _) => Left ( Self :: new_unchecked (
473498 views,
474499 self . buffers ,
475- self . total_bytes_len ,
476- self . total_buffer_len ,
500+ Some ( total_bytes_len) ,
501+ Some ( total_buffer_len) ,
477502 ) ) ,
478503 }
479504 }
@@ -538,8 +563,8 @@ impl BinaryViewColumn {
538563 Utf8ViewColumn :: new_unchecked (
539564 self . views . clone ( ) ,
540565 self . buffers . clone ( ) ,
541- self . total_bytes_len ,
542- self . total_buffer_len ,
566+ Some ( self . total_bytes_len ( ) ) ,
567+ Some ( self . total_buffer_len ( ) ) ,
543568 )
544569 }
545570}
@@ -549,8 +574,8 @@ impl Utf8ViewColumn {
549574 BinaryViewColumn :: new_unchecked (
550575 self . views . clone ( ) ,
551576 self . buffers . clone ( ) ,
552- self . total_bytes_len ,
553- self . total_buffer_len ,
577+ Some ( self . total_bytes_len ( ) ) ,
578+ Some ( self . total_buffer_len ( ) ) ,
554579 )
555580 }
556581
0 commit comments