@@ -93,7 +93,11 @@ def initialize(self):
9393 self .write_to = None
9494 self .img = None
9595
96+ # Collecting char buffer fragments
97+ self ._char_blocks = None
98+
9699 def StartElementHandler (self , name , attrs ):
100+ self .flush_chardata ()
97101 if DEBUG_PRINT :
98102 print 'Start element:\n \t ' , repr (name ), attrs
99103 if name == 'GIFTI' :
@@ -195,6 +199,7 @@ def StartElementHandler(self, name, attrs):
195199 self .write_to = 'Data'
196200
197201 def EndElementHandler (self , name ):
202+ self .flush_chardata ()
198203 if DEBUG_PRINT :
199204 print 'End element:\n \t ' , repr (name )
200205 if name == 'GIFTI' :
@@ -249,6 +254,30 @@ def EndElementHandler(self, name):
249254 self .write_to = None
250255
251256 def CharacterDataHandler (self , data ):
257+ """ Collect character data chunks pending collation
258+
259+ The parser breaks the data up into chunks of size depending on the
260+ buffer_size of the parser. A large bit of character data, with standard
261+ parser buffer_size (such as 8K) can easily span many calls to this
262+ function. We thus collect the chunks and process them when we hit start
263+ or end tags.
264+ """
265+ if self ._char_blocks is None :
266+ self ._char_blocks = []
267+ self ._char_blocks .append (data )
268+
269+ def flush_chardata (self ):
270+ """ Collate and process collected character data
271+ """
272+ if self ._char_blocks is None :
273+ return
274+ # Just join the strings to get the data. Maybe there are some memory
275+ # optimizations we could do by passing the list of strings to the
276+ # read_data_block function.
277+ data = '' .join (self ._char_blocks )
278+ # Reset the char collector
279+ self ._char_blocks = None
280+ # Process data
252281 if self .write_to == 'Name' :
253282 data = data .strip ()
254283 self .nvpair .name = data
@@ -277,25 +306,40 @@ def CharacterDataHandler(self, data):
277306 elif self .write_to == 'Label' :
278307 self .label .label = data .strip ()
279308
309+ @property
310+ def pending_data (self ):
311+ " True if there is character data pending for processing "
312+ return not self ._char_blocks is None
313+
280314
281- def parse_gifti_file (fname , buffer_size = 35000000 ):
315+ def parse_gifti_file (fname , buffer_size = None ):
282316 """ Parse gifti file named `fname`, return image
283317
284318 Parameters
285319 ----------
286320 fname : str
287321 filename of gifti file
288- buffer_size: int, optional
289- size of read buffer.
322+ buffer_size: None or int, optional
323+ size of read buffer. None gives default of 35000000 unless on python <
324+ 2.6, in which case it is read only in the parser. In that case values
325+ other than None cause a ValueError on execution
290326
291327 Returns
292328 -------
293329 img : gifti image
294330 """
331+ if buffer_size is None :
332+ buffer_sz_val = 35000000
333+ else :
334+ buffer_sz_val = buffer_size
295335 datasource = open (fname ,'rb' )
296336 parser = ParserCreate ()
297337 parser .buffer_text = True
298- parser .buffer_size = buffer_size
338+ try :
339+ parser .buffer_size = buffer_sz_val
340+ except AttributeError :
341+ if not buffer_size is None :
342+ raise ValueError ('Cannot set buffer size for parser' )
299343 HANDLER_NAMES = ['StartElementHandler' ,
300344 'EndElementHandler' ,
301345 'CharacterDataHandler' ]
@@ -306,6 +350,8 @@ def parse_gifti_file(fname, buffer_size = 35000000):
306350 parser .ParseFile (datasource )
307351 except ExpatError :
308352 print 'An expat error occured while parsing the Gifti file.'
353+ # Reality check for pending data
354+ assert out .pending_data is False
309355 # update filename
310356 out .img .filename = fname
311357 return out .img
0 commit comments