1717
1818__version__ = "1.0.0"
1919
20+ def _generate_hash_key (row ):
21+ """
22+ Generate a hash key for a row.
23+
24+ Args:
25+ row: Row data dictionary
26+
27+ Returns:
28+ Hash key as a string
29+ """
30+ return xxhash .xxh64 (json .dumps (row , sort_keys = True ).encode ("utf8" )).hexdigest ()
31+
2032def load_csv (fp , key = None , dialect = None ):
2133 """
2234 Load a CSV file into a dictionary keyed by the given column or hash.
@@ -45,7 +57,7 @@ def load_csv(fp, key=None, dialect=None):
4557 if key :
4658 keyfn = lambda r : r [key ]
4759 else :
48- keyfn = lambda r : xxhash . xxh64 ( json . dumps ( r , sort_keys = True ). encode ( "utf8" )). hexdigest ()
60+ keyfn = _generate_hash_key
4961 return {keyfn (r ): r for r in rows }
5062
5163def load_json (fp , key = None ):
@@ -79,7 +91,7 @@ def load_json(fp, key=None):
7991 if key :
8092 keyfn = lambda r : r .get (key , str (id (r )))
8193 else :
82- keyfn = lambda r : xxhash . xxh64 ( json . dumps ( r , sort_keys = True ). encode ( "utf8" )). hexdigest ()
94+ keyfn = _generate_hash_key
8395
8496 return {keyfn (r ): _simplify_json_row (r , common_keys ) for r in raw_list }
8597
@@ -108,6 +120,31 @@ def _simplify_json_row(r, common_keys):
108120
109121 return result
110122
123+ def _determine_columns_to_compare (prev_columns , curr_columns , fields = None , ignorefields = None ):
124+ """
125+ Determine which columns to compare based on fields and ignorefields.
126+
127+ Args:
128+ prev_columns: Set of column names from previous data
129+ curr_columns: Set of column names from current data
130+ fields: List of specific fields to compare (optional)
131+ ignorefields: List of fields to ignore in comparison (optional)
132+
133+ Returns:
134+ Tuple of (compare_columns, ignore_columns)
135+ """
136+ if fields :
137+ compare_columns = set (fields )
138+ elif ignorefields :
139+ compare_columns = prev_columns | curr_columns
140+ compare_columns -= set (ignorefields )
141+ else :
142+ compare_columns = prev_columns | curr_columns
143+
144+ ignore_columns = (prev_columns | curr_columns ) - compare_columns
145+
146+ return compare_columns , ignore_columns
147+
111148def compare (previous , current , show_unchanged = False , fields = None , ignorefields = None ):
112149 """
113150 Compare two dictionaries of rows and return a diff summary.
@@ -143,18 +180,12 @@ def compare(previous, current, show_unchanged=False, fields=None, ignorefields=N
143180 current_columns = set (next (iter (current .values ())).keys ())
144181
145182 # Apply fields/ignorefields filtering
146- if fields :
147- compare_columns = set (fields )
148- elif ignorefields :
149- compare_columns = previous_columns | current_columns
150- compare_columns -= set (ignorefields )
151- else :
152- compare_columns = previous_columns | current_columns
183+ compare_columns , ignore_columns = _determine_columns_to_compare (previous_columns , current_columns , fields , ignorefields )
153184
154185 # Adjust columns_added/removed based on compare_columns
155186 result ["columns_added" ] = [c for c in current_columns if c not in previous_columns and c in compare_columns ]
156187 result ["columns_removed" ] = [c for c in previous_columns if c not in current_columns and c in compare_columns ]
157- ignore_columns = (previous_columns | current_columns ) - compare_columns
188+ # ignore_columns = (previous_columns | current_columns) - compare_columns
158189
159190 # Have any rows been removed or added?
160191 removed = [id for id in previous if id not in current ]
@@ -249,12 +280,7 @@ def streaming_compare_csv(prev_path, curr_path, key, compare_columns=None, ignor
249280 curr_columns = set (reader2 .fieldnames )
250281
251282 # Determine columns to compare
252- if compare_columns :
253- compare_columns = set (compare_columns )
254- elif ignorefields :
255- compare_columns = (prev_columns | curr_columns ) - set (ignorefields )
256- else :
257- compare_columns = prev_columns | curr_columns
283+ compare_columns , _ = _determine_columns_to_compare (prev_columns , curr_columns , compare_columns , ignorefields )
258284
259285 result ["columns_added" ] = [c for c in curr_columns if c not in prev_columns and c in compare_columns ]
260286 result ["columns_removed" ] = [c for c in prev_columns if c not in curr_columns and c in compare_columns ]
@@ -298,15 +324,13 @@ def streaming_compare_csv(prev_path, curr_path, key, compare_columns=None, ignor
298324
299325 return result
300326
301- def human_text (result , key = None , current = None , extras = None ):
327+ def human_text (result , key = None ):
302328 """
303329 Render the diff result as a human-readable string.
304330
305331 Args:
306332 result: Diff result dictionary
307333 key: Name of the key column (optional)
308- current: Current data dictionary (optional)
309- extras: Extra fields to display (optional)
310334
311335 Returns:
312336 Formatted string with diff information
@@ -345,10 +369,6 @@ def human_text(result, key=None, current=None, extras=None):
345369 block .append (f" { key } : { details ['key' ]} " )
346370 for field , (prev_value , current_value ) in details ["changes" ].items ():
347371 block .append (f' { field } : "{ prev_value } " => "{ current_value } "' )
348- if extras and current :
349- current_item = current .get (details ["key" ])
350- if current_item :
351- block .append (human_extras (current_item , extras ))
352372 block .append ("" )
353373 change_blocks .append ("\n " .join (block ))
354374 if details .get ("unchanged" ):
@@ -369,8 +389,6 @@ def human_text(result, key=None, current=None, extras=None):
369389 rows = []
370390 for row in result ["added" ]:
371391 to_append = human_row (row , prefix = " " )
372- if extras :
373- to_append += "\n " + human_extras (row , extras )
374392 rows .append (to_append )
375393 summary .append ("\n \n " .join (rows ))
376394 summary .append ("" )
@@ -384,8 +402,6 @@ def human_text(result, key=None, current=None, extras=None):
384402 rows = []
385403 for row in result ["removed" ]:
386404 to_append = human_row (row , prefix = " " )
387- if extras :
388- to_append += "\n " + human_extras (row , extras )
389405 rows .append (to_append )
390406 summary .append ("\n \n " .join (rows ))
391407 summary .append ("" )
@@ -404,22 +420,3 @@ def human_row(row, prefix=""):
404420 Formatted string representation of the row
405421 """
406422 return "\n " .join (f"{ prefix } { key } : { value } " for key , value in row .items ())
407-
408- def human_extras (row , extras ):
409- """
410- Render extra fields for a row.
411-
412- Args:
413- row: Row data dictionary
414- extras: List of (key, format) tuples
415-
416- Returns:
417- Formatted string with extra information
418- """
419- bits = [" extras:" ]
420- for key , fmt in extras :
421- try :
422- bits .append (f" { key } : { fmt .format (** row )} " )
423- except (KeyError , ValueError ) as e :
424- bits .append (f" { key } : <error formatting: { str (e )} >" )
425- return "\n " .join (bits )
0 commit comments