From 1674602c3ffeaea82e2b8f3fd5973cca6c986817 Mon Sep 17 00:00:00 2001 From: Yacine Petitprez Date: Sat, 24 Jun 2017 18:04:45 +0800 Subject: [PATCH 1/2] Implementation in progress of progressive hashing for accelerating style testing --- lib/rubyXL/objects/ooxml_object.rb | 96 ++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 5 deletions(-) diff --git a/lib/rubyXL/objects/ooxml_object.rb b/lib/rubyXL/objects/ooxml_object.rb index 9a9d3e9ee..e22e72365 100644 --- a/lib/rubyXL/objects/ooxml_object.rb +++ b/lib/rubyXL/objects/ooxml_object.rb @@ -18,6 +18,70 @@ def obtain_class_variable(var_name, default = {}) self.class_variable_set(var_name, default) end + # This is like attr_accessor method, creating the getter and setter + # However, in this case the setter update the hash of the object containing it. + # It also trigger the parent in case of change + def hash_attr_accessor symbol + class_eval <<-RUBY, __FILE__, __LINE__ + def #{symbol} + @#{symbol} + end + + def #{symbol}= value + str_hash = "#{symbol}".hash + + before_hash = @hash + @hash ^= @#{symbol}.hash ^ str_hash # Outmerge the hash component + @#{symbol} = value + @hash ^= @#{symbol}.hash ^ str_hash # Merge again the new hash component + + if @parent + @parent.force_update_hash @parent_attribute, before_hash, @hash + end + + value + end + RUBY + end + + # This is like attr_accessor method, creating the getter and setter + # However, in this case the setter update the hash of the object containing it. + # It also register the parent link on setup + def hash_attr_accessor_child_node symbol + class_eval <<-RUBY, __FILE__, __LINE__ + def #{symbol} + @#{symbol} + end + + def #{symbol}= value + str_hash = "#{symbol}".hash + + old_value = @#{symbol} + + if old_value && old_value.is_a?(OOXMLObjectInstanceMethods) + old_value.parent = nil + old_value.parent_attribute = nil + end + + if value && value.is_a?(OOXMLObjectInstanceMethods) + value.parent = self + value.parent_attribute = "#{symbol}" + end + + before_hash = @hash + @hash ^= @#{symbol}.hash ^ str_hash # Outmerge the hash component + @#{symbol} = value + @hash ^= @#{symbol}.hash ^ str_hash # Merge again the new hash component + + if @parent + @parent.force_update_hash @parent_attribute, before_hash, @hash + end + + value + end + RUBY + end + # Defines an attribute of OOXML object. # === Parameters # * +attribute_name+ - Name of the element attribute as seen in the source XML. Can be either "String" or :Symbol @@ -50,7 +114,7 @@ def define_attribute(attr_name, attr_type, extra_params = {}) attr_hash = extra_params.merge({ :attr_type => attr_type }) attr_hash[:accessor] ||= accessorize(attr_name) attrs[attr_name.to_s] = attr_hash - self.send(:attr_accessor, attr_hash[:accessor]) unless attr_hash[:computed] + self.send(:hash_attr_accessor, attr_hash[:accessor]) unless attr_hash[:computed] end # Defines a child node of OOXML object. @@ -86,7 +150,7 @@ def define_child_node(klass, extra_params = {}) define_count_attribute if extra_params[:collection] == :with_count - self.send(:attr_accessor, accessor) + self.send(:hash_attr_accessor_child_node, accessor) end def define_count_attribute @@ -208,6 +272,12 @@ def process_attribute(obj, raw_value, params) module OOXMLObjectInstanceMethods attr_accessor :local_namespaces + # Accessor used to get back to the parent from this node, + # and notify in case of hashing update + # Parent attribute show on which attribute the node is connected. + # In case of container parent (array like), parent_attribute remain "nil" + attr_accessor :parent, :parent_attribute + def self.included(klass) klass.extend RubyXL::OOXMLObjectClassMethods end @@ -225,6 +295,7 @@ def initialize(params = {}) } init_child_nodes(params) + recompute_hash end def init_child_nodes(params) @@ -246,10 +317,25 @@ def preserve_whitespace end private :preserve_whitespace + # Manually update a hashing part. + def force_update_hash attribute, before_hash, after_hash + @hash ^= before_hash ^ attribute.hash # Outmerge the hash component + @hash ^= after_hash ^ attribute.hash # Merge back the hash component + end + + # Recompute the hash with segment given by the childrens + def recompute_hash + @hash = 0 + @hash = obtain_class_variable(:@@ooxml_attributes).inject(@hash) { |h, (k, v)| h ^ k.hash ^ self.send(v[:accessor]).hash } + @hash = obtain_class_variable(:@@ooxml_child_nodes).inject(@hash) { |h, (k, v)| h ^ k.hash ^ self.send(v[:accessor]).hash } + end + + def hash + @hash + end + def ==(other) - other.is_a?(self.class) && - obtain_class_variable(:@@ooxml_attributes).all? { |k, v| self.send(v[:accessor]) == other.send(v[:accessor]) } && - obtain_class_variable(:@@ooxml_child_nodes).all? { |k, v| self.send(v[:accessor]) == other.send(v[:accessor]) } + self.hash == other.hash && other.is_a?(self.class) end # Recursively write the OOXML object and all its children out as Nokogiri::XML. Immediately before the actual From 19fbb5fc6af770049a06ba02c6a71adb5855fb89 Mon Sep 17 00:00:00 2001 From: Yacine Petitprez Date: Sat, 24 Jun 2017 19:30:02 +0800 Subject: [PATCH 2/2] Using dirty strategy to update the parent hash; performance are still ok No more bug on my production excel test set, and ~25x generation speed improvement :P --- lib/rubyXL/objects/ooxml_object.rb | 53 ++++++++++++------------------ 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/lib/rubyXL/objects/ooxml_object.rb b/lib/rubyXL/objects/ooxml_object.rb index e22e72365..2d47cb257 100644 --- a/lib/rubyXL/objects/ooxml_object.rb +++ b/lib/rubyXL/objects/ooxml_object.rb @@ -31,13 +31,11 @@ def #{symbol}= value str_hash = "#{symbol}".hash before_hash = @hash - @hash ^= @#{symbol}.hash ^ str_hash # Outmerge the hash component + @hash ^= @#{symbol}.hash * str_hash # Outmerge the hash component @#{symbol} = value - @hash ^= @#{symbol}.hash ^ str_hash # Merge again the new hash component + @hash ^= value.hash * str_hash # Merge again the new hash component - if @parent - @parent.force_update_hash @parent_attribute, before_hash, @hash - end + @parent.hash_dirty! if @parent value end @@ -53,31 +51,22 @@ def #{symbol} @#{symbol} end - def #{symbol}= value + def #{symbol}= new_value str_hash = "#{symbol}".hash old_value = @#{symbol} - if old_value && old_value.is_a?(OOXMLObjectInstanceMethods) - old_value.parent = nil - old_value.parent_attribute = nil - end - - if value && value.is_a?(OOXMLObjectInstanceMethods) - value.parent = self - value.parent_attribute = "#{symbol}" - end + old_value.parent = nil if old_value.is_a?(OOXMLObjectInstanceMethods) + new_value.parent = self if new_value.is_a?(OOXMLObjectInstanceMethods) before_hash = @hash - @hash ^= @#{symbol}.hash ^ str_hash # Outmerge the hash component - @#{symbol} = value - @hash ^= @#{symbol}.hash ^ str_hash # Merge again the new hash component + @hash ^= old_value.hash * str_hash # Outmerge the hash component + @#{symbol} = new_value + @hash ^= new_value.hash * str_hash # Merge again the new hash component - if @parent - @parent.force_update_hash @parent_attribute, before_hash, @hash - end + @parent.hash_dirty! if @parent - value + new_value end RUBY end @@ -273,10 +262,8 @@ module OOXMLObjectInstanceMethods attr_accessor :local_namespaces # Accessor used to get back to the parent from this node, - # and notify in case of hashing update - # Parent attribute show on which attribute the node is connected. - # In case of container parent (array like), parent_attribute remain "nil" - attr_accessor :parent, :parent_attribute + # and dirtyize the parent in case of hash update. + attr_accessor :parent def self.included(klass) klass.extend RubyXL::OOXMLObjectClassMethods @@ -318,24 +305,26 @@ def preserve_whitespace private :preserve_whitespace # Manually update a hashing part. - def force_update_hash attribute, before_hash, after_hash - @hash ^= before_hash ^ attribute.hash # Outmerge the hash component - @hash ^= after_hash ^ attribute.hash # Merge back the hash component + def hash_dirty! + @need_hash_recompute = true end # Recompute the hash with segment given by the childrens def recompute_hash @hash = 0 - @hash = obtain_class_variable(:@@ooxml_attributes).inject(@hash) { |h, (k, v)| h ^ k.hash ^ self.send(v[:accessor]).hash } - @hash = obtain_class_variable(:@@ooxml_child_nodes).inject(@hash) { |h, (k, v)| h ^ k.hash ^ self.send(v[:accessor]).hash } + @hash = obtain_class_variable(:@@ooxml_attributes).inject(@hash) { |h, (k, v)| h ^ ( v[:accessor].hash * self.send(v[:accessor]).hash ) } + @hash = obtain_class_variable(:@@ooxml_child_nodes).inject(@hash) { |h, (k, v)| h ^ ( v[:accessor].hash * self.send(v[:accessor]).hash ) } + @need_hash_recompute = false end def hash + recompute_hash if @need_hash_recompute @hash end def ==(other) - self.hash == other.hash && other.is_a?(self.class) + other.is_a?(self.class) && + self.hash == other.hash end # Recursively write the OOXML object and all its children out as Nokogiri::XML. Immediately before the actual