2020)
2121from executorch .backends .xnnpack .test .tester import Quantize as XNNPackQuantize , Tester
2222from executorch .backends .xnnpack .test .tester .tester import ToEdgeTransformAndLower
23+
24+ from executorch .exir import ExecutorchProgramManager
25+ from executorch .exir ._serialize import _deserialize_pte_binary
2326from executorch .exir .passes .external_constants_pass import (
2427 delegate_external_constants_pass_unlifted ,
2528)
29+ from executorch .extension .flat_tensor .serialize .serialize import (
30+ _deserialize_to_flat_tensor ,
31+ )
2632
2733from torchao .quantization .granularity import PerGroup
2834from torchao .quantization .quant_api import Int8DynamicActivationIntxWeightConfig
@@ -87,7 +93,7 @@ def _test_linear(
8793 self ,
8894 partitioner : XnnpackPartitioner ,
8995 quantization_stage : Union [BaseStages .Quantize , BaseStages .Quantize_ ],
90- ):
96+ ) -> ExecutorchProgramManager :
9197 eager_model = self .ModuleLinear (
9298 in_size = 1 ,
9399 input_channels = 32 ,
@@ -106,7 +112,7 @@ def _test_linear(
106112 exec = tester .get_artifact ()
107113 program_buffer = exec .buffer
108114 self .assertEqual (len (exec ._tensor_data ), 1 )
109- data_buffer = bytes (exec ._tensor_data . pop ( "model" ) )
115+ data_buffer = bytes (exec ._tensor_data [ "model" ] )
110116 self .assertTrue (len (data_buffer ) > 200 )
111117 from executorch .extension .pybindings import portable_lib as runtime
112118
@@ -122,6 +128,8 @@ def _test_linear(
122128 # test_inputs
123129 # )
124130
131+ return exec
132+
125133 def test_quantize_ (self ):
126134 # Quantize with torchao quantize_ API.
127135 DynamicallyQuantizedPartitioner = XnnpackPartitioner (
@@ -132,9 +140,16 @@ def test_quantize_(self):
132140 weight_dtype = torch .int4 ,
133141 weight_granularity = PerGroup (32 ),
134142 )
135- self ._test_linear (
143+ exec = self ._test_linear (
136144 DynamicallyQuantizedPartitioner , BaseStages .Quantize_ (config = linear_config )
137145 )
146+ # PTE file has no named data.
147+ pte_file = _deserialize_pte_binary (exec .buffer )
148+ self .assertEqual (pte_file .named_data , None )
149+
150+ # PTD file contains quantized weight and scale.
151+ ptd_file = _deserialize_to_flat_tensor (bytes (exec ._tensor_data ["model" ]))
152+ self .assertEqual (len (ptd_file .named_data ), 2 )
138153
139154 def test_pt2e_quantize (self ):
140155 # Quantize with pt2e quantize.
@@ -156,6 +171,15 @@ def test_pt2e_quantize(self):
156171 partitioner = XnnpackPartitioner (
157172 config_precisions = precision , per_op_mode = per_op_mode
158173 )
159- self ._test_linear (
174+ exec = self ._test_linear (
160175 partitioner , XNNPackQuantize (quantization_config = quant_config )
161176 )
177+ # PTE file has no named data.
178+ pte_file = _deserialize_pte_binary (exec .buffer )
179+ self .assertEqual (pte_file .named_data , None )
180+
181+ # PTD file contains quantized weight, and potentially scale.
182+ ptd_file = _deserialize_to_flat_tensor (
183+ bytes (exec ._tensor_data ["model" ])
184+ )
185+ self .assertTrue (len (ptd_file .named_data ) >= 1 )
0 commit comments