kahrendt · kahrendt · Dec 21, 2024 · Dec 21, 2024
diff --git a/microwakeword/utils.py b/microwakeword/utils.py
@@ -327,6 +327,11 @@ def representative_dataset_gen():
     converter = tf.lite.TFLiteConverter.from_saved_model(path_to_model)
     converter.optimizations = {tf.lite.Optimize.DEFAULT}
 
+    # Without this flag, the Streaming layer `state` variables are left as float32,
+    # resulting in Quantize and Dequantize operations before and after every `ReadVariable`
+    # and `AssignVariable` operation.
+    converter._experimental_variable_quantization = True
+
     if quantize:
         converter.target_spec.supported_ops = {tf.lite.OpsSet.TFLITE_BUILTINS_INT8}
         converter.inference_input_type = tf.int8