Adapt the change of brevitas custom op name (#1772)

2026-01-09 13:57:54 -05:00 · 2023-08-17 14:24:43 -07:00
parent a4c354ce54
commit 8738571d1e
5 changed files with 38 additions and 66 deletions
--- a/apps/language_models/langchain/h2oai_pipeline.py
+++ b/apps/language_models/langchain/h2oai_pipeline.py
@@ -29,14 +29,8 @@ from brevitas_examples.llm.llm_quant.quantize import quantize_model
 from brevitas_examples.llm.llm_quant.run_utils import get_model_impl


-def brevitas〇matmul_rhs_group_quant〡shape(
-    lhs: List[int],
-    rhs: List[int],
-    rhs_scale: List[int],
-    rhs_zero_point: List[int],
-    rhs_bit_width: int,
-    rhs_group_size: int,
-) -> List[int]:
+# fmt: off
+def quant〇matmul_rhs_group_quant〡shape(lhs: List[int], rhs: List[int], rhs_scale: List[int], rhs_zero_point: List[int], rhs_bit_width: int, rhs_group_size: int) -> List[int]:
    if len(lhs) == 3 and len(rhs) == 2:
        return [lhs[0], lhs[1], rhs[0]]
    elif len(lhs) == 2 and len(rhs) == 2:
@@ -45,30 +39,21 @@ def brevitas〇matmul_rhs_group_quant〡shape(
        raise ValueError("Input shapes not supported.")


-def brevitas〇matmul_rhs_group_quant〡dtype(
-    lhs_rank_dtype: Tuple[int, int],
-    rhs_rank_dtype: Tuple[int, int],
-    rhs_scale_rank_dtype: Tuple[int, int],
-    rhs_zero_point_rank_dtype: Tuple[int, int],
-    rhs_bit_width: int,
-    rhs_group_size: int,
-) -> int:
+def quant〇matmul_rhs_group_quant〡dtype(lhs_rank_dtype: Tuple[int, int], rhs_rank_dtype: Tuple[int, int], rhs_scale_rank_dtype: Tuple[int, int], rhs_zero_point_rank_dtype: Tuple[int, int], rhs_bit_width: int, rhs_group_size: int) -> int:
    # output dtype is the dtype of the lhs float input
    lhs_rank, lhs_dtype = lhs_rank_dtype
    return lhs_dtype


-def brevitas〇matmul_rhs_group_quant〡has_value_semantics(
-    lhs, rhs, rhs_scale, rhs_zero_point, rhs_bit_width, rhs_group_size
-) -> None:
+def quant〇matmul_rhs_group_quant〡has_value_semantics(lhs, rhs, rhs_scale, rhs_zero_point, rhs_bit_width, rhs_group_size) -> None:
    return


 brevitas_matmul_rhs_group_quant_library = [
-    brevitas〇matmul_rhs_group_quant〡shape,
-    brevitas〇matmul_rhs_group_quant〡dtype,
-    brevitas〇matmul_rhs_group_quant〡has_value_semantics,
-]
+    quant〇matmul_rhs_group_quant〡shape,
+    quant〇matmul_rhs_group_quant〡dtype,
+    quant〇matmul_rhs_group_quant〡has_value_semantics]
+# fmt: on

 global_device = "cuda"
 global_precision = "fp16"
@@ -244,7 +229,7 @@ class H2OGPTSHARKModel(torch.nn.Module):
                ts_graph,
                [*h2ogptCompileInput],
                output_type=torch_mlir.OutputType.TORCH,
-                backend_legal_ops=["brevitas.matmul_rhs_group_quant"],
+                backend_legal_ops=["quant.matmul_rhs_group_quant"],
                extra_library=brevitas_matmul_rhs_group_quant_library,
                use_tracing=False,
                verbose=False,
--- a/apps/language_models/scripts/vicuna.py
+++ b/apps/language_models/scripts/vicuna.py
@@ -131,7 +131,7 @@ parser.add_argument(
 )

 # fmt: off
-def brevitas〇matmul_rhs_group_quant〡shape(lhs: List[int], rhs: List[int], rhs_scale: List[int], rhs_zero_point: List[int], rhs_bit_width: int, rhs_group_size: int) -> List[int]:
+def quant〇matmul_rhs_group_quant〡shape(lhs: List[int], rhs: List[int], rhs_scale: List[int], rhs_zero_point: List[int], rhs_bit_width: int, rhs_group_size: int) -> List[int]:
    if len(lhs) == 3 and len(rhs) == 2:
        return [lhs[0], lhs[1], rhs[0]]
    elif len(lhs) == 2 and len(rhs) == 2:
@@ -140,20 +140,20 @@ def brevitas〇matmul_rhs_group_quant〡shape(lhs: List[int], rhs: List[int], rh
        raise ValueError("Input shapes not supported.")


-def brevitas〇matmul_rhs_group_quant〡dtype(lhs_rank_dtype: Tuple[int, int], rhs_rank_dtype: Tuple[int, int], rhs_scale_rank_dtype: Tuple[int, int], rhs_zero_point_rank_dtype: Tuple[int, int], rhs_bit_width: int, rhs_group_size: int) -> int:
+def quant〇matmul_rhs_group_quant〡dtype(lhs_rank_dtype: Tuple[int, int], rhs_rank_dtype: Tuple[int, int], rhs_scale_rank_dtype: Tuple[int, int], rhs_zero_point_rank_dtype: Tuple[int, int], rhs_bit_width: int, rhs_group_size: int) -> int:
    # output dtype is the dtype of the lhs float input
    lhs_rank, lhs_dtype = lhs_rank_dtype
    return lhs_dtype


-def brevitas〇matmul_rhs_group_quant〡has_value_semantics(lhs, rhs, rhs_scale, rhs_zero_point, rhs_bit_width, rhs_group_size) -> None:
+def quant〇matmul_rhs_group_quant〡has_value_semantics(lhs, rhs, rhs_scale, rhs_zero_point, rhs_bit_width, rhs_group_size) -> None:
    return


 brevitas_matmul_rhs_group_quant_library = [
-    brevitas〇matmul_rhs_group_quant〡shape,
-    brevitas〇matmul_rhs_group_quant〡dtype,
-    brevitas〇matmul_rhs_group_quant〡has_value_semantics]
+    quant〇matmul_rhs_group_quant〡shape,
+    quant〇matmul_rhs_group_quant〡dtype,
+    quant〇matmul_rhs_group_quant〡has_value_semantics]
 # fmt: on


@@ -838,7 +838,7 @@ class ShardedVicuna(VicunaBase):
                            inputs0[2],
                        ),
                        output_type="torch",
-                        backend_legal_ops=["brevitas.matmul_rhs_group_quant"],
+                        backend_legal_ops=["quant.matmul_rhs_group_quant"],
                        extra_library=brevitas_matmul_rhs_group_quant_library,
                        use_tracing=False,
                        verbose=False,
@@ -882,7 +882,7 @@ class ShardedVicuna(VicunaBase):
                            pkv1_placeholder,
                        ),
                        output_type="torch",
-                        backend_legal_ops=["brevitas.matmul_rhs_group_quant"],
+                        backend_legal_ops=["quant.matmul_rhs_group_quant"],
                        extra_library=brevitas_matmul_rhs_group_quant_library,
                        use_tracing=False,
                        verbose=False,
@@ -1469,7 +1469,7 @@ class UnshardedVicuna(VicunaBase):
                            [*firstVicunaCompileInput],
                            output_type=torch_mlir.OutputType.TORCH,
                            backend_legal_ops=[
-                                "brevitas.matmul_rhs_group_quant"
+                                "quant.matmul_rhs_group_quant"
                            ],
                            extra_library=brevitas_matmul_rhs_group_quant_library,
                            use_tracing=False,
@@ -1556,7 +1556,7 @@ class UnshardedVicuna(VicunaBase):
                            [*secondVicunaCompileInput],
                            output_type=torch_mlir.OutputType.TORCH,
                            backend_legal_ops=[
-                                "brevitas.matmul_rhs_group_quant"
+                                "quant.matmul_rhs_group_quant"
                            ],
                            extra_library=brevitas_matmul_rhs_group_quant_library,
                            use_tracing=False,
--- a/apps/language_models/src/pipelines/minigpt4_pipeline.py
+++ b/apps/language_models/src/pipelines/minigpt4_pipeline.py
@@ -136,7 +136,8 @@ from brevitas_examples.llm.llm_quant.quantize import quantize_model
 from brevitas_examples.llm.llm_quant.run_utils import get_model_impl


-def brevitas〇matmul_rhs_group_quant〡shape(lhs: List[int], rhs: List[int], rhs_scale: List[int], rhs_zero_point: List[int], rhs_bit_width: int, rhs_group_size: int) -> List[int]:
+# fmt: off
+def quant〇matmul_rhs_group_quant〡shape(lhs: List[int], rhs: List[int], rhs_scale: List[int], rhs_zero_point: List[int], rhs_bit_width: int, rhs_group_size: int) -> List[int]:
    if len(lhs) == 3 and len(rhs) == 2:
        return [lhs[0], lhs[1], rhs[0]]
    elif len(lhs) == 2 and len(rhs) == 2:
@@ -145,20 +146,21 @@ def brevitas〇matmul_rhs_group_quant〡shape(lhs: List[int], rhs: List[int], rh
        raise ValueError("Input shapes not supported.")


-def brevitas〇matmul_rhs_group_quant〡dtype(lhs_rank_dtype: Tuple[int, int], rhs_rank_dtype: Tuple[int, int], rhs_scale_rank_dtype: Tuple[int, int], rhs_zero_point_rank_dtype: Tuple[int, int], rhs_bit_width: int, rhs_group_size: int) -> int:
+def quant〇matmul_rhs_group_quant〡dtype(lhs_rank_dtype: Tuple[int, int], rhs_rank_dtype: Tuple[int, int], rhs_scale_rank_dtype: Tuple[int, int], rhs_zero_point_rank_dtype: Tuple[int, int], rhs_bit_width: int, rhs_group_size: int) -> int:
    # output dtype is the dtype of the lhs float input
    lhs_rank, lhs_dtype = lhs_rank_dtype
    return lhs_dtype


-def brevitas〇matmul_rhs_group_quant〡has_value_semantics(lhs, rhs, rhs_scale, rhs_zero_point, rhs_bit_width, rhs_group_size) -> None:
+def quant〇matmul_rhs_group_quant〡has_value_semantics(lhs, rhs, rhs_scale, rhs_zero_point, rhs_bit_width, rhs_group_size) -> None:
    return


 brevitas_matmul_rhs_group_quant_library = [
-    brevitas〇matmul_rhs_group_quant〡shape,
-    brevitas〇matmul_rhs_group_quant〡dtype,
-    brevitas〇matmul_rhs_group_quant〡has_value_semantics]
+    quant〇matmul_rhs_group_quant〡shape,
+    quant〇matmul_rhs_group_quant〡dtype,
+    quant〇matmul_rhs_group_quant〡has_value_semantics]
+# fmt: on


 def load_vmfb(extended_model_name, device, mlir_dialect, extra_args=[]):
@@ -209,7 +211,7 @@ def compile_int_precision(
        torchscript_module,
        inputs,
        output_type="torch",
-        backend_legal_ops=["brevitas.matmul_rhs_group_quant"],
+        backend_legal_ops=["quant.matmul_rhs_group_quant"],
        extra_library=brevitas_matmul_rhs_group_quant_library,
        use_tracing=False,
        verbose=False,
--- a/shark/shark_compile.py
+++ b/shark/shark_compile.py
@@ -11,14 +11,8 @@ from brevitas_examples.llm.llm_quant.quantize import quantize_model
 from brevitas_examples.llm.llm_quant.run_utils import get_model_impl


-def brevitas〇matmul_rhs_group_quant〡shape(
-    lhs: List[int],
-    rhs: List[int],
-    rhs_scale: List[int],
-    rhs_zero_point: List[int],
-    rhs_bit_width: int,
-    rhs_group_size: int,
-) -> List[int]:
+# fmt: off
+def quant〇matmul_rhs_group_quant〡shape(lhs: List[int], rhs: List[int], rhs_scale: List[int], rhs_zero_point: List[int], rhs_bit_width: int, rhs_group_size: int) -> List[int]:
    if len(lhs) == 3 and len(rhs) == 2:
        return [lhs[0], lhs[1], rhs[0]]
    elif len(lhs) == 2 and len(rhs) == 2:
@@ -27,30 +21,21 @@ def brevitas〇matmul_rhs_group_quant〡shape(
        raise ValueError("Input shapes not supported.")


-def brevitas〇matmul_rhs_group_quant〡dtype(
-    lhs_rank_dtype: Tuple[int, int],
-    rhs_rank_dtype: Tuple[int, int],
-    rhs_scale_rank_dtype: Tuple[int, int],
-    rhs_zero_point_rank_dtype: Tuple[int, int],
-    rhs_bit_width: int,
-    rhs_group_size: int,
-) -> int:
+def quant〇matmul_rhs_group_quant〡dtype(lhs_rank_dtype: Tuple[int, int], rhs_rank_dtype: Tuple[int, int], rhs_scale_rank_dtype: Tuple[int, int], rhs_zero_point_rank_dtype: Tuple[int, int], rhs_bit_width: int, rhs_group_size: int) -> int:
    # output dtype is the dtype of the lhs float input
    lhs_rank, lhs_dtype = lhs_rank_dtype
    return lhs_dtype


-def brevitas〇matmul_rhs_group_quant〡has_value_semantics(
-    lhs, rhs, rhs_scale, rhs_zero_point, rhs_bit_width, rhs_group_size
-) -> None:
+def quant〇matmul_rhs_group_quant〡has_value_semantics(lhs, rhs, rhs_scale, rhs_zero_point, rhs_bit_width, rhs_group_size) -> None:
    return


 brevitas_matmul_rhs_group_quant_library = [
-    brevitas〇matmul_rhs_group_quant〡shape,
-    brevitas〇matmul_rhs_group_quant〡dtype,
-    brevitas〇matmul_rhs_group_quant〡has_value_semantics,
-]
+    quant〇matmul_rhs_group_quant〡shape,
+    quant〇matmul_rhs_group_quant〡dtype,
+    quant〇matmul_rhs_group_quant〡has_value_semantics]
+# fmt: on


 def load_vmfb(extended_model_name, device, mlir_dialect, extra_args=[]):
@@ -122,7 +107,7 @@ def compile_int_precision(
        torchscript_module,
        inputs,
        output_type="torch",
-        backend_legal_ops=["brevitas.matmul_rhs_group_quant"],
+        backend_legal_ops=["quant.matmul_rhs_group_quant"],
        extra_library=brevitas_matmul_rhs_group_quant_library,
        use_tracing=False,
        verbose=False,
--- a/shark/shark_importer.py
+++ b/shark/shark_importer.py
@@ -612,7 +612,7 @@ def import_with_fx(
        replace_call_fn_target(
            fx_g,
            src=matmul_rhs_group_quant_placeholder,
-            target=torch.ops.brevitas.matmul_rhs_group_quant,
+            target=torch.ops.quant.matmul_rhs_group_quant,
        )

        fx_g.recompile()