|
17 | 17 | generate_htp_compiler_spec,
|
18 | 18 | generate_qnn_executorch_compiler_spec,
|
19 | 19 | skip_annotation,
|
| 20 | + to_edge_transform_and_lower_to_qnn, |
20 | 21 | )
|
21 | 22 | from executorch.examples.qualcomm.utils import (
|
22 | 23 | build_executorch_binary,
|
23 | 24 | make_output_dir,
|
24 | 25 | make_quantizer,
|
25 | 26 | parse_skip_delegation_node,
|
26 |
| - QnnPartitioner, |
27 | 27 | setup_common_args_and_variables,
|
28 | 28 | SimpleADB,
|
29 | 29 | )
|
30 |
| -from executorch.exir import to_edge |
| 30 | +from executorch.exir import ExecutorchBackendConfig |
31 | 31 | from transformers import BertTokenizer, MobileBertForSequenceClassification
|
32 | 32 |
|
33 | 33 |
|
@@ -273,30 +273,42 @@ def calibrator(gm):
|
273 | 273 |
|
274 | 274 | quantizer = make_quantizer(quant_dtype=quant_dtype)
|
275 | 275 | backend_options = generate_htp_compiler_spec(quant_dtype is not None)
|
276 |
| - partitioner = QnnPartitioner( |
277 |
| - generate_qnn_executorch_compiler_spec( |
278 |
| - soc_model=getattr(QcomChipset, args.model), |
279 |
| - backend_options=backend_options, |
280 |
| - ), |
281 |
| - skip_node_id_set=skip_node_id_set, |
282 |
| - skip_node_op_set=skip_node_op_set, |
| 276 | + # partitioner = QnnPartitioner( |
| 277 | + # generate_qnn_executorch_compiler_spec( |
| 278 | + # soc_model=getattr(QcomChipset, args.model), |
| 279 | + # backend_options=backend_options, |
| 280 | + # ), |
| 281 | + # skip_node_id_set=skip_node_id_set, |
| 282 | + # skip_node_op_set=skip_node_op_set, |
| 283 | + # ) |
| 284 | + backend_options = generate_htp_compiler_spec( |
| 285 | + use_fp16=False, |
| 286 | + ) |
| 287 | + compile_spec = generate_qnn_executorch_compiler_spec( |
| 288 | + soc_model=QcomChipset.SM8550, |
| 289 | + backend_options=backend_options, |
283 | 290 | )
|
284 | 291 | # skip embedding layer cause it's quantization sensitive
|
285 | 292 | graph_module, _ = skip_annotation(
|
286 | 293 | nn_module=model,
|
287 | 294 | quantizer=quantizer,
|
288 |
| - partitioner=partitioner, |
| 295 | + compiler_specs=compile_spec, |
289 | 296 | sample_input=inputs[0],
|
290 | 297 | calibration_cb=calibrator,
|
291 | 298 | fp_node_op_set={torch.ops.aten.embedding.default},
|
292 | 299 | )
|
293 | 300 | # lower all graph again, the skipped operators will be left in CPU
|
294 |
| - exec_prog = to_edge( |
295 |
| - torch.export.export(graph_module, inputs[0], strict=True), |
296 |
| - ).to_executorch() |
297 |
| - |
| 301 | + # exec_prog = to_edge( |
| 302 | + # torch.export.export(graph_module, inputs[0], strict=True), |
| 303 | + # ).to_executorch() |
| 304 | + delegated_program = to_edge_transform_and_lower_to_qnn( |
| 305 | + graph_module, inputs[0], compile_spec |
| 306 | + ) |
| 307 | + executorch_program = delegated_program.to_executorch( |
| 308 | + config=ExecutorchBackendConfig(extract_delegate_segments=True) |
| 309 | + ) |
298 | 310 | with open(f"{args.artifact}/{pte_filename}.pte", "wb") as file:
|
299 |
| - file.write(exec_prog.buffer) |
| 311 | + file.write(executorch_program.buffer) |
300 | 312 |
|
301 | 313 | if args.compile_only:
|
302 | 314 | return
|
|
0 commit comments