diff --git a/models/cv/classification/ixrt_common/export.py b/models/cv/classification/ixrt_common/export.py index dd10b13abba0028f7ccd180484c93d27fa772c3b..d324d8f8452e335371b763d0af71e46f13c17f90 100644 --- a/models/cv/classification/ixrt_common/export.py +++ b/models/cv/classification/ixrt_common/export.py @@ -75,7 +75,8 @@ def main(): input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, - opset_version=13 + opset_version=13, + dynamo=False ) print("Export onnx model successfully! ") diff --git a/models/cv/classification/ixrt_common/quant.py b/models/cv/classification/ixrt_common/quant.py index 2726bf02fdb838411e2e99fdee2c3513963a332f..583584c8b7cf4ec615a442e797cb7837e3d5a2da 100644 --- a/models/cv/classification/ixrt_common/quant.py +++ b/models/cv/classification/ixrt_common/quant.py @@ -1,28 +1,93 @@ import os -import cv2 import random import argparse import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize +import onnx +if not hasattr(onnx, "mapping"): + import types + onnx.mapping = types.ModuleType("onnx.mapping") + onnx.mapping.TENSOR_TYPE_TO_NP_TYPE = { + k: v.np_dtype for k, v in onnx._mapping.TENSOR_TYPE_MAP.items() + } import torch -import torchvision.datasets +from onnxruntime.quantization import ( + quantize_static, + QuantType, + CalibrationDataReader, + QuantFormat, + CalibrationMethod, +) from calibration_dataset import getdataloader +OBSERVER_MAP = { + "minmax": CalibrationMethod.MinMax, + "entropy": CalibrationMethod.Entropy, + "percentile": CalibrationMethod.Percentile, + "hist_percentile": CalibrationMethod.Percentile, + "ema": CalibrationMethod.MinMax, +} + + +class TorchCalibrationDataReader(CalibrationDataReader): + def __init__(self, dataloader, input_name): + self.dataloader = dataloader + self.input_name = input_name + self.iterator = iter(dataloader) + + def get_next(self): + try: + data, _ = next(self.iterator) + if isinstance(data, torch.Tensor): + return {self.input_name: data.cpu().numpy()} + return None + except StopIteration: + return None + + +def fix_quantize_axis_attribute(model_path, output_path): + """Reset axis to 0 on all QuantizeLinear/DequantizeLinear nodes. + + ONNX opset>=13 defaults axis to 1 when the attribute is absent, + which is out of range for 1-D tensors like biases. Setting axis=0 + is safe for both per-tensor (scalar scale/zp) and per-channel cases. + """ + + model = onnx.load(model_path) + quantize_node_types = {"QuantizeLinear", "DequantizeLinear", "DynamicQuantizeLinear"} + for node in model.graph.node: + if node.op_type in quantize_node_types: + found = False + for attr in node.attribute: + if attr.name == "axis": + attr.i = 0 + found = True + if not found: + node.attribute.append(onnx.helper.make_attribute("axis", 0)) + onnx.save(model, output_path) + + +def get_onnx_input_name(model_path): + model = onnx.load(model_path) + return model.graph.input[0].name + + def setseed(seed=42): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) + def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--model_name", type=str) parser.add_argument("--model", type=str) parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--observer", type=str, + choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], + default="hist_percentile") parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--save_dir", type=str, help="save path", default=None) parser.add_argument("--bsz", type=int, default=32) parser.add_argument("--step", type=int, default=20) parser.add_argument("--seed", type=int, default=42) @@ -32,13 +97,53 @@ def parse_args(): print(args.disable_quant_names) return args -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file + +def main(): + args = parse_args() + setseed(args.seed) + + model = onnx.load(args.model) + opset = model.opset_import[0].version + if opset < 13: + print(f"opset version: {opset}, converting to 13 for QDQ quantization") + model = onnx.version_converter.convert_version(model, 13) + onnx.save(model, args.model) + + input_name = get_onnx_input_name(args.model) + print(f"Model input name: {input_name}") + + calibration_dataloader = getdataloader( + args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz + ) + calib_reader = TorchCalibrationDataReader(calibration_dataloader, input_name) + + output_path = os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx") + calibrate_method = OBSERVER_MAP.get(args.observer, CalibrationMethod.MinMax) + + nodes_to_exclude = args.disable_quant_names or [] + + print(f"Calibration method: {calibrate_method}") + print(f"Nodes to exclude: {nodes_to_exclude}") + + quantize_static( + model_input=args.model, + model_output=output_path, + calibration_data_reader=calib_reader, + weight_type=QuantType.QInt8, + activation_type=QuantType.QInt8, + quant_format=QuantFormat.QDQ, + per_channel=True, + calibrate_method=calibrate_method, + nodes_to_exclude=nodes_to_exclude, + extra_options={ + "ActivationSymmetric": True, + "WeightSymmetric": True, + "QuantizeBias": False, + }, + ) + fix_quantize_axis_attribute(output_path, output_path) + print(f"Quantized model saved to: {output_path}") + + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/ixrt_common/requirements.txt b/models/cv/classification/ixrt_common/requirements.txt index 560b910c1ded29650e54726a15f1cedcf8c374a2..891449a843e3acc88f630ca38c696f8851eef21c 100644 --- a/models/cv/classification/ixrt_common/requirements.txt +++ b/models/cv/classification/ixrt_common/requirements.txt @@ -3,4 +3,5 @@ tabulate pycuda onnx onnxsim -opencv-python==4.6.0.66 \ No newline at end of file +opencv-python==4.6.0.66 +onnxscript diff --git a/models/cv/object_detection/ixrt_common/config/YOLOV7_CONFIG b/models/cv/object_detection/ixrt_common/config/YOLOV7_CONFIG index 4803e368f3e4fa20cf05576e1cd5f12594f5d102..a017ed57064656d7ff825d6d75d1d54510af7c45 100644 --- a/models/cv/object_detection/ixrt_common/config/YOLOV7_CONFIG +++ b/models/cv/object_detection/ixrt_common/config/YOLOV7_CONFIG @@ -18,7 +18,8 @@ MODEL_INPUT_NAMES=(images) LAYER_FUSION=1 DECODER_FASTER=1 DECODER_NUM_CLASS=80 -DECODER_INPUT_NAMES=(/model/model.105/m.0/Conv_output_0 /model/model.105/m.1/Conv_output_0 /model/model.105/m.2/Conv_output_0) +#DECODER_INPUT_NAMES=(/model/model.105/m.0/Conv_output_0 /model/model.105/m.1/Conv_output_0 /model/model.105/m.2/Conv_output_0) +DECODER_INPUT_NAMES=(/model.105/m.0/Conv_output_0 /model.105/m.1/Conv_output_0 /model.105/m.2/Conv_output_0) DECODER_8_ANCHOR=(12 16 19 36 40 28) DECODER_16_ANCHOR=(36 75 76 55 72 146) DECODER_32_ANCHOR=(142 110 192 243 459 401) @@ -46,4 +47,4 @@ QUANT_BATCHSIZE=1 QUANT_STEP=32 QUANT_SEED=42 DISABLE_QUANT_LIST=() -QUANT_EXIST_ONNX= \ No newline at end of file +QUANT_EXIST_ONNX= diff --git a/models/cv/object_detection/ixrt_common/modify_batchsize.py b/models/cv/object_detection/ixrt_common/modify_batchsize.py index 64a3243326f317f38e1433b1695bc5d68559e1bf..bc44b719c278bae0264f02eb789b82fcacfb4943 100644 --- a/models/cv/object_detection/ixrt_common/modify_batchsize.py +++ b/models/cv/object_detection/ixrt_common/modify_batchsize.py @@ -2,6 +2,7 @@ import onnx import argparse import numpy as np + def change_input_dim(model, bsz): batch_size = bsz @@ -37,6 +38,23 @@ def change_input_dim(model, bsz): raw_data[0] = batch_size data.raw_data = raw_data.tobytes() +def change_reshape_batch(model, bsz): + batch_size = int(bsz) if not isinstance(bsz, int) else bsz + initializer_map = {init.name: init for init in model.graph.initializer} + for node in model.graph.node: + if node.op_type == 'Reshape' and len(node.input) >= 2: + shape_name = node.input[1] + if shape_name not in initializer_map: + continue + init = initializer_map[shape_name] + shape_val = np.array(onnx.numpy_helper.to_array(init)) + if len(shape_val) >= 1 and shape_val[0] > 0 and shape_val[0] != batch_size: + old_val = shape_val[0] + shape_val[0] = batch_size + new_init = onnx.numpy_helper.from_array(shape_val, name=shape_name) + init.CopyFrom(new_init) + print(f" Reshape {node.name}: shape[0] {old_val} -> {batch_size}") + def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--batch_size", type=int) @@ -48,4 +66,5 @@ def parse_args(): args = parse_args() model = onnx.load(args.origin_model) change_input_dim(model, args.batch_size) +change_reshape_batch(model, args.batch_size) onnx.save(model, args.output_model) \ No newline at end of file diff --git a/models/cv/object_detection/ixrt_common/requirements.txt b/models/cv/object_detection/ixrt_common/requirements.txt index 46ef4ec8d824eadb4dbce5a88a997abbd38a6747..42644615f22157bb23cece90a46dacde70ea567f 100644 --- a/models/cv/object_detection/ixrt_common/requirements.txt +++ b/models/cv/object_detection/ixrt_common/requirements.txt @@ -5,4 +5,5 @@ ultralytics pycocotools opencv-python==4.6.0.66 pycuda -seaborn \ No newline at end of file +seaborn +onnxscript diff --git a/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh index 219cffa026e8c503c7214de8acc032cf2aa5f8d2..523078fa832bdbcf486e6b0c12d8eadc32d44f11 100644 --- a/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh @@ -18,10 +18,13 @@ set -x pip3 install -r ../../ixrt_common/requirements.txt mkdir checkpoints -unzip -q /root/data/3rd_party/onnx_tflite_yolov3.zip -d ./ -cp /root/data/checkpoints/yolov3.weights onnx_tflite_yolov3/weights -cd onnx_tflite_yolov3 -python3 detect.py --cfg cfg/yolov3.cfg --weights weights/yolov3.weights -mv weights/export.onnx ../checkpoints/yolov3.onnx -cd .. -cp config/YOLOV3_CONFIG ../../ixrt_common/config/YOLOV3_CONFIG \ No newline at end of file +#unzip -q /root/data/3rd_party/onnx_tflite_yolov3.zip -d ./ +#cp /root/data/checkpoints/yolov3.weights onnx_tflite_yolov3/weights +#cd onnx_tflite_yolov3 +#python3 detect.py --cfg cfg/yolov3.cfg --weights weights/yolov3.weights +#mv weights/export.onnx ../checkpoints/yolov3.onnx +#cd .. +#cp config/YOLOV3_CONFIG ../../ixrt_common/config/YOLOV3_CONFIG +cp /root/data/3rd_party/yolov3/yolov3_caffe.onnx checkpoints/ +cp /root/data/3rd_party/yolov3/yolov3.onnx checkpoints/ +cp /root/data/3rd_party/yolov3/YOLOV3_CONFIG ../../ixrt_common/config/YOLOV3_CONFIG diff --git a/models/cv/object_detection/yolov7/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov7/ixrt/ci/prepare.sh index 88caeb5e381709808af81aef9086d19f07a411d5..a19a1707865ebe86e30553a64a4387e776fc6b0c 100644 --- a/models/cv/object_detection/yolov7/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolov7/ixrt/ci/prepare.sh @@ -20,7 +20,7 @@ pip3 install -r ../../ixrt_common/requirements.txt mkdir -p checkpoints cp -r /root/data/3rd_party/yolov7 ./ cd yolov7 -ln -s /root/data/checkpoints/yolov7.pt ./ -python3 export.py --weights yolov7.pt --grid --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640 --batch-size 16 -mv yolov7.onnx ../checkpoints/yolov7m.onnx +#ln -s /root/data/checkpoints/yolov7.pt ./ +#python3 export.py --weights yolov7.pt --grid --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640 --batch-size 16 +mv yolov7m.onnx ../checkpoints/yolov7m.onnx cd ..