diff --git a/models/cv/classification/mobilevit/igie/README.md b/models/cv/classification/mobilevit/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ff0461dc2e763055de581766ffbb3eae60653e9f --- /dev/null +++ b/models/cv/classification/mobilevit/igie/README.md @@ -0,0 +1,59 @@ +# Mobilevit_s + +## Model Description + +The MobileViT-S model is a light-weight, general-purpose vision transformer designed specifically for mobile devices. It introduces a novel perspective by treating Transformers as convolutions, effectively combining the local processing strengths of CNNs with the global representation capabilities of Transformers. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| MR-V100 | 4.4.0 | 26.03 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: to download the validation dataset. + +### Install Dependencies + +```bash +pip3 install -r ../../igie_common/requirements.txt +pip3 install timm +``` + +### Model Conversion + +```bash + +# export onnxmodel from timm +python3 export.py --model-name mobilevit_s --output mobilevit_s.onnx + +# use onnxsim optimize onnx model +onnxsim mobilevit_s.onnx mobilevit_s_opt.onnx +``` + +## Model Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +export RUN_DIR=../../igie_common/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_mobilevit_s_fp16_accuracy.sh +# Performance +bash scripts/infer_mobilevit_s_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| :----: | :----: | :----: | :----: | :----: | :----: | +| mobilevit_s | 32 | FP16 |1827.75 | 77.127 | 93.546 | diff --git a/models/cv/classification/mobilevit/igie/ci/prepare.sh b/models/cv/classification/mobilevit/igie/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..0651c29d7d9592f91c2952080acdc251fbc14453 --- /dev/null +++ b/models/cv/classification/mobilevit/igie/ci/prepare.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +pip3 install -r ../../igie_common/requirements.txt +pip3 install timm +python3 export.py --model-name mobilevit_s --output mobilevit_s.onnx \ No newline at end of file diff --git a/models/cv/classification/mobilevit/igie/export.py b/models/cv/classification/mobilevit/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..451779832c2cf05824e14fee3858cebe34e8a581 --- /dev/null +++ b/models/cv/classification/mobilevit/igie/export.py @@ -0,0 +1,67 @@ +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import torch +import timm +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model-name", + type=str, + required=True, + help="Name of the model from torchvision.models.") + + parser.add_argument("--weight", + type=str, + required=False, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + print(f"Loading model: {args.model_name}...") + + model = timm.create_model(args.model_name, pretrained=True) + + model.eval() + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=17 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/mobilevit/igie/inference.py b/models/cv/classification/mobilevit/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..20c21d5602288c67fddbe3ecb66845accc0a777e --- /dev/null +++ b/models/cv/classification/mobilevit/igie/inference.py @@ -0,0 +1,181 @@ +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sys +import argparse +import tvm +import torch +import numpy as np +from tvm import relay +from tqdm import tqdm + +import timm +from timm.data import create_dataset, create_loader, resolve_data_config + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def get_topk_accuracy(pred, label): + if isinstance(pred, np.ndarray): + pred = torch.from_numpy(pred) + + if isinstance(label, np.ndarray): + label = torch.from_numpy(label) + + top1_acc = 0 + top5_acc = 0 + for idx in range(len(label)): + label_value = label[idx] + if label_value == torch.topk(pred[idx].float(), 1).indices.data: + top1_acc += 1 + top5_acc += 1 + + elif label_value in torch.topk(pred[idx].float(), 5).indices.data: + top5_acc += 1 + + return top1_acc, top5_acc + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + # get dataloader + model_name = 'mobilevit_s' + model = timm.create_model(model_name, pretrained=False) + config = resolve_data_config({}, model=model) + dataset = create_dataset('imagenet', root=args.datasets, split='validation') + dataloader = create_loader( + dataset, + input_size=(3, 224, 224), + batch_size=batch_size, + use_prefetcher=True, + interpolation=config['interpolation'], + mean=config['mean'], + std=config['std'], + crop_pct=config['crop_pct'] + ) + + top1_acc = 0 + top5_acc = 0 + total_num = 0 + + for image, label in tqdm(dataloader): + + image = image.cpu().numpy() + label = label.cpu().numpy() + + # pad the last batch + pad_batch = len(image) != batch_size + + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input(args.input_name, tvm.nd.array(image, device)) + + # run inference + module.run() + + pred = module.get_output(0).asnumpy() + + if pad_batch: + pred = pred[:origin_size] + + # get batch accuracy + batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label) + + top1_acc += batch_top1_acc + top5_acc += batch_top5_acc + total_num += batch_size + + result_stat = {} + result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3) + result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3) + + print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/mobilevit/igie/scripts/infer_mobilevit_s_fp16_accuracy.sh b/models/cv/classification/mobilevit/igie/scripts/infer_mobilevit_s_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..32d8e63e4d3f83f3a6b0ed3970283cb28f7f57d8 --- /dev/null +++ b/models/cv/classification/mobilevit/igie/scripts/infer_mobilevit_s_fp16_accuracy.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="mobilevit_s_opt.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +echo "batch size is ${batchsize}" + +# build engine +python3 ${RUN_DIR}build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path mobilevit_s_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine mobilevit_s_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/classification/mobilevit/igie/scripts/infer_mobilevit_s_fp16_performance.sh b/models/cv/classification/mobilevit/igie/scripts/infer_mobilevit_s_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..6620b0511c1d7efe31fc216006afe29e3258e17a --- /dev/null +++ b/models/cv/classification/mobilevit/igie/scripts/infer_mobilevit_s_fp16_performance.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="mobilevit_s_opt.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +echo "batch size is ${batchsize}" + +# build engine +python3 ${RUN_DIR}build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,288,288 \ + --precision fp16 \ + --engine_path mobilevit_s_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine mobilevit_s_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/classification/vit_b_32/igie/README.md b/models/cv/classification/vit_b_32/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..27b1452a05c6e1f2196df35432b5175f4c64723c --- /dev/null +++ b/models/cv/classification/vit_b_32/igie/README.md @@ -0,0 +1,58 @@ +# ViT_B_32 (IGIE) + +## Model Description + +The model utilizes the Vision Transformer (ViT-B/32) architecture as its visual encoder. It partitions input images into 32x32 patches, which are processed through a feature extractor consisting of 12 Transformer layers. Finally, a linear projection head maps the features into a 512-dimensional latent space, achieving cross-modal alignment between image features and text semantics. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| MR-V100 | 4.4.0 | 26.03 | + + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: to download the validation dataset. + +### Install Dependencies + +```bash +pip3 install -r requirements.txt +pip3 install open_clip_torch +pip3 install timm +``` + +### Model Conversion + +```bash +python3 export.py --model-name ViT-B-32 --weight ViT-B-32.pt --output vit_b_32.onnx + +# Use onnxsim optimize onnx model +onnxsim vit_b_32.onnx vit_b_32_opt.onnx +``` + +## Model Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_vit_fp16_accuracy.sh +# Performance +bash scripts/infer_vit_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| :--------: | :----: | :----: | :----: | :----: | :----: | +| ViT_B_32 | 32 | FP16 | 3303.136 | 58.16 | 85.337 | diff --git a/models/cv/classification/vit_b_32/igie/ci/prepare.sh b/models/cv/classification/vit_b_32/igie/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..84234a6d7cb51ab8408ce6ffea74f967e691b4d5 --- /dev/null +++ b/models/cv/classification/vit_b_32/igie/ci/prepare.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +pip3 install -r requirements.txt +pip3 install open_clip_torch +pip3 install timm + +python3 export.py --model-name ViT-B-32 --weight ViT-B-32.pt --output vit_b_32.onnx + +onnxsim vit_b_32.onnx vit_b_32_opt.onnx diff --git a/models/cv/classification/vit_b_32/igie/export.py b/models/cv/classification/vit_b_32/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..6fe52f5a62a15fadad7618aeea9aae3bdebd3677 --- /dev/null +++ b/models/cv/classification/vit_b_32/igie/export.py @@ -0,0 +1,71 @@ +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import torch +import open_clip + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model-name", + type=str, + required=True, + help="Name of the model from open_clip.") + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + model, _, _ = open_clip.create_model_and_transforms( + args.model_name, + pretrained=args.weight + ) + + model.eval() + + image_model = model.visual + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + image_model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=17 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/vit_b_32/igie/inference.py b/models/cv/classification/vit_b_32/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..5a87832e89ec186a2ab6a28c3364fa5b01d4ed72 --- /dev/null +++ b/models/cv/classification/vit_b_32/igie/inference.py @@ -0,0 +1,185 @@ +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sys +import argparse +import tvm +import torch +import torchvision +import numpy as np +from tvm import relay +from tqdm import tqdm + +import open_clip +from timm.data import create_dataset, create_loader + + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def get_topk_accuracy(pred, label): + if isinstance(pred, np.ndarray): + pred = torch.from_numpy(pred) + + if isinstance(label, np.ndarray): + label = torch.from_numpy(label) + + top1_acc = 0 + top5_acc = 0 + for idx in range(len(label)): + label_value = label[idx] + if label_value == torch.topk(pred[idx].float(), 1).indices.data: + top1_acc += 1 + top5_acc += 1 + + elif label_value in torch.topk(pred[idx].float(), 5).indices.data: + top5_acc += 1 + + return top1_acc, top5_acc + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + model_name = "ViT-B-32" + model, _, preprocess = open_clip.create_model_and_transforms(model_name, pretrained="openai") + tokenizer = open_clip.get_tokenizer(model_name) + + from open_clip import IMAGENET_CLASSNAMES as imagenet_classnames + + with torch.no_grad(): + texts = tokenizer([f"a photo of a {c}" for c in imagenet_classnames]) + text_features = model.encode_text(texts) + classifier_weights = (text_features / text_features.norm(dim=-1, keepdim=True)).cpu().numpy() + + dataset = create_dataset('imagenet', root=args.datasets, split='validation', transform=preprocess) + dataloader = create_loader(dataset, input_size=(3, 224, 224), batch_size=batch_size, is_training=False, use_prefetcher=False) + + top1_acc = 0 + top5_acc = 0 + total_num = 0 + + for image, label in tqdm(dataloader): + + image = image.cpu().numpy() + lable = label.cpu().numpy() + + # pad the last batch + pad_batch = len(image) != batch_size + + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input(args.input_name, tvm.nd.array(image, device)) + + # run inference + module.run() + + pred = module.get_output(0).asnumpy() + + if pad_batch: + pred = pred[:origin_size] + + pred /= np.linalg.norm(pred, axis=-1, keepdims=True) + + pred = 100.0 * np.dot(pred, classifier_weights.T) + + # get batch accuracy + batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label) + + top1_acc += batch_top1_acc + top5_acc += batch_top5_acc + total_num += batch_size + + result_stat = {} + result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3) + result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3) + + print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/vit_b_32/igie/requirements.txt b/models/cv/classification/vit_b_32/igie/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7096be1e6da3fa1cf839b6cfc3687682b0107f50 --- /dev/null +++ b/models/cv/classification/vit_b_32/igie/requirements.txt @@ -0,0 +1,3 @@ +onnx +tqdm +onnxsim \ No newline at end of file diff --git a/models/cv/classification/vit_b_32/igie/scripts/infer_vit_b_32_fp16_accuracy.sh b/models/cv/classification/vit_b_32/igie/scripts/infer_vit_b_32_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..ad3b4cffe6b82a985432fc9e97e4d0336ef93284 --- /dev/null +++ b/models/cv/classification/vit_b_32/igie/scripts/infer_vit_b_32_fp16_accuracy.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="vit_b_32_opt.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +echo "batch size is ${batchsize}" + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path vit_b_32_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine vit_b_32_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/classification/vit_b_32/igie/scripts/infer_vit_b_32_fp16_performance.sh b/models/cv/classification/vit_b_32/igie/scripts/infer_vit_b_32_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..46361bea689b3b596d76a8d19942fae390e2258b --- /dev/null +++ b/models/cv/classification/vit_b_32/igie/scripts/infer_vit_b_32_fp16_performance.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="vit_b_32_opt.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +echo "batch size is ${batchsize}" + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path vit_b_32_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine vit_b_32_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/classification/vit_l_14/igie/README.md b/models/cv/classification/vit_l_14/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f612ddc3c354191d2badf1424520d9a497b07a99 --- /dev/null +++ b/models/cv/classification/vit_l_14/igie/README.md @@ -0,0 +1,58 @@ +# ViT_L_14 (IGIE) + +## Model Description + +The model utilizes the Vision Transformer (ViT-L/14) architecture as its visual encoder, representing a high-performance, large-scale variant in the CLIP family. It partitions input images into fine-grained 14x14 patches, enabling the capture of denser visual details compared to the base model. The architecture consists of 24 Transformer layers with a 1024-dimensional hidden width, eventually mapping features through a linear projection head into a 768-dimensional latent space for robust cross-modal alignment. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| MR-V100 | 4.4.0 | 26.03 | + + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: to download the validation dataset. + +### Install Dependencies + +```bash +pip3 install -r requirements.txt +pip3 install open_clip_torch +pip3 install timm +``` + +### Model Conversion + +```bash +python3 export.py --model-name ViT-L-14 --weight ViT-L-14.pt --output vit_l_14.onnx + +# Use onnxsim optimize onnx model +onnxsim vit_l_14.onnx vit_l_14_opt.onnx +``` + +## Model Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_vit_l_14_fp16_accuracy.sh +# Performance +bash scripts/infer_vit_l_14_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| :--------: | :----: | :----: | :----: | :----: | :----: | +| ViT_L_14 | 32 | FP16 | 135.778 | 71.15 | 92.25 | diff --git a/models/cv/classification/vit_l_14/igie/ci/prepare.sh b/models/cv/classification/vit_l_14/igie/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..af8ca9886006b7c01c728984aeadb63845d7ab54 --- /dev/null +++ b/models/cv/classification/vit_l_14/igie/ci/prepare.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +pip3 install -r requirements.txt +pip3 install open_clip_torch +pip3 install timm + +python3 export.py --model-name ViT-L-14 --weight ViT-L-14.pt --output vit_l_14.onnx + +onnxsim vit_l_14.onnx vit_l_14_opt.onnx \ No newline at end of file diff --git a/models/cv/classification/vit_l_14/igie/export.py b/models/cv/classification/vit_l_14/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..6fe52f5a62a15fadad7618aeea9aae3bdebd3677 --- /dev/null +++ b/models/cv/classification/vit_l_14/igie/export.py @@ -0,0 +1,71 @@ +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import torch +import open_clip + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model-name", + type=str, + required=True, + help="Name of the model from open_clip.") + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + model, _, _ = open_clip.create_model_and_transforms( + args.model_name, + pretrained=args.weight + ) + + model.eval() + + image_model = model.visual + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + image_model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=17 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/vit_l_14/igie/inference.py b/models/cv/classification/vit_l_14/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..7bea25fdda3cc340261c8f246c5c260294dc2789 --- /dev/null +++ b/models/cv/classification/vit_l_14/igie/inference.py @@ -0,0 +1,185 @@ +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sys +import argparse +import tvm +import torch +import torchvision +import numpy as np +from tvm import relay +from tqdm import tqdm + +import open_clip +from timm.data import create_dataset, create_loader + + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def get_topk_accuracy(pred, label): + if isinstance(pred, np.ndarray): + pred = torch.from_numpy(pred) + + if isinstance(label, np.ndarray): + label = torch.from_numpy(label) + + top1_acc = 0 + top5_acc = 0 + for idx in range(len(label)): + label_value = label[idx] + if label_value == torch.topk(pred[idx].float(), 1).indices.data: + top1_acc += 1 + top5_acc += 1 + + elif label_value in torch.topk(pred[idx].float(), 5).indices.data: + top5_acc += 1 + + return top1_acc, top5_acc + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + model_name = "ViT-L-14" + model, _, preprocess = open_clip.create_model_and_transforms(model_name, pretrained="openai") + tokenizer = open_clip.get_tokenizer(model_name) + + from open_clip import IMAGENET_CLASSNAMES as imagenet_classnames + + with torch.no_grad(): + texts = tokenizer([f"a photo of a {c}" for c in imagenet_classnames]) + text_features = model.encode_text(texts) + classifier_weights = (text_features / text_features.norm(dim=-1, keepdim=True)).cpu().numpy() + + dataset = create_dataset('imagenet', root=args.datasets, split='validation', transform=preprocess) + dataloader = create_loader(dataset, input_size=(3, 224, 224), batch_size=batch_size, is_training=False, use_prefetcher=False) + + top1_acc = 0 + top5_acc = 0 + total_num = 0 + + for image, label in tqdm(dataloader): + + image = image.cpu().numpy() + lable = label.cpu().numpy() + + # pad the last batch + pad_batch = len(image) != batch_size + + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input(args.input_name, tvm.nd.array(image, device)) + + # run inference + module.run() + + pred = module.get_output(0).asnumpy() + + if pad_batch: + pred = pred[:origin_size] + + pred /= np.linalg.norm(pred, axis=-1, keepdims=True) + + pred = 100.0 * np.dot(pred, classifier_weights.T) + + # get batch accuracy + batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label) + + top1_acc += batch_top1_acc + top5_acc += batch_top5_acc + total_num += batch_size + + result_stat = {} + result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3) + result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3) + + print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/vit_l_14/igie/requirements.txt b/models/cv/classification/vit_l_14/igie/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7096be1e6da3fa1cf839b6cfc3687682b0107f50 --- /dev/null +++ b/models/cv/classification/vit_l_14/igie/requirements.txt @@ -0,0 +1,3 @@ +onnx +tqdm +onnxsim \ No newline at end of file diff --git a/models/cv/classification/vit_l_14/igie/scripts/infer_vit_l_14_fp16_accuracy.sh b/models/cv/classification/vit_l_14/igie/scripts/infer_vit_l_14_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..51f90973fb4c65d63876b8e284247aeecedb3d9d --- /dev/null +++ b/models/cv/classification/vit_l_14/igie/scripts/infer_vit_l_14_fp16_accuracy.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="vit_l_14_opt.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +echo "batch size is ${batchsize}" + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path vit_l_14_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine vit_l_14_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/classification/vit_l_14/igie/scripts/infer_vit_l_14_fp16_performance.sh b/models/cv/classification/vit_l_14/igie/scripts/infer_vit_l_14_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..fe12d833709b6ad8b01fd1f5505f417095e175b6 --- /dev/null +++ b/models/cv/classification/vit_l_14/igie/scripts/infer_vit_l_14_fp16_performance.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="vit_l_14_opt.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +echo "batch size is ${batchsize}" + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path vit_l_14_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine vit_l_14_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/object_detection/yolov11m/igie/README.md b/models/cv/object_detection/yolov11m/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..57d55a342d2612829192a68aef5785ef8deb4044 --- /dev/null +++ b/models/cv/object_detection/yolov11m/igie/README.md @@ -0,0 +1,100 @@ +# YOLOv11m (IGIE) + +## Model Description + +YOLOv11 is the latest generation of the YOLO (You Only Look Once) series object detection model released by Ultralytics. Building upon the advancements of previous YOLO models, such as YOLOv5 and YOLOv8, YOLOv11 introduces comprehensive upgrades to further enhance performance, flexibility, and usability. It is a versatile deep learning model designed for multi-task applications, supporting object detection, instance segmentation, image classification, keypoint pose estimation, and rotated object detection. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| MR-V100 | 4.3.0 | 25.12 | +| MR-V100 | 4.2.0 | 25.03 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: + +- to download the labels dataset. +- to download the validation dataset. +- to download the train dataset. + +```bash +unzip -q -d ./ coco2017labels.zip +unzip -q -d ./coco/images/ train2017.zip +unzip -q -d ./coco/images/ val2017.zip + +coco +├── annotations +│   └── instances_val2017.json +├── images +│   ├── train2017 +│   └── val2017 +├── labels +│   ├── train2017 +│   └── val2017 +├── LICENSE +├── README.txt +├── test-dev2017.txt +├── train2017.cache +├── train2017.txt +├── val2017.cache +└── val2017.txt +``` + +### Install Dependencies + +Contact the Iluvatar administrator to get the missing packages: + +- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + +```bash +pip3 install -r requirements.txt +pip3 install mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +``` + +## Model Conversion + +```bash +python3 export.py --weight yolo11m.pt --batch 32 +# Make sure numpy < 2.0 +``` + +## Model Inference + +```bash +export DATASETS_DIR=/Path/to/coco/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_yolov11m_fp16_accuracy.sh +# Performance +bash scripts/infer_yolov11m_fp16_performance.sh +``` + +### INT8 + +```bash +# Accuracy +bash scripts/infer_yolov11m_int8_accuracy.sh +# Performance +bash scripts/infer_yolov11m_int8_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | IOU@0.5 | IOU@0.5:0.95 | +| -------- | --------- | --------- | ------- | ------- | ------------ | +| YOLOv11m | 32 | FP16 | 507.64 | 0.681 | 0.513 | +| YOLOv11m | 32 | INT8 | 835.60 | 0.665 | 0.488 | + +## References + +YOLOv11: diff --git a/models/cv/object_detection/yolov11m/igie/ci/prepare.sh b/models/cv/object_detection/yolov11m/igie/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..422e91cfa7a374701c3f53372a411254574b46ee --- /dev/null +++ b/models/cv/object_detection/yolov11m/igie/ci/prepare.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip3 install -r requirements.txt + +python3 export.py --weight yolo11m.pt --batch 32 diff --git a/models/cv/object_detection/yolov11m/igie/export.py b/models/cv/object_detection/yolov11m/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..0eea848e5d46e4ea06ecde15ed543147dfb60037 --- /dev/null +++ b/models/cv/object_detection/yolov11m/igie/export.py @@ -0,0 +1,43 @@ +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +from ultralytics import YOLO + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--batch", + type=int, + required=True, + help="batchsize of the model.") + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + model = YOLO(args.weight).cpu() + + model.export(format='onnx', batch=args.batch, imgsz=(640, 640), optimize=True, simplify=True, opset=13) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/object_detection/yolov11m/igie/inference.py b/models/cv/object_detection/yolov11m/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..111c2afa451f8c94bf22e0f7db771b01767964f9 --- /dev/null +++ b/models/cv/object_detection/yolov11m/igie/inference.py @@ -0,0 +1,140 @@ +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import os + +import tvm +from tvm import relay + +import numpy as np +from pathlib import Path +from ultralytics import YOLO +from ultralytics.cfg import get_cfg +from ultralytics.utils import DEFAULT_CFG +from validator import IGIE_Validator + + + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + root_path = args.datasets + val_path = os.path.join(root_path, 'val2017.txt') + + overrides = {} + overrides['mode'] = 'val' + + cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides) + + cfg_args.batch = args.batchsize + + cfg_args.data = { + 'path': Path(root_path), + 'val': val_path, + 'names': + { + 0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', + 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', + 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', + 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', + 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', + 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', + 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', + 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', + 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', + 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', + 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', + 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', + 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', + 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', + 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush' + }, + 'nc': 80} + cfg_args.save_json = True + + validator = IGIE_Validator(args=cfg_args, save_dir=Path('.')) + validator.stride = 32 + + stats = validator(module, device) + +if __name__ == "__main__": + main() diff --git a/models/cv/object_detection/yolov11m/igie/quantize.py b/models/cv/object_detection/yolov11m/igie/quantize.py new file mode 100644 index 0000000000000000000000000000000000000000..e72c920fcb6364172716065b43e1443281624150 --- /dev/null +++ b/models/cv/object_detection/yolov11m/igie/quantize.py @@ -0,0 +1,167 @@ +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import onnx +import psutil +import argparse +import numpy as np +from pathlib import Path + +import torch + +from onnxruntime.quantization import (CalibrationDataReader, QuantFormat, + quantize_static, QuantType, + CalibrationMethod) + +from ultralytics.cfg import get_cfg +from ultralytics.utils import DEFAULT_CFG +from ultralytics.data.utils import check_det_dataset +from ultralytics.models.yolo.detect import DetectionValidator + +class CalibrationDataLoader(CalibrationDataReader): + def __init__(self, input_name, dataloader, cnt_limit=100): + self.cnt = 0 + self.input_name = input_name + self.cnt_limit = cnt_limit + self.dataloader = dataloader + self.iter = iter(dataloader) + + # avoid oom + @staticmethod + def _exceed_memory_upper_bound(upper_bound=80): + info = psutil.virtual_memory() + total_percent = info.percent + if total_percent >= upper_bound: + return True + return False + + def get_next(self): + if self._exceed_memory_upper_bound() or self.cnt >= self.cnt_limit: + return None + self.cnt += 1 + print(f"onnx calibration data count: {self.cnt}") + input_info = next(self.iter) + + ort_input = {self.input_name[0]: input_info.numpy()} + + return ort_input + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--out_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="calibration datasets path.") + + parser.add_argument("--batch", + type=int, + default=32, + help="batchsize of the model.") + + args = parser.parse_args() + + return args + +class PreProcessDatasets(DetectionValidator): + def __call__(self, data): + self.data = data + self.stride = 32 + self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch) + + datasets = [] + length = 0 + + for batch in self.dataloader: + data = self.preprocess(batch)['img'] + datasets.append(data[0]) + length += data.shape[0] + + if length >= 200: + break + + return datasets + +class CalibrationDataset(torch.utils.data.Dataset): + def __init__(self, datasets): + self.datasets = datasets + + def __len__(self): + return len(self.datasets) + + def __getitem__(self, index): + return self.datasets[index] + + +def main(): + args = parse_args() + + model = onnx.load(args.model_path) + input_names = [input.name for input in model.graph.input] + + overrides = {'mode': 'val'} + cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides) + + cfg_args.batch = 1 + cfg_args.save_json = True + + data = { + 'path': Path(args.datasets), + 'val': os.path.join(args.datasets, 'val2017.txt') + } + + validator = PreProcessDatasets(args=cfg_args, save_dir=Path('.')) + + datasets = CalibrationDataset(validator(data)) + + data_loader = torch.utils.data.DataLoader(dataset=datasets, batch_size=args.batch) + + cnt_limit = int(20 / args.batch) + 1 + + calibration = CalibrationDataLoader(input_names, data_loader, cnt_limit=cnt_limit) + + quantize_static(args.model_path, + args.out_path, + calibration_data_reader=calibration, + quant_format=QuantFormat.QOperator, + per_channel=False, + activation_type=QuantType.QInt8, + weight_type=QuantType.QInt8, + use_external_data_format=False, + nodes_to_exclude= [ + '/model.23/Add_1', '/model.23/Add_2', '/model.23/Concat_4', + '/model.23/Concat_5', '/model.23/Mul_2', '/model.10/m/m.0/attn/Softmax', + '/model.23/dfl/Softmax' + ], + calibrate_method=CalibrationMethod.Percentile, + extra_options = { + 'ActivationSymmetric': True, + 'WeightSymmetric': True + } + ) + +if __name__ == "__main__": + main() diff --git a/models/cv/object_detection/yolov11m/igie/requirements.txt b/models/cv/object_detection/yolov11m/igie/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..210906370bb494a9de9ea61888e5a7e33a8b0f30 --- /dev/null +++ b/models/cv/object_detection/yolov11m/igie/requirements.txt @@ -0,0 +1,4 @@ +tqdm +onnx==1.16.0 +ultralytics==8.3.59 +pycocotools \ No newline at end of file diff --git a/models/cv/object_detection/yolov11m/igie/scripts/infer_yolov11m_fp16_accuracy.sh b/models/cv/object_detection/yolov11m/igie/scripts/infer_yolov11m_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..d28109ede0ca3a559818886cf3cb03125f4bbad8 --- /dev/null +++ b/models/cv/object_detection/yolov11m/igie/scripts/infer_yolov11m_fp16_accuracy.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolo11m.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +echo "batch size is ${batchsize}" + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${model_path} \ + --input images:${batchsize},3,640,640 \ + --precision fp16 \ + --engine_path yolo11m_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine yolo11m_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/object_detection/yolov11m/igie/scripts/infer_yolov11m_fp16_performance.sh b/models/cv/object_detection/yolov11m/igie/scripts/infer_yolov11m_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..d0bb4b0ca3abca9c920c2978f1a7b65c40d36b64 --- /dev/null +++ b/models/cv/object_detection/yolov11m/igie/scripts/infer_yolov11m_fp16_performance.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolo11m.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +echo "batch size is ${batchsize}" + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${model_path} \ + --input images:${batchsize},3,640,640 \ + --precision fp16 \ + --engine_path yolo11m_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine yolo11m_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/object_detection/yolov11m/igie/scripts/infer_yolov11m_int8_accuracy.sh b/models/cv/object_detection/yolov11m/igie/scripts/infer_yolov11m_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..1eb7a2cd4864dccc17acafb6d79ba268abee774f --- /dev/null +++ b/models/cv/object_detection/yolov11m/igie/scripts/infer_yolov11m_int8_accuracy.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolo11m.onnx" +quantized_model_path="yolo11m_int8.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +echo "batch size is ${batchsize}" + +if [ ! -e $quantized_model_path ]; then + # quantize model to int8 + python3 quantize.py \ + --model_path ${model_path} \ + --out_path ${quantized_model_path} \ + --batch ${batchsize} \ + --datasets ${datasets_path} +fi + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${quantized_model_path} \ + --input images:${batchsize},3,640,640 \ + --precision int8 \ + --engine_path yolo11m_bs_${batchsize}_int8.so + +# inference +python3 inference.py \ + --engine yolo11m_bs_${batchsize}_int8.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} diff --git a/models/cv/object_detection/yolov11m/igie/scripts/infer_yolov11m_int8_performance.sh b/models/cv/object_detection/yolov11m/igie/scripts/infer_yolov11m_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..5991dd5e29de19d4dd8d233b05353f842ed8a529 --- /dev/null +++ b/models/cv/object_detection/yolov11m/igie/scripts/infer_yolov11m_int8_performance.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolo11m.onnx" +quantized_model_path="yolo11m_int8.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +echo "batch size is ${batchsize}" + +if [ ! -e $quantized_model_path ]; then + # quantize model to int8 + python3 quantize.py \ + --model_path ${model_path} \ + --out_path ${quantized_model_path} \ + --batch ${batchsize} \ + --datasets ${datasets_path} +fi + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${quantized_model_path} \ + --input images:${batchsize},3,640,640 \ + --precision int8 \ + --engine_path yolo11m_bs_${batchsize}_int8.so + +# inference +python3 inference.py \ + --engine yolo11m_bs_${batchsize}_int8.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ + --perf_only True diff --git a/models/cv/object_detection/yolov11m/igie/validator.py b/models/cv/object_detection/yolov11m/igie/validator.py new file mode 100644 index 0000000000000000000000000000000000000000..7590b398fcd335a9d4cd592bcbce8666de65cb56 --- /dev/null +++ b/models/cv/object_detection/yolov11m/igie/validator.py @@ -0,0 +1,89 @@ +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import tvm +import json +import torch +import numpy as np + +from tqdm import tqdm + +from ultralytics.models.yolo.detect import DetectionValidator +from ultralytics.data.utils import check_det_dataset +from ultralytics.utils.metrics import ConfusionMatrix +from ultralytics.data.converter import coco80_to_coco91_class + +class IGIE_Validator(DetectionValidator): + def __call__(self, engine, device): + self.data = self.args.data + self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch) + self.init_metrics() + + self.stats = {'tp': [], 'conf': [], 'pred_cls': [], 'target_cls': [], 'target_img': []} + + # wram up + for _ in range(3): + engine.run() + + for batch in tqdm(self.dataloader): + batch = self.preprocess(batch) + + imgs = batch['img'] + pad_batch = len(imgs) != self.args.batch + if pad_batch: + origin_size = len(imgs) + imgs = np.resize(imgs, (self.args.batch, *imgs.shape[1:])) + + engine.set_input(0, tvm.nd.array(imgs, device)) + + engine.run() + + outputs = engine.get_output(0).asnumpy() + + if pad_batch: + outputs = outputs[:origin_size] + + outputs = torch.from_numpy(outputs) + + preds = self.postprocess([outputs]) + + self.update_metrics(preds, batch) + + stats = self.get_stats() + + if self.args.save_json and self.jdict: + with open(str(self.save_dir / 'predictions.json'), 'w') as f: + print(f'Saving {f.name} ...') + json.dump(self.jdict, f) # flatten and save + + stats = self.eval_json(stats) + + return stats + + def init_metrics(self): + """Initialize evaluation metrics for YOLO.""" + val = self.data.get(self.args.split, '') # validation path + self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt') # is COCO + self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000)) + self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO + self.names = self.data['names'] + self.nc = len(self.names) + self.metrics.names = self.names + self.confusion_matrix = ConfusionMatrix(nc=80) + self.seen = 0 + self.jdict = [] + self.stats = [] + diff --git a/models/cv/object_detection/yolov11n/igie/README.md b/models/cv/object_detection/yolov11n/igie/README.md index 1f17e71749537793183ef83e11cb21644b49d2d4..f970db704218280c6662aae05014f1e74a655dcb 100644 --- a/models/cv/object_detection/yolov11n/igie/README.md +++ b/models/cv/object_detection/yolov11n/igie/README.md @@ -88,8 +88,8 @@ bash scripts/infer_yolov11n_int8_performance.sh ## Model Results -| Model | BatchSize | Precision | FPS | IOU@0.5 | IOU@0.5:0.95 | -| ------- | --------- | --------- | ------- | ------- | ------------ | +| Model | BatchSize | Precision | FPS | IOU@0.5 | IOU@0.5:0.95 | +|----------|-----------|-----------|---------|---------|--------------| | YOLOv11n | 32 | FP16 | 1652.02 | 0.551 | 0.393 | | YOLOv11n | 32 | INT8 | 1995.32 | 0.507 | 0.349 | diff --git a/models/cv/object_detection/yolov11s/igie/README.md b/models/cv/object_detection/yolov11s/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..eb48a67168b105b468d0fa81ab5224052827325a --- /dev/null +++ b/models/cv/object_detection/yolov11s/igie/README.md @@ -0,0 +1,100 @@ +# YOLOv11m (IGIE) + +## Model Description + +YOLOv11 is the latest generation of the YOLO (You Only Look Once) series object detection model released by Ultralytics. Building upon the advancements of previous YOLO models, such as YOLOv5 and YOLOv8, YOLOv11 introduces comprehensive upgrades to further enhance performance, flexibility, and usability. It is a versatile deep learning model designed for multi-task applications, supporting object detection, instance segmentation, image classification, keypoint pose estimation, and rotated object detection. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| MR-V100 | 4.3.0 | 25.12 | +| MR-V100 | 4.2.0 | 25.03 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: + +- to download the labels dataset. +- to download the validation dataset. +- to download the train dataset. + +```bash +unzip -q -d ./ coco2017labels.zip +unzip -q -d ./coco/images/ train2017.zip +unzip -q -d ./coco/images/ val2017.zip + +coco +├── annotations +│   └── instances_val2017.json +├── images +│   ├── train2017 +│   └── val2017 +├── labels +│   ├── train2017 +│   └── val2017 +├── LICENSE +├── README.txt +├── test-dev2017.txt +├── train2017.cache +├── train2017.txt +├── val2017.cache +└── val2017.txt +``` + +### Install Dependencies + +Contact the Iluvatar administrator to get the missing packages: + +- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + +```bash +pip3 install -r requirements.txt +pip3 install mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +``` + +## Model Conversion + +```bash +python3 export.py --weight yolo11m.pt --batch 32 +# Make sure numpy < 2.0 +``` + +## Model Inference + +```bash +export DATASETS_DIR=/Path/to/coco/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_yolov11s_fp16_accuracy.sh +# Performance +bash scripts/infer_yolov11s_fp16_performance.sh +``` + +### INT8 + +```bash +# Accuracy +bash scripts/infer_yolov11s_int8_accuracy.sh +# Performance +bash scripts/infer_yolov11s_int8_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | IOU@0.5 | IOU@0.5:0.95 | +| -------- | --------- | --------- | -------- | ------- | ------------ | +| YOLOv11s | 32 | FP16 | 1006.34 | 0.635 | 0.465 | +| YOLOv11s | 32 | INT8 | 1354.66 | 0.620 | 0.442 | + +## References + +YOLOv11: diff --git a/models/cv/object_detection/yolov11s/igie/ci/prepare.sh b/models/cv/object_detection/yolov11s/igie/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..17704c2a2a6bcfc07a961c4bc43ab2f094ae41e2 --- /dev/null +++ b/models/cv/object_detection/yolov11s/igie/ci/prepare.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip3 install -r requirements.txt + +python3 export.py --weight yolo11s.pt --batch 32 diff --git a/models/cv/object_detection/yolov11s/igie/export.py b/models/cv/object_detection/yolov11s/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..0eea848e5d46e4ea06ecde15ed543147dfb60037 --- /dev/null +++ b/models/cv/object_detection/yolov11s/igie/export.py @@ -0,0 +1,43 @@ +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +from ultralytics import YOLO + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--batch", + type=int, + required=True, + help="batchsize of the model.") + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + model = YOLO(args.weight).cpu() + + model.export(format='onnx', batch=args.batch, imgsz=(640, 640), optimize=True, simplify=True, opset=13) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/object_detection/yolov11s/igie/inference.py b/models/cv/object_detection/yolov11s/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..111c2afa451f8c94bf22e0f7db771b01767964f9 --- /dev/null +++ b/models/cv/object_detection/yolov11s/igie/inference.py @@ -0,0 +1,140 @@ +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import os + +import tvm +from tvm import relay + +import numpy as np +from pathlib import Path +from ultralytics import YOLO +from ultralytics.cfg import get_cfg +from ultralytics.utils import DEFAULT_CFG +from validator import IGIE_Validator + + + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + root_path = args.datasets + val_path = os.path.join(root_path, 'val2017.txt') + + overrides = {} + overrides['mode'] = 'val' + + cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides) + + cfg_args.batch = args.batchsize + + cfg_args.data = { + 'path': Path(root_path), + 'val': val_path, + 'names': + { + 0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', + 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', + 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', + 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', + 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', + 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', + 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', + 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', + 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', + 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', + 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', + 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', + 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', + 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', + 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush' + }, + 'nc': 80} + cfg_args.save_json = True + + validator = IGIE_Validator(args=cfg_args, save_dir=Path('.')) + validator.stride = 32 + + stats = validator(module, device) + +if __name__ == "__main__": + main() diff --git a/models/cv/object_detection/yolov11s/igie/quantize.py b/models/cv/object_detection/yolov11s/igie/quantize.py new file mode 100644 index 0000000000000000000000000000000000000000..e72c920fcb6364172716065b43e1443281624150 --- /dev/null +++ b/models/cv/object_detection/yolov11s/igie/quantize.py @@ -0,0 +1,167 @@ +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import onnx +import psutil +import argparse +import numpy as np +from pathlib import Path + +import torch + +from onnxruntime.quantization import (CalibrationDataReader, QuantFormat, + quantize_static, QuantType, + CalibrationMethod) + +from ultralytics.cfg import get_cfg +from ultralytics.utils import DEFAULT_CFG +from ultralytics.data.utils import check_det_dataset +from ultralytics.models.yolo.detect import DetectionValidator + +class CalibrationDataLoader(CalibrationDataReader): + def __init__(self, input_name, dataloader, cnt_limit=100): + self.cnt = 0 + self.input_name = input_name + self.cnt_limit = cnt_limit + self.dataloader = dataloader + self.iter = iter(dataloader) + + # avoid oom + @staticmethod + def _exceed_memory_upper_bound(upper_bound=80): + info = psutil.virtual_memory() + total_percent = info.percent + if total_percent >= upper_bound: + return True + return False + + def get_next(self): + if self._exceed_memory_upper_bound() or self.cnt >= self.cnt_limit: + return None + self.cnt += 1 + print(f"onnx calibration data count: {self.cnt}") + input_info = next(self.iter) + + ort_input = {self.input_name[0]: input_info.numpy()} + + return ort_input + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--out_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="calibration datasets path.") + + parser.add_argument("--batch", + type=int, + default=32, + help="batchsize of the model.") + + args = parser.parse_args() + + return args + +class PreProcessDatasets(DetectionValidator): + def __call__(self, data): + self.data = data + self.stride = 32 + self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch) + + datasets = [] + length = 0 + + for batch in self.dataloader: + data = self.preprocess(batch)['img'] + datasets.append(data[0]) + length += data.shape[0] + + if length >= 200: + break + + return datasets + +class CalibrationDataset(torch.utils.data.Dataset): + def __init__(self, datasets): + self.datasets = datasets + + def __len__(self): + return len(self.datasets) + + def __getitem__(self, index): + return self.datasets[index] + + +def main(): + args = parse_args() + + model = onnx.load(args.model_path) + input_names = [input.name for input in model.graph.input] + + overrides = {'mode': 'val'} + cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides) + + cfg_args.batch = 1 + cfg_args.save_json = True + + data = { + 'path': Path(args.datasets), + 'val': os.path.join(args.datasets, 'val2017.txt') + } + + validator = PreProcessDatasets(args=cfg_args, save_dir=Path('.')) + + datasets = CalibrationDataset(validator(data)) + + data_loader = torch.utils.data.DataLoader(dataset=datasets, batch_size=args.batch) + + cnt_limit = int(20 / args.batch) + 1 + + calibration = CalibrationDataLoader(input_names, data_loader, cnt_limit=cnt_limit) + + quantize_static(args.model_path, + args.out_path, + calibration_data_reader=calibration, + quant_format=QuantFormat.QOperator, + per_channel=False, + activation_type=QuantType.QInt8, + weight_type=QuantType.QInt8, + use_external_data_format=False, + nodes_to_exclude= [ + '/model.23/Add_1', '/model.23/Add_2', '/model.23/Concat_4', + '/model.23/Concat_5', '/model.23/Mul_2', '/model.10/m/m.0/attn/Softmax', + '/model.23/dfl/Softmax' + ], + calibrate_method=CalibrationMethod.Percentile, + extra_options = { + 'ActivationSymmetric': True, + 'WeightSymmetric': True + } + ) + +if __name__ == "__main__": + main() diff --git a/models/cv/object_detection/yolov11s/igie/requirements.txt b/models/cv/object_detection/yolov11s/igie/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..210906370bb494a9de9ea61888e5a7e33a8b0f30 --- /dev/null +++ b/models/cv/object_detection/yolov11s/igie/requirements.txt @@ -0,0 +1,4 @@ +tqdm +onnx==1.16.0 +ultralytics==8.3.59 +pycocotools \ No newline at end of file diff --git a/models/cv/object_detection/yolov11s/igie/scripts/infer_yolov11s_fp16_accuracy.sh b/models/cv/object_detection/yolov11s/igie/scripts/infer_yolov11s_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..8351e9c039bb9211c0358bf56929231e68e9965d --- /dev/null +++ b/models/cv/object_detection/yolov11s/igie/scripts/infer_yolov11s_fp16_accuracy.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolo11s.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +echo "batch size is ${batchsize}" + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${model_path} \ + --input images:${batchsize},3,640,640 \ + --precision fp16 \ + --engine_path yolo11s_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine yolo11s_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/object_detection/yolov11s/igie/scripts/infer_yolov11s_fp16_performance.sh b/models/cv/object_detection/yolov11s/igie/scripts/infer_yolov11s_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..e72de789f27e35c8ce9f44b3e3d552ff9f592916 --- /dev/null +++ b/models/cv/object_detection/yolov11s/igie/scripts/infer_yolov11s_fp16_performance.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolo11s.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +echo "batch size is ${batchsize}" + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${model_path} \ + --input images:${batchsize},3,640,640 \ + --precision fp16 \ + --engine_path yolo11s_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine yolo11s_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/object_detection/yolov11s/igie/scripts/infer_yolov11s_int8_accuracy.sh b/models/cv/object_detection/yolov11s/igie/scripts/infer_yolov11s_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..84e6f174f529e5ae68d06df95cc5ce367ec7ba02 --- /dev/null +++ b/models/cv/object_detection/yolov11s/igie/scripts/infer_yolov11s_int8_accuracy.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolo11s.onnx" +quantized_model_path="yolo11s_int8.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +echo "batch size is ${batchsize}" + +if [ ! -e $quantized_model_path ]; then + # quantize model to int8 + python3 quantize.py \ + --model_path ${model_path} \ + --out_path ${quantized_model_path} \ + --batch ${batchsize} \ + --datasets ${datasets_path} +fi + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${quantized_model_path} \ + --input images:${batchsize},3,640,640 \ + --precision int8 \ + --engine_path yolo11s_bs_${batchsize}_int8.so + +# inference +python3 inference.py \ + --engine yolo11s_bs_${batchsize}_int8.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} diff --git a/models/cv/object_detection/yolov11s/igie/scripts/infer_yolov11s_int8_performance.sh b/models/cv/object_detection/yolov11s/igie/scripts/infer_yolov11s_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..5a52737f9eeea57cd0a78048646354c2015cea1e --- /dev/null +++ b/models/cv/object_detection/yolov11s/igie/scripts/infer_yolov11s_int8_performance.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolo11s.onnx" +quantized_model_path="yolo11s_int8.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +echo "batch size is ${batchsize}" + +if [ ! -e $quantized_model_path ]; then + # quantize model to int8 + python3 quantize.py \ + --model_path ${model_path} \ + --out_path ${quantized_model_path} \ + --batch ${batchsize} \ + --datasets ${datasets_path} +fi + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${quantized_model_path} \ + --input images:${batchsize},3,640,640 \ + --precision int8 \ + --engine_path yolo11s_bs_${batchsize}_int8.so + +# inference +python3 inference.py \ + --engine yolo11s_bs_${batchsize}_int8.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ + --perf_only True diff --git a/models/cv/object_detection/yolov11s/igie/validator.py b/models/cv/object_detection/yolov11s/igie/validator.py new file mode 100644 index 0000000000000000000000000000000000000000..7590b398fcd335a9d4cd592bcbce8666de65cb56 --- /dev/null +++ b/models/cv/object_detection/yolov11s/igie/validator.py @@ -0,0 +1,89 @@ +# Copyright (c) 2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import tvm +import json +import torch +import numpy as np + +from tqdm import tqdm + +from ultralytics.models.yolo.detect import DetectionValidator +from ultralytics.data.utils import check_det_dataset +from ultralytics.utils.metrics import ConfusionMatrix +from ultralytics.data.converter import coco80_to_coco91_class + +class IGIE_Validator(DetectionValidator): + def __call__(self, engine, device): + self.data = self.args.data + self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch) + self.init_metrics() + + self.stats = {'tp': [], 'conf': [], 'pred_cls': [], 'target_cls': [], 'target_img': []} + + # wram up + for _ in range(3): + engine.run() + + for batch in tqdm(self.dataloader): + batch = self.preprocess(batch) + + imgs = batch['img'] + pad_batch = len(imgs) != self.args.batch + if pad_batch: + origin_size = len(imgs) + imgs = np.resize(imgs, (self.args.batch, *imgs.shape[1:])) + + engine.set_input(0, tvm.nd.array(imgs, device)) + + engine.run() + + outputs = engine.get_output(0).asnumpy() + + if pad_batch: + outputs = outputs[:origin_size] + + outputs = torch.from_numpy(outputs) + + preds = self.postprocess([outputs]) + + self.update_metrics(preds, batch) + + stats = self.get_stats() + + if self.args.save_json and self.jdict: + with open(str(self.save_dir / 'predictions.json'), 'w') as f: + print(f'Saving {f.name} ...') + json.dump(self.jdict, f) # flatten and save + + stats = self.eval_json(stats) + + return stats + + def init_metrics(self): + """Initialize evaluation metrics for YOLO.""" + val = self.data.get(self.args.split, '') # validation path + self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt') # is COCO + self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000)) + self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO + self.names = self.data['names'] + self.nc = len(self.names) + self.metrics.names = self.names + self.confusion_matrix = ConfusionMatrix(nc=80) + self.seen = 0 + self.jdict = [] + self.stats = [] + diff --git a/tests/run_igie.py b/tests/run_igie.py index b5cfba3f636565322d5915a0d26c452b9e23cf55..cbdabdff57a598fef4ba7165f0143207e890beeb 100644 --- a/tests/run_igie.py +++ b/tests/run_igie.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# Copyright (c) 2024-2026, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may