From 020f14d84f8ca5007cca818fd46a2d120655697e Mon Sep 17 00:00:00 2001 From: Raj Gite Date: Thu, 31 Oct 2024 23:38:13 +0530 Subject: [PATCH] Add model simplification step in onnx notebooks (#3454) Signed-off-by: Raj Gite --- Examples/onnx/quantization/AMP.ipynb | 37 +++++++++++++--- Examples/onnx/quantization/adaround.ipynb | 31 ++++++++++++-- Examples/onnx/quantization/cle.ipynb | 52 +++++++++++++++++++++-- Examples/onnx/quantization/quantsim.ipynb | 36 +++++++++++++--- 4 files changed, 137 insertions(+), 19 deletions(-) diff --git a/Examples/onnx/quantization/AMP.ipynb b/Examples/onnx/quantization/AMP.ipynb index 6a6c11a1e55..557d28fba3b 100644 --- a/Examples/onnx/quantization/AMP.ipynb +++ b/Examples/onnx/quantization/AMP.ipynb @@ -134,7 +134,8 @@ }, "source": [ "---\n", - "## 2. Convert an FP32 PyTorch model to ONNX and evaluate the model's baseline FP32 accuracy" + "\n", + "## 2. Convert an FP32 PyTorch model to ONNX, simplify & then evaluate baseline FP32 accuracy" ] }, { @@ -172,7 +173,7 @@ "torch.onnx.export(pt_model.eval(),\n", " dummy_input,\n", " filename,\n", - " training=torch.onnx.TrainingMode.PRESERVE,\n", + " training=torch.onnx.TrainingMode.EVAL,\n", " export_params=True,\n", " do_constant_folding=False,\n", " input_names=['input'],\n", @@ -186,6 +187,29 @@ "model = onnx.load_model(filename)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "It is recommended to simplify the model before using AIMET" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from onnxsim import simplify\n", + "\n", + "try:\n", + " model, _ = simplify(model)\n", + "except:\n", + " print('ONNX Simplifier failed. Proceeding with unsimplified model')" + ] + }, { "cell_type": "markdown", "metadata": { @@ -239,7 +263,7 @@ }, "outputs": [], "source": [ - "sess = ort.InferenceSession(filename, providers=providers)\n", + "sess = ort.InferenceSession(model.SerializeToString(), providers=providers)\n", "accuracy = ImageNetDataPipeline.evaluate(sess)\n", "print(accuracy)" ] @@ -324,6 +348,9 @@ "cell_type": "markdown", "metadata": { "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, "pycharm": { "name": "#%% md\n" } @@ -670,9 +697,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.0" + "version": "3.10.12" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/Examples/onnx/quantization/adaround.ipynb b/Examples/onnx/quantization/adaround.ipynb index b2e4dc5ca92..fff9e734c8f 100644 --- a/Examples/onnx/quantization/adaround.ipynb +++ b/Examples/onnx/quantization/adaround.ipynb @@ -133,7 +133,7 @@ "metadata": {}, "source": [ "---\n", - "## 2. Convert an FP32 PyTorch model to ONNX and evaluate the model's baseline FP32 accuracy" + "## 2. Convert an FP32 PyTorch model to ONNX, simplify & then evaluate baseline FP32 accuracy" ] }, { @@ -184,7 +184,30 @@ "source": [ "---\n", "\n", - "**2.2 Decide whether to place the model on a CPU or CUDA device.** \n", + "**2.2 It is recommended to simplify the model before using AIMET**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from onnxsim import simplify\n", + "\n", + "try:\n", + " model, _ = simplify(model)\n", + "except:\n", + " print('ONNX Simplifier failed. Proceeding with unsimplified model')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "**2.3 Decide whether to place the model on a CPU or CUDA device.** \n", "\n", "This example uses CUDA if it is available. You can change this logic and force a device placement if needed." ] @@ -209,7 +232,7 @@ "metadata": {}, "source": [ "---\n", - "**2.3 Create an onnxruntime session and determine the FP32 accuracy of this model using the evaluate() routine.**" + "**2.4 Create an onnxruntime session and determine the FP32 accuracy of this model using the evaluate() routine.**" ] }, { @@ -218,7 +241,7 @@ "metadata": {}, "outputs": [], "source": [ - "sess = ort.InferenceSession(filename, providers=providers)\n", + "sess = ort.InferenceSession(model.SerializeToString(), providers=providers)\n", "accuracy = ImageNetDataPipeline.evaluate(sess)\n", "print(accuracy)" ] diff --git a/Examples/onnx/quantization/cle.ipynb b/Examples/onnx/quantization/cle.ipynb index 64b3be76472..939d49d0801 100644 --- a/Examples/onnx/quantization/cle.ipynb +++ b/Examples/onnx/quantization/cle.ipynb @@ -145,7 +145,7 @@ "source": [ "---\n", "\n", - "## 2. Convert an FP32 PyTorch model to ONNX and evaluate the model's baseline FP32 accuracy" + "## 2. Convert an FP32 PyTorch model to ONNX, simplify & then evaluate baseline FP32 accuracy" ] }, { @@ -215,7 +215,30 @@ "source": [ "---\n", "\n", - "**2.3 Decide whether to place the model on a CPU or CUDA device.** \n", + "**2.3 It is recommended to simplify the model before using AIMET**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from onnxsim import simplify\n", + "\n", + "try:\n", + " model, _ = simplify(model)\n", + "except:\n", + " print('ONNX Simplifier failed. Proceeding with unsimplified model')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "**2.4 Decide whether to place the model on a CPU or CUDA device.** \n", "\n", "This example uses CUDA if it is available. You can change this logic and force a device placement if needed." ] @@ -245,7 +268,7 @@ "source": [ "---\n", "\n", - "**2.4 Create an ONNX runtime session and compute the floating point 32-bit (FP32) accuracy of this model using the evaluate() routine.**" + "**2.5 Create an ONNX runtime session and compute the floating point 32-bit (FP32) accuracy of this model using the evaluate() routine.**" ] }, { @@ -259,7 +282,7 @@ }, "outputs": [], "source": [ - "sess = ort.InferenceSession(filename, providers=providers)\n", + "sess = ort.InferenceSession(model.SerializeToString(), providers=providers)\n", "accuracy = ImageNetDataPipeline.evaluate(sess)\n", "print(accuracy)" ] @@ -471,6 +494,27 @@ "model = onnx.load_model(filename)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**It is recommended to simplify the model before using AIMET**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from onnxsim import simplify\n", + "\n", + "try:\n", + " model, _ = simplify(model)\n", + "except:\n", + " print('ONNX Simplifier failed. Proceeding with unsimplified model')" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/Examples/onnx/quantization/quantsim.ipynb b/Examples/onnx/quantization/quantsim.ipynb index ce69d90c728..e475d15d2c7 100644 --- a/Examples/onnx/quantization/quantsim.ipynb +++ b/Examples/onnx/quantization/quantsim.ipynb @@ -146,7 +146,8 @@ }, "source": [ "---\n", - "## 2. Convert an FP32 PyTorch model to ONNX and evaluate the model's baseline FP32 accuracy" + "\n", + "## 2. Convert an FP32 PyTorch model to ONNX, simplify & then evaluate baseline FP32 accuracy" ] }, { @@ -183,7 +184,7 @@ "torch.onnx.export(pt_model.eval(),\n", " dummy_input,\n", " filename,\n", - " training=torch.onnx.TrainingMode.PRESERVE,\n", + " training=torch.onnx.TrainingMode.EVAL,\n", " export_params=True,\n", " do_constant_folding=False,\n", " input_names=['input'],\n", @@ -203,7 +204,30 @@ "source": [ "---\n", "\n", - "**2.2 Decide whether to place the model on a CPU or CUDA device.** \n", + "**2.2 It is recommended to simplify the model before using AIMET**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from onnxsim import simplify\n", + "\n", + "try:\n", + " model, _ = simplify(model)\n", + "except:\n", + " print('ONNX Simplifier failed. Proceeding with unsimplified model')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "**2.3 Decide whether to place the model on a CPU or CUDA device.** \n", "\n", "This example uses CUDA if it is available. You can change this logic and force a device placement if needed." ] @@ -241,7 +265,7 @@ }, "source": [ "---\n", - "**2.3 Create an onnxruntime session and determine the FP32 accuracy of this model using the evaluate() routine.**" + "**2.4 Create an onnxruntime session and determine the FP32 accuracy of this model using the evaluate() routine.**" ] }, { @@ -255,7 +279,7 @@ }, "outputs": [], "source": [ - "sess = ort.InferenceSession(filename, providers=providers)\n", + "sess = ort.InferenceSession(model.SerializeToString(), providers=providers)\n", "accuracy = ImageNetDataPipeline.evaluate(sess)\n", "print(accuracy)" ] @@ -456,7 +480,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.10.12" } }, "nbformat": 4,