From 020f14d84f8ca5007cca818fd46a2d120655697e Mon Sep 17 00:00:00 2001
From: Raj Gite <quic_rgite@quicinc.com>
Date: Thu, 31 Oct 2024 23:38:13 +0530
Subject: [PATCH] Add model simplification step in onnx notebooks (#3454)

Signed-off-by: Raj Gite <quic_rgite@quicinc.com>
---
 Examples/onnx/quantization/AMP.ipynb      | 37 +++++++++++++---
 Examples/onnx/quantization/adaround.ipynb | 31 ++++++++++++--
 Examples/onnx/quantization/cle.ipynb      | 52 +++++++++++++++++++++--
 Examples/onnx/quantization/quantsim.ipynb | 36 +++++++++++++---
 4 files changed, 137 insertions(+), 19 deletions(-)

diff --git a/Examples/onnx/quantization/AMP.ipynb b/Examples/onnx/quantization/AMP.ipynb
index 6a6c11a1e55..557d28fba3b 100644
--- a/Examples/onnx/quantization/AMP.ipynb
+++ b/Examples/onnx/quantization/AMP.ipynb
@@ -134,7 +134,8 @@
    },
    "source": [
     "---\n",
-    "## 2. Convert an FP32 PyTorch model to ONNX and evaluate the model's baseline FP32 accuracy"
+    "\n",
+    "## 2. Convert an FP32 PyTorch model to ONNX, simplify & then evaluate baseline FP32 accuracy"
    ]
   },
   {
@@ -172,7 +173,7 @@
     "torch.onnx.export(pt_model.eval(),\n",
     "                  dummy_input,\n",
     "                  filename,\n",
-    "                  training=torch.onnx.TrainingMode.PRESERVE,\n",
+    "                  training=torch.onnx.TrainingMode.EVAL,\n",
     "                  export_params=True,\n",
     "                  do_constant_folding=False,\n",
     "                  input_names=['input'],\n",
@@ -186,6 +187,29 @@
     "model = onnx.load_model(filename)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "It is recommended to simplify the model before using AIMET"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from onnxsim import simplify\n",
+    "\n",
+    "try:\n",
+    "    model, _ = simplify(model)\n",
+    "except:\n",
+    "    print('ONNX Simplifier failed. Proceeding with unsimplified model')"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -239,7 +263,7 @@
    },
    "outputs": [],
    "source": [
-    "sess = ort.InferenceSession(filename, providers=providers)\n",
+    "sess = ort.InferenceSession(model.SerializeToString(), providers=providers)\n",
     "accuracy = ImageNetDataPipeline.evaluate(sess)\n",
     "print(accuracy)"
    ]
@@ -324,6 +348,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -670,9 +697,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.0"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/Examples/onnx/quantization/adaround.ipynb b/Examples/onnx/quantization/adaround.ipynb
index b2e4dc5ca92..fff9e734c8f 100644
--- a/Examples/onnx/quantization/adaround.ipynb
+++ b/Examples/onnx/quantization/adaround.ipynb
@@ -133,7 +133,7 @@
    "metadata": {},
    "source": [
     "---\n",
-    "## 2. Convert an FP32 PyTorch model to ONNX and evaluate the model's baseline FP32 accuracy"
+    "## 2. Convert an FP32 PyTorch model to ONNX, simplify & then evaluate baseline FP32 accuracy"
    ]
   },
   {
@@ -184,7 +184,30 @@
    "source": [
     "---\n",
     "\n",
-    "**2.2 Decide whether to place the model on a CPU or CUDA device.** \n",
+    "**2.2 It is recommended to simplify the model before using AIMET**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from onnxsim import simplify\n",
+    "\n",
+    "try:\n",
+    "    model, _ = simplify(model)\n",
+    "except:\n",
+    "    print('ONNX Simplifier failed. Proceeding with unsimplified model')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "**2.3 Decide whether to place the model on a CPU or CUDA device.** \n",
     "\n",
     "This example uses CUDA if it is available. You can change this logic and force a device placement if needed."
    ]
@@ -209,7 +232,7 @@
    "metadata": {},
    "source": [
     "---\n",
-    "**2.3 Create an onnxruntime session and determine the FP32 accuracy of this model using the evaluate() routine.**"
+    "**2.4 Create an onnxruntime session and determine the FP32 accuracy of this model using the evaluate() routine.**"
    ]
   },
   {
@@ -218,7 +241,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "sess = ort.InferenceSession(filename, providers=providers)\n",
+    "sess = ort.InferenceSession(model.SerializeToString(), providers=providers)\n",
     "accuracy = ImageNetDataPipeline.evaluate(sess)\n",
     "print(accuracy)"
    ]
diff --git a/Examples/onnx/quantization/cle.ipynb b/Examples/onnx/quantization/cle.ipynb
index 64b3be76472..939d49d0801 100644
--- a/Examples/onnx/quantization/cle.ipynb
+++ b/Examples/onnx/quantization/cle.ipynb
@@ -145,7 +145,7 @@
    "source": [
     "---\n",
     "\n",
-    "## 2. Convert an FP32 PyTorch model to ONNX and evaluate the model's baseline FP32 accuracy"
+    "## 2. Convert an FP32 PyTorch model to ONNX, simplify & then evaluate baseline FP32 accuracy"
    ]
   },
   {
@@ -215,7 +215,30 @@
    "source": [
     "---\n",
     "\n",
-    "**2.3 Decide whether to place the model on a CPU or CUDA device.** \n",
+    "**2.3 It is recommended to simplify the model before using AIMET**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from onnxsim import simplify\n",
+    "\n",
+    "try:\n",
+    "    model, _ = simplify(model)\n",
+    "except:\n",
+    "    print('ONNX Simplifier failed. Proceeding with unsimplified model')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "**2.4 Decide whether to place the model on a CPU or CUDA device.** \n",
     "\n",
     "This example uses CUDA if it is available. You can change this logic and force a device placement if needed."
    ]
@@ -245,7 +268,7 @@
    "source": [
     "---\n",
     "\n",
-    "**2.4 Create an ONNX runtime session and compute the floating point 32-bit (FP32) accuracy of this model using the evaluate() routine.**"
+    "**2.5 Create an ONNX runtime session and compute the floating point 32-bit (FP32) accuracy of this model using the evaluate() routine.**"
    ]
   },
   {
@@ -259,7 +282,7 @@
    },
    "outputs": [],
    "source": [
-    "sess = ort.InferenceSession(filename, providers=providers)\n",
+    "sess = ort.InferenceSession(model.SerializeToString(), providers=providers)\n",
     "accuracy = ImageNetDataPipeline.evaluate(sess)\n",
     "print(accuracy)"
    ]
@@ -471,6 +494,27 @@
     "model = onnx.load_model(filename)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**It is recommended to simplify the model before using AIMET**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from onnxsim import simplify\n",
+    "\n",
+    "try:\n",
+    "    model, _ = simplify(model)\n",
+    "except:\n",
+    "    print('ONNX Simplifier failed. Proceeding with unsimplified model')"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/Examples/onnx/quantization/quantsim.ipynb b/Examples/onnx/quantization/quantsim.ipynb
index ce69d90c728..e475d15d2c7 100644
--- a/Examples/onnx/quantization/quantsim.ipynb
+++ b/Examples/onnx/quantization/quantsim.ipynb
@@ -146,7 +146,8 @@
    },
    "source": [
     "---\n",
-    "## 2. Convert an FP32 PyTorch model to ONNX and evaluate the model's baseline FP32 accuracy"
+    "\n",
+    "## 2. Convert an FP32 PyTorch model to ONNX, simplify & then evaluate baseline FP32 accuracy"
    ]
   },
   {
@@ -183,7 +184,7 @@
     "torch.onnx.export(pt_model.eval(),\n",
     "                  dummy_input,\n",
     "                  filename,\n",
-    "                  training=torch.onnx.TrainingMode.PRESERVE,\n",
+    "                  training=torch.onnx.TrainingMode.EVAL,\n",
     "                  export_params=True,\n",
     "                  do_constant_folding=False,\n",
     "                  input_names=['input'],\n",
@@ -203,7 +204,30 @@
    "source": [
     "---\n",
     "\n",
-    "**2.2 Decide whether to place the model on a CPU or CUDA device.** \n",
+    "**2.2 It is recommended to simplify the model before using AIMET**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from onnxsim import simplify\n",
+    "\n",
+    "try:\n",
+    "    model, _ = simplify(model)\n",
+    "except:\n",
+    "    print('ONNX Simplifier failed. Proceeding with unsimplified model')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "**2.3 Decide whether to place the model on a CPU or CUDA device.** \n",
     "\n",
     "This example uses CUDA if it is available. You can change this logic and force a device placement if needed."
    ]
@@ -241,7 +265,7 @@
    },
    "source": [
     "---\n",
-    "**2.3 Create an onnxruntime session and determine the FP32 accuracy of this model using the evaluate() routine.**"
+    "**2.4 Create an onnxruntime session and determine the FP32 accuracy of this model using the evaluate() routine.**"
    ]
   },
   {
@@ -255,7 +279,7 @@
    },
    "outputs": [],
    "source": [
-    "sess = ort.InferenceSession(filename, providers=providers)\n",
+    "sess = ort.InferenceSession(model.SerializeToString(), providers=providers)\n",
     "accuracy = ImageNetDataPipeline.evaluate(sess)\n",
     "print(accuracy)"
    ]
@@ -456,7 +480,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,