Add model simplification step in onnx notebooks (#3454)

Signed-off-by: Raj Gite <quic_rgite@quicinc.com>
quic · Nov 1, 2024 · 8bdf55f · 8bdf55f
1 parent c51b284
commit 8bdf55f
Show file tree

Hide file tree

Showing 4 changed files with 137 additions and 19 deletions.
diff --git a/Examples/onnx/quantization/AMP.ipynb b/Examples/onnx/quantization/AMP.ipynb
@@ -134,7 +134,8 @@
    },
    "source": [
     "---\n",
-    "## 2. Convert an FP32 PyTorch model to ONNX and evaluate the model's baseline FP32 accuracy"
+    "\n",
+    "## 2. Convert an FP32 PyTorch model to ONNX, simplify & then evaluate baseline FP32 accuracy"
    ]
   },
   {
@@ -172,7 +173,7 @@
     "torch.onnx.export(pt_model.eval(),\n",
     "                  dummy_input,\n",
     "                  filename,\n",
-    "                  training=torch.onnx.TrainingMode.PRESERVE,\n",
+    "                  training=torch.onnx.TrainingMode.EVAL,\n",
     "                  export_params=True,\n",
     "                  do_constant_folding=False,\n",
     "                  input_names=['input'],\n",
@@ -186,6 +187,29 @@
     "model = onnx.load_model(filename)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "It is recommended to simplify the model before using AIMET"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from onnxsim import simplify\n",
+    "\n",
+    "try:\n",
+    "    model, _ = simplify(model)\n",
+    "except:\n",
+    "    print('ONNX Simplifier failed. Proceeding with unsimplified model')"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -239,7 +263,7 @@
    },
    "outputs": [],
    "source": [
-    "sess = ort.InferenceSession(filename, providers=providers)\n",
+    "sess = ort.InferenceSession(model.SerializeToString(), providers=providers)\n",
     "accuracy = ImageNetDataPipeline.evaluate(sess)\n",
     "print(accuracy)"
    ]
@@ -324,6 +348,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -670,9 +697,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.0"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/Examples/onnx/quantization/adaround.ipynb b/Examples/onnx/quantization/adaround.ipynb
@@ -133,7 +133,7 @@
    "metadata": {},
    "source": [
     "---\n",
-    "## 2. Convert an FP32 PyTorch model to ONNX and evaluate the model's baseline FP32 accuracy"
+    "## 2. Convert an FP32 PyTorch model to ONNX, simplify & then evaluate baseline FP32 accuracy"
    ]
   },
   {
@@ -184,7 +184,30 @@
    "source": [
     "---\n",
     "\n",
-    "**2.2 Decide whether to place the model on a CPU or CUDA device.** \n",
+    "**2.2 It is recommended to simplify the model before using AIMET**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from onnxsim import simplify\n",
+    "\n",
+    "try:\n",
+    "    model, _ = simplify(model)\n",
+    "except:\n",
+    "    print('ONNX Simplifier failed. Proceeding with unsimplified model')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "**2.3 Decide whether to place the model on a CPU or CUDA device.** \n",
     "\n",
     "This example uses CUDA if it is available. You can change this logic and force a device placement if needed."
    ]
@@ -209,7 +232,7 @@
    "metadata": {},
    "source": [
     "---\n",
-    "**2.3 Create an onnxruntime session and determine the FP32 accuracy of this model using the evaluate() routine.**"
+    "**2.4 Create an onnxruntime session and determine the FP32 accuracy of this model using the evaluate() routine.**"
    ]
   },
   {
@@ -218,7 +241,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "sess = ort.InferenceSession(filename, providers=providers)\n",
+    "sess = ort.InferenceSession(model.SerializeToString(), providers=providers)\n",
     "accuracy = ImageNetDataPipeline.evaluate(sess)\n",
     "print(accuracy)"
    ]

diff --git a/Examples/onnx/quantization/cle.ipynb b/Examples/onnx/quantization/cle.ipynb
@@ -145,7 +145,7 @@
    "source": [
     "---\n",
     "\n",
-    "## 2. Convert an FP32 PyTorch model to ONNX and evaluate the model's baseline FP32 accuracy"
+    "## 2. Convert an FP32 PyTorch model to ONNX, simplify & then evaluate baseline FP32 accuracy"
    ]
   },
   {
@@ -215,7 +215,30 @@
    "source": [
     "---\n",
     "\n",
-    "**2.3 Decide whether to place the model on a CPU or CUDA device.** \n",
+    "**2.3 It is recommended to simplify the model before using AIMET**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from onnxsim import simplify\n",
+    "\n",
+    "try:\n",
+    "    model, _ = simplify(model)\n",
+    "except:\n",
+    "    print('ONNX Simplifier failed. Proceeding with unsimplified model')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "**2.4 Decide whether to place the model on a CPU or CUDA device.** \n",
     "\n",
     "This example uses CUDA if it is available. You can change this logic and force a device placement if needed."
    ]
@@ -245,7 +268,7 @@
    "source": [
     "---\n",
     "\n",
-    "**2.4 Create an ONNX runtime session and compute the floating point 32-bit (FP32) accuracy of this model using the evaluate() routine.**"
+    "**2.5 Create an ONNX runtime session and compute the floating point 32-bit (FP32) accuracy of this model using the evaluate() routine.**"
    ]
   },
   {
@@ -259,7 +282,7 @@
    },
    "outputs": [],
    "source": [
-    "sess = ort.InferenceSession(filename, providers=providers)\n",
+    "sess = ort.InferenceSession(model.SerializeToString(), providers=providers)\n",
     "accuracy = ImageNetDataPipeline.evaluate(sess)\n",
     "print(accuracy)"
    ]
@@ -471,6 +494,27 @@
     "model = onnx.load_model(filename)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**It is recommended to simplify the model before using AIMET**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from onnxsim import simplify\n",
+    "\n",
+    "try:\n",
+    "    model, _ = simplify(model)\n",
+    "except:\n",
+    "    print('ONNX Simplifier failed. Proceeding with unsimplified model')"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,

diff --git a/Examples/onnx/quantization/quantsim.ipynb b/Examples/onnx/quantization/quantsim.ipynb
@@ -146,7 +146,8 @@
    },
    "source": [
     "---\n",
-    "## 2. Convert an FP32 PyTorch model to ONNX and evaluate the model's baseline FP32 accuracy"
+    "\n",
+    "## 2. Convert an FP32 PyTorch model to ONNX, simplify & then evaluate baseline FP32 accuracy"
    ]
   },
   {
@@ -183,7 +184,7 @@
     "torch.onnx.export(pt_model.eval(),\n",
     "                  dummy_input,\n",
     "                  filename,\n",
-    "                  training=torch.onnx.TrainingMode.PRESERVE,\n",
+    "                  training=torch.onnx.TrainingMode.EVAL,\n",
     "                  export_params=True,\n",
     "                  do_constant_folding=False,\n",
     "                  input_names=['input'],\n",
@@ -203,7 +204,30 @@
    "source": [
     "---\n",
     "\n",
-    "**2.2 Decide whether to place the model on a CPU or CUDA device.** \n",
+    "**2.2 It is recommended to simplify the model before using AIMET**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from onnxsim import simplify\n",
+    "\n",
+    "try:\n",
+    "    model, _ = simplify(model)\n",
+    "except:\n",
+    "    print('ONNX Simplifier failed. Proceeding with unsimplified model')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "**2.3 Decide whether to place the model on a CPU or CUDA device.** \n",
     "\n",
     "This example uses CUDA if it is available. You can change this logic and force a device placement if needed."
    ]
@@ -241,7 +265,7 @@
    },
    "source": [
     "---\n",
-    "**2.3 Create an onnxruntime session and determine the FP32 accuracy of this model using the evaluate() routine.**"
+    "**2.4 Create an onnxruntime session and determine the FP32 accuracy of this model using the evaluate() routine.**"
    ]
   },
   {
@@ -255,7 +279,7 @@
    },
    "outputs": [],
    "source": [
-    "sess = ort.InferenceSession(filename, providers=providers)\n",
+    "sess = ort.InferenceSession(model.SerializeToString(), providers=providers)\n",
     "accuracy = ImageNetDataPipeline.evaluate(sess)\n",
     "print(accuracy)"
    ]
@@ -456,7 +480,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,