diff --git a/tutorials/mct_model_garden/models_pytorch/fastdepth/fastdepth.py b/tutorials/mct_model_garden/models_pytorch/fastdepth/fastdepth.py index 6787589d8..cbd674d6d 100644 --- a/tutorials/mct_model_garden/models_pytorch/fastdepth/fastdepth.py +++ b/tutorials/mct_model_garden/models_pytorch/fastdepth/fastdepth.py @@ -114,10 +114,9 @@ def conv_dw(inp, oup, stride, relu6): ) class FastDepth(nn.Module, PyTorchModelHubMixin): - def __init__(self, output_size): + def __init__(self): super(FastDepth, self).__init__() - self.output_size = output_size mobilenet = MobileNetBackbone() for i in range(14): diff --git a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_fastdepth_for_imx500.ipynb b/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_fastdepth_for_imx500.ipynb index 6f4f660ef..7eeb3a53d 100644 --- a/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_fastdepth_for_imx500.ipynb +++ b/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_fastdepth_for_imx500.ipynb @@ -7,7 +7,7 @@ "collapsed": false }, "source": [ - "# Fast-Depth Depth Estimation - Quantization for IMX500\n", + "# Fast-Depth Estimation - Quantization for IMX500\n", "\n", "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/imx500_notebooks/pytorch/pytorch_fastdepth_for_imx500.ipynb)\n", "\n", @@ -15,16 +15,7 @@ "\n", "In this tutorial, we will illustrate a basic and quick process of preparing a pre-trained model for deployment using MCT. Specifically, we will demonstrate how to download a pre-trained pytorch fast-depth model, compress it, and make it deployment-ready using MCT's post-training quantization techniques.\n", "\n", - "We will use an existing pre-trained Fast-Depth model based on [Fast-Depth](https://github.com/dwofk/fast-depth). We will quantize the model using MCT post training quantization technique and visualize some samples of the floating point model and the quantized model.\n", - "\n", - "\n", - "## Summary\n", - "\n", - "In this tutorial we will cover:\n", - "\n", - "1. Post-Training Quantization (PTQ) using MCT of Fast-Depth depth estimation model.\n", - "2. Data preparation\n", - "3. Visualize samples from the floating-point and the quantized models." + "We will use an existing pre-trained Fast-Depth model based on [Fast-Depth](https://github.com/dwofk/fast-depth). We will quantize the model using MCT post training quantization technique and visualize some samples of the floating point model and the quantized model.\n" ] }, { @@ -42,15 +33,86 @@ "cell_type": "code", "id": "7c7fa04c9903736f", "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-10-10T14:08:38.389433Z", + "start_time": "2024-10-10T14:08:22.709164Z" + } }, "source": [ + "import torch\n", "!pip install -q torch\n", "!pip install onnx\n", + "!pip install datasets\n", + "!pip install matplotlib\n", "!pip install 'huggingface-hub>=0.21.0'" ], - "outputs": [], - "execution_count": null + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: onnx in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (1.16.1)\r\n", + "Requirement already satisfied: numpy>=1.20 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from onnx) (1.26.4)\r\n", + "Requirement already satisfied: protobuf>=3.20.2 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from onnx) (4.25.5)\r\n", + "Requirement already satisfied: datasets in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (3.0.1)\r\n", + "Requirement already satisfied: filelock in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from datasets) (3.13.1)\r\n", + "Requirement already satisfied: numpy>=1.17 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from datasets) (1.26.4)\r\n", + "Requirement already satisfied: pyarrow>=15.0.0 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from datasets) (17.0.0)\r\n", + "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from datasets) (0.3.8)\r\n", + "Requirement already satisfied: pandas in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from datasets) (2.2.2)\r\n", + "Requirement already satisfied: requests>=2.32.2 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from datasets) (2.32.3)\r\n", + "Requirement already satisfied: tqdm>=4.66.3 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from datasets) (4.66.5)\r\n", + "Requirement already satisfied: xxhash in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from datasets) (3.5.0)\r\n", + "Requirement already satisfied: multiprocess in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from datasets) (0.70.16)\r\n", + "Requirement already satisfied: fsspec<=2024.6.1,>=2023.1.0 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from fsspec[http]<=2024.6.1,>=2023.1.0->datasets) (2024.6.1)\r\n", + "Requirement already satisfied: aiohttp in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from datasets) (3.10.9)\r\n", + "Requirement already satisfied: huggingface-hub>=0.22.0 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from datasets) (0.24.5)\r\n", + "Requirement already satisfied: packaging in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from datasets) (24.1)\r\n", + "Requirement already satisfied: pyyaml>=5.1 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from datasets) (6.0.1)\r\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from aiohttp->datasets) (2.4.3)\r\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from aiohttp->datasets) (1.3.1)\r\n", + "Requirement already satisfied: attrs>=17.3.0 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from aiohttp->datasets) (24.2.0)\r\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from aiohttp->datasets) (1.4.1)\r\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from aiohttp->datasets) (6.1.0)\r\n", + "Requirement already satisfied: yarl<2.0,>=1.12.0 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from aiohttp->datasets) (1.14.0)\r\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from huggingface-hub>=0.22.0->datasets) (4.11.0)\r\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from requests>=2.32.2->datasets) (3.3.2)\r\n", + "Requirement already satisfied: idna<4,>=2.5 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from requests>=2.32.2->datasets) (3.7)\r\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from requests>=2.32.2->datasets) (2.2.2)\r\n", + "Requirement already satisfied: certifi>=2017.4.17 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from requests>=2.32.2->datasets) (2024.7.4)\r\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from pandas->datasets) (2.9.0.post0)\r\n", + "Requirement already satisfied: pytz>=2020.1 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from pandas->datasets) (2024.2)\r\n", + "Requirement already satisfied: tzdata>=2022.7 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from pandas->datasets) (2024.1)\r\n", + "Requirement already satisfied: six>=1.5 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\r\n", + "Requirement already satisfied: propcache>=0.2.0 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from yarl<2.0,>=1.12.0->aiohttp->datasets) (0.2.0)\r\n", + "Requirement already satisfied: matplotlib in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (3.9.2)\r\n", + "Requirement already satisfied: contourpy>=1.0.1 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from matplotlib) (1.2.1)\r\n", + "Requirement already satisfied: cycler>=0.10 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from matplotlib) (0.12.1)\r\n", + "Requirement already satisfied: fonttools>=4.22.0 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from matplotlib) (4.53.1)\r\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from matplotlib) (1.4.5)\r\n", + "Requirement already satisfied: numpy>=1.23 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from matplotlib) (1.26.4)\r\n", + "Requirement already satisfied: packaging>=20.0 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from matplotlib) (24.1)\r\n", + "Requirement already satisfied: pillow>=8 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from matplotlib) (10.4.0)\r\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from matplotlib) (3.1.2)\r\n", + "Requirement already satisfied: python-dateutil>=2.7 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from matplotlib) (2.9.0.post0)\r\n", + "Requirement already satisfied: six>=1.5 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\r\n", + "Requirement already satisfied: huggingface-hub>=0.21.0 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (0.24.5)\r\n", + "Requirement already satisfied: filelock in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from huggingface-hub>=0.21.0) (3.13.1)\r\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from huggingface-hub>=0.21.0) (2024.6.1)\r\n", + "Requirement already satisfied: packaging>=20.9 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from huggingface-hub>=0.21.0) (24.1)\r\n", + "Requirement already satisfied: pyyaml>=5.1 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from huggingface-hub>=0.21.0) (6.0.1)\r\n", + "Requirement already satisfied: requests in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from huggingface-hub>=0.21.0) (2.32.3)\r\n", + "Requirement already satisfied: tqdm>=4.42.1 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from huggingface-hub>=0.21.0) (4.66.5)\r\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from huggingface-hub>=0.21.0) (4.11.0)\r\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from requests->huggingface-hub>=0.21.0) (3.3.2)\r\n", + "Requirement already satisfied: idna<4,>=2.5 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from requests->huggingface-hub>=0.21.0) (3.7)\r\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from requests->huggingface-hub>=0.21.0) (2.2.2)\r\n", + "Requirement already satisfied: certifi>=2017.4.17 in /data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages (from requests->huggingface-hub>=0.21.0) (2024.7.4)\r\n" + ] + } + ], + "execution_count": 1 }, { "cell_type": "markdown", @@ -58,83 +120,60 @@ "metadata": { "collapsed": false }, - "source": [ - "Install MCT (if it’s not already installed). Additionally, in order to use all the necessary utility functions for this tutorial, we also copy [MCT tutorials folder](https://github.com/sony/model_optimization/tree/main/tutorials) and add it to the system path." - ] + "source": "Install MCT (if it’s not already installed). Additionally, in order to use all the necessary utility functions for this tutorial, we also copy [MCT tutorials folder](https://github.com/sony/model_optimization/tree/main/tutorials) and add it to the system path." }, { "cell_type": "code", "id": "9728247bc20d0600", "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-10-10T14:08:44.433155Z", + "start_time": "2024-10-10T14:08:38.390775Z" + } }, "source": [ - "import sys\n", "import importlib\n", + "import sys\n", "\n", "if not importlib.util.find_spec('model_compression_toolkit'):\n", " !pip install model_compression_toolkit\n", "!git clone https://github.com/sony/model_optimization.git temp_mct && mv temp_mct/tutorials . && \\rm -rf temp_mct\n", "sys.path.insert(0,\"tutorials\")" ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "markdown", - "id": "7a1038b9fd98bba2", - "metadata": { - "collapsed": false - }, - "source": [ - "\n", - "## Representitive Dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 15, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "'wget' is not recognized as an internal or external command,\n", - "operable program or batch file.\n", - "'unzip' is not recognized as an internal or external command,\n", - "operable program or batch file.\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "Done loading office_kitchens images\n" + "Cloning into 'temp_mct'...\r\n", + "remote: Enumerating objects: 25277, done.\u001B[K\r\n", + "remote: Counting objects: 100% (4416/4416), done.\u001B[K\r\n", + "remote: Compressing objects: 100% (965/965), done.\u001B[K\r\n", + "remote: Total 25277 (delta 3791), reused 3716 (delta 3451), pack-reused 20861 (from 1)\u001B[K\r\n", + "Receiving objects: 100% (25277/25277), 11.00 MiB | 13.81 MiB/s, done.\r\n", + "Resolving deltas: 100% (19598/19598), done.\r\n", + "Updating files: 100% (1247/1247), done.\r\n", + "mv: cannot move 'temp_mct/tutorials' to './tutorials': Directory not empty\r\n" ] } ], - "source": [ - "!wget -nc http://horatio.cs.nyu.edu/mit/silberman/nyu_depth_v2/office_kitchens.zip\n", - "!unzip -q -o office_kitchens.zip -d ./office_kitchens\n", - "!echo Done loading office_kitchens images" - ], + "execution_count": 2 + }, + { + "cell_type": "markdown", + "id": "7a1038b9fd98bba2", "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-10-09T13:59:43.255064200Z", - "start_time": "2024-10-09T13:59:43.082651200Z" - } + "collapsed": false }, - "id": "91649564338144d6" + "source": "" }, { "cell_type": "markdown", "id": "084c2b8b-3175-4d46-a18a-7c4d8b6fcb38", "metadata": {}, "source": [ - "## Quantization\n", - "\n", - "### Download a Pre-Trained Model \n", + "## Download a Pre-Trained Model \n", "\n", "We begin by downloading a pre-trained Fast-Depth model. This implemetation is based on [Pytorch Fast-Depth](https://github.com/dwofk/fast-depth). " ] @@ -142,87 +181,404 @@ { "cell_type": "code", "id": "e8395b28-4732-4d18-b081-5d3bdf508691", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-10-10T14:09:17.693158Z", + "start_time": "2024-10-10T14:08:44.434067Z" + } + }, "source": [ "from tutorials.mct_model_garden.models_pytorch.fastdepth.fastdepth import FastDepth\n", + "from model_compression_toolkit.core.pytorch.utils import get_working_device\n", "model = FastDepth.from_pretrained(\"SSI-DNN/pytorch_fastdepth_224x224\")\n", - "model.eval()" + "model.eval()\n", + "\n", + "# Move to device\n", + "device = get_working_device()\n", + "model.to(device)" ], - "outputs": [], - "execution_count": null + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-10-10 17:08:49.112701: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-10-10 17:08:49.112765: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-10-10 17:08:49.407355: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-10-10 17:08:49.958557: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2024-10-10 17:08:54.630107: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "2024-10-10 17:09:11.420429: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", + "2024-10-10 17:09:11.426388: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", + "2024-10-10 17:09:11.426512: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", + "2024-10-10 17:09:11.430648: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", + "2024-10-10 17:09:11.430816: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", + "2024-10-10 17:09:11.430930: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", + "2024-10-10 17:09:11.667248: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", + "2024-10-10 17:09:11.667440: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", + "2024-10-10 17:09:11.667584: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", + "2024-10-10 17:09:11.671781: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1593 MB memory: -> device: 0, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5\n" + ] + }, + { + "data": { + "text/plain": [ + "FastDepth(\n", + " (conv0): Sequential(\n", + " (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", + " (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU6(inplace=True)\n", + " )\n", + " (conv1): Sequential(\n", + " (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)\n", + " (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU6(inplace=True)\n", + " (3): Conv2d(16, 56, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (4): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU6(inplace=True)\n", + " )\n", + " (conv2): Sequential(\n", + " (0): Conv2d(56, 56, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=56, bias=False)\n", + " (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU6(inplace=True)\n", + " (3): Conv2d(56, 88, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (4): BatchNorm2d(88, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU6(inplace=True)\n", + " )\n", + " (conv3): Sequential(\n", + " (0): Conv2d(88, 88, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=88, bias=False)\n", + " (1): BatchNorm2d(88, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU6(inplace=True)\n", + " (3): Conv2d(88, 120, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (4): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU6(inplace=True)\n", + " )\n", + " (conv4): Sequential(\n", + " (0): Conv2d(120, 120, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=120, bias=False)\n", + " (1): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU6(inplace=True)\n", + " (3): Conv2d(120, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (4): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU6(inplace=True)\n", + " )\n", + " (conv5): Sequential(\n", + " (0): Conv2d(144, 144, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=144, bias=False)\n", + " (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU6(inplace=True)\n", + " (3): Conv2d(144, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU6(inplace=True)\n", + " )\n", + " (conv6): Sequential(\n", + " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=256, bias=False)\n", + " (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU6(inplace=True)\n", + " (3): Conv2d(256, 408, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (4): BatchNorm2d(408, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU6(inplace=True)\n", + " )\n", + " (conv7): Sequential(\n", + " (0): Conv2d(408, 408, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=408, bias=False)\n", + " (1): BatchNorm2d(408, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU6(inplace=True)\n", + " (3): Conv2d(408, 376, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (4): BatchNorm2d(376, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU6(inplace=True)\n", + " )\n", + " (conv8): Sequential(\n", + " (0): Conv2d(376, 376, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=376, bias=False)\n", + " (1): BatchNorm2d(376, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU6(inplace=True)\n", + " (3): Conv2d(376, 272, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (4): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU6(inplace=True)\n", + " )\n", + " (conv9): Sequential(\n", + " (0): Conv2d(272, 272, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=272, bias=False)\n", + " (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU6(inplace=True)\n", + " (3): Conv2d(272, 288, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (4): BatchNorm2d(288, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU6(inplace=True)\n", + " )\n", + " (conv10): Sequential(\n", + " (0): Conv2d(288, 288, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=288, bias=False)\n", + " (1): BatchNorm2d(288, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU6(inplace=True)\n", + " (3): Conv2d(288, 296, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (4): BatchNorm2d(296, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU6(inplace=True)\n", + " )\n", + " (conv11): Sequential(\n", + " (0): Conv2d(296, 296, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=296, bias=False)\n", + " (1): BatchNorm2d(296, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU6(inplace=True)\n", + " (3): Conv2d(296, 328, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (4): BatchNorm2d(328, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU6(inplace=True)\n", + " )\n", + " (conv12): Sequential(\n", + " (0): Conv2d(328, 328, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=328, bias=False)\n", + " (1): BatchNorm2d(328, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU6(inplace=True)\n", + " (3): Conv2d(328, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (4): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU6(inplace=True)\n", + " )\n", + " (conv13): Sequential(\n", + " (0): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)\n", + " (1): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU6(inplace=True)\n", + " (3): Conv2d(480, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU6(inplace=True)\n", + " )\n", + " (decode_conv1): Sequential(\n", + " (0): Sequential(\n", + " (0): Conv2d(512, 512, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=512, bias=False)\n", + " (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU(inplace=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): Conv2d(512, 200, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (1): BatchNorm2d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU(inplace=True)\n", + " )\n", + " )\n", + " (decode_conv2): Sequential(\n", + " (0): Sequential(\n", + " (0): Conv2d(200, 200, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=200, bias=False)\n", + " (1): BatchNorm2d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU(inplace=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): Conv2d(200, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU(inplace=True)\n", + " )\n", + " )\n", + " (decode_conv3): Sequential(\n", + " (0): Sequential(\n", + " (0): Conv2d(256, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=256, bias=False)\n", + " (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU(inplace=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): Conv2d(256, 120, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (1): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU(inplace=True)\n", + " )\n", + " )\n", + " (decode_conv4): Sequential(\n", + " (0): Sequential(\n", + " (0): Conv2d(120, 120, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=120, bias=False)\n", + " (1): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU(inplace=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): Conv2d(120, 56, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU(inplace=True)\n", + " )\n", + " )\n", + " (decode_conv5): Sequential(\n", + " (0): Sequential(\n", + " (0): Conv2d(56, 56, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=56, bias=False)\n", + " (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU(inplace=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): Conv2d(56, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU(inplace=True)\n", + " )\n", + " )\n", + " (decode_conv6): Sequential(\n", + " (0): Conv2d(16, 1, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (1): BatchNorm2d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU(inplace=True)\n", + " )\n", + ")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 3 }, { "cell_type": "markdown", "id": "3cde2f8e-0642-4374-a1f4-df2775fe7767", "metadata": {}, "source": [ + "## Quantization\n", + "\n", "### Post training quantization (PTQ) using Model Compression Toolkit (MCT)\n", "\n", - "Now, we are all set to use MCT's post-training quantization. To begin, we'll define a representative dataset and proceed with the model quantization. Please note that, for demonstration purposes, we'll use the evaluation dataset as our representative dataset. We'll calibrate the model using 80 representative images, divided into 30 iterations of 'batch_size' images each. \n" + "Now, we are all set to use MCT's post-training quantization. To begin, we'll use a representative dataset of lsun-bedrooms and proceed with the model quantization. We'll calibrate the model using 80 representative images, divided into 20 iterations of 'batch_size' images each. \n", + "\n", + "### Representitive Dataset" ] }, { "cell_type": "code", "id": "56393342-cecf-4f64-b9ca-2f515c765942", "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-10-10T14:09:22.290843Z", + "start_time": "2024-10-10T14:09:17.701423Z" + } }, "source": [ - "import model_compression_toolkit as mct\n", - "from tutorials.mct_model_garden.models_pytorch.fastdepth import fastdepth\n", + "from torch.utils.data import DataLoader, Dataset\n", + "from torchvision import transforms\n", + "from datasets import load_dataset\n", + "from typing import Iterator, Tuple, List\n", "\n", "BATCH_SIZE = 4\n", - "n_iters = 30\n", - "\n", - "# Load representative dataset\n", - "representative_dataset = coco_dataset_generator(dataset_folder=REPRESENTATIVE_DATASET_FOLDER,\n", - " annotation_file=REPRESENTATIVE_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolox_preprocess_chw_transpose,\n", - " batch_size=BATCH_SIZE)\n", - "\n", - "\n", - "def get_representative_dataset(dataset: Iterator, n_iter: int):\n", + "n_iters = 20\n", + "\n", + "class ValDataset(Dataset):\n", + " def __init__(self, dataset):\n", + " super(ValDataset, self).__init__()\n", + " self.dataset = dataset\n", + " self.val_transform = transforms.Compose([\n", + " transforms.Resize((224, 224)),\n", + " transforms.ToTensor()])\n", + "\n", + " def __len__(self):\n", + " return len(self.dataset)\n", + "\n", + " def __getitem__(self, index):\n", + " img = self.dataset[index]['image']\n", + " tensor = self.val_transform(img)\n", + " return tensor\n", + "\n", + "dataset = load_dataset(\"pcuenq/lsun-bedrooms\",split=\"test\")\n", + "val_dataset = ValDataset(dataset)\n", + "val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)\n", + "\n", + "# Define representative dataset generator\n", + "def get_representative_dataset(n_iter: int, dataset_loader: Iterator[Tuple]):\n", " \"\"\"\n", " This function creates a representative dataset generator. The generator yields numpy\n", " arrays of batches of shape: [Batch, H, W ,C].\n", " Args:\n", - " dataset: dataset iterator\n", - " n_iter: number of iterations for MCT for calibration\n", + " n_iter: number of iterations for MCT to calibrate on\n", + " dataset_loader: iterator object of dataset loader\n", " Returns:\n", " A representative dataset generator\n", " \"\"\" \n", - " def _generator():\n", - " for _ind in range(n_iter):\n", - " batch, label = next(iter(dataset))\n", - " yield [batch]\n", + " def representative_dataset() -> Iterator[List]:\n", + " ds_iter = iter(dataset_loader)\n", + " for _ in range(n_iter):\n", + " yield [next(ds_iter)]\n", "\n", - " return _generator\n", + " return representative_dataset\n", "\n", "# Get representative dataset generator\n", - "representative_dataset_gen = get_representative_dataset(dataset=representative_dataset, n_iter=n_iters)\n", + "representative_dataset_gen = get_representative_dataset(n_iter=n_iters, dataset_loader=val_loader)\n" + ], + "outputs": [], + "execution_count": 4 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Post-Training Quantization (PTQ)", + "id": "bb1bfcca03ce55c" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-10-10T14:09:25.749346Z", + "start_time": "2024-10-10T14:09:22.291787Z" + } + }, + "cell_type": "code", + "source": [ + "import model_compression_toolkit as mct\n", "\n", "# Set IMX500 TPC\n", "tpc = mct.get_target_platform_capabilities(fw_name=\"pytorch\",\n", " target_platform_name='imx500',\n", " target_platform_version='v3')\n", "\n", - "# Define target Resource Utilization for mixed precision weights quantization.\n", - "# Number of parameters of YOLOx-Tiny is 5M and we set target memory (in Bytes) of 87% of 'standard' 8-bit quantization.\n", - "resource_utilization = mct.core.ResourceUtilization(weights_memory=5e6 * 0.87)\n", - "\n", "# Perform post training quantization\n", "quant_model, _ = mct.ptq.pytorch_post_training_quantization(in_module=model,\n", " representative_data_gen=representative_dataset_gen,\n", - " target_resource_utilization=resource_utilization,\n", " target_platform_capabilities=tpc)\n", "\n", - "# Integrate the quantized model with box decoder and NMS\n", - "quant_model = YOLOXPostProcess(quant_model)\n", "\n", "print('Quantized model is ready!')" ], - "outputs": [], - "execution_count": null + "id": "55177376aca838c0", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/fx/passes/shape_prop.py\", line 153, in run_node\n", + " result = super().run_node(n)\n", + " ^^^^^^^^^^^^^^^^^^^\n", + " File \"/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/fx/interpreter.py\", line 203, in run_node\n", + " return getattr(self, n.op)(n.target, args, kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/fx/interpreter.py\", line 320, in call_module\n", + " return submod(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1553, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1562, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/modules/batchnorm.py\", line 176, in forward\n", + " return F.batch_norm(\n", + " ^^^^^^^^^^^^^\n", + " File \"/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/functional.py\", line 2512, in batch_norm\n", + " return torch.batch_norm(\n", + " ^^^^^^^^^^^^^^^^^\n", + "torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 10.74 GiB of which 2.44 MiB is free. Process 3262902 has 8.81 GiB memory in use. Including non-PyTorch memory, this process has 1.92 GiB memory in use. Of the allocated memory 170.59 MiB is allocated by PyTorch, and 23.41 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n" + ] + }, + { + "ename": "RuntimeError", + "evalue": "ShapeProp error for: node=%conv11_1 : [num_users=1] = call_module[target=conv11.1](args = (%conv11_0,), kwargs = {}) with meta={'nn_module_stack': OrderedDict([('conv11', ('conv11', )), ('conv11.1', ('conv11.1', ))])}\n\nWhile executing %conv11_1 : [num_users=1] = call_module[target=conv11.1](args = (%conv11_0,), kwargs = {})\nOriginal traceback:\nNone", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mOutOfMemoryError\u001B[0m Traceback (most recent call last)", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/fx/passes/shape_prop.py:153\u001B[0m, in \u001B[0;36mShapeProp.run_node\u001B[0;34m(self, n)\u001B[0m\n\u001B[1;32m 152\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m--> 153\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43msuper\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrun_node\u001B[49m\u001B[43m(\u001B[49m\u001B[43mn\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 154\u001B[0m \u001B[38;5;28;01mfinally\u001B[39;00m:\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/fx/interpreter.py:203\u001B[0m, in \u001B[0;36mInterpreter.run_node\u001B[0;34m(self, n)\u001B[0m\n\u001B[1;32m 202\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(kwargs, \u001B[38;5;28mdict\u001B[39m)\n\u001B[0;32m--> 203\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mgetattr\u001B[39;49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mn\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mop\u001B[49m\u001B[43m)\u001B[49m\u001B[43m(\u001B[49m\u001B[43mn\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtarget\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/fx/interpreter.py:320\u001B[0m, in \u001B[0;36mInterpreter.call_module\u001B[0;34m(self, target, args, kwargs)\u001B[0m\n\u001B[1;32m 318\u001B[0m submod \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mfetch_attr(target)\n\u001B[0;32m--> 320\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43msubmod\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 1552\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 1553\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_call_impl\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 1559\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[1;32m 1560\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[1;32m 1561\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[0;32m-> 1562\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mforward_call\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1564\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/modules/batchnorm.py:176\u001B[0m, in \u001B[0;36m_BatchNorm.forward\u001B[0;34m(self, input)\u001B[0m\n\u001B[1;32m 171\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124mr\u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 172\u001B[0m \u001B[38;5;124;03mBuffers are only updated if they are to be tracked and we are in training mode. Thus they only need to be\u001B[39;00m\n\u001B[1;32m 173\u001B[0m \u001B[38;5;124;03mpassed when the update should occur (i.e. in training mode when they are tracked), or when buffer stats are\u001B[39;00m\n\u001B[1;32m 174\u001B[0m \u001B[38;5;124;03mused for normalization (i.e. in eval mode when buffers are not None).\u001B[39;00m\n\u001B[1;32m 175\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[0;32m--> 176\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mF\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbatch_norm\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 177\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43minput\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[1;32m 178\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;66;43;03m# If buffers are not to be tracked, ensure that they won't be updated\u001B[39;49;00m\n\u001B[1;32m 179\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrunning_mean\u001B[49m\n\u001B[1;32m 180\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43;01mif\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;129;43;01mnot\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtraining\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01mor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtrack_running_stats\u001B[49m\n\u001B[1;32m 181\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43;01melse\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43;01mNone\u001B[39;49;00m\u001B[43m,\u001B[49m\n\u001B[1;32m 182\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrunning_var\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43;01mif\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;129;43;01mnot\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtraining\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01mor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtrack_running_stats\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43;01melse\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43;01mNone\u001B[39;49;00m\u001B[43m,\u001B[49m\n\u001B[1;32m 183\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mweight\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 184\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbias\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 185\u001B[0m \u001B[43m \u001B[49m\u001B[43mbn_training\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 186\u001B[0m \u001B[43m \u001B[49m\u001B[43mexponential_average_factor\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 187\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43meps\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 188\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/functional.py:2512\u001B[0m, in \u001B[0;36mbatch_norm\u001B[0;34m(input, running_mean, running_var, weight, bias, training, momentum, eps)\u001B[0m\n\u001B[1;32m 2510\u001B[0m _verify_batch_size(\u001B[38;5;28minput\u001B[39m\u001B[38;5;241m.\u001B[39msize())\n\u001B[0;32m-> 2512\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mtorch\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbatch_norm\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 2513\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43minput\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mweight\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mbias\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mrunning_mean\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mrunning_var\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mtraining\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmomentum\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43meps\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mtorch\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbackends\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcudnn\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43menabled\u001B[49m\n\u001B[1;32m 2514\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n", + "\u001B[0;31mOutOfMemoryError\u001B[0m: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 10.74 GiB of which 2.44 MiB is free. Process 3262902 has 8.81 GiB memory in use. Including non-PyTorch memory, this process has 1.92 GiB memory in use. Of the allocated memory 170.59 MiB is allocated by PyTorch, and 23.41 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001B[0;31mRuntimeError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[5], line 9\u001B[0m\n\u001B[1;32m 4\u001B[0m tpc \u001B[38;5;241m=\u001B[39m mct\u001B[38;5;241m.\u001B[39mget_target_platform_capabilities(fw_name\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mpytorch\u001B[39m\u001B[38;5;124m\"\u001B[39m,\n\u001B[1;32m 5\u001B[0m target_platform_name\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mimx500\u001B[39m\u001B[38;5;124m'\u001B[39m,\n\u001B[1;32m 6\u001B[0m target_platform_version\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mv3\u001B[39m\u001B[38;5;124m'\u001B[39m)\n\u001B[1;32m 8\u001B[0m \u001B[38;5;66;03m# Perform post training quantization\u001B[39;00m\n\u001B[0;32m----> 9\u001B[0m quant_model, _ \u001B[38;5;241m=\u001B[39m \u001B[43mmct\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mptq\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mpytorch_post_training_quantization\u001B[49m\u001B[43m(\u001B[49m\u001B[43min_module\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mmodel\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 10\u001B[0m \u001B[43m \u001B[49m\u001B[43mrepresentative_data_gen\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mrepresentative_dataset_gen\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 11\u001B[0m \u001B[43m \u001B[49m\u001B[43mtarget_platform_capabilities\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mtpc\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 14\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mQuantized model is ready!\u001B[39m\u001B[38;5;124m'\u001B[39m)\n", + "File \u001B[0;32m~/git/model_optimization/model_compression_toolkit/ptq/pytorch/quantization_facade.py:111\u001B[0m, in \u001B[0;36mpytorch_post_training_quantization\u001B[0;34m(in_module, representative_data_gen, target_resource_utilization, core_config, target_platform_capabilities)\u001B[0m\n\u001B[1;32m 108\u001B[0m fw_impl \u001B[38;5;241m=\u001B[39m PytorchImplementation()\n\u001B[1;32m 110\u001B[0m \u001B[38;5;66;03m# Ignore hessian info service as it is not used here yet.\u001B[39;00m\n\u001B[0;32m--> 111\u001B[0m tg, bit_widths_config, _, scheduling_info \u001B[38;5;241m=\u001B[39m \u001B[43mcore_runner\u001B[49m\u001B[43m(\u001B[49m\u001B[43min_model\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43min_module\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 112\u001B[0m \u001B[43m \u001B[49m\u001B[43mrepresentative_data_gen\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mrepresentative_data_gen\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 113\u001B[0m \u001B[43m \u001B[49m\u001B[43mcore_config\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcore_config\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 114\u001B[0m \u001B[43m \u001B[49m\u001B[43mfw_info\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mfw_info\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 115\u001B[0m \u001B[43m \u001B[49m\u001B[43mfw_impl\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mfw_impl\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 116\u001B[0m \u001B[43m \u001B[49m\u001B[43mtpc\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mtarget_platform_capabilities\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 117\u001B[0m \u001B[43m \u001B[49m\u001B[43mtarget_resource_utilization\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mtarget_resource_utilization\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 118\u001B[0m \u001B[43m \u001B[49m\u001B[43mtb_w\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mtb_w\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 120\u001B[0m \u001B[38;5;66;03m# At this point, tg is a graph that went through substitutions (such as BN folding) and is\u001B[39;00m\n\u001B[1;32m 121\u001B[0m \u001B[38;5;66;03m# ready for quantization (namely, it holds quantization params, etc.) but the weights are\u001B[39;00m\n\u001B[1;32m 122\u001B[0m \u001B[38;5;66;03m# not quantized yet. For this reason, we use it to create a graph that acts as a \"float\" graph\u001B[39;00m\n\u001B[1;32m 123\u001B[0m \u001B[38;5;66;03m# for things like similarity analyzer (because the quantized and float graph should have the same\u001B[39;00m\n\u001B[1;32m 124\u001B[0m \u001B[38;5;66;03m# architecture to find the appropriate compare points for similarity computation).\u001B[39;00m\n\u001B[1;32m 125\u001B[0m similarity_baseline_graph \u001B[38;5;241m=\u001B[39m copy\u001B[38;5;241m.\u001B[39mdeepcopy(tg)\n", + "File \u001B[0;32m~/git/model_optimization/model_compression_toolkit/core/runner.py:114\u001B[0m, in \u001B[0;36mcore_runner\u001B[0;34m(in_model, representative_data_gen, core_config, fw_info, fw_impl, tpc, target_resource_utilization, running_gptq, tb_w)\u001B[0m\n\u001B[1;32m 111\u001B[0m core_config\u001B[38;5;241m.\u001B[39mmixed_precision_config\u001B[38;5;241m.\u001B[39mset_mixed_precision_enable()\n\u001B[1;32m 112\u001B[0m Logger\u001B[38;5;241m.\u001B[39minfo(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mMixed precision enabled.\u001B[39m\u001B[38;5;124m'\u001B[39m)\n\u001B[0;32m--> 114\u001B[0m graph \u001B[38;5;241m=\u001B[39m \u001B[43mgraph_preparation_runner\u001B[49m\u001B[43m(\u001B[49m\u001B[43min_model\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 115\u001B[0m \u001B[43m \u001B[49m\u001B[43mrepresentative_data_gen\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 116\u001B[0m \u001B[43m \u001B[49m\u001B[43mcore_config\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mquantization_config\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 117\u001B[0m \u001B[43m \u001B[49m\u001B[43mfw_info\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 118\u001B[0m \u001B[43m \u001B[49m\u001B[43mfw_impl\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 119\u001B[0m \u001B[43m \u001B[49m\u001B[43mtpc\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 120\u001B[0m \u001B[43m \u001B[49m\u001B[43mcore_config\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbit_width_config\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 121\u001B[0m \u001B[43m \u001B[49m\u001B[43mtb_w\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 122\u001B[0m \u001B[43m \u001B[49m\u001B[43mmixed_precision_enable\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcore_config\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mis_mixed_precision_enabled\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 123\u001B[0m \u001B[43m \u001B[49m\u001B[43mrunning_gptq\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mrunning_gptq\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 125\u001B[0m hessian_info_service \u001B[38;5;241m=\u001B[39m HessianInfoService(graph\u001B[38;5;241m=\u001B[39mgraph, representative_dataset_gen\u001B[38;5;241m=\u001B[39mrepresentative_data_gen,\n\u001B[1;32m 126\u001B[0m fw_impl\u001B[38;5;241m=\u001B[39mfw_impl)\n\u001B[1;32m 128\u001B[0m tg \u001B[38;5;241m=\u001B[39m quantization_preparation_runner(graph\u001B[38;5;241m=\u001B[39mgraph,\n\u001B[1;32m 129\u001B[0m representative_data_gen\u001B[38;5;241m=\u001B[39mrepresentative_data_gen,\n\u001B[1;32m 130\u001B[0m core_config\u001B[38;5;241m=\u001B[39mcore_config,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 133\u001B[0m tb_w\u001B[38;5;241m=\u001B[39mtb_w,\n\u001B[1;32m 134\u001B[0m hessian_info_service\u001B[38;5;241m=\u001B[39mhessian_info_service)\n", + "File \u001B[0;32m~/git/model_optimization/model_compression_toolkit/core/graph_prep_runner.py:72\u001B[0m, in \u001B[0;36mgraph_preparation_runner\u001B[0;34m(in_model, representative_data_gen, quantization_config, fw_info, fw_impl, tpc, bit_width_config, tb_w, mixed_precision_enable, running_gptq)\u001B[0m\n\u001B[1;32m 36\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mgraph_preparation_runner\u001B[39m(in_model: Any,\n\u001B[1;32m 37\u001B[0m representative_data_gen: Callable,\n\u001B[1;32m 38\u001B[0m quantization_config: QuantizationConfig,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 44\u001B[0m mixed_precision_enable: \u001B[38;5;28mbool\u001B[39m \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m,\n\u001B[1;32m 45\u001B[0m running_gptq: \u001B[38;5;28mbool\u001B[39m \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m Graph:\n\u001B[1;32m 46\u001B[0m \u001B[38;5;250m \u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 47\u001B[0m \u001B[38;5;124;03m Runs all required preparations in order to build a quantization graph from the given model,\u001B[39;00m\n\u001B[1;32m 48\u001B[0m \u001B[38;5;124;03m quantization configuration and target platform specifications.\u001B[39;00m\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 69\u001B[0m \u001B[38;5;124;03m An internal graph representation of the input model.\u001B[39;00m\n\u001B[1;32m 70\u001B[0m \u001B[38;5;124;03m \"\"\"\u001B[39;00m\n\u001B[0;32m---> 72\u001B[0m graph \u001B[38;5;241m=\u001B[39m \u001B[43mread_model_to_graph\u001B[49m\u001B[43m(\u001B[49m\u001B[43min_model\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 73\u001B[0m \u001B[43m \u001B[49m\u001B[43mrepresentative_data_gen\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 74\u001B[0m \u001B[43m \u001B[49m\u001B[43mtpc\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 75\u001B[0m \u001B[43m \u001B[49m\u001B[43mfw_info\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 76\u001B[0m \u001B[43m \u001B[49m\u001B[43mfw_impl\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 78\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m tb_w \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m 79\u001B[0m tb_w\u001B[38;5;241m.\u001B[39madd_graph(graph, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124minitial_graph\u001B[39m\u001B[38;5;124m'\u001B[39m)\n", + "File \u001B[0;32m~/git/model_optimization/model_compression_toolkit/core/graph_prep_runner.py:207\u001B[0m, in \u001B[0;36mread_model_to_graph\u001B[0;34m(in_model, representative_data_gen, tpc, fw_info, fw_impl)\u001B[0m\n\u001B[1;32m 186\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mread_model_to_graph\u001B[39m(in_model: Any,\n\u001B[1;32m 187\u001B[0m representative_data_gen: Callable,\n\u001B[1;32m 188\u001B[0m tpc: TargetPlatformCapabilities,\n\u001B[1;32m 189\u001B[0m fw_info: FrameworkInfo \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m 190\u001B[0m fw_impl: FrameworkImplementation \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m Graph:\n\u001B[1;32m 192\u001B[0m \u001B[38;5;250m \u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 193\u001B[0m \u001B[38;5;124;03m Read a model into a graph object.\u001B[39;00m\n\u001B[1;32m 194\u001B[0m \n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 205\u001B[0m \u001B[38;5;124;03m Graph object that represents the model.\u001B[39;00m\n\u001B[1;32m 206\u001B[0m \u001B[38;5;124;03m \"\"\"\u001B[39;00m\n\u001B[0;32m--> 207\u001B[0m graph \u001B[38;5;241m=\u001B[39m \u001B[43mfw_impl\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmodel_reader\u001B[49m\u001B[43m(\u001B[49m\u001B[43min_model\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 208\u001B[0m \u001B[43m \u001B[49m\u001B[43mrepresentative_data_gen\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 209\u001B[0m graph\u001B[38;5;241m.\u001B[39mset_fw_info(fw_info)\n\u001B[1;32m 210\u001B[0m graph\u001B[38;5;241m.\u001B[39mset_tpc(tpc)\n", + "File \u001B[0;32m~/git/model_optimization/model_compression_toolkit/core/pytorch/pytorch_implementation.py:151\u001B[0m, in \u001B[0;36mPytorchImplementation.model_reader\u001B[0;34m(self, module, representative_data_gen)\u001B[0m\n\u001B[1;32m 149\u001B[0m _module \u001B[38;5;241m=\u001B[39m deepcopy(module)\n\u001B[1;32m 150\u001B[0m _module\u001B[38;5;241m.\u001B[39meval()\n\u001B[0;32m--> 151\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mmodel_reader\u001B[49m\u001B[43m(\u001B[49m\u001B[43m_module\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mrepresentative_data_gen\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mto_numpy\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mto_tensor\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/git/model_optimization/model_compression_toolkit/core/pytorch/reader/reader.py:153\u001B[0m, in \u001B[0;36mmodel_reader\u001B[0;34m(model, representative_data_gen, to_numpy, to_tensor)\u001B[0m\n\u001B[1;32m 140\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 141\u001B[0m \u001B[38;5;124;03mReads a Pytorch model and converts it to an FX Graph using the fx toolkit. Then, builds a base graph representing\u001B[39;00m\n\u001B[1;32m 142\u001B[0m \u001B[38;5;124;03mthe fx graph. Finally, we filter \"broken nodes\" (nodes without outputs, for example: \"assert\").\u001B[39;00m\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 150\u001B[0m \u001B[38;5;124;03m Base graph of the Pytorch model.\u001B[39;00m\n\u001B[1;32m 151\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 152\u001B[0m logging\u001B[38;5;241m.\u001B[39minfo(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mStart Model Reading...\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m--> 153\u001B[0m fx_model \u001B[38;5;241m=\u001B[39m \u001B[43mfx_graph_module_generation\u001B[49m\u001B[43m(\u001B[49m\u001B[43mmodel\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mrepresentative_data_gen\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mto_tensor\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 154\u001B[0m graph \u001B[38;5;241m=\u001B[39m build_graph(fx_model, to_numpy)\n\u001B[1;32m 155\u001B[0m graph \u001B[38;5;241m=\u001B[39m remove_broken_nodes_from_graph(graph)\n", + "File \u001B[0;32m~/git/model_optimization/model_compression_toolkit/core/pytorch/reader/reader.py:96\u001B[0m, in \u001B[0;36mfx_graph_module_generation\u001B[0;34m(pytorch_model, representative_data_gen, to_tensor)\u001B[0m\n\u001B[1;32m 94\u001B[0m inputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mnext\u001B[39m(representative_data_gen())\n\u001B[1;32m 95\u001B[0m input_for_shape_infer \u001B[38;5;241m=\u001B[39m [to_tensor(i) \u001B[38;5;28;01mfor\u001B[39;00m i \u001B[38;5;129;01min\u001B[39;00m inputs]\n\u001B[0;32m---> 96\u001B[0m \u001B[43mShapeProp\u001B[49m\u001B[43m(\u001B[49m\u001B[43msymbolic_traced\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mpropagate\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43minput_for_shape_infer\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 97\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m symbolic_traced\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/fx/passes/shape_prop.py:195\u001B[0m, in \u001B[0;36mShapeProp.propagate\u001B[0;34m(self, *args)\u001B[0m\n\u001B[1;32m 193\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 194\u001B[0m fake_args \u001B[38;5;241m=\u001B[39m args\n\u001B[0;32m--> 195\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43msuper\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrun\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mfake_args\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/fx/interpreter.py:146\u001B[0m, in \u001B[0;36mInterpreter.run\u001B[0;34m(self, initial_env, enable_io_processing, *args)\u001B[0m\n\u001B[1;32m 143\u001B[0m \u001B[38;5;28;01mcontinue\u001B[39;00m\n\u001B[1;32m 145\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m--> 146\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39menv[node] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrun_node\u001B[49m\u001B[43m(\u001B[49m\u001B[43mnode\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 147\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m e:\n\u001B[1;32m 148\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mextra_traceback:\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/fx/passes/shape_prop.py:158\u001B[0m, in \u001B[0;36mShapeProp.run_node\u001B[0;34m(self, n)\u001B[0m\n\u001B[1;32m 156\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m e:\n\u001B[1;32m 157\u001B[0m traceback\u001B[38;5;241m.\u001B[39mprint_exc()\n\u001B[0;32m--> 158\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mRuntimeError\u001B[39;00m(\n\u001B[1;32m 159\u001B[0m \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mShapeProp error for: node=\u001B[39m\u001B[38;5;132;01m{\u001B[39;00mn\u001B[38;5;241m.\u001B[39mformat_node()\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m with \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 160\u001B[0m \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mmeta=\u001B[39m\u001B[38;5;132;01m{\u001B[39;00mn\u001B[38;5;241m.\u001B[39mmeta\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 161\u001B[0m ) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01me\u001B[39;00m\n\u001B[1;32m 163\u001B[0m found_tensor \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m\n\u001B[1;32m 165\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mextract_tensor_meta\u001B[39m(obj):\n", + "\u001B[0;31mRuntimeError\u001B[0m: ShapeProp error for: node=%conv11_1 : [num_users=1] = call_module[target=conv11.1](args = (%conv11_0,), kwargs = {}) with meta={'nn_module_stack': OrderedDict([('conv11', ('conv11', )), ('conv11.1', ('conv11.1', ))])}\n\nWhile executing %conv11_1 : [num_users=1] = call_module[target=conv11.1](args = (%conv11_0,), kwargs = {})\nOriginal traceback:\nNone" + ] + } + ], + "execution_count": 5 }, { "cell_type": "markdown", @@ -240,7 +596,11 @@ "cell_type": "code", "id": "72dd885c7b92fa93", "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-10-10T14:09:25.753622Z", + "start_time": "2024-10-10T14:09:25.751902Z" + } }, "source": [ "mct.exporter.pytorch_export_model(model=quant_model,\n", @@ -257,44 +617,106 @@ "collapsed": false }, "source": [ - "## Visualize samples from NYUv2\n", - "\n", - "### Floating point model evaluation\n", - "Next, we evaluate the floating point model by using `cocoeval` library alongside additional dataset utilities. We can verify the mAP accuracy aligns with that of the original model. \n", - "Note that we set the preprocessing according to [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX).\n", - "Please ensure that the dataset path has been set correctly before running this code cell." + "## Visualize samples from lsun-bedrooms\n", + "Next, we visualize a sample of RGB image along with its depth image from the floating point and the quantized model." ] }, { "cell_type": "code", "id": "01e90967-594b-480f-b2e6-45e2c9ce9cee", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-10-10T14:10:55.776013Z", + "start_time": "2024-10-10T14:10:55.017073Z" + } + }, "source": [ - "from tutorials.mct_model_garden.evaluation_metrics.coco_evaluation import coco_evaluate\n", - "from tutorials.mct_model_garden.models_pytorch.yolox.yolox import model_predict\n", - "\n", - "EVAL_DATASET_FOLDER = './coco/val2017'\n", - "EVAL_DATASET_ANNOTATION_FILE = './coco/annotations/instances_val2017.json'\n", - "\n", - "# Define boxes resizing information to map between the model's output and the original image dimensions\n", - "output_resize = {'shape': yolox_tiny_cfg['img_size'], 'aspect_ratio_preservation': True, \"align_center\": False, 'normalized_coords': False}\n", + "import torch\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from PIL import Image\n", "\n", - "# Integrate the floating-point model with box decoder and NMS\n", - "model = YOLOXPostProcess(model)\n", + "cmap = plt.cm.viridis\n", "\n", - "# Evaluate the floating-point model\n", - "eval_results = coco_evaluate(model=model,\n", - " dataset_folder=EVAL_DATASET_FOLDER,\n", - " annotation_file=EVAL_DATASET_ANNOTATION_FILE,\n", - " preprocess=yolox_preprocess_chw_transpose,\n", - " output_resize=output_resize,\n", - " batch_size=BATCH_SIZE,\n", - " model_inference=model_predict)\n", - "\n", - "print(\"Floating-point model mAP: {:.4f}\".format(eval_results[0]))" + "def colored_depthmap(depth: np.ndarray, d_min: float = None, d_max: float = None) -> np.ndarray:\n", + " \"\"\"\n", + " This function create depth map for visualization.\n", + " Args:\n", + " depth: depth image\n", + " d_min: minimum depth\n", + " d_max: maximum depth\n", + " Returns:\n", + " A depth map\n", + " \"\"\" \n", + " if d_min is None:\n", + " d_min = np.min(depth)\n", + " if d_max is None:\n", + " d_max = np.max(depth)\n", + " depth_relative = (depth - d_min) / (d_max - d_min)\n", + " return 255 * cmap(depth_relative)[:,:,:3] # H, W, C\n", + "\n", + "def merge_into_row(img: torch.tensor, depth_float: torch.tensor, depth_quant: torch.tensor) -> torch.tensor:\n", + " \"\"\"\n", + " This function that merge output of 2 depth estimation result together for visualization.\n", + " Args:\n", + " img: RGB image\n", + " depth_float: Depth image of floating-point model\n", + " depth_quant: Depth image of quantized model\n", + " Returns:\n", + " A merged image\n", + " \"\"\" \n", + " rgb = 255 * np.transpose(np.squeeze(img.detach().cpu().numpy()), (1,2,0)) # H, W, C\n", + " depth_float = np.squeeze(depth_float.detach().cpu().numpy())\n", + " depth_quant = np.squeeze(depth_quant.detach().cpu().numpy())\n", + "\n", + " d_min = min(np.min(depth_float), np.min(depth_quant))\n", + " d_max = max(np.max(depth_float), np.max(depth_quant))\n", + " depth_float_col = colored_depthmap(depth_float, d_min, d_max)\n", + " depth_quant_col = colored_depthmap(depth_quant, d_min, d_max)\n", + " img_merge = np.hstack([rgb, depth_float_col, depth_quant_col])\n", + " \n", + " return img_merge\n", + "\n", + "\n", + "# Take a sample\n", + "SAMPLE_IDX = 0\n", + "img = val_dataset[SAMPLE_IDX]\n", + "img = img.unsqueeze(0).to(device) # adding batch size\n", + "\n", + "# Inference float-point and quantized models\n", + "depth_float = model(img)\n", + "depth_quant = quant_model(img)\n", + "\n", + "# Create and save image for visualization\n", + "merge_img = merge_into_row(img, depth_float, depth_quant)\n", + "merge_img = Image.fromarray(merge_img.astype('uint8'))\n", + "merge_img.save(\"depth.png\")\n", + "print('Depth image is saved!')" ], - "outputs": [], - "execution_count": null + "outputs": [ + { + "ename": "OutOfMemoryError", + "evalue": "CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 10.74 GiB of which 2.44 MiB is free. Process 3262902 has 8.81 GiB memory in use. Including non-PyTorch memory, this process has 1.92 GiB memory in use. Of the allocated memory 171.16 MiB is allocated by PyTorch, and 22.84 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mOutOfMemoryError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[6], line 54\u001B[0m\n\u001B[1;32m 51\u001B[0m img \u001B[38;5;241m=\u001B[39m img\u001B[38;5;241m.\u001B[39munsqueeze(\u001B[38;5;241m0\u001B[39m)\u001B[38;5;241m.\u001B[39mto(device) \u001B[38;5;66;03m# adding batch size\u001B[39;00m\n\u001B[1;32m 53\u001B[0m \u001B[38;5;66;03m# Inference float-point and quantized models\u001B[39;00m\n\u001B[0;32m---> 54\u001B[0m depth_float \u001B[38;5;241m=\u001B[39m \u001B[43mmodel\u001B[49m\u001B[43m(\u001B[49m\u001B[43mimg\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 55\u001B[0m depth_quant \u001B[38;5;241m=\u001B[39m quant_model(img)\n\u001B[1;32m 57\u001B[0m \u001B[38;5;66;03m# Create and save image for visualization\u001B[39;00m\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 1551\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs) \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[1;32m 1552\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 1553\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_call_impl\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 1557\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[1;32m 1558\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[1;32m 1559\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[1;32m 1560\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[1;32m 1561\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[0;32m-> 1562\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mforward_call\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1564\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m 1565\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n", + "File \u001B[0;32m~/git/model_optimization/tutorials/mct_model_garden/models_pytorch/fastdepth/fastdepth.py:158\u001B[0m, in \u001B[0;36mFastDepth.forward\u001B[0;34m(self, x)\u001B[0m\n\u001B[1;32m 156\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m i \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mrange\u001B[39m(\u001B[38;5;241m14\u001B[39m):\n\u001B[1;32m 157\u001B[0m layer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mgetattr\u001B[39m(\u001B[38;5;28mself\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mconv\u001B[39m\u001B[38;5;132;01m{}\u001B[39;00m\u001B[38;5;124m'\u001B[39m\u001B[38;5;241m.\u001B[39mformat(i))\n\u001B[0;32m--> 158\u001B[0m x \u001B[38;5;241m=\u001B[39m \u001B[43mlayer\u001B[49m\u001B[43m(\u001B[49m\u001B[43mx\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 159\u001B[0m \u001B[38;5;66;03m# print(\"{}: {}\".format(i, x.size()))\u001B[39;00m\n\u001B[1;32m 160\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m i \u001B[38;5;241m==\u001B[39m \u001B[38;5;241m1\u001B[39m:\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 1551\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs) \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[1;32m 1552\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 1553\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_call_impl\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 1557\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[1;32m 1558\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[1;32m 1559\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[1;32m 1560\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[1;32m 1561\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[0;32m-> 1562\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mforward_call\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1564\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m 1565\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/modules/container.py:219\u001B[0m, in \u001B[0;36mSequential.forward\u001B[0;34m(self, input)\u001B[0m\n\u001B[1;32m 217\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mforward\u001B[39m(\u001B[38;5;28mself\u001B[39m, \u001B[38;5;28minput\u001B[39m):\n\u001B[1;32m 218\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m module \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mself\u001B[39m:\n\u001B[0;32m--> 219\u001B[0m \u001B[38;5;28minput\u001B[39m \u001B[38;5;241m=\u001B[39m \u001B[43mmodule\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43minput\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[1;32m 220\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28minput\u001B[39m\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 1551\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs) \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[1;32m 1552\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 1553\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_call_impl\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 1557\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[1;32m 1558\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[1;32m 1559\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[1;32m 1560\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[1;32m 1561\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[0;32m-> 1562\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mforward_call\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1564\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m 1565\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/modules/conv.py:458\u001B[0m, in \u001B[0;36mConv2d.forward\u001B[0;34m(self, input)\u001B[0m\n\u001B[1;32m 457\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mforward\u001B[39m(\u001B[38;5;28mself\u001B[39m, \u001B[38;5;28minput\u001B[39m: Tensor) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m Tensor:\n\u001B[0;32m--> 458\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_conv_forward\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43minput\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mweight\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbias\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m/data/projects/swat/envs/eladco/conda_mct/lib/python3.11/site-packages/torch/nn/modules/conv.py:454\u001B[0m, in \u001B[0;36mConv2d._conv_forward\u001B[0;34m(self, input, weight, bias)\u001B[0m\n\u001B[1;32m 450\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mpadding_mode \u001B[38;5;241m!=\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mzeros\u001B[39m\u001B[38;5;124m'\u001B[39m:\n\u001B[1;32m 451\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m F\u001B[38;5;241m.\u001B[39mconv2d(F\u001B[38;5;241m.\u001B[39mpad(\u001B[38;5;28minput\u001B[39m, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_reversed_padding_repeated_twice, mode\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mpadding_mode),\n\u001B[1;32m 452\u001B[0m weight, bias, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mstride,\n\u001B[1;32m 453\u001B[0m _pair(\u001B[38;5;241m0\u001B[39m), \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdilation, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mgroups)\n\u001B[0;32m--> 454\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mF\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mconv2d\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43minput\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mweight\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mbias\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mstride\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 455\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mpadding\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mdilation\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mgroups\u001B[49m\u001B[43m)\u001B[49m\n", + "\u001B[0;31mOutOfMemoryError\u001B[0m: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 10.74 GiB of which 2.44 MiB is free. Process 3262902 has 8.81 GiB memory in use. Including non-PyTorch memory, this process has 1.92 GiB memory in use. Of the allocated memory 171.16 MiB is allocated by PyTorch, and 22.84 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)" + ] + } + ], + "execution_count": 6 }, { "cell_type": "markdown",