Merge remote-tracking branch 'upstream/main' into feat/comparison-and…

…-equality-operations
huggingface · Oct 26, 2023 · d094028 · d094028
2 parents 4fb19fa + c8e197f
commit d094028
Show file tree

Hide file tree

Showing 49 changed files with 3,718 additions and 105 deletions.
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
@@ -0,0 +1,62 @@
+name: PyO3-CI
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+    paths:
+      - candle-pyo3/**
+  pull_request:
+    paths:
+      - candle-pyo3/**
+
+jobs:
+  build_and_test:
+    name: Check everything builds & tests
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest] # For now, only test on Linux
+    steps: 
+      - name: Checkout repository
+        uses: actions/checkout@v2
+
+      - name: Install Rust
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: stable
+
+      - name: Install Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.11
+          architecture: "x64"
+
+      - name: Cache Cargo Registry
+        uses: actions/cache@v1
+        with:
+          path: ~/.cargo/registry
+          key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
+
+      - name: Install
+        working-directory: ./candle-pyo3
+        run: |
+          python -m venv .env
+          source .env/bin/activate
+          pip install -U pip
+          pip install pytest maturin black
+          python -m maturin develop -r
+
+      - name: Check style
+        working-directory: ./candle-pyo3
+        run: |
+          source .env/bin/activate
+          python stub.py --check
+          black --check .
+
+      - name: Run tests
+        working-directory: ./candle-pyo3
+        run: |
+          source .env/bin/activate
+          python -m pytest -s -v tests
diff --git a/Cargo.toml b/Cargo.toml
@@ -7,13 +7,7 @@ members = [
     "candle-nn",
     "candle-pyo3",
     "candle-transformers",
-    "candle-wasm-examples/llama2-c",
-    "candle-wasm-examples/segment-anything",
-    "candle-wasm-examples/whisper",
-    "candle-wasm-examples/yolo",
-    "candle-wasm-examples/bert",
-    "candle-wasm-examples/phi",
-    "candle-wasm-examples/t5",
+    "candle-wasm-examples/*",
     "candle-wasm-tests",
 ]
 exclude = ["candle-flash-attn", "candle-kernels"]

diff --git a/README.md b/README.md
@@ -56,6 +56,7 @@ These online demos run entirely in your browser:
 - [T5](https://huggingface.co/spaces/radames/Candle-T5-Generation-Wasm): text generation.
 - [Phi-v1.5](https://huggingface.co/spaces/radames/Candle-Phi-1.5-Wasm): text generation.
 - [Segment Anything Model](https://huggingface.co/spaces/radames/candle-segment-anything-wasm): Image segmentation.
+- [Blip](https://huggingface.co/spaces/radames/Candle-BLIP-Image-Captioning): image captioning.
 
 We also provide a some command line based examples using state of the art models:
 
@@ -99,6 +100,8 @@ We also provide a some command line based examples using state of the art models
 - [DINOv2](./candle-examples/examples/dinov2/): computer vision model trained
   using self-supervision (can be used for imagenet classification, depth
   evaluation, segmentation).
+- [BLIP](./candle-examples/examples/blip/): image to text model, can be used to
+  generate captions for an image.
 
 Run them using commands like:
 ```
@@ -133,6 +136,8 @@ And then head over to
 ## Useful External Resources
 - [`candle-tutorial`](https://github.com/ToluClassics/candle-tutorial): a
   very detailed tutorial showing how to convert a PyTorch model to Candle.
+- [`optimisers`](https://github.com/KGrewal1/optimisers): a collection of optimisers
+  including SGD with momentum, AdaGrad, AdaDelta, AdaMax, NAdam, RAdam, and RMSprop.
 - [`candle-lora`](https://github.com/EricLBuehler/candle-lora): a LoRA implementation
   that conforms to the official `peft` implementation.
 
@@ -163,8 +168,11 @@ If you have an addition to this list, please submit a pull request.
         - T5.
         - Bert.
     - Whisper (multi-lingual support).
-    - Stable Diffusion v1.5, v2.1, XL v1.0.
-    - Wurstchen v2.
+    - Text to image.
+        - Stable Diffusion v1.5, v2.1, XL v1.0.
+        - Wurstchen v2.
+    - Image to text.
+        - BLIP.
     - Computer Vision Models.
         - DINOv2, ConvMixer, EfficientNet, ResNet, ViT.
         - yolo-v3, yolo-v8.

diff --git a/candle-core/src/backprop.rs b/candle-core/src/backprop.rs
@@ -471,7 +471,15 @@ impl Tensor {
                     Op::Unary(_, UnaryOp::Round) => {
                         Err(Error::BackwardNotSupported { op: "round" })?
                     }
-                    Op::Unary(_, UnaryOp::Gelu) => Err(Error::BackwardNotSupported { op: "gelu" })?,
+                    Op::Unary(arg, UnaryOp::Gelu) => {
+                        let sum_grad = grads.or_insert(arg)?;
+                        let cube = arg.powf(3.)?;
+                        let tanh = (0.0356774 * &cube + (0.797885 * arg)?)?.tanh()?;
+                        let gelu_grad = (((0.5 * &tanh)?
+                            + (0.0535161 * cube + (0.398942 * arg)?)? * (1. - tanh.powf(2.)?))?
+                            + 0.5)?;
+                        *sum_grad = sum_grad.add(&(&grad * gelu_grad)?)?
+                    }
                     Op::Unary(_, UnaryOp::Erf) => Err(Error::BackwardNotSupported { op: "erf" })?,
                     Op::Unary(_, UnaryOp::GeluErf) => {
                         Err(Error::BackwardNotSupported { op: "gelu-erf" })?

diff --git a/candle-core/src/cuda_backend.rs b/candle-core/src/cuda_backend.rs
@@ -2171,7 +2171,7 @@ impl BackendStorage for CudaStorage {
                 if src_l.is_contiguous() {
                     dev.dtod_copy(&src, &mut dst).w()?
                 } else {
-                    let func = dev.get_or_load_func("ucopy_64", kernels::UNARY)?;
+                    let func = dev.get_or_load_func("ucopy_f64", kernels::UNARY)?;
                     // SAFETY: Set later by running the kernel.
                     let params = (el_count, dims.len(), &ds, &src, &mut dst);
                     // SAFETY: ffi.

diff --git a/candle-core/src/error.rs b/candle-core/src/error.rs
@@ -142,6 +142,9 @@ pub enum Error {
     #[error("{op} expects at least one tensor")]
     OpRequiresAtLeastOneTensor { op: &'static str },
 
+    #[error("{op} expects at least two tensors")]
+    OpRequiresAtLeastTwoTensors { op: &'static str },
+
     #[error("backward is not supported for {op}")]
     BackwardNotSupported { op: &'static str },
 

diff --git a/candle-core/src/pickle.rs b/candle-core/src/pickle.rs
@@ -609,6 +609,7 @@ fn rebuild_args(args: Object) -> Result<(Layout, DType, String, usize)> {
         "HalfStorage" => DType::F16,
         "BFloat16Storage" => DType::BF16,
         "ByteStorage" => DType::U8,
+        "LongStorage" => DType::I64,
         other => {
             crate::bail!("unsupported storage type {other}")
         }

diff --git a/candle-core/src/tensor.rs b/candle-core/src/tensor.rs
@@ -449,7 +449,7 @@ impl Tensor {
 
     /// Returns true if the computation graph should track this op, that is if it is
     /// a variable or if it has some variable as dependencies.
-    pub(crate) fn track_op(&self) -> bool {
+    pub fn track_op(&self) -> bool {
         self.is_variable || self.op.is_some()
     }
 
@@ -540,6 +540,73 @@ impl Tensor {
         Ok(inp)
     }
 
+    /// Creates grids of coordinates specified by the 1D inputs.
+    ///
+    /// # Arguments
+    ///
+    /// * `args` - A slice of 1D tensors.
+    /// * `xy_indexing` - Whether to use xy indexing or ij indexing. If xy is selected, the
+    /// first dimension corresponds to the cardinality of the second input and the second
+    /// dimension corresponds to the cardinality of the first input. If ij is selected, the
+    /// dimensions are in the same order as the cardinality of the inputs.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use candle_core::{Tensor, Device, Shape};
+    /// let x = Tensor::new(&[1f32, 2., 3.], &Device::Cpu)?;
+    /// let y = Tensor::new(&[4f32, 5., 6.], &Device::Cpu)?;
+    ///
+    /// let grids_xy = Tensor::meshgrid(&[&x, &y], true)?;
+    ///
+    /// assert_eq!(grids_xy.len(), 2);
+    /// assert_eq!(grids_xy[0].dims(), &[3, 3]);
+    ///
+    /// assert_eq!(grids_xy[0].to_vec2::<f32>()?, &[[1., 2., 3.], [1., 2., 3.], [1., 2., 3.]]);
+    /// assert_eq!(grids_xy[1].to_vec2::<f32>()?, &[[4., 4., 4.], [5., 5., 5.], [6., 6., 6.]]);
+    ///
+    /// let grids_ij = Tensor::meshgrid(&[&x, &y], false)?;
+    ///
+    /// assert_eq!(grids_ij[0].to_vec2::<f32>()?, &[[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]]);
+    /// assert_eq!(grids_ij[1].to_vec2::<f32>()?, &[[4., 5., 6.], [4., 5., 6.], [4., 5., 6.]]);
+    /// # Ok::<(), candle_core::Error>(())
+    /// ```
+    ///
+    /// # Errors
+    ///
+    /// * Will return `Err` if `args` contains less than 2 tensors.
+    ///
+    pub fn meshgrid<A: AsRef<Tensor>>(args: &[A], xy_indexing: bool) -> Result<Vec<Self>> {
+        if args.len() <= 1 {
+            Err(Error::OpRequiresAtLeastTwoTensors { op: "meshgrid" }.bt())?
+        }
+        let args: Vec<_> = if xy_indexing {
+            args.iter().rev().collect()
+        } else {
+            args.iter().collect()
+        };
+
+        let mut shape = Vec::with_capacity(args.len());
+        for arg in args.iter() {
+            shape.push(arg.as_ref().dims1()?)
+        }
+
+        let mut grids = Vec::with_capacity(args.len());
+        for idx in 0..args.len() {
+            let mut ones = vec![1usize; args.len()];
+            ones[idx] = shape[idx];
+            let arg = args[idx].as_ref().reshape(ones)?;
+            let mut repeats = shape.clone();
+            repeats[idx] = 1;
+            let repeated_tensor = arg.repeat(repeats)?;
+            grids.push(repeated_tensor);
+        }
+        if xy_indexing {
+            grids.reverse();
+        }
+        Ok(grids)
+    }
+
     /// This operation multiplies the input tensor by `mul` then adds `add` and return the result.
     /// The input values `mul` and `add` are casted to the appropriate type so some rounding might
     /// be performed.
@@ -1598,6 +1665,24 @@ impl Tensor {
         }
     }
 
+    /// Returns the sub-tensor fixing the index at `index` on the dimension `dim`.
+    ///
+    /// ```rust
+    /// use candle_core::{Tensor, Device};
+    /// let tensor = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?;
+    /// let t = tensor.get_on_dim(1, 0)?;
+    /// assert_eq!(t.to_vec1::<f32>()?, &[0., 2., 4.]);
+    /// let t = tensor.get_on_dim(1, 1)?;
+    /// assert_eq!(t.to_vec1::<f32>()?, &[1., 3., 5.]);
+    /// let t = tensor.get_on_dim(0, 1)?;
+    /// assert_eq!(t.to_vec1::<f32>()?, &[2., 3.]);
+    /// # Ok::<(), candle_core::Error>(())
+    /// ```
+    pub fn get_on_dim<D: Dim>(&self, dim: D, index: usize) -> Result<Tensor> {
+        let dim = dim.to_index(self.shape(), "get_on_dim")?;
+        self.narrow(dim, index, 1)?.squeeze(dim)
+    }
+
     /// Returns a tensor that is a transposed version of the input, the two last dimensions of the
     /// input are swapped.
     ///

diff --git a/candle-core/tests/grad_tests.rs b/candle-core/tests/grad_tests.rs
@@ -192,6 +192,19 @@ fn unary_grad(device: &Device) -> Result<()> {
         test_utils::to_vec1_round(grad_x, 2)?,
         [0.01, 0.42, 0.0, 0.98],
     );
+
+    // testing compared to pytorch nn.GELU(approximate = 'tanh')
+    let y = x.gelu()?;
+    let grads = y.backward()?;
+    let grad_x = grads.get(&x).context("no grad for x")?;
+    assert_eq!(
+        test_utils::to_vec1_round(&y, 4)?,
+        [2.9964, 0.8412, 3.9999, 0.0839]
+    );
+    assert_eq!(
+        test_utils::to_vec1_round(grad_x, 4)?,
+        [1.0116, 1.0830, 1.0003, 0.6188],
+    );
     Ok(())
 }
 

diff --git a/candle-examples/examples/bert/main.rs b/candle-examples/examples/bert/main.rs
@@ -5,11 +5,11 @@ extern crate intel_mkl_src;
 extern crate accelerate_src;
 use candle_transformers::models::bert::{BertModel, Config, DTYPE};
 
-use anyhow::{anyhow, Error as E, Result};
+use anyhow::{Error as E, Result};
 use candle::Tensor;
 use candle_nn::VarBuilder;
 use clap::Parser;
-use hf_hub::{api::sync::Api, Cache, Repo, RepoType};
+use hf_hub::{api::sync::Api, Repo, RepoType};
 use tokenizers::{PaddingParams, Tokenizer};
 
 #[derive(Parser, Debug)]
@@ -19,10 +19,6 @@ struct Args {
     #[arg(long)]
     cpu: bool,
 
-    /// Run offline (you must have the files already cached)
-    #[arg(long)]
-    offline: bool,
-
     /// Enable tracing (generates a trace-timestamp.json file).
     #[arg(long)]
     tracing: bool,
@@ -38,6 +34,10 @@ struct Args {
     #[arg(long)]
     prompt: Option<String>,
 
+    /// Use the pytorch weights rather than the safetensors ones
+    #[arg(long)]
+    use_pth: bool,
+
     /// The number of times to run the prompt.
     #[arg(long, default_value = "1")]
     n: usize,
@@ -60,34 +60,27 @@ impl Args {
         };
 
         let repo = Repo::with_revision(model_id, RepoType::Model, revision);
-        let (config_filename, tokenizer_filename, weights_filename) = if self.offline {
-            let cache = Cache::default().repo(repo);
-            (
-                cache
-                    .get("config.json")
-                    .ok_or(anyhow!("Missing config file in cache"))?,
-                cache
-                    .get("tokenizer.json")
-                    .ok_or(anyhow!("Missing tokenizer file in cache"))?,
-                cache
-                    .get("model.safetensors")
-                    .ok_or(anyhow!("Missing weights file in cache"))?,
-            )
-        } else {
+        let (config_filename, tokenizer_filename, weights_filename) = {
             let api = Api::new()?;
             let api = api.repo(repo);
-            (
-                api.get("config.json")?,
-                api.get("tokenizer.json")?,
-                api.get("model.safetensors")?,
-            )
+            let config = api.get("config.json")?;
+            let tokenizer = api.get("tokenizer.json")?;
+            let weights = if self.use_pth {
+                api.get("pytorch_model.bin")?
+            } else {
+                api.get("model.safetensors")?
+            };
+            (config, tokenizer, weights)
         };
         let config = std::fs::read_to_string(config_filename)?;
         let config: Config = serde_json::from_str(&config)?;
         let tokenizer = Tokenizer::from_file(tokenizer_filename).map_err(E::msg)?;
 
-        let vb =
-            unsafe { VarBuilder::from_mmaped_safetensors(&[weights_filename], DTYPE, &device)? };
+        let vb = if self.use_pth {
+            VarBuilder::from_pth(&weights_filename, DTYPE, &device)?
+        } else {
+            unsafe { VarBuilder::from_mmaped_safetensors(&[weights_filename], DTYPE, &device)? }
+        };
         let model = BertModel::load(vb, &config)?;
         Ok((model, tokenizer))
     }

diff --git a/candle-examples/examples/blip/README.md b/candle-examples/examples/blip/README.md
@@ -0,0 +1,19 @@
+# candle-blip
+
+The
+[blip-image-captioning](https://huggingface.co/Salesforce/blip-image-captioning-base)
+model can generate captions for an input image.
+
+## Running on an example
+
+```bash
+cargo run --example blip --release -- --image candle-examples/examples/yolo-v8/assets/bike.jpg
+```
+
+```
+Running on CPU, to run on GPU, build this example with `--features cuda`
+loaded image Tensor[dims 3, 384, 384; f32]
+model built
+several cyclists are riding down a road with cars behind them%
+```
+![Leading group, Giro d'Italia 2021](../yolo-v8/assets/bike.jpg)