Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into feat/comparison-and…
Browse files Browse the repository at this point in the history
…-equality-operations
  • Loading branch information
LLukas22 committed Oct 26, 2023
2 parents 4fb19fa + c8e197f commit d094028
Show file tree
Hide file tree
Showing 49 changed files with 3,718 additions and 105 deletions.
62 changes: 62 additions & 0 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
name: PyO3-CI

on:
workflow_dispatch:
push:
branches:
- main
paths:
- candle-pyo3/**
pull_request:
paths:
- candle-pyo3/**

jobs:
build_and_test:
name: Check everything builds & tests
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest] # For now, only test on Linux
steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable

- name: Install Python
uses: actions/setup-python@v4
with:
python-version: 3.11
architecture: "x64"

- name: Cache Cargo Registry
uses: actions/cache@v1
with:
path: ~/.cargo/registry
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}

- name: Install
working-directory: ./candle-pyo3
run: |
python -m venv .env
source .env/bin/activate
pip install -U pip
pip install pytest maturin black
python -m maturin develop -r
- name: Check style
working-directory: ./candle-pyo3
run: |
source .env/bin/activate
python stub.py --check
black --check .
- name: Run tests
working-directory: ./candle-pyo3
run: |
source .env/bin/activate
python -m pytest -s -v tests
8 changes: 1 addition & 7 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,7 @@ members = [
"candle-nn",
"candle-pyo3",
"candle-transformers",
"candle-wasm-examples/llama2-c",
"candle-wasm-examples/segment-anything",
"candle-wasm-examples/whisper",
"candle-wasm-examples/yolo",
"candle-wasm-examples/bert",
"candle-wasm-examples/phi",
"candle-wasm-examples/t5",
"candle-wasm-examples/*",
"candle-wasm-tests",
]
exclude = ["candle-flash-attn", "candle-kernels"]
Expand Down
12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ These online demos run entirely in your browser:
- [T5](https://huggingface.co/spaces/radames/Candle-T5-Generation-Wasm): text generation.
- [Phi-v1.5](https://huggingface.co/spaces/radames/Candle-Phi-1.5-Wasm): text generation.
- [Segment Anything Model](https://huggingface.co/spaces/radames/candle-segment-anything-wasm): Image segmentation.
- [Blip](https://huggingface.co/spaces/radames/Candle-BLIP-Image-Captioning): image captioning.

We also provide a some command line based examples using state of the art models:

Expand Down Expand Up @@ -99,6 +100,8 @@ We also provide a some command line based examples using state of the art models
- [DINOv2](./candle-examples/examples/dinov2/): computer vision model trained
using self-supervision (can be used for imagenet classification, depth
evaluation, segmentation).
- [BLIP](./candle-examples/examples/blip/): image to text model, can be used to
generate captions for an image.

Run them using commands like:
```
Expand Down Expand Up @@ -133,6 +136,8 @@ And then head over to
## Useful External Resources
- [`candle-tutorial`](https://github.com/ToluClassics/candle-tutorial): a
very detailed tutorial showing how to convert a PyTorch model to Candle.
- [`optimisers`](https://github.com/KGrewal1/optimisers): a collection of optimisers
including SGD with momentum, AdaGrad, AdaDelta, AdaMax, NAdam, RAdam, and RMSprop.
- [`candle-lora`](https://github.com/EricLBuehler/candle-lora): a LoRA implementation
that conforms to the official `peft` implementation.

Expand Down Expand Up @@ -163,8 +168,11 @@ If you have an addition to this list, please submit a pull request.
- T5.
- Bert.
- Whisper (multi-lingual support).
- Stable Diffusion v1.5, v2.1, XL v1.0.
- Wurstchen v2.
- Text to image.
- Stable Diffusion v1.5, v2.1, XL v1.0.
- Wurstchen v2.
- Image to text.
- BLIP.
- Computer Vision Models.
- DINOv2, ConvMixer, EfficientNet, ResNet, ViT.
- yolo-v3, yolo-v8.
Expand Down
10 changes: 9 additions & 1 deletion candle-core/src/backprop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,15 @@ impl Tensor {
Op::Unary(_, UnaryOp::Round) => {
Err(Error::BackwardNotSupported { op: "round" })?
}
Op::Unary(_, UnaryOp::Gelu) => Err(Error::BackwardNotSupported { op: "gelu" })?,
Op::Unary(arg, UnaryOp::Gelu) => {
let sum_grad = grads.or_insert(arg)?;
let cube = arg.powf(3.)?;
let tanh = (0.0356774 * &cube + (0.797885 * arg)?)?.tanh()?;
let gelu_grad = (((0.5 * &tanh)?
+ (0.0535161 * cube + (0.398942 * arg)?)? * (1. - tanh.powf(2.)?))?
+ 0.5)?;
*sum_grad = sum_grad.add(&(&grad * gelu_grad)?)?
}
Op::Unary(_, UnaryOp::Erf) => Err(Error::BackwardNotSupported { op: "erf" })?,
Op::Unary(_, UnaryOp::GeluErf) => {
Err(Error::BackwardNotSupported { op: "gelu-erf" })?
Expand Down
2 changes: 1 addition & 1 deletion candle-core/src/cuda_backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2171,7 +2171,7 @@ impl BackendStorage for CudaStorage {
if src_l.is_contiguous() {
dev.dtod_copy(&src, &mut dst).w()?
} else {
let func = dev.get_or_load_func("ucopy_64", kernels::UNARY)?;
let func = dev.get_or_load_func("ucopy_f64", kernels::UNARY)?;
// SAFETY: Set later by running the kernel.
let params = (el_count, dims.len(), &ds, &src, &mut dst);
// SAFETY: ffi.
Expand Down
3 changes: 3 additions & 0 deletions candle-core/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ pub enum Error {
#[error("{op} expects at least one tensor")]
OpRequiresAtLeastOneTensor { op: &'static str },

#[error("{op} expects at least two tensors")]
OpRequiresAtLeastTwoTensors { op: &'static str },

#[error("backward is not supported for {op}")]
BackwardNotSupported { op: &'static str },

Expand Down
1 change: 1 addition & 0 deletions candle-core/src/pickle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,7 @@ fn rebuild_args(args: Object) -> Result<(Layout, DType, String, usize)> {
"HalfStorage" => DType::F16,
"BFloat16Storage" => DType::BF16,
"ByteStorage" => DType::U8,
"LongStorage" => DType::I64,
other => {
crate::bail!("unsupported storage type {other}")
}
Expand Down
87 changes: 86 additions & 1 deletion candle-core/src/tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ impl Tensor {

/// Returns true if the computation graph should track this op, that is if it is
/// a variable or if it has some variable as dependencies.
pub(crate) fn track_op(&self) -> bool {
pub fn track_op(&self) -> bool {
self.is_variable || self.op.is_some()
}

Expand Down Expand Up @@ -540,6 +540,73 @@ impl Tensor {
Ok(inp)
}

/// Creates grids of coordinates specified by the 1D inputs.
///
/// # Arguments
///
/// * `args` - A slice of 1D tensors.
/// * `xy_indexing` - Whether to use xy indexing or ij indexing. If xy is selected, the
/// first dimension corresponds to the cardinality of the second input and the second
/// dimension corresponds to the cardinality of the first input. If ij is selected, the
/// dimensions are in the same order as the cardinality of the inputs.
///
/// # Examples
///
/// ```rust
/// use candle_core::{Tensor, Device, Shape};
/// let x = Tensor::new(&[1f32, 2., 3.], &Device::Cpu)?;
/// let y = Tensor::new(&[4f32, 5., 6.], &Device::Cpu)?;
///
/// let grids_xy = Tensor::meshgrid(&[&x, &y], true)?;
///
/// assert_eq!(grids_xy.len(), 2);
/// assert_eq!(grids_xy[0].dims(), &[3, 3]);
///
/// assert_eq!(grids_xy[0].to_vec2::<f32>()?, &[[1., 2., 3.], [1., 2., 3.], [1., 2., 3.]]);
/// assert_eq!(grids_xy[1].to_vec2::<f32>()?, &[[4., 4., 4.], [5., 5., 5.], [6., 6., 6.]]);
///
/// let grids_ij = Tensor::meshgrid(&[&x, &y], false)?;
///
/// assert_eq!(grids_ij[0].to_vec2::<f32>()?, &[[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]]);
/// assert_eq!(grids_ij[1].to_vec2::<f32>()?, &[[4., 5., 6.], [4., 5., 6.], [4., 5., 6.]]);
/// # Ok::<(), candle_core::Error>(())
/// ```
///
/// # Errors
///
/// * Will return `Err` if `args` contains less than 2 tensors.
///
pub fn meshgrid<A: AsRef<Tensor>>(args: &[A], xy_indexing: bool) -> Result<Vec<Self>> {
if args.len() <= 1 {
Err(Error::OpRequiresAtLeastTwoTensors { op: "meshgrid" }.bt())?
}
let args: Vec<_> = if xy_indexing {
args.iter().rev().collect()
} else {
args.iter().collect()
};

let mut shape = Vec::with_capacity(args.len());
for arg in args.iter() {
shape.push(arg.as_ref().dims1()?)
}

let mut grids = Vec::with_capacity(args.len());
for idx in 0..args.len() {
let mut ones = vec![1usize; args.len()];
ones[idx] = shape[idx];
let arg = args[idx].as_ref().reshape(ones)?;
let mut repeats = shape.clone();
repeats[idx] = 1;
let repeated_tensor = arg.repeat(repeats)?;
grids.push(repeated_tensor);
}
if xy_indexing {
grids.reverse();
}
Ok(grids)
}

/// This operation multiplies the input tensor by `mul` then adds `add` and return the result.
/// The input values `mul` and `add` are casted to the appropriate type so some rounding might
/// be performed.
Expand Down Expand Up @@ -1598,6 +1665,24 @@ impl Tensor {
}
}

/// Returns the sub-tensor fixing the index at `index` on the dimension `dim`.
///
/// ```rust
/// use candle_core::{Tensor, Device};
/// let tensor = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?;
/// let t = tensor.get_on_dim(1, 0)?;
/// assert_eq!(t.to_vec1::<f32>()?, &[0., 2., 4.]);
/// let t = tensor.get_on_dim(1, 1)?;
/// assert_eq!(t.to_vec1::<f32>()?, &[1., 3., 5.]);
/// let t = tensor.get_on_dim(0, 1)?;
/// assert_eq!(t.to_vec1::<f32>()?, &[2., 3.]);
/// # Ok::<(), candle_core::Error>(())
/// ```
pub fn get_on_dim<D: Dim>(&self, dim: D, index: usize) -> Result<Tensor> {
let dim = dim.to_index(self.shape(), "get_on_dim")?;
self.narrow(dim, index, 1)?.squeeze(dim)
}

/// Returns a tensor that is a transposed version of the input, the two last dimensions of the
/// input are swapped.
///
Expand Down
13 changes: 13 additions & 0 deletions candle-core/tests/grad_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,19 @@ fn unary_grad(device: &Device) -> Result<()> {
test_utils::to_vec1_round(grad_x, 2)?,
[0.01, 0.42, 0.0, 0.98],
);

// testing compared to pytorch nn.GELU(approximate = 'tanh')
let y = x.gelu()?;
let grads = y.backward()?;
let grad_x = grads.get(&x).context("no grad for x")?;
assert_eq!(
test_utils::to_vec1_round(&y, 4)?,
[2.9964, 0.8412, 3.9999, 0.0839]
);
assert_eq!(
test_utils::to_vec1_round(grad_x, 4)?,
[1.0116, 1.0830, 1.0003, 0.6188],
);
Ok(())
}

Expand Down
47 changes: 20 additions & 27 deletions candle-examples/examples/bert/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ extern crate intel_mkl_src;
extern crate accelerate_src;
use candle_transformers::models::bert::{BertModel, Config, DTYPE};

use anyhow::{anyhow, Error as E, Result};
use anyhow::{Error as E, Result};
use candle::Tensor;
use candle_nn::VarBuilder;
use clap::Parser;
use hf_hub::{api::sync::Api, Cache, Repo, RepoType};
use hf_hub::{api::sync::Api, Repo, RepoType};
use tokenizers::{PaddingParams, Tokenizer};

#[derive(Parser, Debug)]
Expand All @@ -19,10 +19,6 @@ struct Args {
#[arg(long)]
cpu: bool,

/// Run offline (you must have the files already cached)
#[arg(long)]
offline: bool,

/// Enable tracing (generates a trace-timestamp.json file).
#[arg(long)]
tracing: bool,
Expand All @@ -38,6 +34,10 @@ struct Args {
#[arg(long)]
prompt: Option<String>,

/// Use the pytorch weights rather than the safetensors ones
#[arg(long)]
use_pth: bool,

/// The number of times to run the prompt.
#[arg(long, default_value = "1")]
n: usize,
Expand All @@ -60,34 +60,27 @@ impl Args {
};

let repo = Repo::with_revision(model_id, RepoType::Model, revision);
let (config_filename, tokenizer_filename, weights_filename) = if self.offline {
let cache = Cache::default().repo(repo);
(
cache
.get("config.json")
.ok_or(anyhow!("Missing config file in cache"))?,
cache
.get("tokenizer.json")
.ok_or(anyhow!("Missing tokenizer file in cache"))?,
cache
.get("model.safetensors")
.ok_or(anyhow!("Missing weights file in cache"))?,
)
} else {
let (config_filename, tokenizer_filename, weights_filename) = {
let api = Api::new()?;
let api = api.repo(repo);
(
api.get("config.json")?,
api.get("tokenizer.json")?,
api.get("model.safetensors")?,
)
let config = api.get("config.json")?;
let tokenizer = api.get("tokenizer.json")?;
let weights = if self.use_pth {
api.get("pytorch_model.bin")?
} else {
api.get("model.safetensors")?
};
(config, tokenizer, weights)
};
let config = std::fs::read_to_string(config_filename)?;
let config: Config = serde_json::from_str(&config)?;
let tokenizer = Tokenizer::from_file(tokenizer_filename).map_err(E::msg)?;

let vb =
unsafe { VarBuilder::from_mmaped_safetensors(&[weights_filename], DTYPE, &device)? };
let vb = if self.use_pth {
VarBuilder::from_pth(&weights_filename, DTYPE, &device)?
} else {
unsafe { VarBuilder::from_mmaped_safetensors(&[weights_filename], DTYPE, &device)? }
};
let model = BertModel::load(vb, &config)?;
Ok((model, tokenizer))
}
Expand Down
19 changes: 19 additions & 0 deletions candle-examples/examples/blip/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# candle-blip

The
[blip-image-captioning](https://huggingface.co/Salesforce/blip-image-captioning-base)
model can generate captions for an input image.

## Running on an example

```bash
cargo run --example blip --release -- --image candle-examples/examples/yolo-v8/assets/bike.jpg
```

```
Running on CPU, to run on GPU, build this example with `--features cuda`
loaded image Tensor[dims 3, 384, 384; f32]
model built
several cyclists are riding down a road with cars behind them%
```
![Leading group, Giro d'Italia 2021](../yolo-v8/assets/bike.jpg)
Loading

0 comments on commit d094028

Please sign in to comment.