huggingface · JoseCarlosGarcia95 · Dec 22, 2024 · Dec 22, 2024 · Dec 23, 2024 · Dec 25, 2024
diff --git a/Cargo.toml b/Cargo.toml
@@ -66,7 +66,7 @@ serde = { version = "1.0.171", features = ["derive"] }
 serde_plain = "1.0.2"
 serde_json = "1.0.99"
 thiserror = "1"
-tokenizers = { version = "0.19.1", default-features = false }
+tokenizers = { version = "0.21.0", default-features = false }
 tracing = "0.1.37"
 tracing-chrome = "0.7.1"
 tracing-subscriber = "0.3.7"

diff --git a/candle-core/src/quantized/ggml_file.rs b/candle-core/src/quantized/ggml_file.rs
@@ -183,6 +183,18 @@ pub fn qtensor_from_ggml(
         GgmlDType::Q6K => {
             from_raw_data::<k_quants::BlockQ6K>(raw_data, size_in_bytes, dims, device)
         }
+        GgmlDType::Q8K => {
+            from_raw_data::<k_quants::BlockQ8K>(raw_data, size_in_bytes, dims, device)
+        }
+        GgmlDType::Q2b0 => {
+            from_raw_data::<k_quants::BlockQ2b0>(raw_data, size_in_bytes, dims, device)
+        }
+        GgmlDType::QI8 => {
+            from_raw_data::<k_quants::BlockQI8>(raw_data, size_in_bytes, dims, device)
+        }
+        GgmlDType::Q2b1 => {
+            from_raw_data::<k_quants::BlockQ2b1>(raw_data, size_in_bytes, dims, device)
+        }
         _ => crate::bail!("quantized type {ggml_dtype:?} is not supported yet"),
     }
 }

diff --git a/candle-core/src/quantized/gguf_file.rs b/candle-core/src/quantized/gguf_file.rs
@@ -174,6 +174,26 @@ impl Value {
         }
     }
 
+    pub fn from_u8(v: u8) -> Self {
+        Self::U8(v)
+    }
+
+    pub fn from_u64(v: u64) -> Self {
+        Self::U64(v)
+    }
+
+    pub fn from_u32(v: u32) -> Self {
+        Self::U32(v)
+    }
+
+    pub fn from_f32(v: f32) -> Self {
+        Self::F32(v)
+    }
+
+    pub fn from_string(v: String) -> Self {
+        Self::String(v)
+    }
+
     pub fn to_u8(&self) -> Result<u8> {
         match self {
             Self::U8(v) => Ok(*v),
@@ -489,7 +509,7 @@ fn write_string<W: std::io::Write>(w: &mut W, str: &str) -> Result<()> {
 
 pub fn write<W: std::io::Seek + std::io::Write>(
     w: &mut W,
-    metadata: &[(&str, &Value)],
+    metadata: &[(&str, Value)],
     tensors: &[(&str, &QTensor)],
 ) -> Result<()> {
     w.write_u32::<LittleEndian>(0x46554747)?;