Skip to content

Commit

Permalink
Merge pull request #12 from vinlemon/feat-face-landmark
Browse files Browse the repository at this point in the history
Add face landmark task
  • Loading branch information
hydai authored Oct 2, 2024
2 parents 68462d2 + 3605103 commit 8afdca2
Show file tree
Hide file tree
Showing 14 changed files with 3,797 additions and 3 deletions.
77 changes: 76 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
* [x] Hand Landmark Detection
* [x] Image Embedding
* [x] Face Detection
* [ ] Face Landmark Detection
* [x] Face Landmark Detection
* [ ] Pose Landmark Detection
* [x] Audio Classification
* [x] Text Classification
Expand Down Expand Up @@ -85,6 +85,8 @@ Every task has three types: ```XxxBuilder```, ```Xxx```, ```XxxSession```. (``Xx
* image embedding: `ImageEmbedderBuilder` -> `ImageEmbedder` -> `ImageEmbedderSession`
* image segmentation: `ImageSegmenterBuilder` -> `ImageSegmenter` -> `ImageSegmenterSession`
* object detection: `ObjectDetectorBuilder` -> `ObjectDetector` -> `ObjectDetectorSession`
* face detection: `FaceDetectorBuilder` -> `FaceDetector` -> `FaceDetectorSession`
* face landmark detection: `FaceLandmarkerBuilder` -> `FaceLandmarker` -> `FaceLandmarkerSession`
* audio:
* audio classification: `AudioClassifierBuilder` -> `AudioClassifier` -> `AudioClassifierSession`
* text:
Expand Down Expand Up @@ -278,6 +280,79 @@ $ cargo run --release --example gesture_recognition -- ./assets/models/gesture_r
Index: 6
```

### Face Landmarks Detection

```rust
use mediapipe_rs::tasks::vision::FaceLandmarkerBuilder;
use mediapipe_rs::postprocess::utils::DrawLandmarksOptions;
use mediapipe_rs::tasks::vision::FaceLandmarkConnections;

fn main() -> Result<(), Box<dyn std::error::Error>> {
let (model_path, img_path, output_path) = parse_args()?;

let mut input_img = image::open(img_path)?;
let face_landmark_results = FaceLandmarkerBuilder::new()
.num_faces(1) // set max number of faces to detect
.min_face_detection_confidence(0.5)
.min_face_presence_confidence(0.5)
.min_tracking_confidence(0.5)
.output_face_blendshapes(true)
.build_from_file(model_path)? // create a face landmarker
.detect(&input_img)?; // do inference and generate results

// show formatted result message
println!("{}", face_landmark_results);

if let Some(output_path) = output_path {
// draw face landmarks result to image
let options = DrawLandmarksOptions::default()
.connections(FaceLandmarkConnections::get_connections(
&FaceLandmarkConnections::FacemeshTesselation,
))
.landmark_radius_percent(0.003);

for result in face_landmark_results.iter() {
result.draw_with_options(&mut input_img, &options);
}
// save output image
input_img.save(output_path)?;
}

Ok(())
}
```

Example input: (The image is downloaded from https://storage.googleapis.com/mediapipe-assets/portrait.jpg)

<img height="30%" src="https://storage.googleapis.com/mediapipe-assets/portrait.jpg" width="30%" alt="face_detection_full_range_image.jpg" />

Example output in console:

```console
$ cargo run --release --example face_landmark -- ./assets/models/face_landmark/face_landmarker.task ./assets/testdata/img/face.jpg ./assets/doc/face_landmark_output.jpg

Finished release [optimized] target(s) in 4.50s
Running `./scripts/wasmedge-runner.sh target/wasm32-wasi/release/examples/face_landmark.wasm ./assets/models/face_landmark/face_landmarker.task ./assets/testdata/img/face.jpg ./assets/doc/face_landmark_output.jpg`

FaceLandmarkResult #0
Landmarks:
Normalized Landmark #0:
x: 0.49687287
y: 0.24964334
z: -0.029807145
Normalized Landmark #1:
x: 0.49801534
y: 0.22689381
z: -0.05928771
Normalized Landmark #2:
x: 0.49707597
y: 0.23421054
z: -0.03364953
```

Example output image:
<img height="30%" src="./assets/doc/face_landmark_output.jpg" width="30%"/>

### Audio Input

Every audio media which implements the trait ```AudioData``` can be used as audio tasks input.
Expand Down
Binary file added assets/doc/face_landmark_output.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
49 changes: 49 additions & 0 deletions examples/face_landmark.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
fn parse_args() -> Result<(String, String, Option<String>), Box<dyn std::error::Error>> {
let args: Vec<String> = std::env::args().collect();
if args.len() != 3 && args.len() != 4 {
return Err(format!(
"Usage {} model_path image_path [output image path]",
args[0]
)
.into());
}
Ok((args[1].clone(), args[2].clone(), args.get(3).cloned()))
}

use mediapipe_rs::tasks::vision::FaceLandmarkerBuilder;
use mediapipe_rs::postprocess::utils::DrawLandmarksOptions;
use mediapipe_rs::tasks::vision::FaceLandmarkConnections;

fn main() -> Result<(), Box<dyn std::error::Error>> {
let (model_path, img_path, output_path) = parse_args()?;

let mut input_img = image::open(img_path)?;
let face_landmark_results = FaceLandmarkerBuilder::new()
.num_faces(1) // set max number of faces to detect
.min_face_detection_confidence(0.5)
.min_face_presence_confidence(0.5)
.min_tracking_confidence(0.5)
.output_face_blendshapes(true)
.build_from_file(model_path)? // create a face landmarker
.detect(&input_img)?; // do inference and generate results

// show formatted result message
println!("{}", face_landmark_results);

if let Some(output_path) = output_path {
// draw face landmarks result to image
let options = DrawLandmarksOptions::default()
.connections(FaceLandmarkConnections::get_connections(
&FaceLandmarkConnections::FacemeshTesselation,
))
.landmark_radius_percent(0.003);

for result in face_landmark_results.iter() {
result.draw_with_options(&mut input_img, &options);
}
// save output image
input_img.save(output_path)?;
}

Ok(())
}
25 changes: 23 additions & 2 deletions scripts/download-models.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,13 @@ image_classification_init() {
curl -sLO "${url}"
done

# for custom model downloaded from tf hub
curl -sL "https://tfhub.dev/google/lite-model/aiy/vision/classifier/birds_V1/3?lite-format=tflite" -o "lite-model_aiy_vision_classifier_birds_V1_3.tflite"
# for custom model downloaded from kaggle
bird_model_name="lite-model_aiy_vision_classifier_birds_V1_3"
kaggle_tflite_filename="3.tflite"
curl -sL "https://www.kaggle.com/api/v1/models/google/aiy/tfLite/vision-classifier-birds-v1/3/download" -o "${bird_model_name}.tar.gz"
tar -zxvf "${bird_model_name}.tar.gz"
mv "${kaggle_tflite_filename}" "${bird_model_name}.tflite"
rm -rf "${bird_model_name}.tar.gz"

popd
}
Expand Down Expand Up @@ -126,6 +131,21 @@ face_detection_init() {
popd
}

face_landmark_init() {
face_landmark_dir="${model_path}/face_landmark"
mkdir -p "${face_landmark_dir}"
pushd "${face_landmark_dir}"

model_urls=("https://storage.googleapis.com/mediapipe-tasks/face_landmarker/face_landmarker.task"
)

for url in "${model_urls[@]}"; do
curl -sLO "${url}"
done

popd
}


audio_classification_init() {
audio_classification_dir="${model_path}/audio_classification"
Expand Down Expand Up @@ -182,6 +202,7 @@ hand_landmark_detection_init
image_segmentation_init
image_embedding_init
face_detection_init
face_landmark_init
audio_classification_init
text_classification_init
text_embedding_init
6 changes: 6 additions & 0 deletions src/postprocess/utils/vision/draw_landmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ impl<'a, P: Pixel> DrawLandmarksOptions<'a, P> {
self.visibility_threshold = visibility_threshold;
self
}

#[inline(always)]
pub fn landmark_radius_percent(mut self, landmark_radius_percent: f32) -> Self {
self.landmark_radius_percent = landmark_radius_percent;
self
}
}

impl<'a, P: Pixel + DefaultPixel> Default for DrawLandmarksOptions<'a, P> {
Expand Down
158 changes: 158 additions & 0 deletions src/tasks/common/options/face_landmark_options.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#[derive(Clone)]
pub(crate) struct FaceLandmarkOptions {
/// The maximum number of faces can be detected by the FaceLandmarker.
pub num_faces: i32,

/// The minimum confidence score for the face detection to be considered successful.
pub min_face_detection_confidence: f32,

/// The minimum confidence score of face presence score in the face landmark detection.
pub min_face_presence_confidence: f32,

/// The minimum confidence score for the face tracking to be considered successful.
pub min_tracking_confidence: f32,

/// Whether Face Landmarker outputs face blendshapes.
/// Face blendshapes are used for rendering the 3D face model.
pub output_face_blendshapes: bool,

/// Whether FaceLandmarker outputs the facial transformation matrix.
/// FaceLandmarker uses the matrix to transform the face landmarks from a canonical face model
/// to the detected face, so users can apply effects on the detected landmarks.
pub output_facial_transformation_matrixes: bool,
}

impl Default for FaceLandmarkOptions {
#[inline(always)]
fn default() -> Self {
Self {
num_faces: 1,
min_face_detection_confidence: 0.5,
min_face_presence_confidence: 0.5,
min_tracking_confidence: 0.5,
output_face_blendshapes: false,
output_facial_transformation_matrixes: false,
}
}
}

macro_rules! face_landmark_options_impl {
() => {
/// Set the maximum number of faces can be detected by the FaceLandmarker.
#[inline(always)]
pub fn num_faces(mut self, num_faces: i32) -> Self {
self.face_landmark_options.num_faces = num_faces;
self
}

/// Set the minimum confidence score for the face detection to be considered successful.
#[inline(always)]
pub fn min_face_detection_confidence(mut self, min_face_detection_confidence: f32) -> Self {
self.face_landmark_options.min_face_detection_confidence =
min_face_detection_confidence;
self
}

/// Set the minimum confidence score of face presence score in the face landmark detection.
#[inline(always)]
pub fn min_face_presence_confidence(mut self, min_face_presence_confidence: f32) -> Self {
self.face_landmark_options.min_face_presence_confidence = min_face_presence_confidence;
self
}

/// Set the minimum confidence score for the face tracking to be considered successful.
#[inline(always)]
pub fn min_tracking_confidence(mut self, min_tracking_confidence: f32) -> Self {
self.face_landmark_options.min_tracking_confidence = min_tracking_confidence;
self
}

/// Set whether FaceLandmarker outputs face blendshapes.
pub fn output_face_blendshapes(mut self, output_face_blendshapes: bool) -> Self {
self.face_landmark_options.output_face_blendshapes = output_face_blendshapes;
self
}

/// Set whether FaceLandmarker outputs the facial transformation matrix.
pub fn output_facial_transformation_matrixes(
mut self,
output_facial_transformation_matrixes: bool,
) -> Self {
self.face_landmark_options.output_facial_transformation_matrixes =
output_facial_transformation_matrixes;
self
}
};
}

macro_rules! face_landmark_options_check {
( $self:ident ) => {{
if $self.face_landmark_options.num_faces == 0 {
return Err(crate::Error::ArgumentError(
"The number of max faces cannot be zero".into(),
));
}
if $self.face_landmark_options.min_face_presence_confidence < 0.
|| $self.face_landmark_options.min_face_presence_confidence > 1.
{
return Err(crate::Error::ArgumentError(format!(
"The min_face_presence_confidence must in range [0.0, 1.0], but got `{}`",
$self.face_landmark_options.min_face_presence_confidence
)));
}
if $self.face_landmark_options.min_face_detection_confidence < 0.
|| $self.face_landmark_options.min_face_detection_confidence > 1.
{
return Err(crate::Error::ArgumentError(format!(
"The min_face_detection_confidence must in range [0.0, 1.0], but got `{}`",
$self.face_landmark_options.min_face_detection_confidence
)));
}
}};
}

macro_rules! face_landmark_options_get_impl {
() => {
/// Get the maximum number of faces can be detected by the FaceLandmarker.
#[inline(always)]
pub fn num_faces(&self) -> i32 {
self.build_options.face_landmark_options.num_faces
}

/// Get the minimum confidence score for the face detection to be considered successful.
#[inline(always)]
pub fn min_face_detection_confidence(&self) -> f32 {
self.build_options
.face_landmark_options
.min_face_detection_confidence
}

/// Get the minimum confidence score of face presence score in the face landmark detection.
#[inline(always)]
pub fn min_face_presence_confidence(&self) -> f32 {
self.build_options
.face_landmark_options
.min_face_presence_confidence
}

/// Get the minimum confidence score for the face tracking to be considered successful.
#[inline(always)]
pub fn min_tracking_confidence(&self) -> f32 {
self.build_options
.face_landmark_options
.min_tracking_confidence
}

/// Get whether FaceLandmarker outputs face blendshapes.
pub fn output_face_blendshapes(&self) -> bool {
self.build_options.face_landmark_options.output_face_blendshapes
}

/// Get whether FaceLandmarker outputs the facial transformation matrix.
pub fn output_facial_transformation_matrixes(&self) -> bool {
self.build_options
.face_landmark_options
.output_facial_transformation_matrixes
}
};
}
5 changes: 5 additions & 0 deletions src/tasks/common/options/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,14 @@ mod embedding_options;
#[macro_use]
#[cfg(feature = "vision")]
mod hand_landmark_options;
#[macro_use]
#[cfg(feature = "vision")]
mod face_landmark_options;

pub(crate) use base_task_options::BaseTaskOptions;
pub(crate) use classification_options::ClassificationOptions;
pub(crate) use embedding_options::EmbeddingOptions;
#[cfg(feature = "vision")]
pub(crate) use hand_landmark_options::HandLandmarkOptions;
#[cfg(feature = "vision")]
pub(crate) use face_landmark_options::FaceLandmarkOptions;
Loading

0 comments on commit 8afdca2

Please sign in to comment.