diff --git a/backend/src/download.py b/backend/src/download.py index c7098ff..3589490 100644 --- a/backend/src/download.py +++ b/backend/src/download.py @@ -10,19 +10,24 @@ AWS_MODELS_FOLDER = "models/" AWS_PREPROCESSORS_FOLDER = "preprocessors/" -AWS_FINE_TUNED_BERT_MODEL = "fine_tuned_bert_model/" -AWS_MULTIMODAL_ENCODER_MODEL = "multimodal_encoder_model.pth" +AWS_FINE_TUNED_BERT_MODEL = "fine_tuned_bert_model.pth" +AWS_FINE_TUNED_FASTVIT_MODEL = "fine_tuned_fastvit_model.pth" +AWS_MLP_MODEL = "mlp_model.pth" AWS_MULTIMODAL_MODEL = "multimodal_model.pth" + AWS_SCALER_NUMERICAL = "numerical_scaler.pkl" AWS_SCALER_CATEGORICAL = "categorical_scaler.pkl" AWS_ENCODER = "encoder.pkl" +AWS_TARGET_SCALER = "target_scaler.pkl" FINE_TUNED_BERT_MODEL = Path(PARENT_DIR_PATH + AWS_MODELS_FOLDER + AWS_FINE_TUNED_BERT_MODEL) -MULTIMODAL_ENCODER_MODEL = Path(PARENT_DIR_PATH + AWS_MODELS_FOLDER + AWS_MULTIMODAL_ENCODER_MODEL) +FINE_TUNED_FASTVIT_MODEL = Path(PARENT_DIR_PATH + AWS_MODELS_FOLDER + AWS_FINE_TUNED_FASTVIT_MODEL) MULTIMODAL_MODEL = Path(PARENT_DIR_PATH + AWS_MODELS_FOLDER + AWS_MULTIMODAL_MODEL) + SCALER_NUMERICAL = Path(PARENT_DIR_PATH + AWS_PREPROCESSORS_FOLDER + AWS_SCALER_NUMERICAL) SCALER_CATEGORICAL = Path(PARENT_DIR_PATH + AWS_PREPROCESSORS_FOLDER + AWS_SCALER_CATEGORICAL) ENCODER = Path(PARENT_DIR_PATH + AWS_PREPROCESSORS_FOLDER + AWS_ENCODER) +TARGET_SCALER = Path(PARENT_DIR_PATH + AWS_PREPROCESSORS_FOLDER + AWS_TARGET_SCALER) def download_dirs_from_s3(): diff --git a/backend/src/main.py b/backend/src/main.py index a8ff811..b7dc994 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -52,6 +52,7 @@ async def lifespan(app: FastAPI): ml_ops.target_encoder, ml_ops.scaler_numerical, ml_ops.scaler_categorical, + ml_ops.target_scaler, ) = get_models_and_preprocessors() yield diff --git a/backend/src/ml.py b/backend/src/ml.py index 9758389..45ad41a 100644 --- a/backend/src/ml.py +++ b/backend/src/ml.py @@ -1,45 +1,77 @@ import pickle import torch -from src.download import ENCODER, FINE_TUNED_BERT_MODEL, MULTIMODAL_ENCODER_MODEL, MULTIMODAL_MODEL, SCALER_CATEGORICAL, SCALER_NUMERICAL +from src.download import ( + ENCODER, + FINE_TUNED_BERT_MODEL, + FINE_TUNED_FASTVIT_MODEL, + MULTIMODAL_MODEL, + SCALER_CATEGORICAL, + SCALER_NUMERICAL, + TARGET_SCALER, +) from timm import create_model from timm.data import create_transform, resolve_model_data_config from torch import nn -from torch.nn import TransformerEncoder, TransformerEncoderLayer -from transformers import AutoModelForMaskedLM, AutoTokenizer +from transformers import AutoModel, AutoTokenizer def get_models_and_preprocessors(): DEVICE = "cuda" if torch.cuda.is_available() else "cpu" torch.cuda.empty_cache() - fastvit_model = create_model("fastvit_t8.apple_in1k", pretrained=True, num_classes=0) - fastvit_model = fastvit_model.eval() - fastvit_model.to(DEVICE) - fastvit_model.eval() + # Load FastViT model + fastvit = create_model("fastvit_t8.apple_in1k", pretrained=True, num_classes=0) + fastvit.head = nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.Flatten(), nn.Linear(fastvit.num_features, 1)) + fastvit.load_state_dict(torch.load(FINE_TUNED_FASTVIT_MODEL, map_location=DEVICE)) + + fastvit.eval() + fastvit.to(DEVICE) - data_config = resolve_model_data_config(fastvit_model) + data_config = resolve_model_data_config(fastvit) transforms = create_transform(**data_config, is_training=False) - tokenizer = AutoTokenizer.from_pretrained("dumitrescustefan/bert-base-romanian-cased-v1") - bert_model = AutoModelForMaskedLM.from_pretrained(FINE_TUNED_BERT_MODEL) - bert_model.config.output_hidden_states = True - bert_model.to(DEVICE) - bert_model.eval() + class FastViTEmbedding(nn.Module): + def __init__(self, model): + super(FastViTEmbedding, self).__init__() + self.model = model + self.pool = nn.AdaptiveAvgPool2d(1) + + def forward(self, x): + # This accesses the last layer before the regression head. + x = self.model.forward_features(x) + x = self.pool(x) + x = x.view(x.size(0), -1) + return x - encoder_layer = TransformerEncoderLayer( - d_model=768 * 2, - nhead=8, - dim_feedforward=512, - dropout=0.1, - activation="relu", + fastvit_model = FastViTEmbedding(fastvit).to(DEVICE) + fastvit_model.eval() + + # Load BERT model + tokenizer = AutoTokenizer.from_pretrained( + "dumitrescustefan/bert-base-romanian-uncased-v1", do_lower_case=True, add_special_tokens=True, max_length=512, padding=True, truncation=True ) + bert_model = AutoModel.from_pretrained("dumitrescustefan/bert-base-romanian-uncased-v1") + bert_model.eval() + bert_model.to(DEVICE) - multimodal_encoder = TransformerEncoder(encoder_layer, num_layers=6) - multimodal_encoder.to(DEVICE) - multimodal_encoder.load_state_dict(torch.load(MULTIMODAL_ENCODER_MODEL, map_location=DEVICE)) + class BERTEmbeddings(nn.Module): + def __init__(self): + super(BERTEmbeddings, self).__init__() + self.bert = bert_model + self.fc = nn.Linear(768, 1) - model = Net(multimodal_encoder=multimodal_encoder) + def forward(self, input_ids, attention_mask): + outputs = self.bert(input_ids, attention_mask) + outputs = outputs[1] # Use the output of the [CLS] token + return outputs + + bert_model = BERTEmbeddings().to(DEVICE) + + bert_model.load_state_dict(torch.load(FINE_TUNED_BERT_MODEL, map_location=DEVICE)) + bert_model.eval() + + model = Net() model.to(DEVICE) model.load_state_dict(torch.load(MULTIMODAL_MODEL, map_location=DEVICE)) @@ -52,30 +84,27 @@ def get_models_and_preprocessors(): with open(SCALER_CATEGORICAL, "rb") as f: scaler_categorical = pickle.load(f) - return fastvit_model, transforms, tokenizer, bert_model, model, target_encoder, scaler_numerical, scaler_categorical + with open(TARGET_SCALER, "rb") as f: + target_scaler = pickle.load(f) + + return fastvit_model, transforms, tokenizer, bert_model, model, target_encoder, scaler_numerical, scaler_categorical, target_scaler class Net(nn.Module): - def __init__(self, multimodal_encoder): + def __init__(self): super(Net, self).__init__() - self.fc1 = nn.Linear(12, 512) - self.fc2 = nn.Linear(512, 256) - self.fc3 = nn.Linear(256, 128) - self.fc4 = nn.Linear(128, 1) + self.fc1 = nn.Linear(1546, 128) + self.fc2 = nn.Linear(128, 64) + self.fc3 = nn.Linear(64, 1) self.dropout = nn.Dropout(0.2) - self.transformer = multimodal_encoder def forward(self, x): - structured_data = x[:, -10:] - embeddings = x[:, :-10] - x = self.transformer(embeddings) - cls_tokens = x[:, [0, 768]] - x = torch.cat((cls_tokens, structured_data), dim=1) x = torch.relu(self.fc1(x)) + x = self.dropout(x) x = torch.relu(self.fc2(x)) - x = torch.relu(self.fc3(x)) x = self.dropout(x) - x = self.fc4(x) + x = self.fc3(x) + return x @@ -89,6 +118,7 @@ def __init__(self): self.target_encoder = None self.scaler_numerical = None self.scaler_categorical = None + self.target_scaler = None ml_ops = MLOps() diff --git a/backend/src/services/inference_srv.py b/backend/src/services/inference_srv.py index cfbbdd8..92c5372 100644 --- a/backend/src/services/inference_srv.py +++ b/backend/src/services/inference_srv.py @@ -24,8 +24,7 @@ async def get_text_features(self, text): input_ids = text["input_ids"].to(self.DEVICE) attention_mask = text["attention_mask"].to(self.DEVICE) with torch.no_grad(): - outputs = ml_ops.bert_model(input_ids, attention_mask=attention_mask) - text_features = outputs.hidden_states[-1].mean(dim=1) + text_features = ml_ops.bert_model(input_ids, attention_mask=attention_mask) return text_features async def get_structured_data_features(self, numerical_data, categorical_data): @@ -56,13 +55,13 @@ async def predict( structured_data = await self.get_structured_data_features(numerical_data, categorical_data) structured = tensor(structured_data, dtype=float32).to(self.DEVICE) - booleans = tensor([[1 if sold_by == "company" else 0, 1 if gearbox == "automatic" else 0]]).to(self.DEVICE) + booleans = tensor([[1 if gearbox == "automatic" else 0, 1 if sold_by == "company" else 0]]).to(self.DEVICE) structured = cat([structured, booleans], dim=1) features = cat([image, text, structured], dim=1) with torch.no_grad(): prediction = ml_ops.model(features) - prediction = int(prediction.item()) + prediction = int(ml_ops.target_scaler.inverse_transform([[prediction.item()]])) return prediction diff --git a/backend/src/services/preprocess_srv.py b/backend/src/services/preprocess_srv.py index dfde319..ed533b7 100644 --- a/backend/src/services/preprocess_srv.py +++ b/backend/src/services/preprocess_srv.py @@ -43,12 +43,16 @@ def preprocess_image(self, image_url: str): return image def preprocess_structured_data(self, numerical_data: list, categorical_data: list): - categorical_df = pd.DataFrame([categorical_data], columns=["marca", "model", "combustibil", "tip caroserie"]) - numerical_df = pd.DataFrame([numerical_data], columns=["km", "putere", "capacitate cilindrica", "anul producției"]) + categorical_df = pd.DataFrame([categorical_data], columns=["manufacturer", "model", "fuel", "chassis"]) + numerical_df = pd.DataFrame([numerical_data], columns=["km", "power", "engine_capacity", "year"]) categorical_encoded = ml_ops.target_encoder.transform(categorical_df) numerical_scaled = ml_ops.scaler_numerical.transform(numerical_df) + numerical_scaled_df = pd.DataFrame(numerical_scaled, columns=["km", "power", "engine_capacity", "year"]) categorical_scaled = ml_ops.scaler_categorical.transform(categorical_encoded) - data = cat([tensor(numerical_scaled, dtype=float32), tensor(categorical_scaled, dtype=float32)], dim=1) + categorical_scaled_df = pd.DataFrame(categorical_scaled, columns=["manufacturer", "model", "fuel", "chassis"]) + df = pd.concat([numerical_scaled_df, categorical_scaled_df], axis=1) + df = df[["manufacturer", "model", "year", "km", "power", "engine_capacity", "fuel", "chassis"]] + data = tensor(df.values, dtype=float32) return data def tokenize(self, text: str) -> dict: diff --git a/backend/src/tests/integration_tests/conftest.py b/backend/src/tests/integration_tests/conftest.py index 2516649..83c60e0 100644 --- a/backend/src/tests/integration_tests/conftest.py +++ b/backend/src/tests/integration_tests/conftest.py @@ -33,6 +33,7 @@ def init_models(): ml_ops.target_encoder, ml_ops.scaler_numerical, ml_ops.scaler_categorical, + ml_ops.target_scaler, ) = get_models_and_preprocessors()