TIFNGK_E41222719/services.py

334 lines
12 KiB
Python

# import re
# from connection import prisma
# from prisma.enums import Sentiment
# from schemas import ProductCandidate, ProductAnalysisResult
# import config
# import ml_core
# from fastapi import Request
# def clean_product_name(name: str) -> str:
# name = re.sub(r'【.*?】', '', name)
# return name.strip()
# async def process_product_reviews(candidate: ProductCandidate, user_email: str, metric_id: int, brand_id: int, request: Request):
# aspect_stats = {
# aspect: {"positive": 0, "total": 0}
# for aspect in config.ASPECT_KEYWORDS.keys()
# }
# print(f"🔍 Memulai Analisis ABSA: {candidate.name[:30]}...")
# model_db = await prisma.model.find_first(where={"modelName": "Model XGBoost (Optimized)"})
# if not model_db:
# print("❌ ERROR: Model XGBoost tidak ditemukan di database!")
# return None
# user_db = await prisma.user.find_unique(where={"email": user_email})
# if not user_db:
# print(f"⚠️ User {user_email} tidak ditemukan!")
# return None
# brand_name = clean_product_name(candidate.name.split()[0]) if candidate.name.strip() else "Unknown"
# product_name = clean_product_name(candidate.name)
# product_db = await prisma.product.find_first(where={"url": candidate.url})
# if not product_db:
# product_db = await prisma.product.create(
# data={
# "name": product_name,
# "url": candidate.url,
# "brand": {
# "connect_or_create": {
# "where": {"name": brand_name},
# "create": {"name": brand_name}
# }
# },
# }
# )
# total_reviews = len(candidate.reviews)
# if total_reviews == 0: return None
# pos_count, neg_count = 0, 0
# reviews_data_to_save = []
# processed_texts_for_kwd = []
# for raw_text in candidate.reviews:
# if await request.is_disconnected():
# print(f"🛑 ABSA Dibatalkan di tengah jalan untuk {candidate.name}")
# return None
# clean_text = ml_core.preprocess_text(raw_text)
# original_lower = raw_text.lower()
# vec = ml_core.vectorizer.transform([clean_text])
# pred_idx = ml_core.model_optimized.predict(vec)[0]
# label = ml_core.label_encoder.inverse_transform([pred_idx])[0].lower()
# try:
# prob = ml_core.model_optimized.predict_proba(vec)[0]
# confidence_score = float(max(prob))
# except:
# confidence_score = 1.0
# is_positive = (label == "positif")
# if is_positive:
# pos_count += 1
# processed_texts_for_kwd.append(clean_text)
# else:
# neg_count += 1
# detected_keywords_from_review = []
# for aspect, keywords in config.ASPECT_KEYWORDS.items():
# matched_words = [k for k in keywords if k in original_lower or k in clean_text]
# if matched_words:
# detected_keywords_from_review.extend(matched_words)
# aspect_stats[aspect]["total"] += 1
# if is_positive:
# aspect_stats[aspect]["positive"] += 1
# final_keywords = list(set(detected_keywords_from_review))
# reviews_data_to_save.append({
# "content": raw_text,
# "sentiment": Sentiment.POSITIVE if is_positive else Sentiment.NEGATIVE,
# "confidenceScore": confidence_score,
# "keywords": final_keywords,
# "productId": product_db.productId,
# "modelId": model_db.modelId,
# "userId": user_db.id
# })
# if reviews_data_to_save:
# async with prisma.tx() as transaction:
# await transaction.review.delete_many(where={"productId": product_db.productId})
# await transaction.review.create_many(data=reviews_data_to_save)
# final_aspect_scores = {}
# for aspect, stat in aspect_stats.items():
# score = (stat["positive"] / stat["total"] * 100) if stat["total"] > 0 else 0
# final_aspect_scores[aspect] = round(score, 1)
# general_sentiment_pct = (pos_count / total_reviews) * 100
# valid_aspects = {k: v for k, v in final_aspect_scores.items() if aspect_stats[k]["total"] > 0}
# if valid_aspects:
# best_aspect = max(valid_aspects, key=valid_aspects.get)
# worst_aspect = min(valid_aspects, key=valid_aspects.get)
# verdict_summary = f"Produk ini sangat unggul pada aspek {best_aspect.upper()}."
# if valid_aspects[worst_aspect] < 60:
# verdict_summary += f" Namun, pengguna sering mengeluhkan {worst_aspect.upper()}."
# else:
# verdict_summary = "Ulasan umum tidak spesifik membahas aspek teknis tertentu."
# if general_sentiment_pct > 80:
# verdict_label = "Sangat Direkomendasikan"
# elif general_sentiment_pct > 60:
# verdict_label = "Layak Dipertimbangkan"
# else:
# verdict_label = "Kurang Disarankan"
# new_analysis = await prisma.analysis.create(
# data={
# "userId": user_db.id,
# }
# )
# await prisma.metric.create(
# data={
# "generalSentiment": general_sentiment_pct,
# "compatibilityScore": general_sentiment_pct,
# "verdict": verdict_label,
# "topKeywords": [verdict_summary],
# "analysisId": new_analysis.analysisId,
# "productId": product_db.productId,
# "modelId": model_db.modelId,
# }
# )
# return ProductAnalysisResult(
# name=candidate.name,
# url=candidate.url,
# general_score=general_sentiment_pct,
# aspect_scores=final_aspect_scores,
# total_reviews=total_reviews,
# description=verdict_summary,
# positive_count=pos_count,
# negative_count=neg_count,
# verdict=verdict_label
# )
import re
from connection import prisma
from prisma.enums import Sentiment
from schemas import ProductCandidate, ProductAnalysisResult
import config
import ml_core
from fastapi import Request
def clean_product_name(name: str) -> str:
name = re.sub(r'【.*?】', '', name)
return name.strip()
async def process_product_reviews(candidate: ProductCandidate, user_email: str, metric_id: int, brand_id: int, request: Request):
user_db = await prisma.user.find_unique(where={"email": user_email})
if not user_db:
print(f"⚠️ User tidak ditemukan untuk email: {user_email}")
raise ValueError(f"User dengan email {user_email} tidak ditemukan di database")
aspect_stats = {
aspect: {"positive": 0, "total": 0}
for aspect in config.ASPECT_KEYWORDS.keys()
}
print(f"🔍 Memulai Analisis ABSA: {candidate.name[:30]}...")
model_db = await prisma.model.find_first(where={"modelName": "Model XGBoost (Optimized)"})
if not model_db:
print("❌ ERROR: Model XGBoost (Optimized) tidak ditemukan di database!")
return None
user_db = await prisma.user.find_unique(where={"email": user_email})
if not user_db:
print(f"⚠️ User {user_email} tidak ditemukan!")
return None
brand_name = clean_product_name(candidate.name.split()[0]) if candidate.name.strip() else "Unknown"
product_name = clean_product_name(candidate.name)
product_db = await prisma.product.find_first(where={"url": candidate.url})
if not product_db:
product_db = await prisma.product.create(
data={
"name": product_name,
"url": candidate.url,
"brand": {
"connect_or_create": {
"where": {"name": brand_name},
"create": {"name": brand_name}
}
},
}
)
total_reviews = len(candidate.reviews)
if total_reviews == 0: return None
pos_count, neg_count = 0, 0
reviews_data_to_save = []
processed_texts_for_kwd = []
for raw_text in candidate.reviews:
if await request.is_disconnected():
print(f"🛑 ABSA Dibatalkan di tengah jalan untuk {candidate.name}")
return None
clean_text = ml_core.preprocess_text(raw_text)
original_lower = raw_text.lower()
vec = ml_core.vectorizer.transform([clean_text])
pred_idx = ml_core.model_optimized.predict(vec)[0]
label = ml_core.label_encoder.inverse_transform([pred_idx])[0].lower()
try:
prob = ml_core.model_optimized.predict_proba(vec)[0]
confidence_score = float(max(prob))
except:
confidence_score = 1.0
is_positive = (label == "positif")
if is_positive:
pos_count += 1
processed_texts_for_kwd.append(clean_text)
else:
neg_count += 1
detected_keywords_from_review = []
for aspect, keywords in config.ASPECT_KEYWORDS.items():
matched_words = [k for k in keywords if k in original_lower or k in clean_text]
if matched_words:
detected_keywords_from_review.extend(matched_words)
aspect_stats[aspect]["total"] += 1
if is_positive:
aspect_stats[aspect]["positive"] += 1
final_keywords = list(set(detected_keywords_from_review))
reviews_data_to_save.append({
"content": raw_text,
"sentiment": Sentiment.POSITIVE if is_positive else Sentiment.NEGATIVE,
"confidenceScore": confidence_score,
"keywords": final_keywords,
"productId": product_db.productId,
"modelId": model_db.modelId,
"userId": user_db.id
})
if reviews_data_to_save:
async with prisma.tx() as transaction:
await transaction.review.delete_many(where={"productId": product_db.productId})
await transaction.review.create_many(data=reviews_data_to_save)
final_aspect_scores = {}
for aspect, stat in aspect_stats.items():
score = (stat["positive"] / stat["total"] * 100) if stat["total"] > 0 else 0
final_aspect_scores[aspect] = round(score, 1)
general_sentiment_pct = (pos_count / total_reviews) * 100
valid_aspects = {k: v for k, v in final_aspect_scores.items() if aspect_stats[k]["total"] > 0}
if valid_aspects:
best_aspect = max(valid_aspects, key=valid_aspects.get)
worst_aspect = min(valid_aspects, key=valid_aspects.get)
verdict_summary = f"Produk ini sangat unggul pada aspek {best_aspect.upper()}."
if valid_aspects[worst_aspect] < 60:
verdict_summary += f" Namun, pengguna sering mengeluhkan {worst_aspect.upper()}."
else:
verdict_summary = "Ulasan umum tidak spesifik membahas aspek teknis tertentu."
if general_sentiment_pct > 80:
verdict_label = "Sangat Direkomendasikan"
elif general_sentiment_pct > 60:
verdict_label = "Layak Dipertimbangkan"
else:
verdict_label = "Kurang Disarankan"
new_analysis = await prisma.analysis.create(
data={
"userId": user_db.id,
}
)
await prisma.metric.create(
data={
"generalSentiment": general_sentiment_pct,
"compatibilityScore": general_sentiment_pct,
"verdict": verdict_label,
"topKeywords": [verdict_summary],
"analysisId": new_analysis.analysisId,
"productId": product_db.productId,
"modelId": model_db.modelId,
}
)
return ProductAnalysisResult(
name=candidate.name,
url=candidate.url,
general_score=general_sentiment_pct,
aspect_scores=final_aspect_scores,
total_reviews=total_reviews,
description=verdict_summary,
positive_count=pos_count,
negative_count=neg_count,
verdict=verdict_label
)