Upload files to "/"
This commit is contained in:
commit
f43ab0695c
|
@ -0,0 +1,95 @@
|
||||||
|
from flask import Flask, render_template, Response, jsonify
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import cv2
|
||||||
|
import tensorflow as tf
|
||||||
|
from keras import models, utils, backend as K
|
||||||
|
from custom_object import YOLOLoss, YOLOActivation
|
||||||
|
from function import predictImage
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
camera = cv2.VideoCapture(0)
|
||||||
|
|
||||||
|
bio = [
|
||||||
|
{
|
||||||
|
'penulis': 'Adilah Qurrotu\'aini',
|
||||||
|
'angkatan': '2020',
|
||||||
|
'prodi': 'Teknik Informatika',
|
||||||
|
'jurusan': 'Teknologi Informasi',
|
||||||
|
'ptn': 'Politeknik Negeri Jember',
|
||||||
|
'email': 'adilah.qurrotu.aini@gmail.com'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Load YOLOv2 model
|
||||||
|
model = tf.keras.models.load_model('model/SIBIDetection_YOLOv2_780_2.h5', custom_objects={'YOLOLoss': YOLOLoss, 'YOLOActivation': YOLOActivation})
|
||||||
|
|
||||||
|
# Load hand gesture recognition model
|
||||||
|
# gesture_model = tf.keras.models.load_model('model/SIBIDetection_YOLOv2_780_2.h5')
|
||||||
|
|
||||||
|
# Load anchor box dimensions
|
||||||
|
anchor_box_dimensions = np.array([[7.34375, 7.96875],
|
||||||
|
[6.65625, 10.46875],
|
||||||
|
[9.28125, 10.40625],
|
||||||
|
[5.34375, 9.21875],
|
||||||
|
[10.28125, 6.65625]])
|
||||||
|
|
||||||
|
def generate_frames():
|
||||||
|
global prediction_text
|
||||||
|
while True:
|
||||||
|
success, frame = camera.read()
|
||||||
|
if not success:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
grayscale_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||||
|
grayscale_frame = cv2.cvtColor(grayscale_frame, cv2.COLOR_GRAY2BGR)
|
||||||
|
predicted_img, class_pred = predictImage(model, grayscale_frame, threshold = 0.1)
|
||||||
|
ret, buffer = cv2.imencode('.jpg', predicted_img)
|
||||||
|
frame = buffer.tobytes()
|
||||||
|
prediction_text = chr(65 + class_pred)
|
||||||
|
yield (b'--frame\r\n'
|
||||||
|
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
|
||||||
|
|
||||||
|
current_index = 0
|
||||||
|
|
||||||
|
# Fungsi untuk melakukan streaming teks
|
||||||
|
def generate_text():
|
||||||
|
global prediction_text
|
||||||
|
yield prediction_text
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/stream')
|
||||||
|
def stream():
|
||||||
|
return Response(generate_text(), mimetype='text/html')
|
||||||
|
|
||||||
|
@app.route('/video')
|
||||||
|
def video():
|
||||||
|
return Response(generate_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')
|
||||||
|
|
||||||
|
@app.route('/fetchtext')
|
||||||
|
def fetchtext():
|
||||||
|
#return jsonify({'prediction': prediction_text})
|
||||||
|
return Response(generate_text(), content_type='text/plain')
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def home():
|
||||||
|
return render_template('home.html', title='Beranda')
|
||||||
|
|
||||||
|
@app.route('/dictionary')
|
||||||
|
def dictionary():
|
||||||
|
return render_template('dictionary.html', title='Kamus Isyarat')
|
||||||
|
|
||||||
|
@app.route('/detection')
|
||||||
|
def detection():
|
||||||
|
return render_template('detection.html', title='Deteksi Isyarat')
|
||||||
|
|
||||||
|
@app.route('/about')
|
||||||
|
def about():
|
||||||
|
return render_template('about.html', title='Tentang Penulis', bio=bio)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app.run(debug=True)
|
|
@ -0,0 +1,26 @@
|
||||||
|
import tensorflow as tf
|
||||||
|
import numpy as np
|
||||||
|
# from keras import utils
|
||||||
|
# from tensorflow.keras.utils.generic_utils import get_custom_objects
|
||||||
|
from yololoss import YOLOLoss
|
||||||
|
|
||||||
|
class YOLOActivation(tf.keras.layers.Layer):
|
||||||
|
def __init__(self, name = 'yolo_activation', **kwargs):
|
||||||
|
super(YOLOActivation, self).__init__(name = name)
|
||||||
|
|
||||||
|
def call(self, inputs):
|
||||||
|
conf = tf.sigmoid(inputs[..., 0])
|
||||||
|
conf = tf.expand_dims(conf, axis = -1)
|
||||||
|
|
||||||
|
coord = tf.sigmoid(inputs[..., 1:5])
|
||||||
|
|
||||||
|
class_probs = tf.nn.softmax(inputs[..., 5:])
|
||||||
|
|
||||||
|
outputs = tf.concat([conf, coord, class_probs], axis = -1)
|
||||||
|
|
||||||
|
return outputs
|
||||||
|
|
||||||
|
tf.keras.utils.get_custom_objects().update({'YOLOActivation': YOLOActivation})
|
||||||
|
tf.keras.utils.get_custom_objects().update({'YOLOLoss': YOLOLoss})
|
||||||
|
|
||||||
|
model = tf.keras.models.load_model('model/SIBIDetection_YOLOv2_780_2.h5')
|
|
@ -0,0 +1,78 @@
|
||||||
|
{% extends "index.html" %}
|
||||||
|
{% block content %}
|
||||||
|
|
||||||
|
<h4 class="title" style="font-style: 'Roboto';
|
||||||
|
font-size: 28px;
|
||||||
|
color: #FFFFFF;
|
||||||
|
padding: 12px 10px;
|
||||||
|
border-radius: 12px;
|
||||||
|
background-color: #007BFF;">
|
||||||
|
Sistem Isyarat Bahasa Indonesia / {% if title %} {{ title }} {% endif %}</h4>
|
||||||
|
|
||||||
|
<img id="video_stream" src="{{ url_for('video') }}" width="640" height="480">
|
||||||
|
|
||||||
|
<button id="reloadPage" onclick="reloadPage()"
|
||||||
|
style="padding: 10px;
|
||||||
|
width: fit-content;
|
||||||
|
border-radius: 10px;
|
||||||
|
background-color: #007BFF;
|
||||||
|
color: #FFFFFF;
|
||||||
|
border: none;
|
||||||
|
outline: none;">
|
||||||
|
Segarkan</button>
|
||||||
|
|
||||||
|
<!-- <div>
|
||||||
|
<h2>Output</h2>
|
||||||
|
<p id="text">Result : </p>
|
||||||
|
</div> -->
|
||||||
|
|
||||||
|
<div class="form-floating mb-3">
|
||||||
|
<textarea class="form-control" placeholder="Leave a comment here" id="floatingText" style="height: 100px" disabled>Result : </textarea>
|
||||||
|
<label for="floatingTextarea2Disabled" style="padding-top: 20px;">Output</label>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
// Akses kamera menggunakan WebRTC
|
||||||
|
function loadCamera(){
|
||||||
|
navigator.mediaDevices.getUserMedia({ video: true })
|
||||||
|
.then(function(stream) {
|
||||||
|
var videoElement = document.getElementById('videoElement');
|
||||||
|
videoElement.srcObject = stream;
|
||||||
|
})
|
||||||
|
.catch(function(err) {
|
||||||
|
console.log("Error: " + err);
|
||||||
|
});
|
||||||
|
document.getElementById("startCameraBtn").style.display = "none";
|
||||||
|
document.getElementById("reloadPage").style.display = "block";
|
||||||
|
}
|
||||||
|
|
||||||
|
function fetchStreamedText() {
|
||||||
|
var xhr = new XMLHttpRequest();
|
||||||
|
xhr.open('GET', '/stream', true);
|
||||||
|
|
||||||
|
xhr.onreadystatechange = function() {
|
||||||
|
if (xhr.readyState === XMLHttpRequest.DONE) {
|
||||||
|
if (xhr.status === 200) {
|
||||||
|
var newText = xhr.responseText; // Menghilangkan spasi ekstra atau baris kosong
|
||||||
|
document.getElementById("floatingText").innerHTML += newText;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
xhr.send();
|
||||||
|
}
|
||||||
|
// Panggil fungsi fetchStreamedText setiap 1 detik
|
||||||
|
setInterval(fetchStreamedText, 500); // 1000 ms = 1 detik
|
||||||
|
|
||||||
|
// Deteksi objek dalam video
|
||||||
|
// Anda bisa menggunakan TensorFlow.js atau OpenCV.js di sini
|
||||||
|
// Misalnya:
|
||||||
|
// const model = await tf.loadModel('model_url');
|
||||||
|
// const predictions = await model.predict(videoElement);
|
||||||
|
|
||||||
|
function reloadPage() {
|
||||||
|
location.reload();
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
{% endblock %}
|
|
@ -0,0 +1,199 @@
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
anchor_grid = np.array([
|
||||||
|
[ 7.34375, 7.96875],
|
||||||
|
[ 6.65625, 10.46875],
|
||||||
|
[ 9.28125, 10.40625],
|
||||||
|
[ 5.34375, 9.21875],
|
||||||
|
[10.28125, 6.65625]
|
||||||
|
])
|
||||||
|
|
||||||
|
min_max_delta_xy = (-0.1, 0.1)
|
||||||
|
min_max_delta_wh = (-1.21, 1.21)
|
||||||
|
|
||||||
|
def getCoordinate(x, y, w, h):
|
||||||
|
w_half = w // 2
|
||||||
|
h_half = h // 2
|
||||||
|
|
||||||
|
x1 = int(x - w_half)
|
||||||
|
y1 = int(y - h_half)
|
||||||
|
x2 = int(x + w_half)
|
||||||
|
y2 = int(y + h_half)
|
||||||
|
|
||||||
|
return x1, y1, x2, y2
|
||||||
|
|
||||||
|
def getBoxSize(x1, y1, x2, y2):
|
||||||
|
w = x2 - x1
|
||||||
|
h = y2 - y1
|
||||||
|
|
||||||
|
return w, h
|
||||||
|
|
||||||
|
def getScale(currentSize, targetSize):
|
||||||
|
w, h = currentSize
|
||||||
|
w1, h1 = targetSize
|
||||||
|
|
||||||
|
scale_w = w1 / w
|
||||||
|
scale_h = h1 / h
|
||||||
|
|
||||||
|
return scale_w, scale_h
|
||||||
|
|
||||||
|
def getGridIndex(xc, xy):
|
||||||
|
cx = xc // 32
|
||||||
|
cy = xy // 32
|
||||||
|
|
||||||
|
return cx, cy
|
||||||
|
|
||||||
|
def convToGridCoord(xc, yc, cx, cy):
|
||||||
|
xg = (xc - (cx * 32)) / 32
|
||||||
|
yg = (yc - (cy * 32)) / 32
|
||||||
|
|
||||||
|
return xg, yg
|
||||||
|
|
||||||
|
def convToGridSize(w, h):
|
||||||
|
wg = w / 32
|
||||||
|
hg = h / 32
|
||||||
|
|
||||||
|
return wg, hg
|
||||||
|
|
||||||
|
def convToImageCoord(xg, yg, cx, cy):
|
||||||
|
x = int(round(32 * xg + 32 * cx))
|
||||||
|
y = int(round(32 * yg + 32 * cy))
|
||||||
|
|
||||||
|
return x, y
|
||||||
|
|
||||||
|
def convToImageSize(wg, hg):
|
||||||
|
w_img = int(round(wg * 32))
|
||||||
|
h_img = int(round(hg * 32))
|
||||||
|
|
||||||
|
return w_img, h_img
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
def getDelta(xg, yg, wg, hg, w_anchorGrid, h_anchorGrid):
|
||||||
|
delta_x = (xg - 0.5) / w_anchorGrid
|
||||||
|
delta_y = (yg - 0.5) / h_anchorGrid
|
||||||
|
delta_w = math.log(wg / w_anchorGrid)
|
||||||
|
delta_h = math.log(hg / h_anchorGrid)
|
||||||
|
|
||||||
|
return delta_x, delta_y, delta_w, delta_h
|
||||||
|
|
||||||
|
def convDeltaCoord(delta_x, delta_y, w_anchorGrid, h_anchorGrid, cx, cy):
|
||||||
|
xg = delta_x * w_anchorGrid + 0.5
|
||||||
|
yg = delta_y * h_anchorGrid + 0.5
|
||||||
|
|
||||||
|
x, y = convToImageCoord(xg, yg, cx, cy)
|
||||||
|
|
||||||
|
return x, y
|
||||||
|
|
||||||
|
def convDeltaSize(delta_w, delta_h, w_anchorGrid, h_anchorGrid):
|
||||||
|
wg = w_anchorGrid * np.exp(delta_w)
|
||||||
|
hg = h_anchorGrid * np.exp(delta_h)
|
||||||
|
|
||||||
|
w, h = convToImageSize(wg, hg)
|
||||||
|
|
||||||
|
return w, h
|
||||||
|
|
||||||
|
#normalize = membuat nilai delta dalam rentang 0 hingga 1
|
||||||
|
def normalize_delta(val, min_val, max_val):
|
||||||
|
return (val - min_val) / (max_val - min_val)
|
||||||
|
|
||||||
|
#denormalize = mengembalikan nilai delta yang telah dinormalisasi ke nilai aslinya
|
||||||
|
def denormalize_delta(val, min_val, max_val):
|
||||||
|
return val * (max_val - min_val) + min_val
|
||||||
|
|
||||||
|
def showPredictionResult(image, y_pred, ground_truth = None, threshold = 0.5):
|
||||||
|
img = np.copy(image)
|
||||||
|
class_pred = 0 # Anggap t[5:] adalah skor untuk setiap kelas
|
||||||
|
|
||||||
|
for i in range(13):
|
||||||
|
for j in range(13):
|
||||||
|
for b in range(5):
|
||||||
|
cur_y_pred = y_pred[i, j, b]
|
||||||
|
|
||||||
|
conf = cur_y_pred[0]
|
||||||
|
|
||||||
|
if conf < threshold:
|
||||||
|
continue
|
||||||
|
|
||||||
|
colors = [
|
||||||
|
(1, 0, 0), # red
|
||||||
|
(0, 1, 0), # green
|
||||||
|
(1, 1, 0) # yellow
|
||||||
|
]
|
||||||
|
|
||||||
|
delta_x = denormalize_delta(cur_y_pred[1], *min_max_delta_xy) # denormalize delta_x
|
||||||
|
delta_y = denormalize_delta(cur_y_pred[2], *min_max_delta_xy) # denormalize delta_y
|
||||||
|
delta_w = denormalize_delta(cur_y_pred[3], *min_max_delta_wh) # denormalize delta_w
|
||||||
|
delta_h = denormalize_delta(cur_y_pred[4], *min_max_delta_wh) # denormalize delta_h
|
||||||
|
|
||||||
|
x, y = convDeltaCoord(delta_x, delta_y, *anchor_grid[b], i, j)
|
||||||
|
w, h = convDeltaSize(delta_w, delta_h, *anchor_grid[b])
|
||||||
|
|
||||||
|
x1, y1, x2, y2 = getCoordinate(x, y, w, h)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class_pred = np.argmax(cur_y_pred[5:]) # Anggap t[5:] adalah skor untuk setiap kelas
|
||||||
|
|
||||||
|
# Menggambar kotak latar belakang untuk teks
|
||||||
|
text_size, _ = cv2.getTextSize(chr(65 + class_pred), cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
|
||||||
|
cv2.rectangle(img, (x1, y1 - text_size[1] - 5), (x1 + text_size[0] + 5, y1), (1, 1, 1), -1)
|
||||||
|
|
||||||
|
# Menambahkan teks dari class prediksi
|
||||||
|
class_text = chr(65 + class_pred) # Mengubah indeks kelas menjadi huruf kapital
|
||||||
|
cv2.putText(img, class_text, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
|
||||||
|
|
||||||
|
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 1, 0), 2)
|
||||||
|
|
||||||
|
if ground_truth is not None:
|
||||||
|
cur_gt = ground_truth[i, j, b]
|
||||||
|
|
||||||
|
gt_delta_x = denormalize_delta(cur_gt[1], *min_max_delta_xy) # denormalize delta_x
|
||||||
|
gt_delta_y = denormalize_delta(cur_gt[2], *min_max_delta_xy) # denormalize delta_y
|
||||||
|
gt_delta_w = denormalize_delta(cur_gt[3], *min_max_delta_wh) # denormalize delta_w
|
||||||
|
gt_delta_h = denormalize_delta(cur_gt[4], *min_max_delta_wh) # denormalize delta_h
|
||||||
|
|
||||||
|
gt_x, gt_y = convDeltaCoord(gt_delta_x, gt_delta_y, *anchor_grid[b], i, j)
|
||||||
|
gt_w, gt_h = convDeltaSize(gt_delta_w, gt_delta_h, *anchor_grid[b])
|
||||||
|
|
||||||
|
gt_x1, gt_y1, gt_x2, gt_y2 = getCoordinate(gt_x, gt_y, gt_w, gt_h)
|
||||||
|
|
||||||
|
cv2.rectangle(img, (gt_x1, gt_y1), (gt_x2, gt_y2), (1, 0, 0.95), 1)
|
||||||
|
|
||||||
|
return img, class_pred
|
||||||
|
|
||||||
|
def predictImage(model, image, threshold = 0.5):
|
||||||
|
img = np.copy(image)
|
||||||
|
|
||||||
|
img = cv2.resize(img, (416, 416))
|
||||||
|
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||||
|
img = img / 255
|
||||||
|
img = tf.expand_dims(img, axis = 0)
|
||||||
|
|
||||||
|
y_pred = model.predict(img)
|
||||||
|
|
||||||
|
max_conf = tf.reduce_max(y_pred[..., 0], keepdims = True)
|
||||||
|
|
||||||
|
|
||||||
|
img, class_pred = showPredictionResult(img[0].numpy(), y_pred[0], threshold = max_conf)
|
||||||
|
img = (img * 255).astype(np.uint8)
|
||||||
|
|
||||||
|
return img, class_pred
|
||||||
|
|
||||||
|
def generate_html(predictions):
|
||||||
|
predictions_str = ''.join(predictions)
|
||||||
|
html_content = f'''
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Hasil Prediksi</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Huruf yang Diprediksi</h1>
|
||||||
|
<p>{predictions_str}</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
'''
|
||||||
|
|
||||||
|
return html_content
|
|
@ -0,0 +1,120 @@
|
||||||
|
import tensorflow as tf
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class YOLOLoss(tf.keras.losses.Loss):
|
||||||
|
def __init__(self, name='yolo_loss', *args, **kwargs):
|
||||||
|
super(YOLOLoss, self).__init__(name = name)
|
||||||
|
|
||||||
|
self.anchor_box = np.array([[ 7.34375, 7.96875],
|
||||||
|
[ 6.65625, 10.46875],
|
||||||
|
[ 9.28125, 10.40625],
|
||||||
|
[ 5.34375, 9.21875],
|
||||||
|
[10.28125, 6.65625]])
|
||||||
|
self.lambda_coord = 5.0
|
||||||
|
self.lambda_noobj = 1.0
|
||||||
|
self.threshold = 0.5
|
||||||
|
self.min_max_delta_xy = (-0.1, 0.1)
|
||||||
|
self.min_max_delta_wh = (-1.21, 1.21)
|
||||||
|
|
||||||
|
cy, cx = tf.meshgrid(tf.range(13), tf.range(13))
|
||||||
|
|
||||||
|
self.cell = tf.stack([cx, cy], axis = -1)
|
||||||
|
self.cell = tf.expand_dims(self.cell, axis = -2)
|
||||||
|
self.cell = tf.tile(self.cell, [1, 1, 5, 1])
|
||||||
|
self.cell = tf.cast(self.cell, tf.float32)
|
||||||
|
|
||||||
|
def call(self, y_true, y_pred):
|
||||||
|
true_conf = y_true[..., 0]
|
||||||
|
pred_conf = y_pred[..., 0]
|
||||||
|
pred_conf = tf.cast(pred_conf, tf.float32)
|
||||||
|
|
||||||
|
true_coord = y_true[..., 1:5]
|
||||||
|
pred_coord = y_pred[..., 1:5]
|
||||||
|
|
||||||
|
true_xy = y_true[..., 1:3]
|
||||||
|
pred_xy = y_pred[..., 1:3]
|
||||||
|
|
||||||
|
true_wh = y_true[..., 3:5]
|
||||||
|
pred_wh = y_pred[..., 3:5]
|
||||||
|
|
||||||
|
true_prob = y_true[..., 5:]
|
||||||
|
pred_prob = y_pred[..., 5:]
|
||||||
|
|
||||||
|
objectness = tf.where(true_conf == 1, 1., 0.)
|
||||||
|
objectness = tf.cast(objectness, tf.float32)
|
||||||
|
|
||||||
|
ious = self.iou(true_conf, true_coord, pred_conf, pred_coord)
|
||||||
|
ious = tf.cast(ious, tf.float32)
|
||||||
|
|
||||||
|
xy_loss = tf.reduce_sum(tf.square(pred_xy - true_xy), axis = -1)
|
||||||
|
xy_loss = tf.cast(xy_loss, tf.float32)
|
||||||
|
xy_loss = self.lambda_coord * tf.reduce_sum(objectness * xy_loss)
|
||||||
|
|
||||||
|
wh_loss = tf.reduce_sum(tf.square(tf.sqrt(pred_wh) - tf.sqrt(true_wh)), axis = -1)
|
||||||
|
wh_loss = tf.cast(wh_loss, tf.float32)
|
||||||
|
wh_loss = self.lambda_coord * tf.reduce_sum(objectness * wh_loss)
|
||||||
|
|
||||||
|
coord_loss = xy_loss + wh_loss
|
||||||
|
|
||||||
|
object_loss = tf.reduce_sum(objectness * tf.square(pred_conf - ious))
|
||||||
|
object_loss = tf.cast(object_loss, tf.float32)
|
||||||
|
|
||||||
|
no_object_loss = self.lambda_noobj * tf.reduce_sum((1 - objectness) * tf.square(pred_conf - 0))
|
||||||
|
no_object_loss = tf.cast(no_object_loss, tf.float32)
|
||||||
|
|
||||||
|
class_loss = tf.reduce_sum(tf.square(pred_prob - true_prob), axis = -1)
|
||||||
|
class_loss = tf.cast(class_loss, tf.float32)
|
||||||
|
class_loss = tf.reduce_sum(objectness * class_loss)
|
||||||
|
|
||||||
|
total_loss = coord_loss + object_loss + no_object_loss + class_loss
|
||||||
|
|
||||||
|
return total_loss
|
||||||
|
|
||||||
|
def convXY(self, delta_xy):
|
||||||
|
xy_grid = delta_xy * self.anchor_box + 0.5
|
||||||
|
xy = 32 * xy_grid + 32 * self.cell
|
||||||
|
|
||||||
|
return tf.round(xy)
|
||||||
|
|
||||||
|
def convWH(self, delta_wh):
|
||||||
|
wh_grid = self.anchor_box * tf.math.exp(delta_wh)
|
||||||
|
wh = wh_grid * 32
|
||||||
|
|
||||||
|
return tf.round(wh)
|
||||||
|
|
||||||
|
def iou(self, true_conf, true_coord, pred_conf, pred_coord):
|
||||||
|
true_delta_xy = true_coord[..., :2]
|
||||||
|
pred_delta_xy = pred_coord[..., :2]
|
||||||
|
|
||||||
|
true_delta_wh = true_coord[..., 2:]
|
||||||
|
pred_delta_wh = pred_coord[..., 2:]
|
||||||
|
|
||||||
|
true_xy = self.convXY(true_delta_xy)
|
||||||
|
true_wh = self.convWH(true_delta_wh)
|
||||||
|
|
||||||
|
pred_xy = self.convXY(pred_delta_xy)
|
||||||
|
pred_wh = self.convWH(pred_delta_wh)
|
||||||
|
|
||||||
|
x1, y1 = true_xy[..., 0], true_xy[..., 1]
|
||||||
|
w1, h1 = true_wh[..., 0], true_wh[..., 1]
|
||||||
|
|
||||||
|
x2, y2 = pred_xy[..., 0], pred_xy[..., 1]
|
||||||
|
w2, h2 = pred_wh[..., 0], pred_wh[..., 1]
|
||||||
|
|
||||||
|
intersection = tf.math.minimum(w1, w2)
|
||||||
|
intersection *= tf.math.minimum(h1, h2)
|
||||||
|
|
||||||
|
area1 = w1 * h1
|
||||||
|
area2 = w2 * h2
|
||||||
|
|
||||||
|
union = area1 + area2 - intersection
|
||||||
|
|
||||||
|
iou = intersection / union
|
||||||
|
|
||||||
|
return iou
|
||||||
|
|
||||||
|
def normalize_delta(self, val, min_val, max_val):
|
||||||
|
return (val - min_val) / (max_val - min_val)
|
||||||
|
|
||||||
|
def denormalize_delta(self, val, min_val, max_val):
|
||||||
|
return val * (max_val - min_val) + min_val
|
Loading…
Reference in New Issue