GG: Gaze Gauge β version
人物識別した上でカメラへの注視時間、性別、年齢、感情7種類(怒り、嫌悪、恐れ、喜び、悲しみ、驚き、中立)を推定するプログラムです。macOSX Sonoma 14.5 (MacBook Pro 16-inch, 2021, Python 3.11.11) と Windows11(GE73 Raider RGB 8RF-001JP, Python 3.11.9)で動くことは確認しました。自己責任でお使いください。
顔が認識されると枠が出てきます。カメラを注視しているときは枠が緑色になり、枠の上部に認識結果(下図の場合、ID 0: 292.8s, Man, 33.2y, happy)が表示されています。
カメラを注視していないときは枠が赤色になります。
動作中はこんな感じです。
Windowsではこんな感じです。
自分のPCでPythonを動かしたことがあるくらいの前提知識は必要かも。
Pythonと幾つかのライブラリのインストールが必要になります。Pythonをインストールしたあと、ターミナルに以下のコマンドを打ち込んでライブラリをインストールしてください。
pip install cv2 mediapipe face_recognition numpy deepface PyQt5 tf-keras
Pythonをインストールしたあと、まずコンパイル済みのDlibをインストールします。Dlib_Windows_Python3.x から Python のバージョンに合わせたwhlファイルをダウンロードしてインストールしてください。たとえば Python 3.11 だとすると、dlib-19.24.1-cp311-cp311-win_amd64.whl をダウンロードしてから以下のコマンドでインストールできます。
python -m pip install dlib-19.24.1-cp311-cp311-win_amd64.whl
そのあと、コマンドプロンプトに以下のコマンドを打ち込んでライブラリをインストールしてください。
pip install opencv-python mediapipe face_recognition numpy deepface PyQt5 tf-keras msvc-runtime
ターミナルもしくはコマンドプロンプトに以下のコマンドを打ち込んで gg4.py を実行するだけです。
python ./gg4.py
〈gg4.pyのコードを表示〉
import sys
import os
import cv2
import time
import csv
import face_recognition
import numpy as np
from PyQt5 import QtWidgets, QtCore, QtGui
# DeepFaceライブラリのインポート
from deepface import DeepFace
import mediapipe as mp
# ----- 不要なログを抑制 -----
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["GLOG_minloglevel"] = "3"
try:
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
except ImportError:
pass
# ----- 注視判定用の初期パラメータ -----
GAZE_LOWER_THRESHOLD = 0.40
GAZE_UPPER_THRESHOLD = 0.60
def compute_gaze_ratio(iris_center_x, eye_left_x, eye_right_x):
if eye_right_x - eye_left_x == 0:
return 0.5
return (iris_center_x - eye_left_x) / (eye_right_x - eye_left_x)
def is_looking_center(left_ratio, right_ratio, lower=GAZE_LOWER_THRESHOLD, upper=GAZE_UPPER_THRESHOLD):
"""左右の虹彩中心の比率が指定範囲内なら「注視中」と判定"""
return lower < left_ratio < upper and lower < right_ratio < upper
# ----- 人物識別・注視時間管理 -----
persons = {}
next_person_id = 0
FACE_TOLERANCE = 0.5 # 初期値
class MainWindow(QtWidgets.QWidget):
def __init__(self, parent=None):
super(MainWindow, self).__init__(parent)
self.setWindowTitle("Gaze Tracking with DeepFace")
self.setStyleSheet("font-size: 14px;")
# --- パラメータ初期値 ---
self.gaze_lower_threshold = 0.40
self.gaze_upper_threshold = 0.60
self.face_tolerance = 0.5
# --- DeepFace処理間隔ウィジェット ---
self.deepface_interval_spin = QtWidgets.QSpinBox()
self.deepface_interval_spin.setRange(1, 60)
self.deepface_interval_spin.setSingleStep(1)
self.deepface_interval_spin.setValue(10) # 例:10フレーム毎にDeepFace処理
self.deepface_interval_spin.valueChanged.connect(self.update_parameters)
self.deepface_interval = self.deepface_interval_spin.value()
# --- 映像表示領域 ---
self.video_label = QtWidgets.QLabel()
self.video_label.setFixedSize(960, 540)
# --- カメラ設定グループ ---
camera_settings_box = QtWidgets.QGroupBox("Camera Settings")
cam_select_label = QtWidgets.QLabel("カメラ選択:")
self.camera_combo = QtWidgets.QComboBox()
self.camera_combo.addItems(["0", "1", "2"])
self.camera_combo.currentIndexChanged.connect(self.on_camera_changed)
res_select_label = QtWidgets.QLabel("解析解像度:")
self.resolution_combo = QtWidgets.QComboBox()
self.resolution_combo.addItems(["1280x720", "1920x1080"])
self.resolution_combo.currentIndexChanged.connect(self.on_resolution_changed)
cam_settings_layout = QtWidgets.QFormLayout()
cam_settings_layout.addRow(cam_select_label, self.camera_combo)
cam_settings_layout.addRow(res_select_label, self.resolution_combo)
cam_settings_layout.setLabelAlignment(QtCore.Qt.AlignLeft)
camera_settings_box.setLayout(cam_settings_layout)
# --- パラメータ設定グループ ---
param_group = QtWidgets.QGroupBox("Parameter Settings")
self.gaze_lower_spin = QtWidgets.QDoubleSpinBox()
self.gaze_lower_spin.setRange(0.0, 1.0)
self.gaze_lower_spin.setSingleStep(0.01)
self.gaze_lower_spin.setValue(self.gaze_lower_threshold)
self.gaze_lower_spin.valueChanged.connect(self.update_parameters)
self.gaze_upper_spin = QtWidgets.QDoubleSpinBox()
self.gaze_upper_spin.setRange(0.0, 1.0)
self.gaze_upper_spin.setSingleStep(0.01)
self.gaze_upper_spin.setValue(self.gaze_upper_threshold)
self.gaze_upper_spin.valueChanged.connect(self.update_parameters)
self.face_tol_spin = QtWidgets.QDoubleSpinBox()
self.face_tol_spin.setRange(0.0, 1.0)
self.face_tol_spin.setSingleStep(0.01)
self.face_tol_spin.setValue(self.face_tolerance)
self.face_tol_spin.valueChanged.connect(self.update_parameters)
self.max_faces_spin = QtWidgets.QSpinBox()
self.max_faces_spin.setRange(1, 10)
self.max_faces_spin.setValue(5)
self.max_faces_spin.valueChanged.connect(self.update_parameters)
param_layout = QtWidgets.QFormLayout()
param_layout.addRow("注視下限:", self.gaze_lower_spin)
param_layout.addRow("注視上限:", self.gaze_upper_spin)
param_layout.addRow("顔照合閾値:", self.face_tol_spin)
param_layout.addRow("最大検出顔数:", self.max_faces_spin)
param_layout.addRow("DeepFace処理間隔:", self.deepface_interval_spin)
param_layout.setLabelAlignment(QtCore.Qt.AlignLeft)
param_group.setLayout(param_layout)
# --- 終了ボタン ---
self.exit_button = QtWidgets.QPushButton("終了")
self.exit_button.setFixedHeight(40)
self.exit_button.clicked.connect(self.close_app)
# --- 右側のコントロール群ウィジェット ---
# カメラ設定とパラメータ設定、終了ボタンを縦に配置
controls_widget = QtWidgets.QWidget()
controls_layout = QtWidgets.QVBoxLayout()
controls_layout.addWidget(camera_settings_box)
controls_layout.addWidget(param_group)
controls_layout.addStretch()
controls_layout.addWidget(self.exit_button)
controls_widget.setLayout(controls_layout)
controls_widget.setFixedWidth(300) # 余分な空白が広がらないように固定幅を設定
# --- 全体レイアウトを横方向に配置(左:映像、右:コントロール) ---
main_layout = QtWidgets.QHBoxLayout()
main_layout.addWidget(self.video_label)
main_layout.addWidget(controls_widget)
self.setLayout(main_layout)
# ----- カメラキャプチャ初期化 -----
self.cap = None
self.current_resolution = (1280, 720)
self.init_camera(int(self.camera_combo.currentText()), self.current_resolution)
self.prev_time = time.time()
# ----- MediaPipe Face Mesh の初期化(UIの最大検出顔数に従う) -----
self.max_num_faces = self.max_faces_spin.value()
self.face_mesh = mp.solutions.face_mesh.FaceMesh(
static_image_mode=False,
max_num_faces=self.max_num_faces,
refine_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
)
# フレームカウンタ(DeepFace解析タイミング用)
self.frame_count = 0
# ----- タイマーでフレーム更新(約33fps) -----
self.timer = QtCore.QTimer()
self.timer.timeout.connect(self.update_frame)
self.timer.start(30)
def update_parameters(self):
"""パラメータ設定ウィジェットの値をインスタンス変数に反映"""
self.gaze_lower_threshold = self.gaze_lower_spin.value()
self.gaze_upper_threshold = self.gaze_upper_spin.value()
self.face_tolerance = self.face_tol_spin.value()
self.deepface_interval = self.deepface_interval_spin.value()
new_max = self.max_faces_spin.value()
if new_max != self.max_num_faces:
self.max_num_faces = new_max
self.face_mesh.close()
self.face_mesh = mp.solutions.face_mesh.FaceMesh(
static_image_mode=False,
max_num_faces=self.max_num_faces,
refine_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
)
def init_camera(self, cam_index, resolution):
"""
指定されたカメラインデックスと解析用解像度で VideoCapture を初期化する。
"""
if self.cap is not None:
self.cap.release()
self.cap = cv2.VideoCapture(cam_index)
if not self.cap.isOpened():
QtWidgets.QMessageBox.critical(self, "Error", f"カメラ {cam_index} が開けません")
sys.exit(1)
width, height = resolution
self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
def on_camera_changed(self):
cam_index = int(self.camera_combo.currentText())
self.init_camera(cam_index, self.current_resolution)
def on_resolution_changed(self):
res_text = self.resolution_combo.currentText()
try:
width_str, height_str = res_text.split("x")
width = int(width_str)
height = int(height_str)
self.current_resolution = (width, height)
cam_index = int(self.camera_combo.currentText())
self.init_camera(cam_index, self.current_resolution)
except Exception:
pass
def update_frame(self):
global persons, next_person_id
ret, frame = self.cap.read()
if not ret:
return
# 鏡像表示
frame = cv2.flip(frame, 1)
h, w, _ = frame.shape
# RGB変換(MediaPipe, face_recognition はRGB前提)
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
current_time = time.time()
dt = current_time - self.prev_time
self.prev_time = current_time
# フレームカウンタ更新(DeepFace解析タイミング用)
self.frame_count += 1
# MediaPipe による顔検出
results = self.face_mesh.process(rgb_frame)
if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
pts = []
for lm in face_landmarks.landmark:
pts.append((int(lm.x * w), int(lm.y * h)))
x_vals = [p[0] for p in pts]
y_vals = [p[1] for p in pts]
bbox = (min(x_vals), min(y_vals), max(x_vals), max(y_vals))
x_min, y_min, x_max, y_max = bbox
# ----- 虹彩による注視判定 -----
left_iris_center = None
try:
left_eye_left = pts[33]
left_eye_right = pts[133]
left_iris_pts = pts[468:468+5]
left_iris_center = (sum(p[0] for p in left_iris_pts) / 5.0,
sum(p[1] for p in left_iris_pts) / 5.0)
for point in left_iris_pts:
cv2.circle(frame, (point[0], point[1]), 3, (255, 0, 0), -1)
except Exception:
left_ratio = 0.5
try:
right_eye_left = pts[362]
right_eye_right = pts[263]
right_iris_pts = pts[473:473+5]
right_iris_center = (sum(p[0] for p in right_iris_pts) / 5.0,
sum(p[1] for p in right_iris_pts) / 5.0)
for point in right_iris_pts:
cv2.circle(frame, (point[0], point[1]), 3, (255, 0, 0), -1)
except Exception:
right_ratio = 0.5
try:
left_ratio = compute_gaze_ratio(left_iris_center[0], left_eye_left[0], left_eye_right[0])
except Exception:
left_ratio = 0.5
try:
right_ratio = compute_gaze_ratio(right_iris_center[0], right_eye_left[0], right_eye_right[0])
except Exception:
right_ratio = 0.5
gaze_flag = is_looking_center(left_ratio, right_ratio,
lower=self.gaze_lower_threshold,
upper=self.gaze_upper_threshold)
# ----- 顔認識 -----
face_location = (max(y_min, 0), min(x_max, w), min(y_max, h), max(x_min, 0))
if (face_location[2] - face_location[0]) < 20 or (face_location[1] - face_location[3]) < 20:
continue
encodings = face_recognition.face_encodings(rgb_frame, known_face_locations=[face_location])
if not encodings:
continue
candidate_encoding = encodings[0]
best_match_id = None
best_distance = float('inf')
for pid, person in persons.items():
distance = face_recognition.face_distance([person["face_encoding"]], candidate_encoding)[0]
if distance < self.face_tolerance and distance < best_distance:
best_distance = distance
best_match_id = pid
if best_match_id is None:
# 新規人物追加時(DeepFace解析用フィールドも初期化)
persons[next_person_id] = {
"face_encoding": candidate_encoding,
"gaze_time": dt if gaze_flag else 0.0,
"last_seen": current_time,
"bbox": bbox,
"age_sum": 0.0,
"age_count": 0,
"age_avg": 0.0,
"gender": None,
"emotion_duration": {
"angry": 0.0, "disgust": 0.0, "fear": 0.0,
"happy": 0.0, "sad": 0.0, "surprise": 0.0, "neutral": 0.0
},
"last_emotion": None
}
this_id = next_person_id
next_person_id += 1
else:
this_id = best_match_id
person = persons[this_id]
person["last_seen"] = current_time
person["bbox"] = bbox
if gaze_flag:
person["gaze_time"] += dt
# 顔照合の重み付け更新
alpha = 0.9
person["face_encoding"] = alpha * person["face_encoding"] + (1 - alpha) * candidate_encoding
person = persons[this_id] # 最新の情報を取得
# ----- DeepFace解析(指定フレーム毎に実行) -----
if self.frame_count % self.deepface_interval == 0:
x_min_disp = max(x_min, 0)
y_min_disp = max(y_min, 0)
x_max_disp = min(x_max, w)
y_max_disp = min(y_max, h)
if x_max_disp - x_min_disp > 0 and y_max_disp - y_min_disp > 0:
face_img = frame[y_min_disp:y_max_disp, x_min_disp:x_max_disp]
try:
analysis = DeepFace.analyze(face_img, actions=['age', 'gender', 'emotion'], enforce_detection=False, silent = 1)
# 年齢の平均更新
person["age_sum"] += analysis[0]["age"]
person["age_count"] += 1
person["age_avg"] = person["age_sum"] / person["age_count"]
# 性別(直近結果採用)
person["gender"] = analysis[0]["dominant_gender"]
# 直近の感情更新
person["last_emotion"] = analysis[0]["dominant_emotion"]
except Exception:
pass
# ----- 感情ごとの注視時間更新 -----
if gaze_flag and person.get("last_emotion") is not None:
emo = person["last_emotion"]
if emo in person["emotion_duration"]:
person["emotion_duration"][emo] += dt
# ----- 結果描画(映像上にID, 注視時間, 性別・年齢, 感情を出力) -----
color = (0, 255, 0) if gaze_flag else (0, 0, 255)
cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
info_text = f"ID {this_id}: {person['gaze_time']:.1f}s"
if person["gender"] is not None:
info_text += f", {person['gender']}, {person['age_avg']:.1f}y"
if person["last_emotion"] is not None:
info_text += f", {person['last_emotion']}"
cv2.putText(frame, info_text,
(x_min, max(y_min - 10, 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
label_y = max(y_min - 30, 30)
if gaze_flag:
cv2.putText(frame, "Looking", (x_min, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
else:
cv2.putText(frame, "Not Looking", (x_min, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
if gaze_flag:
if left_iris_center is not None:
cv2.circle(frame, (int(left_iris_center[0]), int(left_iris_center[1])), 7, (0, 255, 0), -1)
if right_iris_center is not None:
cv2.circle(frame, (int(right_iris_center[0]), int(right_iris_center[1])), 7, (0, 255, 0), -1)
# 映像をGUI表示領域へ縮小して反映
rgb_display = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
image = QtGui.QImage(rgb_display.data, w, h, 3 * w, QtGui.QImage.Format_RGB888)
pixmap = QtGui.QPixmap.fromImage(image)
scaled_pixmap = pixmap.scaled(self.video_label.size(), QtCore.Qt.KeepAspectRatio)
self.video_label.setPixmap(scaled_pixmap)
def write_csv(self):
# ファイル名に日時を付与(例: gg_output_20230415_153045.csv)
timestamp = time.strftime("%Y%m%d_%H%M%S", time.localtime())
output_filename = f"gg_output_{timestamp}.csv"
current_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
with open(output_filename, "w", newline="", encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
header = ["User ID", "Gaze Duration (sec)", "Gender", "Average Age",
"Angry (sec)", "Disgust (sec)", "Fear (sec)", "Happy (sec)", "Sad (sec)", "Surprise (sec)", "Neutral (sec)"]
writer.writerow(header)
for pid, person in persons.items():
row = [
# current_time_str,
pid,
f"{person['gaze_time']:.1f}",
person['gender'] if person['gender'] is not None else "N/A",
f"{person['age_avg']:.1f}" if person["age_count"] > 0 else "N/A",
f"{person['emotion_duration']['angry']:.1f}",
f"{person['emotion_duration']['disgust']:.1f}",
f"{person['emotion_duration']['fear']:.1f}",
f"{person['emotion_duration']['happy']:.1f}",
f"{person['emotion_duration']['sad']:.1f}",
f"{person['emotion_duration']['surprise']:.1f}",
f"{person['emotion_duration']['neutral']:.1f}"
]
writer.writerow(row)
def close_app(self):
self.timer.stop()
self.cap.release()
self.face_mesh.close()
self.write_csv()
QtWidgets.QApplication.quit()
if __name__ == "__main__":
app = QtWidgets.QApplication(sys.argv)
window = MainWindow()
window.show()
sys.exit(app.exec_())
「終了」ボタンを押すと、下図のような「ユーザID、注視時間、性別、年齢の平均値、感情ごとの時間」がCSVファイルで出力されます。ファイル名は「gg_output_(date)_(time).csv」です。