Linux & Android Dialy – ページ 5 – Just another WordPress site

在庫管理のための検出画像ディレクトリと推論モデルのディレクトリ設定

モデルと
推論する画像を格納するディレクトリを
設定ファイルconfig.iniで指定しておき
切り替えを簡単にできるようにする

これはモデルを今後作成しなおすのと
画像の対象をwebカメラで撮影したものにするか
もしくはリアルタイムで監視したものにするかを切り替えるため

mkdir inventory_images

で画像ファイルの置き場所を作成

vim config.ini

で設定ファイルを作成

[Settings]
model_path = inventory_model/best.pt
image_directory = inventory_images

として保存

count_inventory.py
の中身を
import json
from ultralytics import YOLO
from collections import defaultdict

# JSONファイルからクラスラベルのマッピングを読み込み
with open('label_mapping.json', 'r', encoding='utf-8') as f:
    label_mapping = json.load(f)

# YOLOv8モデルのロード
model = YOLO('inventory_model/best.pt')  # ここで適切なモデルを選択

# 画像のロード
image_path = 'path_to_your_image.jpg'
image = cv2.imread(image_path)

# 画像の検出
results = model(image)

# 検出結果の取得
detections = results[0]  # 最初の結果を取得
classes = detections.boxes.cls

# 検出物体のカウント
object_counts = defaultdict(int)
for cls in classes:
    class_label = model.names[int(cls)]
    if class_label in label_mapping:
        label = label_mapping[class_label]
    else:
        label = class_label
    object_counts[label] += 1

# 検出結果の表示
for label, count in object_counts.items():
    print(f'{label}: {count}個')

から

import json
import cv2
import configparser
from ultralytics import YOLO
from collections import defaultdict
from line_notify import send_line_notify  # インポートを追加
from datetime import datetime
from inventory_database_module 
 import save_detection_to_db  # データベース保存用の関数をインポート
import os

# 設定ファイルの読み込み
config = configparser.ConfigParser()
config.read('config.ini')

# 設定ファイルからモデルパスと画像ディレクトリを取得
model_path = config['Settings']['model_path']
image_directory = config['Settings']['image_directory']

# ラベルマッピングファイルのパス
label_mapping_path = 'label_mapping.json'

# JSONファイルからクラスラベルのマッピングを読み込み
with open(label_mapping_path, 'r', encoding='utf-8') as f:
    label_mapping = json.load(f)

# YOLOv8モデルのロード
model = YOLO(model_path)  # 設定ファイルからモデルパスを使用

# 画像ディレクトリ内の全画像ファイルを処理
for image_filename in os.listdir(image_directory):
    image_path = os.path.join(image_directory, image_filename)
    if os.path.isfile(image_path) and image_path.lower().endswith(('.png', '.jpg', '.jpeg')):
        # 画像のロード
        image = cv2.imread(image_path)

        # 画像の検出
        results = model(image, save=True, conf=0.2, iou=0.5)

        # 検出結果の取得
        detections = results[0]  # 最初の結果を取得
        classes = detections.boxes.cls

        # 検出物体のカウント
        object_counts = defaultdict(int)
        for cls in classes:
            class_label = model.names[int(cls)]
            if class_label in label_mapping:
                label = label_mapping[class_label]
            else:
                label = class_label
            object_counts[label] += 1

        # 検出結果のフィルタリング（1以下のもの）
        filtered_object_counts = {label: count for label, count in object_counts.items() if count <= 1}

        # フィルタリングされた検出結果のメッセージ生成
        message_lines = [f'{label}: {count}個' for label, count in filtered_object_counts.items()]
        message = '\n'.join(message_lines)

        # 現在の時刻を取得
        current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        message = f"{message}\n\nMessage sent at: {current_time}"

        # 検出結果の表示
        for line in message_lines:
            print(line)

        # LINE Notifyにメッセージを送信（フィルタリングされた結果のみ）
        if message_lines:
            send_line_notify(message)
            save_detection_to_db(filtered_object_counts)  # データベースに検出結果を保存
        else:
            print("No objects with counts of 1 or less detected in file:", image_filename)

へ変更

試しに

cp data_bak/Baskulin4.jpg inventory_images

でデータを写し

python count_inventory.py

を実行すると

0: 640x512 1 baskulin, 131.0ms
Speed: 5.6ms preprocess, 131.0ms inference, 6.6ms postprocess per image at shape (1, 3, 640, 512)
Results saved to runs/detect/predict22
バスクリン: 1個
File: runs/detect/predict22/image0.jpg
200
{"status":200,"message":"ok"}

というようにLINEへ送信される

とりあえずここまでできたので
githubで公開し
モデルは roboflowなどを使って改良して後々公開する

yolov8の検出結果のDB格納

検出結果のDB格納

vim create_table.py

を作成

import sqlite3

def create_table():
    conn = sqlite3.connect('detections.db')
    cursor = conn.cursor()

    # テーブルを作成
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS detections (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            label TEXT NOT NULL,
            count INTEGER NOT NULL,
            timestamp TEXT NOT NULL
        )
    ''')

    conn.commit()
    conn.close()

if __name__ == '__main__':
    create_table()

これを実行し
DBを作成

次にDBへ保存するモジュールの作成

vim inventory_database_module.py

でファイルを作成

import sqlite3
from datetime import datetime

def save_detection_to_db(detections):
    conn = sqlite3.connect('detections.db')
    cursor = conn.cursor()

    # 現在の時刻を取得
    current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

    # 検出結果をテーブルに挿入
    for label, count in detections.items():
        cursor.execute('''
            INSERT INTO detections (label, count, timestamp)
            VALUES (?, ?, ?)
        ''', (label, count, current_time))

    conn.commit()
    conn.close()

として保存

import argparse
import json
import cv2
from ultralytics import YOLO
from collections import defaultdict
from line_notify import send_line_notify  # インポートを追加
from datetime import datetime
from inventory_database_module import save_detection_to_db  # データベース保存用の関数をインポート

# コマンドライン引数の解析
parser = argparse.ArgumentParser(description="YOLOv8 Object Detection")
parser.add_argument('image_path', type=str, help='Path to the input image file')
args = parser.parse_args()

# ラベルマッピングファイルのパス
label_mapping_path = 'label_mapping.json'

# JSONファイルからクラスラベルのマッピングを読み込み
with open(label_mapping_path, 'r', encoding='utf-8') as f:
    label_mapping = json.load(f)

# YOLOv8モデルのロード
model = YOLO('inventory_model/best.pt')  # ここで適切なモデルを選択

# 画像のロード
image = cv2.imread(args.image_path)

# 画像の検出
results = model(image, save=True, conf=0.1, iou=0.5)

# 検出結果の取得
detections = results[0]  # 最初の結果を取得
classes = detections.boxes.cls

# 検出物体のカウント
object_counts = defaultdict(int)
for cls in classes:
    class_label = model.names[int(cls)]
    if class_label in label_mapping:
        label = label_mapping[class_label]
    else:
        label = class_label
    object_counts[label] += 1

# 検出結果のフィルタリング（1以下のもの）
filtered_object_counts = {label: count for label, count in object_counts.items() if count <= 1}

# フィルタリングされた検出結果のメッセージ生成
message_lines = [f'{label}: {count}個' for label, count in filtered_object_counts.items()]
message = '\n'.join(message_lines)

# 現在の時刻を取得
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
message = f"{message}\n在庫チェックの時刻: {current_time}"

# 検出結果の表示
for line in message_lines:
    print(line)

# LINE Notifyにメッセージを送信（フィルタリングされた結果のみ）
if message_lines:
    send_line_notify(message)
    save_detection_to_db(filtered_object_counts)  # データベースに検出結果を保存
else:
    print("No objects with counts of 1 or less detected.")

というように
結果をDBに保存し
在庫チェックの時刻も送信するようにコード変更

なお実行した後に
DBの中身を見るには

vim view_detections.py

で

import sqlite3

def view_detections(db_path='detections.db'):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # テーブルの内容を取得
    cursor.execute('SELECT * FROM detections')
    rows = cursor.fetchall()

    # カラム名を取得
    column_names = [description[0] for description in cursor.description]

    # 結果を表示
    print(f"{' | '.join(column_names)}")
    print("-" * 50)
    for row in rows:
        print(" | ".join(str(value) for value in row))

    conn.close()

if __name__ == '__main__':
    view_detections()

として保存

python view_detections.py

を実行すると

id | label | count | timestamp
--------------------------------------------------
1 | バスクリン | 1 | 2024-07-07 06:45:47
2 | バスクリン | 1 | 2024-07-07 06:50:42
3 | バスクリン | 1 | 2024-07-07 06:51:43

となり検出結果の確認ができる

認識精度が低いため
精度を0.1まで下げないと認識しないし
並べた時の複数の検出ができていない

とりあえず指定ディレクトリの画像から検出するようにコード変更する

LINE notifyのモジュール化

別のメソッドでも使えるようにモジュール化する
また

message = 'ファイルパス自動取得テスト'

の部分は
他のプログラムで
生成された文字列を受け取って実行するようにコードを変更する

vim line_notify.py

で

import requests
import os
from PIL import Image
from io import BytesIO
from utils import load_config, get_latest_directory, get_image_files

def resize_image_if_needed(image_data, max_size=3 * 1024 * 1024):
    if len(image_data) > max_size:
        image = Image.open(BytesIO(image_data))
        new_size = (image.width // 2, image.height // 2)
        image = image.resize(new_size, Image.LANCZOS)

        output = BytesIO()
        image_format = image.format if image.format else 'JPEG'
        image.save(output, format=image_format)
        return output.getvalue()
    return image_data

def send_line_notify(message, config_path='config.json'):
    # 設定ファイルを読み込む
    config = load_config(config_path)

    # 設定ファイルからトークンとディレクトリパスを取得
    token = config['token']
    base_path = config['image_file_path']

    # 最新のpredictディレクトリを取得
    latest_dir = get_latest_directory(base_path)
    image_files = get_image_files(latest_dir)

    url = 'https://notify-api.line.me/api/notify'

    headers = {'Authorization': f"Bearer {token}"}
    params = {'message': message}

    # 最新のpredictディレクトリ内の全ての画像ファイルに対してLINE Notify APIにリクエストを送信
    for image_file_path in image_files:
        with open(image_file_path, 'rb') as img_file:
            img_data = img_file.read()
            img_data = resize_image_if_needed(img_data)

            # ファイルデータをバイトデータとして用意
            files = {'imageFile': BytesIO(img_data)}
            files['imageFile'].name = os.path.basename(image_file_path)

            # LINE Notify APIにリクエストを送信
            res = requests.post(url, headers=headers, params=params, files=files)

            # レスポンスを出力
            print(f"File: {image_file_path}")
            print(res.status_code)
            print(res.text)

とりあえずこれを使えるかテストする

import argparse
import json
import cv2
from ultralytics import YOLO
from collections import defaultdict

# コマンドライン引数の解析
parser = argparse.ArgumentParser(description="YOLOv8 Object Detection")
parser.add_argument('image_path', type=str, help='Path to the input image file')
args = parser.parse_args()

# ラベルマッピングファイルのパス
label_mapping_path = 'label_mapping.json'

# JSONファイルからクラスラベルのマッピングを読み込み
with open(label_mapping_path, 'r', encoding='utf-8') as f:
    label_mapping = json.load(f)

# YOLOv8モデルのロード
model = YOLO('inventory_model/best.pt')  # ここで適切なモデルを選択

# 画像のロード
image = cv2.imread(args.image_path)

# 画像の検出
results = model(image, save=True, conf=0.2, iou=0.5)

# 検出結果の取得
detections = results[0]  # 最初の結果を取得
classes = detections.boxes.cls

# 検出物体のカウント
object_counts = defaultdict(int)
for cls in classes:
    class_label = model.names[int(cls)]
    if class_label in label_mapping:
        label = label_mapping[class_label]
    else:
        label = class_label
    object_counts[label] += 1

# 検出結果の表示
for label, count in object_counts.items():
    print(f'{label}: {count}個')

の中で呼び出すようにする

import argparse
import json
import cv2
from ultralytics import YOLO
from collections import defaultdict
from line_notify import send_line_notify  # インポートを追加

# コマンドライン引数の解析
parser = argparse.ArgumentParser(description="YOLOv8 Object Detection")
parser.add_argument('image_path', type=str, help='Path to the input image file')
args = parser.parse_args()

# ラベルマッピングファイルのパス
label_mapping_path = 'label_mapping.json'

# JSONファイルからクラスラベルのマッピングを読み込み
with open(label_mapping_path, 'r', encoding='utf-8') as f:
    label_mapping = json.load(f)

# YOLOv8モデルのロード
model = YOLO('inventory_model/best.pt')  # ここで適切なモデルを選択

# 画像のロード
image = cv2.imread(args.image_path)

# 画像の検出
results = model(image, save=True, conf=0.2, iou=0.5)

# 検出結果の取得
detections = results[0]  # 最初の結果を取得
classes = detections.boxes.cls

# 検出物体のカウント
object_counts = defaultdict(int)
for cls in classes:
    class_label = model.names[int(cls)]
    if class_label in label_mapping:
        label = label_mapping[class_label]
    else:
        label = class_label
    object_counts[label] += 1

# 検出結果のメッセージ生成
message_lines = [f'{label}: {count}個' for label, count in object_counts.items()]
message = '\n'.join(message_lines)

# 検出結果の表示
for line in message_lines:
    print(line)

# LINE Notifyにメッセージを送信
send_line_notify(message)

これを

python count_inventory_terminal.py data_bak/Baskulin4.jpg

で実行すると

0: 640x512 1 baskulin, 125.4ms
Speed: 7.7ms preprocess, 125.4ms inference, 7.6ms postprocess per image at shape (1, 3, 640, 512)
Results saved to runs/detect/predict4
バスクリン: 1個
File: runs/detect/predict4/image0.jpg
200
{"status":200,"message":"ok"}

となり画像つきメッセージが送信される

次は在庫の数が１以下のものをリストにして送信するようにする

import argparse
import json
import cv2
from ultralytics import YOLO
from collections import defaultdict
from line_notify import send_line_notify  # インポートを追加

# コマンドライン引数の解析
parser = argparse.ArgumentParser(description="YOLOv8 Object Detection")
parser.add_argument('image_path', type=str, help='Path to the input image file')
args = parser.parse_args()

# ラベルマッピングファイルのパス
label_mapping_path = 'label_mapping.json'

# JSONファイルからクラスラベルのマッピングを読み込み
with open(label_mapping_path, 'r', encoding='utf-8') as f:
    label_mapping = json.load(f)

# YOLOv8モデルのロード
model = YOLO('inventory_model/best.pt')  # ここで適切なモデルを選択

# 画像のロード
image = cv2.imread(args.image_path)

# 画像の検出
results = model(image, save=True, conf=0.2, iou=0.5)

# 検出結果の取得
detections = results[0]  # 最初の結果を取得
classes = detections.boxes.cls

# 検出物体のカウント
object_counts = defaultdict(int)
for cls in classes:
    class_label = model.names[int(cls)]
    if class_label in label_mapping:
        label = label_mapping[class_label]
    else:
        label = class_label
    object_counts[label] += 1

# 検出結果のフィルタリング（1以下のもの）
filtered_object_counts = {label: count for label, count in object_counts.items() if count <= 1}

# フィルタリングされた検出結果のメッセージ生成
message_lines = [f'{label}: {count}個' for label, count in filtered_object_counts.items()]
message = '\n'.join(message_lines)

# 検出結果の表示
for line in message_lines:
    print(line)

# LINE Notifyにメッセージを送信（フィルタリングされた結果のみ）
if message:
    send_line_notify(message)
else:
    print("No objects with counts of 1 or less detected.")

これで今度は

python count_inventory_terminal.py data_bak/potato_starch1.jpg

として検出されない時には

0: 640x512 (no detections), 123.0ms
Speed: 5.7ms preprocess, 123.0ms inference, 5.4ms postprocess per image at shape (1, 3, 640, 512)
Results saved to runs/detect/predict6
No objects with counts of 1 or less detected.

となって
LINE送信はされなくなる

今回の画像はモデルの学習不足のためか
片栗粉の検出ができなかったので
それを認識できない場合のテストに使った

しかし、画像読み取りエラーなどを考慮し
今後何らかのアクションを取るようにした方が良いかもしれない

エラーログ以外のものを考えるようにする

また、送信するタイミングは、在庫数が１以下になった時に送るようにしました。

この場合、画像が検出できなかったりした時に判定ができないため
今後の課題とします
解決方法としては、検出結果をDBへ格納しておき
実行したタイムスタンプも記録、検出結果が０の時にはアラートを飛ばすなどがありそうです

とりあえず、ターミナル実行のみの状態なので
今後はどこから画像を撮ってくるのか、またwebカメラで行うのか、それとも
ラズパイゼロなどで撮影した画像を使うのか、それを考えてからまた改良していこうと思います

LINE Notify を通知で使う

LINEで通知できるようにする

在庫管理をできるようにしたら
足りないものを知らせる機能が必要

LINEで買い物リストとして昼ぐらいに送信すれば
帰りに購入して帰ることができる

過去記事を参考に
LINE Messasging API　でメッセージ送信

を参考にリンクをしたら

LINE Business ID
になってしまうので
再度調べることにする

LINE: LINE Notifyを用いてWindowsのcurlコマンドからメッセージを投稿する
を参考に再度設定をしていく

スマホのLINEアプリで
トーク > トークルームの作成 > グループ

友達は誰も選択せずに次へ

次に友達をグループに追加があるけど
デフォルトで友達をグループに自動で追加がチェックされているので
チェックを外す

グループ名は買い物リスト
とした

次にLINE Notifyにログインする
https://notify-bot.line.me/ja/

ログインの時にQRコードからログインできるので
スマホのLINEアプリでQRコードを読み取る
もしくはスマホのカメラアプリでQRコードを読み取ると
LINEアプリでQRコードを読み取るように出るので
そのまま実行していくと認証画面になり
PCの画面に表示された数字をスマホで打ち込めば
ログインできる

ログインできたらトークンを発行する

マイページ > アクセストークンの発行

トークンを発行する
をクリックし
トークン名を入力し
通知を送信するトークルームを選択

今回は両方とも
買い物リストを選択

これでトークンが発行される

次にスマホで
買い物リストの
トークルームを開き
設定 > 招待で
LINE notifyを選択し招待する

ここへはcurlコマンドでメッセージを送信できる

https://notify-bot.line.me/doc/ja/
のサンプルは

 curl -X POST -H 'Authorization: Bearer <access_token>' -F 'message=foobar' \
https://notify-api.line.me/api/notify
{"status":200,"message":"ok"}

 curl -v -X POST -H 'Authorization: Bearer invalidtoken' -F 'message=foobar' \
https://notify-api.line.me/api/notify
{"status":401,"message":"Invalid access token"}

https://qiita.com/frozencatpisces/items/679d66ab1d617b7a40cb#1-投稿先トークルームの作成
では

curl -X POST -H "Authorization: Bearer 発行したトークン" -F "message=foobar" https://notify-api.line.me/api/notify

これをリストにする場合は複数行必要なので
LINE Notify で curl で改行する
https://blog.framinal.life/entry/2023/06/14/151933
を参考に

URLエンコーディングされた改行文字(%0A) に変換して送ることでできそう

message="こんにちは\n元気ですか?\n\n"

# Convert newline characters to URL-encoded form
message_encoded=$(echo -e $message | awk '{printf "%s%%0A", $0}')

curl -X POST -H "Authorization: Bearer XXXXXX" --data-binary "message=$message_encoded" https://notify-api.line.me/api/notify

ちょっと古い情報で３年前のだと

LINE NotifyからのLINE通知を改行する方法【Python/LINE Notify(API)】
だと
Lineで通知する文章を改行したい場合、「\n」を入れると改行できるらしい

send_contents = f'\n今日は\n{strftime}({day_of_the_week[weekday]})です。'

また

send_contents = '\n今日は\n' + str(strftime) + '(' + str(day_of_the_week[weekday]) + ')です。'

というようにしてもOK

“Line Notify”を利用してPythonでLineに通知を送る
では
requesta
を使っている

pip install requests

テキストだけなら

import requests

url = 'https://notify-api.line.me/api/notify'

token = '発行したトークン'
message = '通知したいメッセージ'

headers = {'Authorization': f"Bearer {token}"}
params = {'message': message}

requests.post(url, headers=headers, params=params)

テキストと画像付きなら

import requests

url = 'https://notify-api.line.me/api/notify'

token = '発行したトークン'
message = '通知したいテキスト'
image_file_path = 'イメージファイルパス'

headers = {'Authorization': f"Bearer {token}"}
params = {'message': message}
files = {'imageFile': open(image_file_path, 'rb')}

res = requests.post(url, headers=headers, params=params, files=files)

テキストの改行なら

message = 'ここで改行\n改行後のテキスト'

PythonでLINE Notifyを使ってみよう
によれば
・メッセージを改行したい場合は「\n」を挿入 ・メッセージは最大１０００文字まで

import requests
def notify(message):

        url = 'https://notify-api.line.me/api/notify'
        token = '発行されたトークン'
        headers = {'Authorization': 'Bearer ' + token}

        message = message
        params = {'message': message}

        requests.post(url, headers=headers, params=params)

if __name__ == '__main__':
    notify('テスト')

がサンプル

こちらもrequestを使用

[Python]LINEで天気を自動通知させてみた[初心者]
によれば
Webから天気情報を取得してLINEで自動通知をしてみました。定期実行までやります
では
Cronで定期実行

実行するコードは

import datetime
import urllib.request as req
import requests
from bs4 import BeautifulSoup
import re

#LINE notifyの設定を行う
url = "https://notify-api.line.me/api/notify"
access_token = '○○○○○'
headers = {'Authorization': 'Bearer ' + access_token}

#天気サイトから欲しい情報を取得する
url2 = "https://tenki.jp/forecast/3/17/4610/14100/"   #欲しい情報があるURLを指定
res = requests.get(url2)                              #上記URL情報を取得する
soup = BeautifulSoup(res.content, 'html.parser')      #取得した情報をhtmlで解析する

# 以下各種情報を取得
ddd = soup.find(class_="left-style")                  

telop = soup.find("p", class_="weather-telop").string

highlists = soup.find("dd",class_="high-temp temp")

lowlists = soup.find("dd",class_="low-temp temp")

ttt = soup.find(class_="rain-probability")

row=[]
for t in ttt:
    row.append(t)

# message変数に通知したい文を代入する　改行したい場合は "\n" とダブルクォテーションで囲う
message="\n" + ddd.text + "\n" + telop + "\n" + "最高　" + highlists.text + "\n" + "最低　" + lowlists.text + "\n"+ "---------" + "\n" +row[1].text +"\n" + "~6  : " + row[3].text + "\n" + "~12 : " + row[5].text +"\n" + "~18 : " + row[7].text +"\n" + "~24 : " + row[9].text +"\n" +"今日も元気に٩( 'ω' )و "

payload = {'message': message}
r = requests.post(url, headers=headers, params=payload,)

というように requestを使っている

あと
1人のユーザーにつき、1時間に通知できる回数は1000回まで
という縛りがあるが
買い物リストは１日1回程度だと思うし
カメラ画像からとして考えても３箇所程度だと思うので問題なし

とりあえずchatgptで調べた結果
Curl でも requestでも問題はなさそう

コードメンテを考えるとrequestsの方が良さそう

ということで

vim line_order.py

でファイルを作成し

vim config.json

で設定ファイルを作成

{
  "token": "発行したトークン",
  "image_file_path": "イメージファイルパス"
}

import requests
import json

# 設定ファイルを読み込む関数
def load_config(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

# 設定ファイルを読み込む
config = load_config('config.json')

# 設定ファイルからトークンとファイルパスを取得
token = config['token']
image_file_path = config['image_file_path']

url = 'https://notify-api.line.me/api/notify'
message = '通知したいテキスト'

headers = {'Authorization': f"Bearer {token}"}
params = {'message': message}
files = {'imageFile': open(image_file_path, 'rb')}

# LINE Notify APIにリクエストを送信
res = requests.post(url, headers=headers, params=params, files=files)

# レスポンスを出力
print(res.status_code)
print(res.text)

と
line_order.py
の内容を書き換えても

Traceback (most recent call last):
  File "/Users/snowpool/aw10s/inventory/line_order.py", line 21, in <module>
    files = {'imageFile': open(image_file_path, 'rb')}
IsADirectoryError: [Errno 21] Is a directory: 'image/'

となる

とりあえず画像を指定する

{
  "token": "発行したトークン",
  "image_file_path": "runs/detect/predict7/Baskulin1.jpg"
}

とすれば成功

改良点としては
メッセージの文章を
検出結果のラベルを変換した文字列にすること
検出結果は
runs/detect/
の中にどんどん新しい番号が付けられて増えていくため
動的にパスを取得するスクリプトにすること

osモジュールを使用して、指定されたディレクトリ内のサブディレクトリをリストアップし、その中で最新の番号を持つディレクトリを特定できる

1. os.listdir(base_path)を使用して、指定されたディレクトリ内の全てのファイルとディレクトリのリストを取得します。
2. リスト内の要素がディレクトリであるかどうかを確認するためにos.path.isdir()を使用します。
3. predictプレフィックスを削除して数値に変換し、max()関数を使用して最大の数値を持つディレクトリを特定します。
4. os.path.join(base_path, latest_dir)を使用して、フルパスを生成します。

import os

def get_latest_directory(base_path):
    # 指定されたディレクトリ内の全てのサブディレクトリを取得
    subdirs = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]
    
    # サブディレクトリ名を数値に変換し、ソートして最新のディレクトリを特定
    latest_dir = max(subdirs, key=lambda x: int(x.replace('predict', '')))
    
    return os.path.join(base_path, latest_dir)

# 使用例
base_path = 'runs/detect'
latest_dir = get_latest_directory(base_path)
print(f"Latest directory: {latest_dir}")

実行すると

File "/Users/snowpool/aw10s/inventory/utils.py", line 14, in <module> latest_dir = get_latest_directory(base_path) File "/Users/snowpool/aw10s/inventory/utils.py", line 8, in get_latest_directory latest_dir = max(subdirs, key=lambda x: int(x.replace('predict', ''))) File "/Users/snowpool/aw10s/inventory/utils.py", line 8, in <lambda> latest_dir = max(subdirs, key=lambda x: int(x.replace('predict', ''))) ValueError: invalid literal for int() with base 10: ''

となる

原因は

ValueError: invalid literal for int() with base 10: ''

というエラーは
int()関数が空文字列を処理しようとしたときに発生

これは、predictという文字列をreplaceで削除した結果が
空文字列になる場合に起こる

例えば、predictという名前のディレクトリがある場合など

この問題を解決するために
ディレクトリ名がpredictのプレフィックスを持っているかどうかをチェックし
それ以外のディレクトリ名を無視するようにする

また、predictの後の文字列が数字であることを確認するために
追加のチェックを行う

対処として
1. predictで始まり、その後に数字が続くディレクトリのみを対象とするようにフィルタリングしています。
2. 有効なディレクトリが存在しない場合に適切なエラーメッセージを出力します。

import os

def get_latest_directory(base_path):
    # 指定されたディレクトリ内の全てのサブディレクトリを取得
    subdirs = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]

    # サブディレクトリ名が 'predict' で始まり、その後に数字が続くものをフィルタリング
    predict_dirs = [d for d in subdirs if d.startswith('predict') and d[7:].isdigit()]

    if not predict_dirs:
        raise ValueError("No valid 'predict' directories found")

    # サブディレクトリ名を数値に変換し、ソートして最新のディレクトリを特定
    latest_dir = max(predict_dirs, key=lambda x: int(x[7:]))

    return os.path.join(base_path, latest_dir)

# 使用例
base_path = 'runs/detect'
latest_dir = get_latest_directory(base_path)
print(f"Latest directory: {latest_dir}")

これで実行すると

Latest directory: runs/detect/predict7

となった

次は画像ファイルパスの取得

import os

def get_latest_directory(base_path):
    # 指定されたディレクトリ内の全てのサブディレクトリを取得
    subdirs = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]

    # サブディレクトリ名が 'predict' で始まり、その後に数字が続くものをフィルタリング
    predict_dirs = [d for d in subdirs if d.startswith('predict') and d[7:].isdigit()]

    if not predict_dirs:
        raise ValueError("No valid 'predict' directories found")

    # サブディレクトリ名を数値に変換し、ソートして最新のディレクトリを特定
    latest_dir = max(predict_dirs, key=lambda x: int(x[7:]))

    return os.path.join(base_path, latest_dir)

def get_image_files(directory):
    # 指定されたディレクトリ内の全ての画像ファイルのパスを取得
    image_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.bmp')
    image_files = [os.path.join(directory, file) for file in os.listdir(directory) if file.lower().endswith(image_extensions)]
    return image_files

# 使用例
base_path = 'runs/detect'
latest_dir = get_latest_directory(base_path)
image_files = get_image_files(latest_dir)

print(f"Latest directory: {latest_dir}")
print("Image files:")
for image_file in image_files:
    print(image_file)

とすることで画像ファイルのパスが取得できた

Latest directory: runs/detect/predict7
Image files:
runs/detect/predict7/Baskulin1.jpg

次にこれをモジュールにして
LINEの画像パスにして送信テストする

import os
import json

def load_config(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

def get_latest_directory(base_path):
    # 指定されたディレクトリ内の全てのサブディレクトリを取得
    subdirs = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]

    # サブディレクトリ名が 'predict' で始まり、その後に数字が続くものをフィルタリング
    predict_dirs = [d for d in subdirs if d.startswith('predict') and d[7:].isdigit()]

    if not predict_dirs:
        raise ValueError("No valid 'predict' directories found")

    # サブディレクトリ名を数値に変換し、ソートして最新のディレクトリを特定
    latest_dir = max(predict_dirs, key=lambda x: int(x[7:]))

    return os.path.join(base_path, latest_dir)

def get_image_files(directory):
    # 指定されたディレクトリ内の全ての画像ファイルのパスを取得
    image_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.bmp')
    image_files = [os.path.join(directory, file) for file in os.listdir(directory) if file.lower().endswith(image_extensions)]
    return image_files

として

config.jsonの中の

  "image_file_path": "runs/detect/predict7/Baskulin1.jpg"

という指定を

  "image_file_path": "runs/detect"

に変更

line_order.pyの中身を

import requests
import os
from utils import load_config, get_latest_directory, get_image_files

# 設定ファイルを読み込む
config = load_config('config.json')

# 設定ファイルからトークンとディレクトリパスを取得
token = config['token']
base_path = config['image_file_path']

# 最新のpredictディレクトリを取得
latest_dir = get_latest_directory(base_path)
image_files = get_image_files(latest_dir)

url = 'https://notify-api.line.me/api/notify'
message = '通知したいテキスト'

headers = {'Authorization': f"Bearer {token}"}
params = {'message': message}

# 最新のpredictディレクトリ内の全ての画像ファイルに対してLINE Notify APIにリクエストを送信
for image_file_path in image_files:
    files = {'imageFile': open(image_file_path, 'rb')}
    
    # LINE Notify APIにリクエストを送信
    res = requests.post(url, headers=headers, params=params, files=files)

    # レスポンスを出力
    print(f"File: {image_file_path}")
    print(res.status_code)
    print(res.text)

として保存

これで

python line_order.py

を実行すれば画像付きでLINEで送信してくれる

試しに再度新しいyolov8での推論をして
できたディレクトリを対象にするか実験する

しかし

python count_inventory_terminal.py data_bak/Baskulin1.jpg

0: 640x512 1 baskulin, 93.9ms
Speed: 2.7ms preprocess, 93.9ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 512)
バスクリン: 1個

の後に

python line_order.py

を実行しても

File: runs/detect/predict7/Baskulin1.jpg
200
{"status":200,"message":"ok"}

となる

ls runs/detect
の結果も
predict		predict3	predict5	predict7
predict2	predict4	predict6

となる

どうやらyoloのコマンドで実行した時だけ
runs/predict以下に保存されるらしい

つまりカウントした後に画像を保存するプログラムを追加しないとだめ
あと、カウントした時に残り１以下になった時に
ラベルを書き出すプログラムが必要

Yolov8検出結果と文字列の結び付け

在庫管理のため yolov8で在庫対象とするものを検出し
その在庫数が２以下になれば
LINEで買い物リストを送信するものを作成したい
このためには検出結果のカウント
そして
検出されたものを日本語にする必要がある

YOLOv8の検出結果と対応する文字列を結びつけるためには、
検出されたクラスのラベルを
日本語の対応するラベルに変換するマッピングを作成する必要があり

まずYOLOv8の結果取得:
画像をYOLOv8に入力し、検出結果を取得
検出結果には
各物体のクラスラベル
バウンディングボックスの座標などが含まれている

そして
クラスラベルのマッピング作成
検出されたクラスラベルを
日本語の対応するラベルに変換する辞書を作成

例として

label_mapping = {
    'baskulin': 'バスクリン',
    'potato_starch': '片栗粉',
    'shampoo': 'シャンプー'
}

という感じ

以下はchatgptのコード例

import json

# YOLOv8の検出結果の例
# ここではJSON形式で表現されていますが、実際にはYOLOv8から得られたデータ形式に合わせてください
detection_results = [
    {"class": "baskulin", "bbox": [100, 100, 150, 150]},
    {"class": "potato_starch", "bbox": [200, 200, 250, 250]},
    {"class": "shampoo", "bbox": [300, 300, 350, 350]},
]

# クラスラベルのマッピング
label_mapping = {
    'baskulin': 'バスクリン',
    'potato_starch': '片栗粉',
    'shampoo': 'シャンプー'
}

# マッピングの適用
for detection in detection_results:
    class_label = detection['class']
    if class_label in label_mapping:
        detection['class'] = label_mapping[class_label]

# 結果の表示
print(json.dumps(detection_results, ensure_ascii=False, indent=2))

yolov8の検出結果を表示するには？
とすると

import cv2
import matplotlib.pyplot as plt
from ultralytics import YOLO

# YOLOv8モデルのロード
model = YOLO('yolov8s.pt')  # ここで適切なモデルを選択

# 画像のロード
image_path = 'path_to_your_image.jpg'
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# 画像の検出
results = model(image)

# 検出結果の取得
detections = results[0]  # 最初の結果を取得
classes = detections.boxes.cls
boxes = detections.boxes.xyxy
scores = detections.boxes.conf

# クラスラベルのマッピング
label_mapping = {
    'baskulin': 'バスクリン',
    'potato_starch': '片栗粉',
    'shampoo': 'シャンプー'
}

# バウンディングボックスとラベルの描画
for cls, box, score in zip(classes, boxes, scores):
    class_label = model.names[int(cls)]
    if class_label in label_mapping:
        label = label_mapping[class_label]
    else:
        label = class_label
    x1, y1, x2, y2 = map(int, box)
    cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2)
    cv2.putText(image, f'{label} {score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

# 結果の表示
plt.figure(figsize=(10, 10))
plt.imshow(image)
plt.axis('off')
plt.show()

となるが

バウンディングボックスとラベルを画像に描画
結果をMatplotlibを使って表示
となる

結果の表示で画像での結果表示は不要

from ultralytics import YOLO

# YOLOv8モデルのロード
model = YOLO('yolov8s.pt')  # ここで適切なモデルを選択

# 画像のロード
image_path = 'path_to_your_image.jpg'
image = cv2.imread(image_path)

# 画像の検出
results = model(image)

# 検出結果の取得
detections = results[0]  # 最初の結果を取得
classes = detections.boxes.cls
boxes = detections.boxes.xyxy
scores = detections.boxes.conf

# クラスラベルのマッピング
label_mapping = {
    'baskulin': 'バスクリン',
    'potato_starch': '片栗粉',
    'shampoo': 'シャンプー'
}

# 検出結果のテキスト表示
for cls, box, score in zip(classes, boxes, scores):
    class_label = model.names[int(cls)]
    if class_label in label_mapping:
        label = label_mapping[class_label]
    else:
        label = class_label
    x1, y1, x2, y2 = map(int, box)
    print(f'検出: {label}, 信頼度: {score:.2f}, バウンディングボックス: ({x1}, {y1}, {x2}, {y2})')

バウンディングボックスの座標も不要

from ultralytics import YOLO

# YOLOv8モデルのロード
model = YOLO('yolov8s.pt')  # ここで適切なモデルを選択

# 画像のロード
image_path = 'path_to_your_image.jpg'
image = cv2.imread(image_path)

# 画像の検出
results = model(image)

# 検出結果の取得
detections = results[0]  # 最初の結果を取得
classes = detections.boxes.cls
scores = detections.boxes.conf

# クラスラベルのマッピング
label_mapping = {
    'baskulin': 'バスクリン',
    'potato_starch': '片栗粉',
    'shampoo': 'シャンプー'
}

# 検出結果のテキスト表示
for cls, score in zip(classes, scores):
    class_label = model.names[int(cls)]
    if class_label in label_mapping:
        label = label_mapping[class_label]
    else:
        label = class_label
    print(f'検出: {label}, 信頼度: {score:.2f}')

次に
検出した物体のそれぞれの個数をカウントして表示

from ultralytics import YOLO
from collections import defaultdict

# YOLOv8モデルのロード
model = YOLO('yolov8s.pt')  # ここで適切なモデルを選択

# 画像のロード
image_path = 'path_to_your_image.jpg'
image = cv2.imread(image_path)

# 画像の検出
results = model(image)

# 検出結果の取得
detections = results[0]  # 最初の結果を取得
classes = detections.boxes.cls

# クラスラベルのマッピング
label_mapping = {
    'baskulin': 'バスクリン',
    'potato_starch': '片栗粉',
    'shampoo': 'シャンプー'
}

# 検出物体のカウント
object_counts = defaultdict(int)
for cls in classes:
    class_label = model.names[int(cls)]
    if class_label in label_mapping:
        label = label_mapping[class_label]
    else:
        label = class_label
    object_counts[label] += 1

# 検出結果の表示
for label, count in object_counts.items():
    print(f'{label}: {count}個')

これでほぼ目的のものに近くなった

from collections import defaultdict

これは
Pythonの標準ライブラリであるcollectionsモジュールから
defaultdictクラスをインポートしています

defaultdictは、
キーが存在しない場合にデフォルト値を
自動的に提供する辞書を作成するために使用

この場合、物体のカウントを行うために使用している

ただ、在庫管理のものはどんどん追加していくため
別のファイルにして読み込むようにする

vim label_mapping.json

でJSONファイルを作成

{
    "baskulin": "バスクリン",
    "potato_starch": "片栗粉",
    "shampoo": "シャンプー"
}

次にJSON ファイルの読み込み

import json
from ultralytics import YOLO
from collections import defaultdict

# JSONファイルからクラスラベルのマッピングを読み込み
with open('label_mapping.json', 'r', encoding='utf-8') as f:
    label_mapping = json.load(f)

# YOLOv8モデルのロード
model = YOLO('yolov8s.pt')  # ここで適切なモデルを選択

# 画像のロード
image_path = 'path_to_your_image.jpg'
image = cv2.imread(image_path)

# 画像の検出
results = model(image)

# 検出結果の取得
detections = results[0]  # 最初の結果を取得
classes = detections.boxes.cls

# 検出物体のカウント
object_counts = defaultdict(int)
for cls in classes:
    class_label = model.names[int(cls)]
    if class_label in label_mapping:
        label = label_mapping[class_label]
    else:
        label = class_label
    object_counts[label] += 1

# 検出結果の表示
for label, count in object_counts.items():
    print(f'{label}: {count}個')

これを書き換える

コマンドラインで動作するようにする

コマンドラインから画像ファイルを指定して使用するようには
Pythonのargparseモジュールを使用

これにより
コマンドラインから画像ファイルのパスを指定できるようになる

また使用するモデルは

inventory_model/best.pt

とする

vim count_inventory_terminal.py

で中身を

import argparse
import json
import cv2
from ultralytics import YOLO
from collections import defaultdict

# コマンドライン引数の解析
parser = argparse.ArgumentParser(description="YOLOv8 Object Detection")
parser.add_argument('image_path', type=str, help='Path to the input image file')
args = parser.parse_args()

# ラベルマッピングファイルのパス
label_mapping_path = 'label_mapping.json'

# JSONファイルからクラスラベルのマッピングを読み込み
with open(label_mapping_path, 'r', encoding='utf-8') as f:
    label_mapping = json.load(f)

# YOLOv8モデルのロード
model = YOLO('inventory_model/best.pt')  # ここで適切なモデルを選択

# 画像のロード
image = cv2.imread(args.image_path)

# 画像の検出
results = model(image)

# 検出結果の取得
detections = results[0]  # 最初の結果を取得
classes = detections.boxes.cls

# 検出物体のカウント
object_counts = defaultdict(int)
for cls in classes:
    class_label = model.names[int(cls)]
    if class_label in label_mapping:
        label = label_mapping[class_label]
    else:
        label = class_label
    object_counts[label] += 1

# 検出結果の表示
for label, count in object_counts.items():
    print(f'{label}: {count}個')

これで実行すると

python count_inventory_terminal.py data_bak/Baskulin1.jpg

0: 640x512 1 baskulin, 114.8ms
Speed: 9.4ms preprocess, 114.8ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 512)
バスクリン: 1個

となって検出結果の日本語表示ができる

とりあえず対象物の日本語化とカウントができたので
次はLINEで送信機能を作成する

FaceRecognizerSFによる顔の認識の実践

スマホの写真のサイズを1/4にすることで

generate_aligned_faces.py

による顔の切り出しが成功した

取り出した顔画像は
Face00x.jpg
となっているので、個人ごとの名前ファイルに改名する

次に
顔画像から特徴を抽出、特徴辞書として保存する

python generate_feature_dictionary.py snowpool.jpg

これで
snowpool.npy
が作成される

同様に家族分も実行する

python resize_save.py PXL_20240218_063620749.jpg

で画像ファイルを1/4にして

python generate_aligned_faces.py PXL_20240218_063620749_quarter.jpg

で写真から顔を抽出

mv face001.jpg child.jpg

python generate_feature_dictionary.py child.jpg

これでそれぞれのnpyファイルができる

次に識別
モデルが変更になっているので

     weights = os.path.join(directory, "yunet.onnx")

を

    weights = os.path.join(directory, "face_detection_yunet_2023mar.onnx")

というように
指定するモデルを
変更する

編集するファイルは

face_recognizer.py

これで

python face_recognizer.py PXL_20240218_063620749.jpg

を実行したら

OpenCV: Couldn't read video stream from file "/Users/snowpool/aw10s/face_recog/image.jpg"

となる

https://sites.google.com/iot-com.net/home/ホーム/実験室/jetson-nano/jetson-nanoのopen-cvで顔認証
によれば

#            return True, (user_id, cos_score)　　　←オリジナルのtypo
            return True, (user_id, score)

とあるので

            # return True, (user_id, cos_score)
            return True, (user_id, score)

というように修正

そして
そのままだとファイル名が指定されているので
コマンドラインからファイル名を指定して実行できるように
ソースを変更する

Mainの部分を

# main関数の引数を追加 def main(image_path): # captureの初期化を変更 capture = cv2.VideoCapture(image_path)  # コマンドラインから指定された画像ファイル

として

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Face Recognition")
    parser.add_argument('image', help="Path to the image file")
    args = parser.parse_args()
    main(args.image)

と書き換える

これで再度実行してみる

Traceback (most recent call last):
  File "/Users/snowpool/aw10s/face_recog/face_recognizer.py", line 108, in <module>
    main(args.image)
  File "/Users/snowpool/aw10s/face_recog/face_recognizer.py", line 37, in main
    files = glob.glob(os.path.join(directory, "*.npy"))
NameError: name 'directory' is not defined

となった

原因は

    directory = os.path.dirname(__file__)

を削除したことでディレクトリの指定ができなくなっていた

import os
import sys
import glob
import numpy as np
import cv2
import argparse

COSINE_THRESHOLD = 0.363
NORML2_THRESHOLD = 1.128

# 特徴を辞書と比較してマッチしたユーザーとスコアを返す関数
def match(recognizer, feature1, dictionary):
    for element in dictionary:
        user_id, feature2 = element
        score = recognizer.match(feature1, feature2, cv2.FaceRecognizerSF_FR_COSINE)
        if score > COSINE_THRESHOLD:
            # return True, (user_id, cos_score)
            return True, (user_id, score)

    return False, ("", 0.0)

# def main():
#     # キャプチャを開く
#     directory = os.path.dirname(__file__)
#     capture = cv2.VideoCapture(os.path.join(directory, "image.jpg")) # 画像ファイル
# main関数の引数を追加
def main(image_path):
    # captureの初期化を変更
    directory = os.path.dirname(__file__)
    
    capture = cv2.VideoCapture(image_path)  # コマンドラインから指定された画像ファイル

    #capture = cv2.VideoCapture(0) # カメラ
    if not capture.isOpened():
        exit()

    # 特徴を読み込む
    dictionary = []
    files = glob.glob(os.path.join(directory, "*.npy"))
    for file in files:
        feature = np.load(file)
        user_id = os.path.splitext(os.path.basename(file))[0]
        dictionary.append((user_id, feature))

    # モデルを読み込む
    # weights = os.path.join(directory, "yunet.onnx")
    weights = os.path.join(directory, "face_detection_yunet_2023mar.onnx")
    face_detector = cv2.FaceDetectorYN_create(weights, "", (0, 0))
    weights = os.path.join(directory, "face_recognizer_fast.onnx")
    face_recognizer = cv2.FaceRecognizerSF_create(weights, "")

    while True:
        # フレームをキャプチャして画像を読み込む
        result, image = capture.read()
        if result is False:
            cv2.waitKey(0)
            break

        # 画像が3チャンネル以外の場合は3チャンネルに変換する
        channels = 1 if len(image.shape) == 2 else image.shape[2]
        if channels == 1:
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
        if channels == 4:
            image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)

        # 入力サイズを指定する
        height, width, _ = image.shape
        face_detector.setInputSize((width, height))

        # 顔を検出する
        result, faces = face_detector.detect(image)
        faces = faces if faces is not None else []

        for face in faces:
            # 顔を切り抜き特徴を抽出する
            aligned_face = face_recognizer.alignCrop(image, face)
            feature = face_recognizer.feature(aligned_face)

            # 辞書とマッチングする
            result, user = match(face_recognizer, feature, dictionary)

            # 顔のバウンディングボックスを描画する
            box = list(map(int, face[:4]))
            color = (0, 255, 0) if result else (0, 0, 255)
            thickness = 2
            cv2.rectangle(image, box, color, thickness, cv2.LINE_AA)

            # 認識の結果を描画する
            id, score = user if result else ("unknown", 0.0)
            text = "{0} ({1:.2f})".format(id, score)
            position = (box[0], box[1] - 10)
            font = cv2.FONT_HERSHEY_SIMPLEX
            scale = 0.6
            cv2.putText(image, text, position, font, scale, color, thickness, cv2.LINE_AA)

        # 画像を表示する
        cv2.imshow("face recognition", image)
        key = cv2.waitKey(1)
        if key == ord('q'):
            break
    
    cv2.destroyAllWindows()

# if __name__ == '__main__':
#     main()
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Face Recognition")
    parser.add_argument('image', help="Path to the image file")
    args = parser.parse_args()
    main(args.image)

で
再度

python face_recognizer.py PXL_20240218_063620749.jpg

を
実行したら顔の認識ができた

yolov8 を Google Colab で実行

yolov8のテスト

自動ラベルで作成したものが間違っているのか
それとも変換したのが問題なのかを知りたいので
一度試す

Colabで実験する

# Install ultralytics
!pip install ultralytics

でyolov8インストール

from google.colab import drive
drive.mount('/content/drive')

でマウント

!yolo obb train data=/content/drive/MyDrive/InventoryControl/daily_necessities_label/data.yaml pretrained=yolov8n-obb.pt epochs=100 imgsz=640 exist_ok=True

で学習

このコマンドは、YOLO (You Only Look Once) モデルを用いて物体検出の学習を行うためのものです。特に、YOLOv8n-obbモデルを用いて、向き付き境界ボックス（Oriented Bounding Boxes, OBB）を使用して物体を検出する訓練を行います。以下は各パラメータの詳細です：
* train: このオプションは、モデルを訓練モードに設定します。
* data=/content/drive/MyDrive/InventoryControl/daily_necessities_label/data.yaml: 訓練に使用するデータセットの設定ファイルのパスです。このYAMLファイルには、訓練データ、検証データのパスや、クラス名が含まれています。
* pretrained=yolov8n-obb.pt: 事前訓練済みのモデルファイル。このファイルを初期の重みとして使用して、訓練を開始します。
* epochs=100: モデルが訓練データを何回繰り返して学習するかを指定します。この場合、100回繰り返します。
* imgsz=640: 入力画像のサイズを640ピクセルにリサイズします。
* exist_ok=True: 既に訓練結果のフォルダが存在しても、エラーを出さずに上書きまたは新たに訓練を開始することを許可します。
このコマンドを実行することで、指定されたパラメータでYOLOモデルの訓練が行われ、物体検出の精度を向上させることができます。

とりあえず、バスクリンだけでなく
バスロマンも学習させる

そして肌おもいも学習させて、その状態から実行してみる

とりあえずバスロマンと肌おもいの画像からは
バスロマンをバスクリンと誤認識してるけど
カウントはできた

バスクリンの在庫を使い切ったため
バスロマンと肌おもいの写真で識別してみました

バスクリンとバスロマンを誤認識してますが
数は合っていますので
在庫管理には使えるとは思います

いっそバスロマンも学習すれば誤認識はなくなるかもしれません

以下ログと使用したテストの画像です

# Install ultralytics
!pip install ultralytics

でyolov8インストール

from google.colab import drive
drive.mount('/content/drive')

でgoogle driveマウント

!yolo obb train data=/content/drive/MyDrive/InventoryControl/daily_necessities_label/data.yaml pretrained=yolov8n-obb.pt epochs=400 exist_ok=True

で前回３７０程度のエポックで停止したので
今回は４００にしてA１００で実行

import os
import subprocess

source_file = '/content/drive/MyDrive/PXL_20240617_182349485.jpg'

# テキストファイルのパスを構築（画像ファイル名と同じ）
file_name, file_extension = os.path.splitext(source_file)
label_file_path = '/content/runs/obb/predict/labels/' + os.path.basename(file_name) + '.txt'

# ファイルの存在を確認し、存在する場合は削除
if os.path.exists(label_file_path):
    os.remove(label_file_path)

# YOLOを使用して予測を実行
!yolo obb predict model=/content/runs/obb/train/weights/best.pt source='{source_file}' save=True save_txt=True exist_ok=True

# ファイルが存在する場合のみ、テキストファイルの行数を取得して表示
if os.path.exists(label_file_path):
    num_lines = subprocess.check_output(["wc", "-l", label_file_path]).decode().split()[0]
    print("バスクリンの数は", num_lines)
else:
    print("ファイルが見つかりませんでした。")

実行結果は

Ultralytics YOLOv8.2.35 :rocket: Python-3.10.12 torch-2.3.0+cu121 CUDA:0 (NVIDIA A100-SXM4-40GB, 40514MiB)
YOLOv8n-obb summary (fused): 187 layers, 3077804 parameters, 0 gradients, 8.3 GFLOPs

image 1/1 /content/drive/MyDrive/PXL_20240617_182349485.jpg: 1024x800 143.7ms
Speed: 12.4ms preprocess, 143.7ms inference, 197.3ms postprocess per image at shape (1, 3, 1024, 800)
Results saved to runs/obb/predict
1 label saved to runs/obb/predict/labels
:bulb: Learn more at https://docs.ultralytics.com/modes/predict
バスクリンの数は 1

日用品の買い物の時に少しずつ写真を撮影し
学習データに使っていこうと思います

Yolov8 を webカメラで使う

import cv2
from yolov8.utils.webcam import Webcam

def main():
    webcam = Webcam(source=0)  # 通常、0はデフォルトのWebカメラを示します
    while True:
        frame = webcam.get_frame()
        if frame is None:
            break
        # YOLOv8を使って画像上で物体検出を実行
        results = webcam.model(frame)
        # 検出結果の表示
        results.show()
        # 'q'キーが押されたら終了
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

if __name__ == '__main__':
    main()

が
chatgptの答えだが

実行すると

Traceback (most recent call last):
  File "/Users/snowpool/aw10s/inventory/webcam_yv8.py", line 2, in <module>
    from yolov8.utils.webcam import Webcam
ModuleNotFoundError: No module named 'yolov8'

となるので
yolov8 webカメラ
で検索し調べる

【やってみた】YOLOv8の機能試す＆Webカメラでリアルタイム推論

によれば
Webカメラの場合はカメラ番号を入れれば実行可能とのこと

from ultralytics import YOLO

model = YOLO("yolov8n.pt")
results = model(0 , show=True) 
for i in enumerate(results):
    print(i)

を実行したら
M1macbookair のカメラからyolov8が起動し
該当するものが判定された

なおこのコードの場合
Ctrl + c で止めるまでずっと動きます

OpenCVのSFaceで顔認証の準備

OpenCVのSFaceで顔認証

https://sites.google.com/iot-com.net/home/ホーム/実験室/jetson-nano/jetson-nanoのopen-cvで顔認証
を参考に
OpenCVにDNNを使用した顔認識SFaceが実装され、誰の顔かを認識できる様になったのとの記事を見つけて試してみました。
記事 : OpenCVの新しい顔認識を試してみる https://qiita.com/UnaNancyOwen/items/8c65a976b0da2a558f06
Github : OpenCV ObjDetect Module Face Recognition (SFace) Sample https://gist.github.com/UnaNancyOwen/49df508ad8b6d9520024354df0e3e740

顔認識は OpenCV 4.5.4 以上からの導入になる

基本的にはGithub のPython をコピペすればそのまま動作する

generate_aligned_faces.py
入力した写真から人の顔の部分を切り出して保存するプログラム
複数の人物が写っている場合は全員を切り出して
face001.jpg , face002.jpg ･･･　と名前を付けて保存する
出力されたファイル名を人の名前に変更しておくと後々便利です。　
face001.jpg → taro.jpg
というようにリネームする

generate_feature_dictionary.py
切り出した顔のjpgファイルを読み込んで、顔の特徴量に変換するプログラム
顔写真 taro.jpg を入力すると　顔の特徴量 taro.npy が出力される
このnumpyファイルに各個人の顔の特徴量が128次元ベクトルに変換されて入る

face_recognizer.py
入力された写真に上記で作成した顔の特徴量が近い人が写っているかを判別するプログラム
特徴量の npyファイルは同じフォルダに入っているものが全て自動で読み込まれる
表示される名前は特徴量ファイル名となるので人物名をファイル名にした方がわかりやすい
類似した顔が無い場合には Unknown と表示

これらを元に実践する

generate_aligned_faces.py
で写真のファイルを引数にして実行

python generate_aligned_faces.py image.jpg

とすれば
写真に写っている人の分だけファイルができる
そのファイル名を人の名前に変更する

つまり全て
face001.jpg
という感じで
Face00x.jpg
となっているので写真ごとに名前を変える

次に
generate_feature_dictionary.py

切り出した顔のjpgファイルを読み込んで、顔の特徴量に変換するプログラムです。
例えば顔写真 taro.jpg を入力すると　顔の特徴量 taro.npy が出力されます。
このnumpyファイルに各個人の顔の特徴量が128次元ベクトルに変換されて入ってます。

例

python generate_feature_dictionary.py face001.jpg
python generate_feature_dictionary.py face002.jpg

写真の人の分だけ実行すればOK
人物名なら

python generate_feature_dictionary.py tarojpg
python generate_feature_dictionary.py jiro.jpg
python generate_feature_dictionary.py hoge.jpg
python generate_feature_dictionary.py hogehoge.jpg

これで
顔の特徴量 taro.npy
というようなnpyファイルが作成される
実際には画像ファイル名.npyファイルになる

実行するにあたり
写真を用意する必要がある
横も認識したいのなら、横の写真も必要になる

とりあえず写真を探すこと
まずは自分の写真を撮影し
GooglePhotoからダウンロード

 cp ~/Downloads/PXL_20240612_091410912.jpg .

でコピー

作業ディレクトリは

/Users/snowpool/aw10s/face_recog

で行う

https://gist.github.com/UnaNancyOwen/49df508ad8b6d9520024354df0e3e740#file-face_recognizer-pyのコードをそのまま使う

Download Zip
でダウンロードし展開
中身を

 cp ~/Downloads/49df508ad8b6d9520024354df0e3e740-54e7dbd2f15b6137dc2b6d4ef6ce3143528c3978/* .

でコピー

ソースだけでなくモデルのダウンロードが必要

https://github.com/ShiqiYu/libfacedetection.train/blob/master/tasks/task1/onnx/yunet.onnx
をクリックしたら

404 - page not found
The 
master
 branch of 
libfacedetection.train
 does not contain the path 

https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view
については
プレビューできません

となる

とりあえずモデルのダウンロードを調べることにする

https://www.eranger.co.jp/blog/news/face-detection-recognition-by-opencv
の記事を参考に

face_detection_yunet_2023mar.onnx
で検索

https://github.com/opencv/opencv_zoo/blob/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx
にデータがあったので

https://github.com/opencv/opencv_zoo/tree/main
のreadmeを見てから

git clone https://github.com/opencv/opencv_zoo.git

でリポジトリのclone

cp opencv_zoo/models/face_detection_yunet/face_detection_yunet_2023mar.onnx .

で作業ディレクトリにコピー

python generate_aligned_faces.py PXL_20240612_091410912.jpg

を実行したが

Traceback (most recent call last):
  File "/Users/snowpool/aw10s/face_recog/generate_aligned_faces.py", line 60, in <module>
    main()
  File "/Users/snowpool/aw10s/face_recog/generate_aligned_faces.py", line 33, in main
    face_detector = cv2.FaceDetectorYN_create(weights, "", (0, 0))
cv2.error: OpenCV(4.10.0) /Users/xperience/GHA-Actions-OpenCV/_work/opencv-python/opencv-python/opencv/modules/dnn/src/onnx/onnx_importer.cpp:277: error: (-5:Bad argument) Can't read ONNX file: /Users/snowpool/aw10s/face_recog/yunet.onnx in function 'ONNXImporter'

となる

python

でPythonインタープリターを使用し

import cv2
print(cv2.__version__)

でバージョンを確認すると
4.10.0
となった

pip show opencv-python

Name: opencv-python
Version: 4.8.0.74
Summary: Wrapper package for OpenCV python bindings.
Home-page: https://github.com/opencv/opencv-python
Author: 
Author-email: 
License: Apache 2.0
Location: /Users/snowpool/.pyenv/versions/3.10.6/lib/python3.10/site-packages
Requires: numpy, numpy, numpy, numpy, numpy
Required-by: ultralytics

だとバージョンが違う

Pythonインタープリターで
現在のインポートされているOpenCVの場所を確認

import cv2
print(cv2.__file__)

の結果は

/Users/snowpool/.pyenv/versions/3.10.6/lib/python3.10/site-packages/cv2/__init__.py

システムパスの確認

import sys
print(sys.path)

の結果は

['', '/Users/snowpool/.pyenv/versions/3.10.6/lib/python310.zip', '/Users/snowpool/.pyenv/versions/3.10.6/lib/python3.10', '/Users/snowpool/.pyenv/versions/3.10.6/lib/python3.10/lib-dynload', '/Users/snowpool/.pyenv/versions/3.10.6/lib/python3.10/site-packages']

その前によくREADMEを読んで再度実行

rm face_detection_yunet_2023mar.onnx

で一度削除し

cd opencv_zoo

git lfs install

これで
Git LFSが有効化され、大きなファイルを扱う準備が整う

git lfs pull
Git LFSを用いて管理されているファイル（大容量のファイルなど）をダウンロードします。git clone はリポジトリのメタデータと小さなファイルのみをダウンロードするため、git lfs pull を使用してLFSを介して管理されている大きなファイルを取得する必要があります
とのこと

cp opencv_zoo/models/face_detection_yunet/face_detection_yunet_2023mar.onnx .

でファイルをコピー

次に
* face_recognizer_fast.onnx
これもダウンロードできなかったので
githubで検索

https://github.com/MYJLAB-2022-HackThon/FaceServer/blob/27ce7099eb3ec46bb07d988b9681e9cc2a6b291c/app/face_recognizer_fast.onnx
にあったので

git clone https://github.com/MYJLAB-2022-HackThon/FaceServer.git

でリポジトリをclone

cp FaceServer/app/face_recognizer_fast.onnx .

でファイルをコピー

これで再度

 python generate_aligned_faces.py PXL_20240612_091410912.jpg

を実行したら

Traceback (most recent call last):
  File "/Users/snowpool/aw10s/face_recog/generate_aligned_faces.py", line 60, in <module>
    main()
  File "/Users/snowpool/aw10s/face_recog/generate_aligned_faces.py", line 33, in main
    face_detector = cv2.FaceDetectorYN_create(weights, "", (0, 0))
cv2.error: OpenCV(4.10.0) /Users/xperience/GHA-Actions-OpenCV/_work/opencv-python/opencv-python/opencv/modules/dnn/src/onnx/onnx_importer.cpp:277: error: (-5:Bad argument) Can't read ONNX file: /Users/snowpool/aw10s/face_recog/yunet.onnx in function 'ONNXImporter'

となる

多分

    # モデルを読み込む
    weights = os.path.join(directory, "yunet.onnx")
    face_detector = cv2.FaceDetectorYN_create(weights, "", (0, 0))
    weights = os.path.join(directory, "face_recognizer_fast.onnx")

のonnxファイルの指定を変えればいけるはず

generate_aligned_faces.py
の中の

    # weights = os.path.join(directory, "yunet.onnx")

を

    weights = os.path.join(directory, "face_detection_yunet_2023mar.onnx")

にして実行

とりあえず

import os
import argparse
import numpy as np
import cv2

def main():
    # 引数をパースする
    parser = argparse.ArgumentParser("generate aligned face images from an image")
    parser.add_argument("image", help="input image file path (./image.jpg)")
    args = parser.parse_args()

    # 引数から画像ファイルのパスを取得
    path = args.image
    directory = os.path.dirname(args.image)
    if not directory:
        directory = os.path.dirname(__file__)
        path = os.path.join(directory, args.image)

    # 画像を開く
    image = cv2.imread(path)
    if image is None:
        exit()

    # 画像が3チャンネル以外の場合は3チャンネルに変換する
    channels = 1 if len(image.shape) == 2 else image.shape[2]
    if channels == 1:
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
    if channels == 4:
        image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)

    # モデルを読み込む
    # weights = os.path.join(directory, "yunet.onnx")
    weights = os.path.join(directory, "face_detection_yunet_2023mar.onnx")
    
    face_detector = cv2.FaceDetectorYN_create(weights, "", (0, 0))
    weights = os.path.join(directory, "face_recognizer_fast.onnx")
    face_recognizer = cv2.FaceRecognizerSF_create(weights, "")

    # 入力サイズを指定する
    height, width, _ = image.shape
    face_detector.setInputSize((width, height))

    # 顔を検出する
    _, faces = face_detector.detect(image)

    # 検出された顔を切り抜く
    aligned_faces = []
    if faces is not None:
        for face in faces:
            aligned_face = face_recognizer.alignCrop(image, face)
            aligned_faces.append(aligned_face)

    # 画像を表示、保存する
    for i, aligned_face in enumerate(aligned_faces):
        cv2.imshow("aligned_face {:03}".format(i + 1), aligned_face)
        cv2.imwrite(os.path.join(directory, "face{:03}.jpg".format(i + 1)), aligned_face)

    cv2.waitKey(0)
    cv2.destroyAllWindows()

if __name__ == '__main__':
    main()

として

サンプル画像をダウンロード後

python generate_aligned_faces.py 136777535-36d6bce1-91bf-446c-9377-645cc60b9c65.jpg

とすると
face001.jpg
face002.jpg
が作成されますが

スマホで撮影した画像で

python generate_aligned_faces.py PXL_20240612_091410912.jpg

とすると処理が終わりません

ファイルサイズなども関連しているかもしれないため一度ファイルサイズなども調べてみます

ファイル情報を調べたいので

 vim file_info.py

で

import cv2
import os
import argparse

def main():
    # コマンドライン引数を解析するパーサーを作成
    parser = argparse.ArgumentParser(description="Display image properties")
    parser.add_argument("image_path", help="Path to the image file")
    args = parser.parse_args()

    # 画像を読み込む
    image = cv2.imread(args.image_path)
    if image is None:
        print("画像が読み込めませんでした。")
        return

    # 画像の高さ、幅、チャンネル数を取得
    height, width, channels = image.shape
    print(f"画像の幅: {width} ピクセル")
    print(f"画像の高さ: {height} ピクセル")
    print(f"色チャネル数: {channels}")

    # ファイルサイズを取得
    file_size = os.path.getsize(args.image_path)
    print(f"ファイルサイズ: {file_size} バイト")

if __name__ == '__main__':
    main()

で
python file_info.py PXL_20240612_091410912.jpg

結果は

画像の幅: 2736 ピクセル
画像の高さ: 3648 ピクセル
色チャネル数: 3
ファイルサイズ: 2319152 バイト

python file_info.py 136777535-36d6bce1-91bf-446c-9377-645cc60b9c65.jpg

だと

画像の幅: 450 ピクセル
画像の高さ: 312 ピクセル
色チャネル数: 3
ファイルサイズ: 26914 バイト

リサイズして半分にしてから実行したいので

vim resize_save.py

中身は

import cv2
import os
import argparse

def main():
    # コマンドライン引数を解析するパーサーを作成
    parser = argparse.ArgumentParser(description="Resize and save an image")
    parser.add_argument("image_path", help="Path to the image file")
    args = parser.parse_args()

    # 画像を読み込む
    image = cv2.imread(args.image_path)
    if image is None:
        print("画像が読み込めませんでした。")
        return

    # 画像の元の高さ、幅を取得
    height, width = image.shape[:2]

    # 新しい寸法を計算（元のサイズの半分）
    new_width = width // 2
    new_height = height // 2

    # 画像をリサイズ
    resized_image = cv2.resize(image, (new_width, new_height))

    # 新しいファイル名を設定
    new_file_path = os.path.splitext(args.image_path)[0] + "_resized.jpg"

    # リサイズした画像を保存
    cv2.imwrite(new_file_path, resized_image)
    print(f"リサイズされた画像が保存されました: {new_file_path}")

if __name__ == '__main__':
    main()

これを実行し

python resize_save.py PXL_20240612_091410912.jpg

でファイルサイズを半分にしたが結果は同じのため

python file_info.py PXL_20240612_091410912_resized.jpg

でさらに半分にすることでようやく

Face001.jpg

が作成できた

これにより
Pixcel8で撮影したスマホの写真で顔データを作る場合には
元画像の1/4にする必要があることが判明

ちなみにファイルの比較は

python file_info.py PXL_20240612_091410912_resized_resized.jpg

画像の幅: 684 ピクセル
画像の高さ: 912 ピクセル
色チャネル数: 3
ファイルサイズ: 228769 バイト

python file_info.py PXL_20240612_091410912.jpg

画像の幅: 2736 ピクセル
画像の高さ: 3648 ピクセル
色チャネル数: 3
ファイルサイズ: 2319152 バイト