Cloud vision API – Linux & Android Dialy

Cloud vision APIで文字列として商品情報が取得できるか試す

vim purchase_list.py

中身を

from google.cloud import vision
from PIL import Image
import io
import os

def resize_image_if_needed(image_path, max_size_mb=40):
    """Resize the image to half its original dimensions if it exceeds max_size_mb."""
    with open(image_path, "rb") as fb:
        image = Image.open(fb)
        image_io = io.BytesIO()
        image.save(image_io, format=image.format)
        image_size_mb = image_io.tell() / (1024 * 1024)
        
        if image_size_mb > max_size_mb:
            new_size = (image.width // 2, image.height // 2)
            resized_image = image.resize(new_size, Image.ANTIALIAS)
            
            resized_io = io.BytesIO()
            resized_image.save(resized_io, format=image.format)
            return resized_io.getvalue()
        
        return image_io.getvalue()

client = vision.ImageAnnotatorClient()

# image_path = "combined_image.jpg"
image_path = "image_0.jpg"

resized_image = resize_image_if_needed(image_path)

image = vision.Image(content=resized_image)

response = client.document_text_detection(image=image)
texts = response.text_annotations

if texts:
    print(texts[0].description)
else:
    print("No text detected.")

で実行したが

    raise exceptions.from_grpc_error(exc) from exc
google.api_core.exceptions.InvalidArgument: 400 Request payload size exceeds the limit: 41943040 bytes.

となるため容量を４０M以下になるようにする

from google.cloud import vision
from PIL import Image
import io

def compress_image(image_path, max_size_mb=40):
    """Compress an image to be under the specified size in megabytes."""
    with open(image_path, "rb") as fb:
        image = Image.open(fb)
        image_format = image.format
        image_io = io.BytesIO()
        
        # Try different quality settings to get under the size limit
        for quality in range(95, 10, -5):
            image_io.seek(0)
            image.save(image_io, format=image_format, quality=quality)
            size_mb = image_io.tell() / (1024 * 1024)
            if size_mb <= max_size_mb:
                break
        
        return image_io.getvalue()

client = vision.ImageAnnotatorClient()

image_path = "preprocessed_image.jpg"
compressed_image = compress_image(image_path)

image = vision.Image(content=compressed_image)

response = client.document_text_detection(image=image)
texts = response.text_annotations

if texts:
    print(texts[0].description)
else:
    print("No text detected.")

よくみたらファイル名が違っていた

杏林堂のチラシは
combined_image.jpg
だったので

from google.cloud import vision
from PIL import Image
import io
import os

def resize_image_if_needed(image_path, max_size_mb=40):
    """Resize the image to half its original dimensions if it exceeds max_size_mb."""
    with open(image_path, "rb") as fb:
        image = Image.open(fb)
        image_io = io.BytesIO()
        image.save(image_io, format=image.format)
        image_size_mb = image_io.tell() / (1024 * 1024)
        
        if image_size_mb > max_size_mb:
            new_size = (image.width // 2, image.height // 2)
            resized_image = image.resize(new_size, Image.ANTIALIAS)
            
            resized_io = io.BytesIO()
            resized_image.save(resized_io, format=image.format)
            return resized_io.getvalue()
        
        return image_io.getvalue()

client = vision.ImageAnnotatorClient()

image_path = "combined_image.jpg"
resized_image = resize_image_if_needed(image_path)

image = vision.Image(content=resized_image)

response = client.document_text_detection(image=image)
texts = response.text_annotations

if texts:
    print(texts[0].description)
else:
    print("No text detected.")

で

python purchase_list.py > purchase_list.txt

で結果をテキストファイルに保存した

次に
https://github.com/yakipudding/flyer-ocr
のソース

スーパーのチラシをOCRしてSlackに通知したら便利だった
を参考に
OCRをかけて自分が狙っている商品が出たら通知が来るようにする

使う内容は

# OCR
def detect_text(image_paths):
  from google.cloud import vision
  import io
  client = vision.ImageAnnotatorClient()

  all_text = ''

  for image_path in image_paths:
    with io.open(image_path, 'rb') as image_file:
      content = image_file.read()

    image = vision.Image(content=content)

    response = client.text_detection(image=image)
    texts = response.text_annotations

    for text in texts:
      all_text += str(text.description)

    if response.error.message:
      raise Exception(
        '{}\nFor more info on error messages, check: '
        'https://cloud.google.com/apis/design/errors'.format(
          response.error.message))

  return all_text

で
OCR結果を変数に格納

そしてこの結果から指定したキーワードが存在するかチェック

# キーワード検索
def search_words(all_text):
  hitwords = []
  # ★任意のキーワード（商品名）を設定
  keywords = ['ヨーグルト', '若鶏もも肉']
  for keyword in keywords:
    if keyword in all_text:
      hitwords.append(keyword)

  return hitwords

これでリストに商品名を入れている

Slackに通知を送る
の部分をLine notifyで送る処理に変えればOK

def slack_notice(results):
  import slackweb
  slack = slackweb.Slack(url='★WebhookのURL')
  for result in results:
    text = f'{result["date"]} チラシ掲載商品：{",".join(result["hitwords"])}\n<{result["url"]}|チラシを見る>'
    slack.notify(text=text)

とりあえずコードを書いていく
商品リストを
settings.json
に記述する

これはチラシの文字列とマッチさせるため
とりあえずは杏林堂のチラシをOCRかけて一致するようなキーワードにする

まずチラシ統合画像がかぶるので
日付をファイル名につけるように変更

import time
import requests
from PIL import Image
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.safari.service import Service as SafariService
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from datetime import datetime

def open_link_in_safari(url):
    # Safariドライバーを使用してブラウザを起動
    service = SafariService()
    driver = webdriver.Safari(service=service)
    driver.get(url)
    time.sleep(3)  # リンクを開いた後に3秒間待機
    return driver

def click_date_element(driver, base_xpath):
    try:
        # コンテナ内の日付要素を探してクリック
        container = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, base_xpath))
        )
        links = container.find_elements(By.XPATH, ".//a[contains(@title, '日替')]")

        for link in links:
            if '日替' in link.get_attribute('title'):
                link.click()
                print(f'Clicked on link with title: {link.get_attribute("title")}')
                time.sleep(3)  # クリックした後に3秒間待機
                return

        print('No link found with title containing: 日替')
    except Exception as e:
        print(f'Error clicking on element: {e}')

def get_images_from_container(driver, base_xpath):
    image_urls = []
    try:
        # コンテナ内の画像要素を探す
        container = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, base_xpath))
        )
        images = container.find_elements(By.TAG_NAME, 'img')
        
        for img in images:
            src = img.get_attribute('src')
            # 特定の条件に基づいて画像をフィルタリング
            if 'index/img' in src:
                image_urls.append(src)
                print(f'Found image: {src}')
    except Exception as e:
        print(f'Error finding images: {e}')
    return image_urls

def download_images(image_urls):
    images = []
    for i, url in enumerate(image_urls):
        response = requests.get(url)
        if response.status_code == 200:
            image = Image.open(BytesIO(response.content))
            images.append(image)
            print(f'Downloaded image_{i}.jpg')
        else:
            print(f'Failed to download {url}')
    return images

def merge_images(images, output_path):
    widths, heights = zip(*(img.size for img in images))

    total_height = sum(heights)
    max_width = max(widths)

    combined_image = Image.new('RGB', (max_width, total_height))

    y_offset = 0
    for img in images:
        combined_image.paste(img, (0, y_offset))
        y_offset += img.height

    combined_image.save(output_path)
    print(f'Saved combined image as {output_path}')

def main():
    url = 'https://www.shufoo.net/pntweb/shopDetail/860323/?cid=nmail_pc'
    driver = open_link_in_safari(url)
    # 特定のリンクをクリックする
    base_xpath_click = '/html/body/div[1]/div[3]/div[1]/div/div[4]/div/div/div/div/div/div/ul'
    click_date_element(driver, base_xpath_click)
    
    # 画像を取得してダウンロードする
    base_xpath_images = '/html/body/div[1]/div[3]/div[1]/div/div[2]/div[2]'
    image_urls = get_images_from_container(driver, base_xpath_images)
    driver.quit()
    
    if image_urls:
        images = download_images(image_urls)
        if images:
            # 現在の日付を取得してフォーマット
            current_date = datetime.now().strftime('%Y%m%d')
            output_path = f'/mnt/data/combined_image_{current_date}.jpg'
            merge_images(images, output_path)

if __name__ == '__main__':
    main()

しかし

Clicked on link with title: 8/2　日替
Found image: https://ipqcache2.shufoo.net/c/2024/07/30/15607036918828/index/img/0_100_0.jpg
Found image: https://ipqcache2.shufoo.net/c/2024/07/30/15607036918828/index/img/0_100_1.jpg
Found image: https://ipqcache2.shufoo.net/c/2024/07/30/15607036918828/index/img/0_100_2.jpg
Found image: https://ipqcache2.shufoo.net/c/2024/07/30/15607036918828/index/img/0_100_3.jpg
Downloaded image_0.jpg
Downloaded image_1.jpg
Downloaded image_2.jpg
Downloaded image_3.jpg
Traceback (most recent call last):
  File "/Users/snowpool/aw10s/store_adversting_list/clik_allget_image.py", line 107, in <module>
    main()
  File "/Users/snowpool/aw10s/store_adversting_list/clik_allget_image.py", line 104, in main
    merge_images(images, output_path)
  File "/Users/snowpool/aw10s/store_adversting_list/clik_allget_image.py", line 83, in merge_images
    combined_image.save(output_path)
  File "/Users/snowpool/.pyenv/versions/3.10.6/lib/python3.10/site-packages/PIL/Image.py", line 2428, in save
    fp = builtins.open(filename, "w+b")
FileNotFoundError: [Errno 2] No such file or directory: '/mnt/data/combined_image_20240802.jpg'

となるので

import os

def main():
    url = 'https://www.shufoo.net/pntweb/shopDetail/860323/?cid=nmail_pc'
    driver = open_link_in_safari(url)
    # 特定のリンクをクリックする
    base_xpath_click = '/html/body/div[1]/div[3]/div[1]/div/div[4]/div/div/div/div/div/div/ul'
    click_date_element(driver, base_xpath_click)
    
    # 画像を取得してダウンロードする
    base_xpath_images = '/html/body/div[1]/div[3]/div[1]/div/div[2]/div[2]'
    image_urls = get_images_from_container(driver, base_xpath_images)
    driver.quit()
    
    if image_urls:
        images = download_images(image_urls)
        if images:
            # 現在の日付を取得してフォーマット
            current_date = datetime.now().strftime('%Y%m%d')
            # Update the path to a valid directory on your machine
            output_dir = os.path.expanduser('~/images')
            os.makedirs(output_dir, exist_ok=True)  # Ensure the directory exists
            output_path = os.path.join(output_dir, f'combined_image_{current_date}.jpg')
            merge_images(images, output_path)

if __name__ == '__main__':
    main()

へコード変更

さっきの原因は
ローカルシステム上に/mnt/data/というディレクトリが存在しないため
解決策：
1. ディレクトリを作成：ローカルマシンにディレクトリを作成します（例: imagesというディレクトリをユーザーディレクトリ内に作成）。
2. output_pathを更新：スクリプトの中で画像を保存するパスを、この新しいディレクトリに設定します。

これでエラーはなくなるが
ホームディレクトリに画像が保存される
これだと分かりにくいので
カレントディレクトリにimagesフォルダを作成し
１つのファイルにまとめた統合画像ファイルを保存する

import os
import time
import requests
from PIL import Image
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.safari.service import Service as SafariService
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from datetime import datetime

def open_link_in_safari(url):
    # Safariドライバーを使用してブラウザを起動
    service = SafariService()
    driver = webdriver.Safari(service=service)
    driver.get(url)
    time.sleep(3)  # リンクを開いた後に3秒間待機
    return driver

def click_date_element(driver, base_xpath):
    try:
        # コンテナ内の日付要素を探してクリック
        container = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, base_xpath))
        )
        links = container.find_elements(By.XPATH, ".//a[contains(@title, '日替')]")

        for link in links:
            if '日替' in link.get_attribute('title'):
                link.click()
                print(f'Clicked on link with title: {link.get_attribute("title")}')
                time.sleep(3)  # クリックした後に3秒間待機
                return

        print('No link found with title containing: 日替')
    except Exception as e:
        print(f'Error clicking on element: {e}')

def get_images_from_container(driver, base_xpath):
    image_urls = []
    try:
        # コンテナ内の画像要素を探す
        container = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, base_xpath))
        )
        images = container.find_elements(By.TAG_NAME, 'img')
        
        for img in images:
            src = img.get_attribute('src')
            # 特定の条件に基づいて画像をフィルタリング
            if 'index/img' in src:
                image_urls.append(src)
                print(f'Found image: {src}')
    except Exception as e:
        print(f'Error finding images: {e}')
    return image_urls

def download_images(image_urls):
    images = []
    for i, url in enumerate(image_urls):
        response = requests.get(url)
        if response.status_code == 200:
            image = Image.open(BytesIO(response.content))
            images.append(image)
            print(f'Downloaded image_{i}.jpg')
        else:
            print(f'Failed to download {url}')
    return images

def merge_images(images, output_path):
    widths, heights = zip(*(img.size for img in images))

    total_height = sum(heights)
    max_width = max(widths)

    combined_image = Image.new('RGB', (max_width, total_height))

    y_offset = 0
    for img in images:
        combined_image.paste(img, (0, y_offset))
        y_offset += img.height

    combined_image.save(output_path)
    print(f'Saved combined image as {output_path}')

def main():
    url = 'https://www.shufoo.net/pntweb/shopDetail/860323/?cid=nmail_pc'
    driver = open_link_in_safari(url)
    # 特定のリンクをクリックする
    base_xpath_click = '/html/body/div[1]/div[3]/div[1]/div/div[4]/div/div/div/div/div/div/ul'
    click_date_element(driver, base_xpath_click)
    
    # 画像を取得してダウンロードする
    base_xpath_images = '/html/body/div[1]/div[3]/div[1]/div/div[2]/div[2]'
    image_urls = get_images_from_container(driver, base_xpath_images)
    driver.quit()
    
    if image_urls:
        images = download_images(image_urls)
        if images:
            # 現在の日付を取得してフォーマット
            current_date = datetime.now().strftime('%Y%m%d')
            # カレントディレクトリにimagesフォルダを作成
            output_dir = 'images'
            os.makedirs(output_dir, exist_ok=True)  # ディレクトリが存在しない場合は作成
            output_path = os.path.join(output_dir, f'combined_image_{current_date}.jpg')
            merge_images(images, output_path)

if __name__ == '__main__':
    main()

これでimagesの中に被らずに保存できる

次に文字列との一致だが
杏林堂のチラシで欲しいものがなかった

とりあえず適当に検出できた文字列をキーワードにして
一致したら
チラシの画像ファイルと文字列をlineで送るようにする