チラシ詳細の取得 – Linux & Android Dialy

チラシ詳細の取得

Shufooで直リンクすれば
Gmail処理は不要

https://www.shufoo.net/pntweb/shopDetail/860323/?cid=nmail_pc
杏林堂
キーワード
日替

https://www.shufoo.net/pntweb/shopDetail/197728/?cid=nmail_pc
ユーコープ
キーワード
ユーコープのお買い得！

https://www.shufoo.net/pntweb/shopDetail/15782/?cid=nmail_pc
ぴあご

というように
それぞれみたいチラシのURLとキーワードがペアになっている
ならばこれをJSONとかにして当てはめることができるはず

もしかしたらxpathも同じかもしれないので

cp clik_allget_image.py piago.py

でコピーして
ぴあごとコープで試す

どうやらxpathも同じ
ただし coopのチラシがOCRの精度が良くない

とりあえずぴあごのチラシのリンクはできたけど
画像のダウンロードができていない

とりあえず杏林堂は毎日テストできるので
先に杏林堂のチラシでLINE送信を試す

def wait_for_page_load(driver, timeout=30):
    WebDriverWait(driver, timeout).until(
        EC.presence_of_element_located((By.XPATH, '//img'))  # ページに画像が表示されるまで待機
    )

を追加してみる

import os
import time
import requests
from PIL import Image
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.safari.service import Service as SafariService
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from datetime import datetime

def open_link_in_safari(url):
    # Safariドライバーを使用してブラウザを起動
    service = SafariService()
    driver = webdriver.Safari(service=service)
    driver.get(url)
    time.sleep(3)  # リンクを開いた後に3秒間待機
    return driver

def click_date_element(driver, base_xpath):
    try:
        # コンテナ内の日付要素を探してクリック
        container = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, base_xpath))
        )
        links = container.find_elements(By.XPATH, ".//a[contains(@title, '日替')]")

        for link in links:
            if '日替' in link.get_attribute('title'):
                link.click()
                print(f'Clicked on link with title: {link.get_attribute("title")}')
                time.sleep(3)  # クリックした後に3秒間待機
                return

        print('No link found with title containing: 日替')
    except Exception as e:
        print(f'Error clicking on element: {e}')

def get_images_from_container(driver, base_xpath):
    image_urls = []
    try:
        # コンテナ内の画像要素を探す
        container = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, base_xpath))
        )
        images = container.find_elements(By.TAG_NAME, 'img')
        
        for img in images:
            src = img.get_attribute('src')
            # 特定の条件に基づいて画像をフィルタリング
            if 'index/img' in src:
                image_urls.append(src)
                print(f'Found image: {src}')
    except Exception as e:
        print(f'Error finding images: {e}')
    return image_urls

def download_images(image_urls):
    images = []
    for i, url in enumerate(image_urls):
        response = requests.get(url)
        if response.status_code == 200:
            image = Image.open(BytesIO(response.content))
            images.append(image)
            print(f'Downloaded image_{i}.jpg')
        else:
            print(f'Failed to download {url}')
    return images

def merge_images(images, output_path):
    widths, heights = zip(*(img.size for img in images))

    total_height = sum(heights)
    max_width = max(widths)

    combined_image = Image.new('RGB', (max_width, total_height))

    y_offset = 0
    for img in images:
        combined_image.paste(img, (0, y_offset))
        y_offset += img.height

    combined_image.save(output_path)
    print(f'Saved combined image as {output_path}')

def main():
    url = 'https://www.shufoo.net/pntweb/shopDetail/860323/?cid=nmail_pc'
    driver = open_link_in_safari(url)
    # 特定のリンクをクリックする
    base_xpath_click = '/html/body/div[1]/div[3]/div[1]/div/div[4]/div/div/div/div/div/div/ul'
    click_date_element(driver, base_xpath_click)
    
    # 画像を取得してダウンロードする
    base_xpath_images = '/html/body/div[1]/div[3]/div[1]/div/div[2]/div[2]'
    image_urls = get_images_from_container(driver, base_xpath_images)
    driver.quit()
    
    if image_urls:
        images = download_images(image_urls)
        if images:
            # 現在の日付を取得してフォーマット
            current_date = datetime.now().strftime('%Y%m%d')
            # カレントディレクトリにimagesフォルダを作成
            output_dir = 'images'
            os.makedirs(output_dir, exist_ok=True)  # ディレクトリが存在しない場合は作成
            output_path = os.path.join(output_dir, f'combined_image_{current_date}.jpg')
            merge_images(images, output_path)

if __name__ == '__main__':
    main()

を

import os
import time
import requests
from PIL import Image
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.safari.service import Service as SafariService
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from datetime import datetime

def open_link_in_safari(url):
    # Safariドライバーを使用してブラウザを起動
    service = SafariService()
    driver = webdriver.Safari(service=service)
    driver.get(url)
    wait_for_page_load(driver)  # ページの読み込みを待機
    return driver

def wait_for_page_load(driver, timeout=30):
    """
    ページに画像が表示されるまで待機する関数。
    """
    try:
        WebDriverWait(driver, timeout).until(
            EC.presence_of_element_located((By.XPATH, '//img'))
        )
        print("Page loaded successfully.")
    except Exception as e:
        print(f"Error waiting for page to load: {e}")

def click_date_element(driver, base_xpath):
    try:
        # コンテナ内の日付要素を探してクリック
        container = WebDriverWait(driver, 30).until(
            EC.presence_of_element_located((By.XPATH, base_xpath))
        )
        links = container.find_elements(By.XPATH, ".//a[contains(@title, '日替')]")

        for link in links:
            if '日替' in link.get_attribute('title'):
                link.click()
                print(f'Clicked on link with title: {link.get_attribute("title")}')
                wait_for_page_load(driver)  # クリック後のページ読み込みを待機
                return

        print('No link found with title containing: 日替')
    except Exception as e:
        print(f'Error clicking on element: {e}')

def get_images_from_container(driver, base_xpath):
    image_urls = []
    try:
        # コンテナ内の画像要素を探す
        container = WebDriverWait(driver, 30).until(
            EC.presence_of_element_located((By.XPATH, base_xpath))
        )
        images = container.find_elements(By.TAG_NAME, 'img')
        
        for img in images:
            src = img.get_attribute('src')
            # 特定の条件に基づいて画像をフィルタリング
            if 'index/img' in src:
                image_urls.append(src)
                print(f'Found image: {src}')
    except Exception as e:
        print(f'Error finding images: {e}')
    return image_urls

def download_images(image_urls):
    images = []
    for i, url in enumerate(image_urls):
        response = requests.get(url)
        if response.status_code == 200:
            image = Image.open(BytesIO(response.content))
            images.append(image)
            print(f'Downloaded image_{i}.jpg')
        else:
            print(f'Failed to download {url}')
    return images

def merge_images(images, output_path):
    widths, heights = zip(*(img.size for img in images))

    total_height = sum(heights)
    max_width = max(widths)

    combined_image = Image.new('RGB', (max_width, total_height))

    y_offset = 0
    for img in images:
        combined_image.paste(img, (0, y_offset))
        y_offset += img.height

    combined_image.save(output_path)
    print(f'Saved combined image as {output_path}')

def main():
    url = 'https://www.shufoo.net/pntweb/shopDetail/860323/?cid=nmail_pc'
    driver = open_link_in_safari(url)
    # 特定のリンクをクリックする
    base_xpath_click = '/html/body/div[1]/div[3]/div[1]/div/div[4]/div/div/div/div/div/div/ul'
    click_date_element(driver, base_xpath_click)
    
    # 画像を取得してダウンロードする
    base_xpath_images = '/html/body/div[1]/div[3]/div[1]/div/div[2]/div[2]'
    image_urls = get_images_from_container(driver, base_xpath_images)
    driver.quit()
    
    if image_urls:
        images = download_images(image_urls)
        if images:
            # 現在の日付を取得してフォーマット
            current_date = datetime.now().strftime('%Y%m%d')
            # カレントディレクトリにimagesフォルダを作成
            output_dir = 'images'
            os.makedirs(output_dir, exist_ok=True)  # ディレクトリが存在しない場合は作成
            output_path = os.path.join(output_dir, f'combined_image_{current_date}.jpg')
            merge_images(images, output_path)

if __name__ == '__main__':
    main()

としてみる

コメントを残す コメントをキャンセル

コメントを残すコメントをキャンセル