チラシ詳細の取得
Shufooで直リンクすれば
Gmail処理は不要
https://www.shufoo.net/pntweb/shopDetail/860323/?cid=nmail_pc
杏林堂
キーワード
日替
https://www.shufoo.net/pntweb/shopDetail/197728/?cid=nmail_pc
ユーコープ
キーワード
ユーコープのお買い得!
https://www.shufoo.net/pntweb/shopDetail/15782/?cid=nmail_pc
ぴあご
というように
それぞれみたいチラシのURLとキーワードがペアになっている
ならばこれをJSONとかにして当てはめることができるはず
もしかしたらxpathも同じかもしれないので
cp clik_allget_image.py piago.py
でコピーして
ぴあごとコープで試す
どうやらxpathも同じ
ただし coopのチラシがOCRの精度が良くない
とりあえずぴあごのチラシのリンクはできたけど
画像のダウンロードができていない
とりあえず杏林堂は毎日テストできるので
先に杏林堂のチラシでLINE送信を試す
def wait_for_page_load(driver, timeout=30): WebDriverWait(driver, timeout).until( EC.presence_of_element_located((By.XPATH, '//img')) # ページに画像が表示されるまで待機 )
を追加してみる
import os import time import requests from PIL import Image from io import BytesIO from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.safari.service import Service as SafariService from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from datetime import datetime def open_link_in_safari(url): # Safariドライバーを使用してブラウザを起動 service = SafariService() driver = webdriver.Safari(service=service) driver.get(url) time.sleep(3) # リンクを開いた後に3秒間待機 return driver def click_date_element(driver, base_xpath): try: # コンテナ内の日付要素を探してクリック container = WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.XPATH, base_xpath)) ) links = container.find_elements(By.XPATH, ".//a[contains(@title, '日替')]") for link in links: if '日替' in link.get_attribute('title'): link.click() print(f'Clicked on link with title: {link.get_attribute("title")}') time.sleep(3) # クリックした後に3秒間待機 return print('No link found with title containing: 日替') except Exception as e: print(f'Error clicking on element: {e}') def get_images_from_container(driver, base_xpath): image_urls = [] try: # コンテナ内の画像要素を探す container = WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.XPATH, base_xpath)) ) images = container.find_elements(By.TAG_NAME, 'img') for img in images: src = img.get_attribute('src') # 特定の条件に基づいて画像をフィルタリング if 'index/img' in src: image_urls.append(src) print(f'Found image: {src}') except Exception as e: print(f'Error finding images: {e}') return image_urls def download_images(image_urls): images = [] for i, url in enumerate(image_urls): response = requests.get(url) if response.status_code == 200: image = Image.open(BytesIO(response.content)) images.append(image) print(f'Downloaded image_{i}.jpg') else: print(f'Failed to download {url}') return images def merge_images(images, output_path): widths, heights = zip(*(img.size for img in images)) total_height = sum(heights) max_width = max(widths) combined_image = Image.new('RGB', (max_width, total_height)) y_offset = 0 for img in images: combined_image.paste(img, (0, y_offset)) y_offset += img.height combined_image.save(output_path) print(f'Saved combined image as {output_path}') def main(): url = 'https://www.shufoo.net/pntweb/shopDetail/860323/?cid=nmail_pc' driver = open_link_in_safari(url) # 特定のリンクをクリックする base_xpath_click = '/html/body/div[1]/div[3]/div[1]/div/div[4]/div/div/div/div/div/div/ul' click_date_element(driver, base_xpath_click) # 画像を取得してダウンロードする base_xpath_images = '/html/body/div[1]/div[3]/div[1]/div/div[2]/div[2]' image_urls = get_images_from_container(driver, base_xpath_images) driver.quit() if image_urls: images = download_images(image_urls) if images: # 現在の日付を取得してフォーマット current_date = datetime.now().strftime('%Y%m%d') # カレントディレクトリにimagesフォルダを作成 output_dir = 'images' os.makedirs(output_dir, exist_ok=True) # ディレクトリが存在しない場合は作成 output_path = os.path.join(output_dir, f'combined_image_{current_date}.jpg') merge_images(images, output_path) if __name__ == '__main__': main()
を
import os import time import requests from PIL import Image from io import BytesIO from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.safari.service import Service as SafariService from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from datetime import datetime def open_link_in_safari(url): # Safariドライバーを使用してブラウザを起動 service = SafariService() driver = webdriver.Safari(service=service) driver.get(url) wait_for_page_load(driver) # ページの読み込みを待機 return driver def wait_for_page_load(driver, timeout=30): """ ページに画像が表示されるまで待機する関数。 """ try: WebDriverWait(driver, timeout).until( EC.presence_of_element_located((By.XPATH, '//img')) ) print("Page loaded successfully.") except Exception as e: print(f"Error waiting for page to load: {e}") def click_date_element(driver, base_xpath): try: # コンテナ内の日付要素を探してクリック container = WebDriverWait(driver, 30).until( EC.presence_of_element_located((By.XPATH, base_xpath)) ) links = container.find_elements(By.XPATH, ".//a[contains(@title, '日替')]") for link in links: if '日替' in link.get_attribute('title'): link.click() print(f'Clicked on link with title: {link.get_attribute("title")}') wait_for_page_load(driver) # クリック後のページ読み込みを待機 return print('No link found with title containing: 日替') except Exception as e: print(f'Error clicking on element: {e}') def get_images_from_container(driver, base_xpath): image_urls = [] try: # コンテナ内の画像要素を探す container = WebDriverWait(driver, 30).until( EC.presence_of_element_located((By.XPATH, base_xpath)) ) images = container.find_elements(By.TAG_NAME, 'img') for img in images: src = img.get_attribute('src') # 特定の条件に基づいて画像をフィルタリング if 'index/img' in src: image_urls.append(src) print(f'Found image: {src}') except Exception as e: print(f'Error finding images: {e}') return image_urls def download_images(image_urls): images = [] for i, url in enumerate(image_urls): response = requests.get(url) if response.status_code == 200: image = Image.open(BytesIO(response.content)) images.append(image) print(f'Downloaded image_{i}.jpg') else: print(f'Failed to download {url}') return images def merge_images(images, output_path): widths, heights = zip(*(img.size for img in images)) total_height = sum(heights) max_width = max(widths) combined_image = Image.new('RGB', (max_width, total_height)) y_offset = 0 for img in images: combined_image.paste(img, (0, y_offset)) y_offset += img.height combined_image.save(output_path) print(f'Saved combined image as {output_path}') def main(): url = 'https://www.shufoo.net/pntweb/shopDetail/860323/?cid=nmail_pc' driver = open_link_in_safari(url) # 特定のリンクをクリックする base_xpath_click = '/html/body/div[1]/div[3]/div[1]/div/div[4]/div/div/div/div/div/div/ul' click_date_element(driver, base_xpath_click) # 画像を取得してダウンロードする base_xpath_images = '/html/body/div[1]/div[3]/div[1]/div/div[2]/div[2]' image_urls = get_images_from_container(driver, base_xpath_images) driver.quit() if image_urls: images = download_images(image_urls) if images: # 現在の日付を取得してフォーマット current_date = datetime.now().strftime('%Y%m%d') # カレントディレクトリにimagesフォルダを作成 output_dir = 'images' os.makedirs(output_dir, exist_ok=True) # ディレクトリが存在しない場合は作成 output_path = os.path.join(output_dir, f'combined_image_{current_date}.jpg') merge_images(images, output_path) if __name__ == '__main__': main()
としてみる