pyttsx3 音声読み上げ機能をモジュール化

module/module_speaker.py

で

import pyttsx3
import emoji
import re

class Speaker:
    def __init__(self):
        self.engine = pyttsx3.init()
        # 音量やスピード、声の設定があればここで行える
        self.engine.setProperty("rate", 170)  # 話すスピード（デフォルト=200）
        self.engine.setProperty("volume", 1.0)  # 音量（0.0～1.0）

    def remove_emoji(self, text: str) -> str:
        return emoji.replace_emoji(text, replace='')

    def speak(self, text: str):
        clean_text = self.remove_emoji(text)
        clean_text = re.sub(r"[*_`~^]", "", clean_text)  # マークアップ記号を除去
        print("\n【読み上げるテキスト】")
        print(clean_text)

        # 1文ずつ話すことで安定性を向上
        sentences = re.split(r"(?<=[。！？\n])", clean_text)
        for sentence in sentences:
            if sentence.strip():
                self.engine.say(sentence.strip())
        self.engine.runAndWait()

として保存

pyttsx3 関連の処理をモジュール化した module_speaker.py を使うように

touch main5.py

で

from module.module_audio_to_text import AudioToTextCorrector
from module.module_speaker import Speaker
from ollama import chat, ChatResponse

# モデル名
OLLAMA_MODEL = 'gemma3:4b'

# 音声読み上げクラスを初期化
speaker = Speaker()

def ask_ollama(prompt: str) -> str:
    try:
        response: ChatResponse = chat(model=OLLAMA_MODEL, messages=[
            {
                'role': 'user',
                'content': prompt,
            }
        ])
        return response.message.content.strip()
    except Exception as e:
        print(f"Ollamaエラー: {e}")
        return "エラーが発生しました。"

def main():
    audio_to_text = AudioToTextCorrector("config.json")

    while True:
        corrected_text = audio_to_text.record_and_correct(timeout_seconds=10)

        if corrected_text is None:
            print("終了条件に達したため、ループを抜けます。")
            break

        print("\n【認識・補正したテキスト】")
        print(corrected_text)

        # Ollamaへ質問
        ollama_reply = ask_ollama(corrected_text)

        print("\n【gemma3:4bの返答】")
        print(ollama_reply)

        # 読み上げ
        speaker.speak(ollama_reply)

if __name__ == "__main__":
    main()

として検証

[2025-05-10 06:23:46.009] [ctranslate2] [thread 64614] [warning] The compute type inferred from the saved model is float16, but the target device or backend do not support efficient float16 computation. The model weights have been automatically converted to use the float32 compute type instead.
stand by ready OK
recording...
finished

【認識・補正したテキスト】
こんにちは。

【gemma3:4bの返答】
こんにちは！何かお手伝いできることはありますか？😊

【読み上げるテキスト】
こんにちは！何かお手伝いできることはありますか？
stand by ready OK
recording...
finished
10秒間音声が入力されなかったため、処理を終了します。
終了条件に達したため、ループを抜けます。

となるので無事に動作しているのが確認できる

次はウェイクワードエンジンの導入

コメントを残す コメントをキャンセル

コメントを残すコメントをキャンセル