ウェイクワードエンジンと ollama gemma3 4b の組み合わせ
とりあえずopenwakeword で alexa なら動作するので
次に音声入力が起動するように組み合わせる
これならAlexa と言ったら
音声入力を開始
という
常にマイクを監視
「ねえラマ」などのウェイクワードが話されたら録音開始
認識→質問→読み上げを実行
終了後、またウェイクワード待ちに戻る
ができるはず
import pyaudio
import numpy as np
from openwakeword.model import Model
import sys
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
audio = pyaudio.PyAudio()
mic_stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
model_name = "alexa_v0.1.onnx"
model = Model(
wakeword_models=[model_name],
inference_framework="onnx"
)
print("Listening for wakeword \"Alexa\"...")
print()
prev_detect=False
while True:
audio = np.frombuffer(mic_stream.read(CHUNK), dtype=np.int16)
prediction = model.predict(audio)
scores = model.prediction_buffer[model_name]
curr_score = format(scores[-1], '.20f')
detect = True if float(curr_score) > 0.5 else False
if detect:
if detect != prev_detect:
print(f"Detected!({curr_score[:5]})")
prev_detect=True
else:
prev_detect=False
を元にGPTの提案したコードを書き換える
touch module/module_wakeword.py
で
import pyaudio
import numpy as np
from openwakeword.model import Model
class WakeWordDetector:
def __init__(self, model_path="alexa_v0.1.onnx", threshold=0.5):
self.model_name = model_path
self.threshold = threshold
# モデル初期化
self.model = Model(
wakeword_models=[self.model_name],
inference_framework="onnx"
)
# PyAudio設定
self.format = pyaudio.paInt16
self.channels = 1
self.rate = 16000
self.chunk = 1024
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(
format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
frames_per_buffer=self.chunk
)
def listen_for_wakeword(self):
print(f"ウェイクワード待機中...(モデル: {self.model_name})")
prev_detect = False
while True:
audio_chunk = self.stream.read(self.chunk, exception_on_overflow=False)
audio_np = np.frombuffer(audio_chunk, dtype=np.int16)
_ = self.model.predict(audio_np)
score = self.model.prediction_buffer[self.model_name][-1]
detect = score > self.threshold
if detect and not prev_detect:
print(f"ウェイクワード検出!(スコア: {score:.3f})")
return True # 検出されたら終了
prev_detect = detect
✅ 特徴と使い方
* alexa_v0.1.onnx をモデルとして使う(変更可能)
* listen_for_wakeword() を呼び出すと、検出されるまでループし、検出されたら return True
✅ モデルファイルが models/ にある場合の使い方
wakeword_detector = WakeWordDetector(model_path="models/alexa_v0.1.onnx", threshold=0.5)
✅ 使い方例(main側)
from module.module_wakeword import WakeWordDetector
wakeword_detector = WakeWordDetector("models/alexa_v0.1.onnx", threshold=0.5)
wakeword_detector.listen_for_wakeword()
これらを元に
touch main6.py
を作成し
from module.module_audio_to_text import AudioToTextCorrector
from module.module_speaker import Speaker
from module.module_wakeword import WakeWordDetector
from ollama import chat, ChatResponse
# モデル名(Ollama用)
OLLAMA_MODEL = 'gemma3:4b'
# 各モジュールの初期化
speaker = Speaker()
audio_to_text = AudioToTextCorrector("config.json")
wakeword_detector = WakeWordDetector(model_path="models/alexa_v0.1.onnx", threshold=0.5)
def ask_ollama(prompt: str) -> str:
try:
response: ChatResponse = chat(model=OLLAMA_MODEL, messages=[
{'role': 'user', 'content': prompt}
])
return response.message.content.strip()
except Exception as e:
print(f"Ollamaエラー: {e}")
return "エラーが発生しました。"
def main():
while True:
# ① ウェイクワードを検出するまで待機
wakeword_detector.listen_for_wakeword()
# ② 音声を認識してテキスト化(+日本語補正)
corrected_text = audio_to_text.record_and_correct(timeout_seconds=10)
if corrected_text is None:
print("無音またはタイムアウトで中断。再びウェイクワード待ちに戻ります。")
continue
print("\n【認識・補正したテキスト】")
print(corrected_text)
# ③ Ollama(gemma3:4b)へ質問
ollama_reply = ask_ollama(corrected_text)
print("\n【gemma3:4bの返答】")
print(ollama_reply)
# ④ 読み上げ
speaker.speak(ollama_reply)
if __name__ == "__main__":
main()
でウェイクワード対応させる
実行すると
[2025-05-14 06:23:54.568] [ctranslate2] [thread 311016] [warning] The compute type inferred from the saved model is float16, but the target device or backend do not support efficient float16 computation. The model weights have been automatically converted to use the float32 compute type instead.
Traceback (most recent call last):
File "/Users/snowpool/aw10s/gemma/main6.py", line 12, in <module>
wakeword_detector = WakeWordDetector(model_path="models/alexa_v0.1.onnx", threshold=0.5)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/snowpool/aw10s/gemma/module/module_wakeword.py", line 11, in __init__
self.model = Model(
^^^^^^
File "/Users/snowpool/.pyenv/versions/3.11.0/lib/python3.11/site-packages/openwakeword/utils.py", line 686, in wrapped
return func(*args, **new_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/snowpool/.pyenv/versions/3.11.0/lib/python3.11/site-packages/openwakeword/model.py", line 97, in __init__
raise ValueError("Could not find pretrained model for model name '{}'".format(i))
ValueError: Could not find pretrained model for model name 'models/alexa_v0.1.onnx'
これは
openWakeWord の Model(…) に渡されたモデルパスが正しく認識されていない
openwakeword.Model(…) に渡す wakeword_models は、**ファイル名ではなく「モデル名 or 辞書形式」**で渡す必要がある
なので
self.model = Model(
wakeword_models=[self.model_name], # これはNG
inference_framework="onnx"
)
を
self.model = Model(
wakeword_models={ "alexa": self.model_name },
inference_framework="onnx"
)
にする
import pyaudio
import numpy as np
from openwakeword.model import Model
class WakeWordDetector:
def __init__(self, model_path="models/alexa_v0.1.onnx", threshold=0.5):
self.model_name = "alexa"
self.threshold = threshold
# モデル名とファイルパスを辞書で渡す
self.model = Model(
wakeword_models={self.model_name: model_path},
inference_framework="onnx"
)
# PyAudio設定
self.format = pyaudio.paInt16
self.channels = 1
self.rate = 16000
self.chunk = 1024
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(
format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
frames_per_buffer=self.chunk
)
def listen_for_wakeword(self):
print(f"ウェイクワード待機中...(アレクサ)")
prev_detect = False
while True:
audio_chunk = self.stream.read(self.chunk, exception_on_overflow=False)
audio_np = np.frombuffer(audio_chunk, dtype=np.int16)
_ = self.model.predict(audio_np)
score = self.model.prediction_buffer[self.model_name][-1]
detect = score > self.threshold
if detect and not prev_detect:
print(f"「アレクサ」検出!(スコア: {score:.3f})")
return True
prev_detect = detect
が全体コード
今度は
[2025-05-14 06:28:28.142] [ctranslate2] [thread 314783] [warning] The compute type inferred from the saved model is float16, but the target device or backend do not support efficient float16 computation. The model weights have been automatically converted to use the float32 compute type instead.
Traceback (most recent call last):
File "/Users/snowpool/aw10s/gemma/main6.py", line 12, in <module>
wakeword_detector = WakeWordDetector(model_path="models/alexa_v0.1.onnx", threshold=0.5)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/snowpool/aw10s/gemma/module/module_wakeword.py", line 11, in __init__
self.model = Model(
^^^^^^
File "/Users/snowpool/.pyenv/versions/3.11.0/lib/python3.11/site-packages/openwakeword/utils.py", line 686, in wrapped
return func(*args, **new_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/snowpool/.pyenv/versions/3.11.0/lib/python3.11/site-packages/openwakeword/model.py", line 90, in __init__
for ndx, i in enumerate(wakeword_models):
RuntimeError: dictionary changed size during iteration
コード生成がうまくいかないため
test_openwakeword.py
のコードをモジュール化する
module/module_wakeword_simple.py
内容は
import pyaudio
import numpy as np
from openwakeword.model import Model
class SimpleWakeWordDetector:
def __init__(self, model_path="alexa_v0.1.onnx", threshold=0.5):
self.model_path = model_path
self.threshold = threshold
self.format = pyaudio.paInt16
self.channels = 1
self.rate = 16000
self.chunk = 1024
# モデル名(ファイル名)でアクセスするためのキー
self.model_name = model_path
# OpenWakeWord モデルの読み込み
self.model = Model(
wakeword_models=[self.model_path],
inference_framework="onnx"
)
# マイク初期化
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(
format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
frames_per_buffer=self.chunk
)
def listen_for_wakeword(self):
print(f"Listening for wakeword \"{self.model_name}\"...")
prev_detect = False
while True:
data = self.stream.read(self.chunk, exception_on_overflow=False)
audio = np.frombuffer(data, dtype=np.int16)
self.model.predict(audio)
scores = self.model.prediction_buffer[self.model_name]
curr_score = scores[-1]
detect = curr_score > self.threshold
if detect and not prev_detect:
print(f"Wakeword Detected! (score: {curr_score:.3f})")
return True
prev_detect = detect
使い方は
from module.module_wakeword_simple import SimpleWakeWordDetector
wake_detector = SimpleWakeWordDetector(model_path="models/alexa_v0.1.onnx", threshold=0.5)
while True:
wake_detector.listen_for_wakeword()
print("処理を実行します...")
# → 音声認識など次の処理へ
しかし、そもそもの前提として
Modelディレクトリにはモデルが存在していない
touch download.py
内容は
import openwakeword
openwakeword.utils.download_models()
で
実行すると
python download.py
embedding_model.tflite: 100%|█████████████████| 1.33M/1.33M [00:00<00:00, 9.92MiB/s]
embedding_model.onnx: 100%|███████████████████| 1.33M/1.33M [00:00<00:00, 7.13MiB/s]
melspectrogram.tflite: 100%|██████████████████| 1.09M/1.09M [00:00<00:00, 6.30MiB/s]
melspectrogram.onnx: 100%|████████████████████| 1.09M/1.09M [00:00<00:00, 6.54MiB/s]
silero_vad.onnx: 100%|████████████████████████| 1.81M/1.81M [00:00<00:00, 7.82MiB/s]
alexa_v0.1.tflite: 100%|████████████████████████| 855k/855k [00:00<00:00, 5.67MiB/s]
alexa_v0.1.onnx: 100%|██████████████████████████| 854k/854k [00:00<00:00, 5.36MiB/s]
hey_mycroft_v0.1.tflite: 100%|██████████████████| 860k/860k [00:00<00:00, 6.84MiB/s]
hey_mycroft_v0.1.onnx: 100%|████████████████████| 858k/858k [00:00<00:00, 6.52MiB/s]
hey_jarvis_v0.1.tflite: 100%|█████████████████| 1.28M/1.28M [00:00<00:00, 6.96MiB/s]
hey_jarvis_v0.1.onnx: 100%|███████████████████| 1.27M/1.27M [00:00<00:00, 6.26MiB/s]
hey_rhasspy_v0.1.tflite: 100%|██████████████████| 416k/416k [00:00<00:00, 4.76MiB/s]
hey_rhasspy_v0.1.onnx: 100%|████████████████████| 204k/204k [00:00<00:00, 3.06MiB/s]
timer_v0.1.tflite: 100%|██████████████████████| 1.74M/1.74M [00:00<00:00, 7.98MiB/s]
timer_v0.1.onnx: 100%|████████████████████████| 1.74M/1.74M [00:00<00:00, 8.96MiB/s]
weather_v0.1.tflite: 100%|████████████████████| 1.15M/1.15M [00:00<00:00, 6.99MiB/s]
weather_v0.1.onnx: 100%|██████████████████████| 1.15M/1.15M [00:00<00:00, 6.44MiB/s]
となってダウンロードされているがパスが不明
GPTで
ls ~/.cache/openwakeword/models/
で存在するというが
ls: /Users/snowpool/.cache/openwakeword/models/: No such file or directory
となるのでこれではない
touch show_model_path.py
で
from openwakeword.utils import default_cache_dir
import os
print("モデル保存先:")
print(os.path.join(default_cache_dir(), "models"))
で実行する
しかし
Traceback (most recent call last):
File "/Users/snowpool/aw10s/gemma/show_model_path.py", line 1, in <module>
from openwakeword.utils import default_cache_dir
ImportError: cannot import name 'default_cache_dir' from 'openwakeword.utils' (/Users/snowpool/.pyenv/versions/3.11.0/lib/python3.11/site-packages/openwakeword/utils.py)
となる
参考サイトとして
https://zenn.dev/kun432/scraps/1a987de4943c65
によれば
pip show openwakeword | grep Location
で調べることができる
Location: /Users/snowpool/.pyenv/versions/3.11.0/lib/python3.11/site-packages
モデルはsite-packagesの下にダウンロードされる
Treeコマンドはインストールされていないので
brew install tree
でインストール
しかしエラーになるので
arch -arm64 brew install tree
でインストールする
tree /Users/snowpool/.pyenv/versions/3.11.0/lib/python3.11/site-packages/openwakeword
/Users/snowpool/.pyenv/versions/3.11.0/lib/python3.11/site-packages/openwakeword
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-311.pyc
│ ├── custom_verifier_model.cpython-311.pyc
│ ├── data.cpython-311.pyc
│ ├── metrics.cpython-311.pyc
│ ├── model.cpython-311.pyc
│ ├── train.cpython-311.pyc
│ ├── utils.cpython-311.pyc
│ └── vad.cpython-311.pyc
├── custom_verifier_model.py
├── data.py
├── metrics.py
├── model.py
├── resources
│ └── models
│ ├── alexa_v0.1.onnx
│ ├── alexa_v0.1.tflite
│ ├── embedding_model.onnx
│ ├── embedding_model.tflite
│ ├── hey_jarvis_v0.1.onnx
│ ├── hey_jarvis_v0.1.tflite
│ ├── hey_mycroft_v0.1.onnx
│ ├── hey_mycroft_v0.1.tflite
│ ├── hey_rhasspy_v0.1.onnx
│ ├── hey_rhasspy_v0.1.tflite
│ ├── melspectrogram.onnx
│ ├── melspectrogram.tflite
│ ├── silero_vad.onnx
│ ├── timer_v0.1.onnx
│ ├── timer_v0.1.tflite
│ ├── weather_v0.1.onnx
│ └── weather_v0.1.tflite
├── train.py
├── utils.py
└── vad.py
これで場所が把握できたのでモデルをコピーする
mkdir -p models
cp /Users/snowpool/.pyenv/versions/3.11.0/lib/python3.11/site-packages/openwakeword/resources/models/alexa_v0.1.onnx models/
これで実行したけど
アレクサ
と言っても検知しない
試しに
python test_openwakeword.py
を実行してもダメだった