Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 41 additions & 18 deletions config/client_aux.yaml
Original file line number Diff line number Diff line change
@@ -1,28 +1,51 @@
huri_url: ws://localhost:8000/session

topic_list: [question]
interface_path: src.interfaces.cli_interface:cli_interface

senders:
audio:
name: audio
# audio:
# name: audio
# topic: audio_in
# args:
# sample_rate: 16000
# frame_duration: 0.030
text:
name: text
topic: question
args:
sample_rate: 16000
frame_duration: 0.030

modules:
mic:
name: mic
hooks:
text:
name: text
topics: [question, answer]
audio:
name: audio
topics: [audio]
args:
vad_agressiveness: 3
silence_duration: 1.5
block_duration: ${inputs.audio.args.frame_duration}
logging: INFO
stt:
name: stt
incoming_sample_rate: ${senders.text.args.sample_rate}
sample_rate: 44100
save_audio_dir: "uuid"

modules:
# mic:
# name: mic
# args:
# vad_agressiveness: 3
# silence_duration: 1.5
# block_duration: ${senders.audio.args.frame_duration}
# stt:
# name: stt
# args:
# language: en
# block_duration: ${senders.audio.args.frame_duration}
# tag:
# name: tag
rag:
name: rag
args:
language: "en"
block_duration: ${inputs.audio.args.frame_duration}
logging: INFO
tag:
name: tag
logging: INFO
language: en
tone: formal
tts:
name: tts
32 changes: 0 additions & 32 deletions config/client_aux2.yaml

This file was deleted.

25 changes: 0 additions & 25 deletions config/client_auxio.yaml

This file was deleted.

21 changes: 18 additions & 3 deletions config/client_template.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,34 @@
# HuRI websocket server url
huri_url: ws://localhost:8000/session

# List of event topic the client will receive
topic_list: [topic1, topic2]
# Define interface to be used's import path
interface_path: src.interfaces.cli_interface:cli_interface

# Define senders to be used and their custom args
senders:
# sender tag can be anything
example:
# sender name must be in the list of available ClientSender in Client instance (src.client_sender:get_senders)
# sender name must be in the list of available ClientSender in chosen Interface (Interface.get_senders)
name: my_sender
# topic the sender will send to HuRI, it must match output_type event data structure
topic: my_event
# if my_sender init with "model", "sample_rate" and "refresh_rate" params, they can be customized here
args:
refresh_rate: infinite

# Define hooks to be used and their custom args
hooks:
# hook tag can be anything
example:
# hook name must be in the list of available ClientHook in chosen Interface (Interface.get_senders)
name: my_hook
# topics the hook will process from HuRI, it must match input_type event data structure
topics: [my_event, llm_response]
# if my_hook init with "model", "sample_rate" and "refresh_rate" params, they can be customized here
args:
sample_rate: 0
no: beat

# Define module to be used and their custom args
modules:
# module tag can be anything
Expand Down
8 changes: 7 additions & 1 deletion config/client_text.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
huri_url: ws://localhost:8000/session

topic_list: [question, rag_response]
interface_path: src.interfaces.cli_interface:cli_interface

senders:
text:
name: text
topic: question

hooks:
text:
name: text
topics: [rag_response]

modules:
rag:
Expand Down
119 changes: 119 additions & 0 deletions config/huri_cpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# HuRI — local Ray Serve config (no Kubernetes)
# ============================================================================
# This is the standalone equivalent of the inline `ray.serveConfig` in
# deploy/examples/local_nvidia_amd/values.yaml, adapted to run on a single
# local Ray node started with `ray start` instead of KubeRay.
#
# Run it with:
#
# # 1. Start a local Ray head:
# ray start --head --num-cpus=8 --num-gpus=1
#
# # 2. Deploy this config:
# serve deploy config/huri.yaml
# # ...or run it in the foreground (starts its own Ray if none is running):
# serve run config/huri.yaml
#
# # 3. Tear down when done:
# serve shutdown -y && ray stop
#
# NOTE: The Helm chart uses GPU_TYPE_NVIDIA / GPU_TYPE_AMD custom resources to
# pin deployments to vendor-specific worker groups / container images. On a
# single local machine there is only one environment, so those are dropped and
# scheduling is done purely with num_gpus fractions.
#
# Likewise there are no PVC volume mounts here, so the model-path /
# voice-sample / GPU env vars the chart injects from .Values.models,
# .Values.voiceAssets and workerGroups[*].containerEnv are folded into
# runtime_env.env_vars below. Adjust the paths to wherever the weights
# actually live on this machine.
# ============================================================================

proxy_location: EveryNode
http_options:
host: 0.0.0.0
port: 8000

applications:
- name: huri-app
route_prefix: /
import_path: src.app:app
runtime_env:
env_vars:
RAY_COLOR_PREFIX: "1"

# --- Gesture sliding-window defaults (run in the HuRI CPU actor) ---
HURI_GESTURE_CONTEXT_SEC: "2.0"
HURI_GESTURE_MIN_CHUNK_SEC: "0.5"
# Caps the EMAGE process to a fraction of GPU memory so TTS keeps the
# rest. "0" disables the cap. Keep roughly in line with GestureGeneration
# num_gpus below. (Helm sets this on the nvidia worker's containerEnv.)
HURI_GESTURE_GPU_MEM_FRACTION: "0.2"

# --- CosyVoice3 / TTS ---
# CosyVoice3 contract: "<instruction><|endofprompt|><transcript-of-voice.wav>".
# The reference transcript MUST come AFTER the marker, or the LM treats
# it as an instruction and intermittently speaks it (prompt leakage).
HURI_VOICE_TRANSCRIPT: "You are a helpful assistant.<|endofprompt|>Instinct creates its own oppressors and bids us rise up against them."
# From .Values.models.cosytts.env (mountPath/modelId) — edit for local layout.
HURI_MODEL_PATH: /models/cosytts/FunAudioLLM/Fun-CosyVoice3-0.5B-2512
# Path to the CosyVoice repo root containing third_party/Matcha-TTS.
HURI_COSY_DIR: /app/cosyvoice
# From .Values.voiceAssets.env — the reference voice sample.
HURI_VOICE_SAMPLE_PATH: /assets/voice.wav

# --- STT (faster-whisper) ---
# From .Values.models.whisper.env (mountPath/repoId) — edit for local layout.
HURI_STT_MODEL_PATH: /models/whisper/Systran/faster-whisper-base

# --- Gesture (EMAGE) ---
# From .Values.models.emage.env (mountPath/repoId) — edit for local layout.
HURI_EMAGE_REPO: /models/emage/H-Liu1997/emage_audio

# --- GPU-vendor runtime env (Helm puts these on the worker containers) ---
NVIDIA_VISIBLE_DEVICES: "all"
NVIDIA_DRIVER_CAPABILITIES: "compute,utility"
HF_HUB_DOWNLOAD_TIMEOUT: "10"

deployments:
# HuRI: FastAPI/WebSocket ingress + per-session router. CPU only —
# all GPU work is offloaded to the handle-backed deployments below.
- name: HuRI
ray_actor_options:
num_cpus: 1
num_gpus: 0

# STT: shared faster-whisper actor.
- name: STT
num_replicas: 1
ray_actor_options:
num_cpus: 1
num_gpus: 0

# RAG: embeddings (API) + LLM client. No GPU needed.
- name: RAGHandle
num_replicas: 1
ray_actor_options:
num_cpus: 1
num_gpus: 0
user_config:
embedding_model: "bge-large-en-v1.5-gguf-Q4_K_M"
llm_model: "Qwen3.5-4B-GGUF"

# GPU split (manual override knob): num_gpus are Ray *scheduling*
# fractions that let replicas pack onto the same device and bias the
# split. TTS gets the lion's share so streamed speech stays low-latency;
# gesture gets the remainder. To also cap gesture's actual VRAM, set
# HURI_GESTURE_GPU_MEM_FRACTION above.
#
# These fractions must sum to <= the --num-gpus you pass to `ray start`.
# As written STT(0.5) + TTS(0.8) + Gesture(0.2) = 1.5, so use
# --num-gpus=2 (e.g. two physical GPUs), or lower the fractions to fit 1.
- name: TTS
ray_actor_options:
num_cpus: 1
num_gpus: 0
- name: GestureGeneration
ray_actor_options:
num_cpus: 1
num_gpus: 0
17 changes: 6 additions & 11 deletions src/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from src.core.client import Client
from src.core.dataclasses.config import ClientConfig
from src.core.user_config import get_or_create_and_save_user_id


def load_client_config(path: str) -> ClientConfig:
Expand All @@ -16,6 +17,10 @@ def load_client_config(path: str) -> ClientConfig:
if not isinstance(raw_resolved, Dict):
raise RuntimeError("error yaml does not output a dict")

user_id_file_path = raw_resolved.get("user_id_file_path")
user_id = get_or_create_and_save_user_id(user_id_file_path)
raw_resolved["user_id"] = user_id

return ClientConfig.from_dict(raw_resolved)


Expand All @@ -26,21 +31,11 @@ async def launch_client():
required=True,
help="Path to Client config file (YAML)",
)
parser.add_argument(
"--save-audio",
nargs="?",
const="audio_dumps",
default=None,
metavar="DIR",
help="Save streamed TTS audio to .wav files (one per utterance) in DIR "
"for quality-checking. Defaults to ./audio_dumps when the flag is given "
"without a value.",
)

args = parser.parse_args()
config = load_client_config(args.config)

await Client(config=config, save_audio_dir=args.save_audio).run()
await Client(config=config).run()


if __name__ == "__main__":
Expand Down
Loading