Sentience-Robotics · Popochounet · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026
diff --git a/config/client_aux.yaml b/config/client_aux.yaml
@@ -1,28 +1,51 @@
 huri_url: ws://localhost:8000/session
 
-topic_list: [question]
+interface_path: src.interfaces.cli_interface:cli_interface
 
 senders:
-  audio:
-    name: audio
+  # audio:
+  #   name: audio
+  #   topic: audio_in
+  #   args:
+  #     sample_rate: 16000
+  #     frame_duration: 0.030
+  text:
+    name: text
+    topic: question
     args:
       sample_rate: 16000
       frame_duration: 0.030
 
-modules:
-  mic:
-    name: mic
+hooks:
+  text:
+    name: text
+    topics: [question, answer]
+  audio:
+    name: audio
+    topics: [audio]
     args:
-      vad_agressiveness: 3
-      silence_duration: 1.5
-      block_duration: ${inputs.audio.args.frame_duration}
-    logging: INFO
-  stt:
-    name: stt
+      incoming_sample_rate: ${senders.text.args.sample_rate}
+      sample_rate: 44100
+      save_audio_dir: "uuid"
+
+modules:
+  # mic:
+  #   name: mic
+  #   args:
+  #     vad_agressiveness: 3
+  #     silence_duration: 1.5
+  #     block_duration: ${senders.audio.args.frame_duration}
+  # stt:
+  #   name: stt
+  #   args:
+  #     language: en
+  #     block_duration: ${senders.audio.args.frame_duration}
+  # tag:
+  #   name: tag
+  rag:
+    name: rag
     args:
-      language: "en"
-      block_duration: ${inputs.audio.args.frame_duration}
-    logging: INFO
-  tag:
-    name: tag
-    logging: INFO
+      language: en
+      tone: formal
+  tts:
+    name: tts
diff --git a/config/client_aux2.yaml b/config/client_aux2.yaml
diff --git a/config/client_auxio.yaml b/config/client_auxio.yaml
diff --git a/config/client_template.yaml b/config/client_template.yaml
@@ -1,19 +1,34 @@
 # HuRI websocket server url
 huri_url: ws://localhost:8000/session
 
-# List of event topic the client will receive
-topic_list: [topic1, topic2]
+# Define interface to be used's import path
+interface_path: src.interfaces.cli_interface:cli_interface
 
 # Define senders to be used and their custom args
 senders:
   # sender tag can be anything
   example:
-    # sender name must be in the list of available ClientSender in Client instance (src.client_sender:get_senders)
+    # sender name must be in the list of available ClientSender in chosen Interface (Interface.get_senders)
     name: my_sender
+    # topic the sender will send to HuRI, it must match output_type event data structure
+    topic: my_event
     # if my_sender init with "model", "sample_rate" and "refresh_rate" params, they can be customized here
     args:
       refresh_rate: infinite
 
+# Define hooks to be used and their custom args
+hooks:
+  # hook tag can be anything
+  example:
+    # hook name must be in the list of available ClientHook in chosen Interface (Interface.get_senders)
+    name: my_hook
+    # topics the hook will process from HuRI, it must match input_type event data structure
+    topics: [my_event, llm_response]
+    # if my_hook init with "model", "sample_rate" and "refresh_rate" params, they can be customized here
+    args:
+      sample_rate: 0
+      no: beat
+
 # Define module to be used and their custom args
 modules:
   # module tag can be anything

diff --git a/config/client_text.yaml b/config/client_text.yaml
@@ -1,10 +1,16 @@
 huri_url: ws://localhost:8000/session
 
-topic_list: [question, rag_response]
+interface_path: src.interfaces.cli_interface:cli_interface
 
 senders:
   text:
     name: text
+    topic: question
+
+hooks:
+  text:
+    name: text
+    topics: [rag_response]
 
 modules:
   rag:

diff --git a/config/huri_cpu.yaml b/config/huri_cpu.yaml
@@ -0,0 +1,119 @@
+# HuRI — local Ray Serve config (no Kubernetes)
+# ============================================================================
+# This is the standalone equivalent of the inline `ray.serveConfig` in
+# deploy/examples/local_nvidia_amd/values.yaml, adapted to run on a single
+# local Ray node started with `ray start` instead of KubeRay.
+#
+# Run it with:
+#
+#   # 1. Start a local Ray head:
+#   ray start --head --num-cpus=8 --num-gpus=1
+#
+#   # 2. Deploy this config:
+#   serve deploy config/huri.yaml
+#   #    ...or run it in the foreground (starts its own Ray if none is running):
+#   serve run config/huri.yaml
+#
+#   # 3. Tear down when done:
+#   serve shutdown -y && ray stop
+#
+# NOTE: The Helm chart uses GPU_TYPE_NVIDIA / GPU_TYPE_AMD custom resources to
+# pin deployments to vendor-specific worker groups / container images. On a
+# single local machine there is only one environment, so those are dropped and
+# scheduling is done purely with num_gpus fractions.
+#
+# Likewise there are no PVC volume mounts here, so the model-path /
+# voice-sample / GPU env vars the chart injects from .Values.models,
+# .Values.voiceAssets and workerGroups[*].containerEnv are folded into
+# runtime_env.env_vars below. Adjust the paths to wherever the weights
+# actually live on this machine.
+# ============================================================================
+
+proxy_location: EveryNode
+http_options:
+  host: 0.0.0.0
+  port: 8000
+
+applications:
+  - name: huri-app
+    route_prefix: /
+    import_path: src.app:app
+    runtime_env:
+      env_vars:
+        RAY_COLOR_PREFIX: "1"
+
+        # --- Gesture sliding-window defaults (run in the HuRI CPU actor) ---
+        HURI_GESTURE_CONTEXT_SEC: "2.0"
+        HURI_GESTURE_MIN_CHUNK_SEC: "0.5"
+        # Caps the EMAGE process to a fraction of GPU memory so TTS keeps the
+        # rest. "0" disables the cap. Keep roughly in line with GestureGeneration
+        # num_gpus below. (Helm sets this on the nvidia worker's containerEnv.)
+        HURI_GESTURE_GPU_MEM_FRACTION: "0.2"
+
+        # --- CosyVoice3 / TTS ---
+        # CosyVoice3 contract: "<instruction><|endofprompt|><transcript-of-voice.wav>".
+        # The reference transcript MUST come AFTER the marker, or the LM treats
+        # it as an instruction and intermittently speaks it (prompt leakage).
+        HURI_VOICE_TRANSCRIPT: "You are a helpful assistant.<|endofprompt|>Instinct creates its own oppressors and bids us rise up against them."
+        # From .Values.models.cosytts.env (mountPath/modelId) — edit for local layout.
+        HURI_MODEL_PATH: /models/cosytts/FunAudioLLM/Fun-CosyVoice3-0.5B-2512
+        # Path to the CosyVoice repo root containing third_party/Matcha-TTS.
+        HURI_COSY_DIR: /app/cosyvoice
+        # From .Values.voiceAssets.env — the reference voice sample.
+        HURI_VOICE_SAMPLE_PATH: /assets/voice.wav
+
+        # --- STT (faster-whisper) ---
+        # From .Values.models.whisper.env (mountPath/repoId) — edit for local layout.
+        HURI_STT_MODEL_PATH: /models/whisper/Systran/faster-whisper-base
+
+        # --- Gesture (EMAGE) ---
+        # From .Values.models.emage.env (mountPath/repoId) — edit for local layout.
+        HURI_EMAGE_REPO: /models/emage/H-Liu1997/emage_audio
+
+        # --- GPU-vendor runtime env (Helm puts these on the worker containers) ---
+        NVIDIA_VISIBLE_DEVICES: "all"
+        NVIDIA_DRIVER_CAPABILITIES: "compute,utility"
+        HF_HUB_DOWNLOAD_TIMEOUT: "10"
+
+    deployments:
+      # HuRI: FastAPI/WebSocket ingress + per-session router. CPU only —
+      # all GPU work is offloaded to the handle-backed deployments below.
+      - name: HuRI
+        ray_actor_options:
+          num_cpus: 1
+          num_gpus: 0
+
+      # STT: shared faster-whisper actor.
+      - name: STT
+        num_replicas: 1
+        ray_actor_options:
+          num_cpus: 1
+          num_gpus: 0
+
+      # RAG: embeddings (API) + LLM client. No GPU needed.
+      - name: RAGHandle
+        num_replicas: 1
+        ray_actor_options:
+          num_cpus: 1
+          num_gpus: 0
+        user_config:
+          embedding_model: "bge-large-en-v1.5-gguf-Q4_K_M"
+          llm_model: "Qwen3.5-4B-GGUF"
+
+      # GPU split (manual override knob): num_gpus are Ray *scheduling*
+      # fractions that let replicas pack onto the same device and bias the
+      # split. TTS gets the lion's share so streamed speech stays low-latency;
+      # gesture gets the remainder. To also cap gesture's actual VRAM, set
+      # HURI_GESTURE_GPU_MEM_FRACTION above.
+      #
+      # These fractions must sum to <= the --num-gpus you pass to `ray start`.
+      # As written STT(0.5) + TTS(0.8) + Gesture(0.2) = 1.5, so use
+      # --num-gpus=2 (e.g. two physical GPUs), or lower the fractions to fit 1.
+      - name: TTS
+        ray_actor_options:
+          num_cpus: 1
+          num_gpus: 0
+      - name: GestureGeneration
+        ray_actor_options:
+          num_cpus: 1
+          num_gpus: 0
diff --git a/src/client.py b/src/client.py
@@ -6,6 +6,7 @@
 
 from src.core.client import Client
 from src.core.dataclasses.config import ClientConfig
+from src.core.user_config import get_or_create_and_save_user_id
 
 
 def load_client_config(path: str) -> ClientConfig:
@@ -16,6 +17,10 @@ def load_client_config(path: str) -> ClientConfig:
     if not isinstance(raw_resolved, Dict):
         raise RuntimeError("error yaml does not output a dict")
 
+    user_id_file_path = raw_resolved.get("user_id_file_path")
+    user_id = get_or_create_and_save_user_id(user_id_file_path)
+    raw_resolved["user_id"] = user_id
+
     return ClientConfig.from_dict(raw_resolved)
 
 
@@ -26,21 +31,11 @@ async def launch_client():
         required=True,
         help="Path to Client config file (YAML)",
     )
-    parser.add_argument(
-        "--save-audio",
-        nargs="?",
-        const="audio_dumps",
-        default=None,
-        metavar="DIR",
-        help="Save streamed TTS audio to .wav files (one per utterance) in DIR "
-        "for quality-checking. Defaults to ./audio_dumps when the flag is given "
-        "without a value.",
-    )
 
     args = parser.parse_args()
     config = load_client_config(args.config)
 
-    await Client(config=config, save_audio_dir=args.save_audio).run()
+    await Client(config=config).run()
 
 
 if __name__ == "__main__":