Sentience-Robotics · MatthiasvonRakowski · Jul 3, 2026 · Jul 2, 2026 · Popochounet · Jul 3, 2026
diff --git a/config/client_text.yaml b/config/client_text.yaml
@@ -1,6 +1,6 @@
 huri_url: ws://localhost:8000/session
 
-topic_list: [question, rag_response]
+topic_list: [question, token]
 
 senders:
   text:
@@ -12,4 +12,7 @@ modules:
     args:
       language: en
       tone: formal
-    logging: INFO
+      max_history_turns: 6
+      temperature: 0.7
+      response_format: short
+      persona: ""
diff --git a/config/huri.yaml b/config/huri.yaml
@@ -40,8 +40,15 @@ applications:
     import_path: src.app:app
     runtime_env:
       env_vars:
+        OLLAMA_BASE_URL: "http://localhost:11434"
+        OLLAMA_API_KEY: "ollama"
+
         RAY_COLOR_PREFIX: "1"
 
+        # Make the CosyVoice repo (and its Matcha-TTS submodule) importable
+        # inside every replica, regardless of cwd.
+        PYTHONPATH: "/home/fifster/Tek/Eip/HuRI/assets/cosyvoice:/home/fifster/Tek/Eip/HuRI/assets/cosyvoice/third_party/Matcha-TTS"
+
         # --- Gesture sliding-window defaults (run in the HuRI CPU actor) ---
         HURI_GESTURE_CONTEXT_SEC: "2.0"
         HURI_GESTURE_MIN_CHUNK_SEC: "0.5"
@@ -56,19 +63,19 @@ applications:
         # it as an instruction and intermittently speaks it (prompt leakage).
         HURI_VOICE_TRANSCRIPT: "You are a helpful assistant.<|endofprompt|>Instinct creates its own oppressors and bids us rise up against them."
         # From .Values.models.cosytts.env (mountPath/modelId) — edit for local layout.
-        HURI_MODEL_PATH: /models/cosytts/FunAudioLLM/Fun-CosyVoice3-0.5B-2512
+        HURI_MODEL_PATH: assets/cosyvoice_model
         # Path to the CosyVoice repo root containing third_party/Matcha-TTS.
-        HURI_COSY_DIR: /app/cosyvoice
+        HURI_COSY_DIR: assets/cosyvoice
         # From .Values.voiceAssets.env — the reference voice sample.
-        HURI_VOICE_SAMPLE_PATH: /assets/voice.wav
+        HURI_VOICE_SAMPLE_PATH: assets/voice.wav
 
         # --- STT (faster-whisper) ---
         # From .Values.models.whisper.env (mountPath/repoId) — edit for local layout.
-        HURI_STT_MODEL_PATH: /models/whisper/Systran/faster-whisper-base
+        # HURI_STT_MODEL_PATH: /models/whisper/Systran/faster-whisper-base
 
         # --- Gesture (EMAGE) ---
         # From .Values.models.emage.env (mountPath/repoId) — edit for local layout.
-        HURI_EMAGE_REPO: /models/emage/H-Liu1997/emage_audio
+        HURI_EMAGE_REPO: assets/emage_audio
 
         # --- GPU-vendor runtime env (Helm puts these on the worker containers) ---
         NVIDIA_VISIBLE_DEVICES: "all"
@@ -88,7 +95,7 @@ applications:
         num_replicas: 1
         ray_actor_options:
           num_cpus: 1
-          num_gpus: 0.5
+          num_gpus: 0
 
       # RAG: embeddings (API) + LLM client. No GPU needed.
       - name: RAGHandle
@@ -98,7 +105,10 @@ applications:
           num_gpus: 0
         user_config:
           embedding_model: "bge-large-en-v1.5-gguf-Q4_K_M"
-          llm_model: "Qwen3.5-4B-GGUF"
+          llm_model: "mistral:7b"
+          llm_base_url: "http://localhost:11434/v1"   # Ollama's OpenAI-compatible endpoint
+          llm_api_key: "ollama"
+          memory_maintenance_check_hours: 0.5
 
       # GPU split (manual override knob): num_gpus are Ray *scheduling*
       # fractions that let replicas pack onto the same device and bias the

diff --git a/src/core/huri.py b/src/core/huri.py
@@ -125,5 +125,17 @@ async def receive_loop(session: Session, ws: WebSocket):
             finally:
                 print(f"Client {user_id} disconnected")
 
-        await receive_loop(self.clients[session_id], ws)
-        del self.clients[session_id]
+        try:
+            await receive_loop(self.clients[session_id], ws)
+        finally:
+            # Persist per-session state (e.g. conversation memory) on disconnect.
+            for module in modules:
+                fin = getattr(module, "finalize", None)
+                if fin is None:
+                    continue
+                try:
+                    await fin()
+                except Exception:
+                    import traceback
+                    print(f"[HuRI] finalize failed for {type(module).__name__}:\n{traceback.format_exc()}")
+            self.clients.pop(session_id, None)
diff --git a/src/modules/rag/memory_inspect.py b/src/modules/rag/memory_inspect.py
@@ -0,0 +1,90 @@
+"""Inspect conversation memories: current strength, decay projection, fate.
+
+Usage:
+    python -m src.modules.rag.memory.memory_inspect
+    python -m src.modules.rag.memory.memory_inspect --user-id <id>
+"""
+import argparse
+from datetime import datetime, timedelta
+
+try:
+    from .qdrant_utils import make_qdrant_client
+except ImportError:
+    from qdrant_utils import make_qdrant_client
+
+HALF_LIFE_DAYS = 5.0
+DELETE_BELOW, CONSOLIDATE_BELOW = 0.05, 0.30
+
+
+def strength(payload: dict, at: datetime | None = None) -> float:
+    """Query-independent strength: recency * importance (matches maintenance)."""
+    at = at or datetime.now()
+    imp = payload.get("importance", 3)
+    half = max(HALF_LIFE_DAYS * (imp / 5.0), 0.5)
+    try:
+        last = datetime.fromisoformat(payload.get("last_accessed") or payload["created_at"])
+        age = (at - last).total_seconds() / 86400.0
+    except Exception:
+        age = 0.0
+    return (0.5 ** (max(age, 0) / half)) * (imp / 10.0)
+
+
+def fate(s: float) -> str:
+    if s < DELETE_BELOW:
+        return "DELETE"
+    if s < CONSOLIDATE_BELOW:
+        return "CONSOLIDATE"
+    return "KEEP"
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--qdrant-url", default="http://localhost:6333")
+    ap.add_argument("--collection", default="conversations")
+    ap.add_argument("--user-id", default=None)
+    args = ap.parse_args()
+
+    qdrant = make_qdrant_client(args.qdrant_url)
+    points, offset = [], None
+    while True:
+        batch, offset = qdrant.scroll(collection_name=args.collection, limit=200,
+                                      offset=offset, with_payload=True, with_vectors=False)
+        points.extend(batch)
+        if offset is None:
+            break
+
+    now = datetime.now()
+    rows = []
+    for p in points:
+        pl = p.payload
+        if pl.get("type") == "maintenance_marker":
+            print(f"[marker] last maintenance run: {pl.get('last_run')}\n")
+            continue
+        if args.user_id and pl.get("_user_id") != args.user_id:
+            continue
+        s_now = strength(pl, now)
+        rows.append({
+            "text": pl.get("text", "")[:60].replace("\n", " "),
+            "type": pl.get("type", "?"),
+            "imp": pl.get("importance", "?"),
+            "acc": pl.get("access_count", 0),
+            "age_d": round((now - datetime.fromisoformat(
+                pl.get("last_accessed") or pl["created_at"])).total_seconds() / 86400, 1),
+            "now": round(s_now, 3),
+            "+5d": round(strength(pl, now + timedelta(days=5)), 3),
+            "+15d": round(strength(pl, now + timedelta(days=15)), 3),
+            "fate": fate(s_now),
+        })
+
+    rows.sort(key=lambda r: r["now"], reverse=True)
+    hdr = f"{'strength':>8} {'+5d':>6} {'+15d':>6} {'imp':>3} {'acc':>3} {'age':>5} {'fate':<12} text"
+    print(hdr)
+    print("-" * len(hdr))
+    for r in rows:
+        print(f"{r['now']:>8} {r['+5d']:>6} {r['+15d']:>6} {r['imp']:>3} "
+              f"{r['acc']:>3} {r['age_d']:>5} {r['fate']:<12} {r['text']}")
+    print(f"\n{len(rows)} memories")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/modules/rag/memory_maintenance.py b/src/modules/rag/memory_maintenance.py
@@ -0,0 +1,119 @@
+"""Periodic memory maintenance: decay-based pruning + consolidation.
+
+Run every N days (cron/systemd timer):
+    python -m src.modules.rag.memory_maintenance
+Strong memories are kept, weak ones are merged into a consolidated memory,
+dead ones are deleted. Decay itself is computed lazily at query time in
+rag.py; this job only prunes and compresses.
+"""
+import argparse
+import json
+import uuid
+from collections import defaultdict
+from datetime import datetime
+
+import httpx
+from qdrant_client.models import Distance, PointIdsList, PointStruct, VectorParams
+
+try:
+    from .qdrant_utils import make_qdrant_client
+except ImportError:
+    from qdrant_utils import make_qdrant_client
+
+DELETE_BELOW = 0.05
+CONSOLIDATE_BELOW = 0.30
+HALF_LIFE_DAYS = 5.0
+
+
+def strength(payload: dict) -> float:
+    """Query-independent strength: recency * importance (no relevance term)."""
+    importance = payload.get("importance", 3)
+    half_life = max(HALF_LIFE_DAYS * (importance / 5.0), 0.5)
+    try:
+        last = datetime.fromisoformat(payload.get("last_accessed") or payload["created_at"])
+        age_days = (datetime.now() - last).total_seconds() / 86400.0
+    except Exception:
+        age_days = 0.0
+    recency = 0.5 ** (age_days / half_life)
+    return recency * (importance / 10.0)
+
+
+def embed(client: httpx.Client, url: str, model: str, text: str) -> list[float]:
+    r = client.post(f"{url}/v1/embeddings", json={"model": model, "input": text})
+    r.raise_for_status()
+    return r.json()["data"][0]["embedding"]
+
+
+def llm(client: httpx.Client, url: str, model: str, prompt: str) -> str:
+    r = client.post(f"{url}/api/chat", json={
+        "model": model, "stream": False,
+        "messages": [{"role": "user", "content": prompt}],
+        "options": {"num_predict": 300},
+    })
+    r.raise_for_status()
+    return r.json()["message"]["content"]
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--qdrant-url", default="http://localhost:6333")
+    ap.add_argument("--collection", default="conversations")
+    ap.add_argument("--ollama-url", default="http://localhost:11434")
+    ap.add_argument("--embedding-model", default="bge-large-en-v1.5-gguf-Q4_K_M")
+    ap.add_argument("--llm-model", default="mistral:7b")
+    ap.add_argument("--dry-run", action="store_true")
+    args = ap.parse_args()
+
+    qdrant = make_qdrant_client(args.qdrant_url)
+    http = httpx.Client(timeout=180.0)
+
+    points, offset = [], None
+    while True:
+        batch, offset = qdrant.scroll(collection_name=args.collection, limit=200,
+                                      offset=offset, with_payload=True, with_vectors=False)
+        points.extend(batch)
+        if offset is None:
+            break
+    print(f"{len(points)} memories in '{args.collection}'")
+
+    to_delete, weak_by_user = [], defaultdict(list)
+    for p in points:
+        s = strength(p.payload)
+        if s < DELETE_BELOW:
+            to_delete.append(p)
+        elif s < CONSOLIDATE_BELOW:
+            weak_by_user[p.payload.get("_user_id", "anonymous")].append(p)
+    print(f"delete: {len(to_delete)}, consolidate candidates: {sum(map(len, weak_by_user.values()))}")
+
+    if args.dry_run:
+        return
+
+    for user, weak in weak_by_user.items():
+        if len(weak) < 3:
+            continue  # not worth merging yet; keep decaying
+        texts = [p.payload["text"] for p in weak]
+        merged = llm(http, args.ollama_url, args.llm_model,
+                     "These are old memories about conversations with the same person. "
+                     "Merge them into a single 3-5 sentence memory keeping only durable "
+                     "facts, preferences and recurring themes. Drop one-off small talk.\n\n"
+                     + "\n---\n".join(texts)).strip()
+        vec = embed(http, args.ollama_url, args.embedding_model, merged)
+        now = datetime.now().isoformat()
+        imp = min(max(p.payload.get("importance", 3) for p in weak) + 1, 10)
+        qdrant.upsert(collection_name=args.collection, points=[PointStruct(
+            id=str(uuid.uuid4()), vector=vec, payload={
+                "text": merged, "_user_id": user, "type": "conversation_consolidated",
+                "created_at": now, "last_accessed": now, "access_count": 0,
+                "importance": imp,
+            })])
+        to_delete.extend(weak)
+        print(f"[{user}] consolidated {len(weak)} → 1 (importance={imp})")
+
+    if to_delete:
+        qdrant.delete(collection_name=args.collection,
+                      points_selector=PointIdsList(points=[p.id for p in to_delete]))
+        print(f"deleted {len(to_delete)} points")
+
+
+if __name__ == "__main__":
+    main()